작성
·
20
0
(1) 연습문제 1
-- 문제 1) USER의 다음 접속월, 다다음 접속 월
SELECT
user_id,
visit_month,
LEAD(visit_month,1) OVER(PARTITION BY user_id ORDER BY visit_month) AS next_month,
LEAD(visit_month,2) OVER(PARTITION BY user_id ORDER BY visit_month) AS the_month_after_next
FROM
`avdanced.analytics_function_01`
(2) 연습문제 2
-- 문제 2) USER의 다음 접속월, 다다음 접속 월, 이전 접속 월
SELECT
user_id,
visit_month,
LEAD(visit_month,1) OVER(PARTITION BY user_id ORDER BY visit_month) AS next_month,
LEAD(visit_month,2) OVER(PARTITION BY user_id ORDER BY visit_month) AS the_month_after_next,
LAG(visit_month,1) OVER(PARTITION BY user_id ORDER BY visit_month) AS last_month
FROM
`avdanced.analytics_function_01`
연습문제 (1~4)
SELECT
-- 1)모든 주문량
SUM(amount) OVER() AS amount_total,
-- 2)특정주문시점에서 누적주문량
#SUM(amount) OVER(partition by order_date) AS cumulative_sum,
SUM(amount) OVER (ORDER BY order_date) AS cumulative_sum,
-- 3)고객별 주문 시점에서 누적 주문량
#SUM(amount) OVER(partition by user_id) AS cumulative_sum_by_user,
SUM(amount) OVER(partition by user_id ORDER BY order_id) AS cumulative_sum_by_user,
-- 4) 최근 직전 5개 평균 주문량
AVG(amount) OVER(ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING) AS last_5_orders_avg_amount,
AVG(amount) OVER(ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) AS last_5_unbounded_orders_avg_amount,
AVG(amount) OVER(ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS all_orders_avg_amount
FROM `avdanced.orders`
(1) 연습문제 1
-- 연습문제1) 사용자별 쿼리 실행 횟수
WITH base AS(
SELECT
user,
team,
query_date,
COUNT(*) OVER(PARTITION BY user) AS total_query_cnt,
FROM
`avdanced.query_logs`
)
SELECT
*
FROM
base
(2) 연습문제 2
-- 연습문제2) 주차별 팀내 쿼리 실행한 수 (RANK 1만 보이도록)
WITH base2 AS(
SELECT
EXTRACT(WEEK FROM query_date) AS week_number,
team,
user,
COUNT(*) OVER(PARTITION BY EXTRACT(WEEK FROM query_date) ,user ORDER BY EXTRACT(WEEK FROM query_date) ) AS query_cnt,
FROM
`avdanced.query_logs`
ORDER BY EXTRACT(WEEK FROM query_date)
)
SELECT
DISTINCT *,
RANK() OVER(PARTITION BY team,week_number ORDER BY total_query_cnt DESC) AS team_rank
FROM base2
QUALIFY team_rank = 1
ORDER BY week_number, team
(3) 연습문제 3
WITH base2 AS(
SELECT
EXTRACT(WEEK FROM query_date) AS week_number,
team,
user,
COUNT(*) OVER(PARTITION BY EXTRACT(WEEK FROM query_date) ,user ORDER BY EXTRACT(WEEK FROM query_date) ) AS query_cnt,
FROM
`avdanced.query_logs`
#QUALIFY team_rank = 1
ORDER BY EXTRACT(WEEK FROM query_date)
), base3 AS(
SELECT
DISTINCT *,
RANK() OVER(PARTITION BY team,week_number ORDER BY query_cnt DESC) AS team_rank
FROM base2
QUALIFY team_rank = 1
ORDER BY week_number, team
)
-- 연습문제3) 쿼리 실행 시점 1주전 쿼리 실행
SELECT
DISTINCT *,
LAG(query_cnt,1) OVER(PARTITION BY user ORDER BY week_number) AS prev_week_query_count
FROM
base2
GROUP BY ALL
ORDER BY user, week_number
(4) 연습문제 4
--연습문제4)
SELECT
*,
SUM(query_count) OVER(PARTITION BY user ORDER BY query_date) AS culmulative_query_count,
SUM(query_cnt) OVER(PARTITION BY user ORDER BY query_date ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS cumulative_sum2
FROM(
SELECT
DISTINCT *,
COUNT(user) OVER(PARTITION BY query_date, user) AS query_count,
FROM `avdanced.query_logs`
)
ORDER BY user,query_date
(5) 연습문제 5
WITH raw_data AS (
SELECT DATE '2024-05-01' AS date, 15 AS number_of_orders UNION ALL
SELECT DATE '2024-05-02', 13 UNION ALL
SELECT DATE '2024-05-03', NULL UNION ALL
SELECT DATE '2024-05-04', 16 UNION ALL
SELECT DATE '2024-05-05', NULL UNION ALL
SELECT DATE '2024-05-06', 18 UNION ALL
SELECT DATE '2024-05-07', 20 UNION ALL
SELECT DATE '2024-05-08', NULL UNION ALL
SELECT DATE '2024-05-09', 13 UNION ALL
SELECT DATE '2024-05-10', 14 UNION ALL
SELECT DATE '2024-05-11', NULL UNION ALL
SELECT DATE '2024-05-12', NULL
)
--연습문제 5) null에 이전 값 삽입
SELECT
raw_data.date,
IF(raw_data.number_of_orders IS NULL, LAG(raw_data.number_of_orders,1) OVER(ORDER BY date), raw_data.number_of_orders)
FROM raw_data
(6) 연습문제 6
WITH raw_data AS (
SELECT DATE '2024-05-01' AS date, 15 AS number_of_orders UNION ALL
SELECT DATE '2024-05-02', 13 UNION ALL
SELECT DATE '2024-05-03', NULL UNION ALL
SELECT DATE '2024-05-04', 16 UNION ALL
SELECT DATE '2024-05-05', NULL UNION ALL
SELECT DATE '2024-05-06', 18 UNION ALL
SELECT DATE '2024-05-07', 20 UNION ALL
SELECT DATE '2024-05-08', NULL UNION ALL
SELECT DATE '2024-05-09', 13 UNION ALL
SELECT DATE '2024-05-10', 14 UNION ALL
SELECT DATE '2024-05-11', NULL UNION ALL
SELECT DATE '2024-05-12', NULL
), null_is_lag AS(
--연습문제 5) null에 이전 값 삽입
SELECT
raw_data.date,
IF(raw_data.number_of_orders IS NULL, LAG(raw_data.number_of_orders,1) OVER(ORDER BY date), raw_data.number_of_orders) AS number_of_orders
FROM raw_data
)
-- 연습문제 6) 이동평균
SELECT
*,
AVG(nl.number_of_orders) OVER(ORDER BY nl.date ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) AS moving_avg
FROM null_is_lag AS nl
(7) 연습문제 7
-- 1. TIMESTAMP → DATETIME
-- 2. SECOND_DIFF 생성 : uSER로 묶어서 -
-- 3. SESSION_START생성 : USER로 묶어서 LAG(DATA,1)이 NULL이면 1, SECOND_DIFF가 20이상이면 +1
-- 4. SESSION_ID생성: SESSION_START가 1일 경우 SESSION_ID +1, NULL일 경우 LAG(DATA,1)
WITH add_date AS (
-- 1. TIMESTAMP → DATETIME
SELECT
event_date,
event_timestamp,
DATETIME(TIMESTAMP_MICROS(event_timestamp)) AS event_datetime,
event_name,
user_id,
user_pseudo_id,
LAG(DATETIME(TIMESTAMP_MICROS(event_timestamp))) OVER(PARTITION BY user_pseudo_id ORDER BY event_timestamp) AS before_event_datetime
FROM
`avdanced.app_logs_temp`
--,UNNEST(event_params) AS param -- FROM 절 안에서 UNNEST를 사용
WHERE event_date ="2022-08-18" AND user_pseudo_id = "1997494153.8491999091"
), add_diff AS (
-- 2. SECOND_DIFF 생성 : uSER로 묶어서 -
SELECT
*,
DATE_DIFF(event_datetime, before_event_datetime,SECOND) AS second_diff,
FROM add_date
), add_session AS(
-- 3. SESSION_START생성 : USER로 묶어서 LAG(DATA,1)이 NULL이면 1, SECOND_DIFF가 20이상이면 +1
SELECT
*,
IF(second_diff IS NULL OR second_diff >=20, 1, NULL) AS session_start
FROM
add_diff
)
-- 4. SESSION_ID생성
*,
SUM(session_start) OVER(PARTITION BY user_pseudo_id ORDER BY event_datetime) AS session_num
FROM add_session
ORDER BY event_datetime
답변 1
0
안녕하세요, 인프런 AI 인턴입니다. 김성호님.
윈도우 함수와 관련된 다양한 연습 문제를 통해 SQL에서의 데이터 다루기에 능숙해지시는 과정에 계신 것 같습니다. 그러한 연습을 통해 데이터 분석 및 처리에서의 다양한 패턴과 기법을 배워가는 모습이 인상적입니다.
이와 관련하여 프레임 설정과 데이터의 흐름을 잘 이해하신 것처럼 보입니다. 아래에 윈도우 함수와 관련하여 유사한 질문과 그에 대한 답변을 담은 링크를 제공해 드리겠습니다. 김성호님께서 코드 작성 시 직면하는 궁금증을 해결하는 데 도움이 될 수 있을 것입니다.
Bigquery 활용편(PDF)파일 P.229 페이지 코드 오류
퍼널별 유저들의 새로운 세션 시작 횟수의 총합 _ 윈도우 함수 마지막 문제 응용
windows function default 값에 대하여 range between과 rows between의 차이
각 링크는 김성호님의 연습 문제에 직접적으로 관련될 수 있는 예시와 설명을 포함하고 있습니다.
저는 질문자님의 이해를 돕고자 지식공유자님의 기존 답변을 참고하여 유사한 답변 링크를 추천해드리고 있습니다. 현재 베타 기능으로 답변이 만족스럽지 않을 수 있는 점 양해 부탁드립니다. 🙏 추가적으로 궁금한 점이 있으시면, 이어서 질문해 주세요. 곧 지식공유자께서 답변해 주실 것입니다.