작성
·
20
·
수정됨
0
강의 문제 탐색 함수, Frame 연습 문제 (1~3)
1번
SELECT
user_id,
visit_month,
LEAD(visit_month,1) OVER(PARTITION BY user_id ORDER BY visit_month) AS next_month,
LEAD(visit_month,2) OVER(PARTITION BY user_id ORDER BY visit_month) AS the_month_after_next
FROM
`avdanced.analytics_function_01`
2번
SELECT
user_id,
visit_month,
LEAD(visit_month,1) OVER(PARTITION BY user_id ORDER BY visit_month) AS next_month,
LEAD(visit_month,2) OVER(PARTITION BY user_id ORDER BY visit_month) AS the_month_after_next,
LAG(visit_month,1) OVER(PARTITION BY user_id ORDER BY visit_month) AS last_month
FROM
`avdanced.analytics_function_01`
Frame
SELECT
-- 1번 모든 주문량
SUM(amount) OVER() AS amount_total,
-- 2번 특정주문시점 누적주문량
#SUM(amount) OVER(partition by order_date) AS cumulative_sum,
SUM(amount) OVER (ORDER BY order_date) AS cumulative_sum,
-- 3번 고객별 주문 시점에서 누적 주문량
#SUM(amount) OVER(partition by user_id) AS cumulative_sum_by_user,
SUM(amount) OVER(partition by user_id ORDER BY order_id) AS cumulative_sum_by_user,
-- 4번 최근 직전 5개 평균 주문량
AVG(amount) OVER(ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING) AS last_5_orders_avg_amount,
AVG(amount) OVER(ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) AS last_5_unbounded_orders_avg_amount,
AVG(amount) OVER(ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS all_orders_avg_amount
FROM `avdanced.orders`
윈도우함수 연습문제(1~7)
1번
SELECT
user,
team,
query_date,
count(user) over(PARTITION BY user) AS total_query_cnt
FROM
advanced.query_logs
ORDER BY
query_date,
user
2번
WITH query_cnt_by_team AS (
SELECT
EXTRACT(WEEK FROM query_date) AS week_number,
team,
user,
COUNT(user) AS query_cnt
FROM advanced.query_logs
GROUP BY ALL
)
SELECT
*,
RANK() OVER (PARTITION BY week_number, team ORDER BY query_cnt DESC) AS rk
FROM query_cnt_by_team
QUALIFY rk = 1
ORDER BY
week_number,
team,
query_cnt DESC
3번
WITH query_cnt_by_team AS (
SELECT
EXTRACT(WEEK FROM query_date) AS week_number,
team,
user,
COUNT(user) AS query_cnt
FROM advanced.query_logs
GROUP BY ALL
)
SELECT
*,
LAG(query_cnt, 1) OVER (PARTITION BY user ORDER BY week_number) AS prev_week_query_cnt
FROM query_cnt_by_team
ORDER BY
user,
week_number
4번
SELECT
query_date,
team,
user,
query_cnt,
SUM(query_cnt) OVER (PARTITION BY user ORDER BY query_date ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS cumulative_sum
FROM (
SELECT
query_date,
team,
user,
COUNT(user) AS query_cnt
FROM advanced.query_logs
GROUP BY ALL
)
ORDER BY
user,
query_date
5번
WITH raw_data AS(
SELECT DATE'2024-05-01'AS date,15 AS number_of_orders UNION ALL
SELECT DATE'2024-05-02', 13 UNION ALL
SELECT DATE'2024-05-03', NULL UNION ALL
SELECT DATE'2024-05-04', 16 UNION ALL
SELECT DATE'2024-05-05', NULL UNION ALL
SELECT DATE'2024-05-06', 18 UNION ALL
SELECT DATE'2024-05-07', 20 UNION ALL
SELECT DATE'2024-05-08', NULL UNION ALL
SELECT DATE'2024-05-09', 13 UNION ALL
SELECT DATE'2024-05-10', 14 UNION ALL
SELECT DATE'2024-05-11', NULL UNION ALL
SELECT DATE'2024-05-12', NULL
)
SELECT
date,
IF(number_of_orders is null , last_value(number_of_orders IGNORE NULLS) OVER(ORDER BY date asc), number_of_orders) AS number_of_orders_not_null
FROM
raw_data;
6번
WITH raw_data AS(
SELECT DATE'2024-05-01'AS date,15 AS number_of_orders UNION ALL
SELECT DATE'2024-05-02', 13 UNION ALL
SELECT DATE'2024-05-03', NULL UNION ALL
SELECT DATE'2024-05-04', 16 UNION ALL
SELECT DATE'2024-05-05', NULL UNION ALL
SELECT DATE'2024-05-06', 18 UNION ALL
SELECT DATE'2024-05-07', 20 UNION ALL
SELECT DATE'2024-05-08', NULL UNION ALL
SELECT DATE'2024-05-09', 13 UNION ALL
SELECT DATE'2024-05-10', 14 UNION ALL
SELECT DATE'2024-05-11', NULL UNION ALL
SELECT DATE'2024-05-12', NULL
),
filled_data AS (
SELECT
* EXCEPT(number_of_orders),
LAST_VALUE(number_of_orders IGNORE NULLS) OVER (ORDER BY date) AS number_of_orders
FROM raw_data
)
SELECT
*,
AVG(number_of_orders) OVER (ORDER BY date ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) AS moving_avg
FROM filled_data
7번
WITH base AS(
SELECT
event_date,
event_timestamp,
DATETIME(TIMESTAMP_MICROS(event_timestamp), 'Asia/Seoul') AS event_datetime,
event_name,
user_id,
user_pseudo_id,
DATETIME(TIMESTAMP_MICROS(LAG(event_timestamp) OVER (PARTITION BY user_pseudo_id ORDER BY event_timestamp)), 'Asia/Seoul') AS before_event_datetime
FROM advanced.app_logs
WHERE event_date = '2022-08-18'
AND user_pseudo_id = '1997494153.8491999091'
),
session_info AS(
SELECT
*,
TIMESTAMP_DIFF(event_datetime, before_event_datetime, SECOND) AS second_diff,
CASE
WHEN TIMESTAMP_DIFF(event_datetime, before_event_datetime, SECOND) >= 20
OR TIMESTAMP_DIFF(event_datetime, before_event_datetime, SECOND) IS NULL THEN 1
ELSE NULL
END AS session_start
FROM base
)
SELECT
*,
SUM(session_start) OVER (PARTITION BY user_pseudo_id ORDER BY event_datetime) AS session_id
FROM session_info
ORDER BY event_date, event_timestamp;
답변 1
0
안녕하세요, 인프런 AI 인턴입니다. 권혁범님.
윈도우 함수와 NULL 값을 다루는 다양한 SQL 쿼리 연습 문제에 대해 작성해 주셨군요. 이러한 연습을 통해 데이터 분석과 SQL 쿼리 작성 능력을 강화하는 것은 매우 중요합니다.
비슷한 주제에 대한 질문들이 여러 개 있었습니다. 아래 링크를 통해 유사한 질문과 답변을 확인할 수 있습니다. 이 자료들이 권혁범님의 스터디에 도움이 되길 바랍니다.
Bigquery 활용편(PDF)파일 P.229 페이지 코드 오류
퍼널별 유저들의 새로운 세션 시작 횟수의 총합 _ 윈도우 함수 마지막 문제 응용
윈도우 함수 사용시 Rank()와 Row_number()의 차이
RANK()
와 ROW_NUMBER()
함수의 차이 및 활용에 대한 설명이 포함되어 있습니다. 비슷한 함수 활용에 대해 도움이 될 수 있습니다.이처럼 다양한 쿼리 문제를 풀어보며 실력을 쌓아가셔서, 데이터 분석 과정에서의 윈도우 함수 활용 능력을 더욱 강화하실 수 있을 것입니다.
저는 질문자님의 이해를 돕고자 지식공유자님의 기존 답변을 참고하여 유사한 답변 링크를 추천해드리고 있습니다. 현재 베타 기능으로 답변이 만족스럽지 않을 수 있는 점 양해 부탁드립니다. 🙏 추가적으로 궁금한 점이 있으시면, 이어서 질문해 주세요. 곧 지식공유자께서 답변해 주실 것입니다.