작성
·
50
·
수정됨
0
윈도우 함수 (탐색 함수) 연습 문제
1번
SELECT
user_id,
visit_month,
lead(visit_month, 1) OVER(PARTITION BY user_id ORDER BY visit_month) AS next_1month,
lead(visit_month, 2) OVER(PARTITION BY user_id ORDER BY visit_month) AS next_2month,
FROM
advanced.analytics_function_01
ORDER BY
1,2;
2번
SELECT
user_id,
visit_month,
lead(visit_month, 1) OVER(PARTITION BY user_id ORDER BY visit_month) AS next_1month,
lead(visit_month, 2) OVER(PARTITION BY user_id ORDER BY visit_month) AS next_2month,
lag(visit_month, 1) OVER(PARTITION BY user_id ORDER BY visit_month) AS pre_1month
FROM
advanced.analytics_function_01
ORDER BY
1,2;
윈도우 함수 연습 문제
1번
SELECT
user,
team,
query_date,
count(user) over(PARTITION BY user) AS total_query_cnt
FROM
advanced.query_logs
ORDER BY
query_date, user
2번
WITH base AS (
SELECT
EXTRACT(week FROM query_date) AS week_number,
team,
user,
count(user) AS query_cnt
FROM
advanced.query_logs
GROUP BY
1,2,3
)
SELECT
*,
rank() OVER(PARTITION BY week_number, team ORDER BY query_cnt desc) AS team_rank
FROM
base
QUALIFY team_rank = 1
ORDER BY
week_number, query_cnt desc;
3번
WITH base AS (
SELECT
EXTRACT(week FROM query_date) AS week_number,
team,
user,
count(user) AS query_cnt
FROM
advanced.query_logs
GROUP BY
1,2,3
)
SELECT
*,
lag(query_cnt) OVER(PARTITION BY team, user ORDER BY week_number asc) AS prev_week_query_cnt
FROM
base
ORDER BY
team, user, week_number;
4번
WITH base AS (
SELECT
user,
team,
query_date,
COUNT(user) as query_count
FROM
advanced.query_logs
GROUP BY
1,2,3
)
SELECT
user,
team,
query_date,
query_count,
SUM(query_count) OVER(PARTITION BY team, user ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS cumulative_query_count
FROM
base
ORDER BY
team, user, query_date;
5번
WITH raw_data AS(
SELECT DATE'2024-05-01'AS date,15 AS number_of_orders UNION ALL
SELECT DATE'2024-05-02',13 UNION ALL
SELECT DATE'2024-05-03',NULL UNION ALL
SELECT DATE'2024-05-04',16 UNION ALL
SELECT DATE'2024-05-05',NULL UNION ALL
SELECT DATE'2024-05-06',18 UNION ALL
SELECT DATE'2024-05-07',20 UNION ALL
SELECT DATE'2024-05-08',NULL UNION ALL
SELECT DATE'2024-05-09',13 UNION ALL
SELECT DATE'2024-05-10',14 UNION ALL
SELECT DATE'2024-05-11',NULL UNION ALL
SELECT DATE'2024-05-12',NULL
)
SELECT
date,
IF(number_of_orders is null , last_value(number_of_orders IGNORE NULLS) OVER(ORDER BY date asc), number_of_orders) AS number_of_orders_not_null
FROM
raw_data;
6번
WITH raw_data AS(
SELECT DATE'2024-05-01'AS date,15 AS number_of_orders UNION ALL
SELECT DATE'2024-05-02',13 UNION ALL
SELECT DATE'2024-05-03',NULL UNION ALL
SELECT DATE'2024-05-04',16 UNION ALL
SELECT DATE'2024-05-05',NULL UNION ALL
SELECT DATE'2024-05-06',18 UNION ALL
SELECT DATE'2024-05-07',20 UNION ALL
SELECT DATE'2024-05-08',NULL UNION ALL
SELECT DATE'2024-05-09',13 UNION ALL
SELECT DATE'2024-05-10',14 UNION ALL
SELECT DATE'2024-05-11',NULL UNION ALL
SELECT DATE'2024-05-12',NULL
),
fill_null AS (
SELECT
date,
IF(number_of_orders is null , last_value(number_of_orders IGNORE NULLS) OVER(ORDER BY date asc), number_of_orders) AS number_of_orders_not_null
FROM
raw_data
)
SELECT
date,
number_of_orders_not_null,
avg(number_of_orders_not_null) OVER(ORDER BY date ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) AS moving_avg
FROM
fill_null;
7번
WITH base AS (
SELECT
event_date,
event_timestamp,
DATETIME(TIMESTAMP_MICROS(event_timestamp), 'Asia/Seoul') AS event_datetime,
event_name,
user_id,
user_pseudo_id
FROM
advanced.app_logs
WHERE
event_date = '2022-08-18'
), diff_date AS (
SELECT
*,
DATETIME_DIFF(event_datetime, pre_event_time, second) AS date_diff_sec
FROM (
SELECT
*,
LAG(event_datetime, 1) OVER(PARTITION BY user_pseudo_id ORDER BY event_datetime asc) AS pre_event_time
FROM
base
)
), session_start AS (
SELECT
*,
CASE
WHEN pre_event_time IS NULL THEN 1
WHEN date_diff_sec >= 20 THEN 1
END AS start_session
FROM
diff_date
)
SELECT
event_date,
event_datetime,
event_name,
user_id,
user_pseudo_id,
date_diff_sec,
SUM(start_session) OVER(PARTITION BY user_pseudo_id ORDER BY event_datetime) AS session_id
FROM
session_start
ORDER BY
user_pseudo_id, event_datetime;
답변 1
0
안녕하세요, 인프런 AI 인턴입니다. 이재헌님.
BigQuery에서 윈도우 함수와 관련된 문제를 푸는 것은 SQL의 강력한 기능을 이해하는 데 매우 중요합니다. 윈도우 함수는 데이터 분석에서 다양한 통계와 패턴을 추출하는 데 유용하게 사용됩니다.
기존에 주어진 질문들 중에 빅쿼리 강의에서 윈도우 함수를 다루는 유사한 문제와 해결책을 찾는다면 아래 링크들을 참고해 보시길 추천합니다:
LAST_VALUE 와 FIRST_VALUE 함수 사용 시 주의점:
이 링크들은 BigQuery에서 복잡해 보일 수 있는 윈도우 함수를 명확하게 이해하며 연습 문제를 해결하는 데 큰 도움이 될 것입니다. 추가적으로 궁금한 점이나 어려운 점이 있다면, 질문을 남겨주세요. 지식공유자님께서는 곧 답변을 제공해 주실 것입니다.
저는 질문자님의 이해를 돕고자 지식공유자님의 기존 답변을 참고하여 유사한 답변 링크를 추천해드렸습니다. 현재 베타 기능으로 답변이 만족스럽지 않을 수 있는 점 양해 부탁드립니다. 🙏 추가적으로 궁금한 점이 있으시면, 이어서 질문해 주세요. 곧 지식공유자께서 답변해 주실 것입니다.