작성
·
30
0
-- 1)
SELECT user_id
, visit_month
, LEAD(visit_month, 1) OVER (PARTITION BY user_id ORDER BY visit_month) AS lead_visit_month
, LEAD(visit_month, 2) OVER (PARTITION BY user_id ORDER BY visit_month) AS lead2_visit_month
FROM advanced.analytics_function_01
ORDER BY user_id
-- 2)
SELECT user_id
, visit_month
, LEAD(visit_month) OVER (PARTITION BY user_id ORDER BY visit_month) AS after_visit_month
, LEAD(visit_month, 2) OVER (PARTITION BY user_id ORDER BY visit_month) AS after2_visit_month
, LAG(visit_month) OVER (PARTITION BY user_id ORDER BY visit_month) AS before_visit_month
FROM advanced.analytics_function_01
ORDER BY user_id, visit_month
-- 3) 유저가 접속했을 때, 다음 접속까지의 간격을 구하시오.
SELECT *
, after_visit_month - visit_month AS diff_month
FROM (
SELECT user_id
, visit_month
, LEAD(visit_month) OVER (PARTITION BY user_id ORDER BY visit_month) AS after_visit_month
FROM advanced.analytics_function_01
)
ORDER BY user_id
, visit_month
-- 4) 이 데이터셋을 기준으로 user_id의 첫번째 방문 월, 마지막 방문 월을 구하는 쿼리를 작성해주세요.
SELECT *
, FIRST_VALUE(visit_month) OVER (PARTITION BY user_id ORDER BY visit_month) AS first_month
, LAST_VALUE(visit_month) OVER (PARTITION BY user_id ORDER BY visit_month) AS last_month
FROM advanced.analytics_function_01
ORDER BY user_id
, visit_month
-- 윈도우 함수 - 데이터 범위 지정
SELECT *
, SUM(amount) OVER () AS amount_sum
, SUM(amount) OVER (ORDER BY order_id) AS cumulative_sum
, SUM(amount) OVER (PARTITION BY user_id ORDER BY order_id) AS cumulative_sum_by_user
, AVG(amount) OVER (ORDER BY order_id ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING) AS last_5_orders_avg_amount
FROM advanced.orders
ORDER BY order_id
-- 1)
SELECT *
, COUNT(user) OVER (PARTITION BY user) AS total_query_cnt
FROM advanced.query_logs
ORDER BY user
-- 2)
WITH query_cnt_by_team AS (
SELECT EXTRACT(WEEK FROM query_date) AS week_number
, team
, user
, COUNT(user) AS query_cnt
FROM advanced.query_logs
GROUP BY ALL
)
SELECT *
, RANK() OVER (PARTITION BY week_number, team ORDER BY query_cnt DESC) AS rk
FROM query_cnt_by_team
QUALIFY rk = 1
ORDER BY week_number
, team
, query_cnt DESC
-- 3)
WITH query_cnt_by_team AS (
SELECT EXTRACT(WEEK FROM query_date) AS week_number
, team
, user
, COUNT(user) AS query_cnt
FROM advanced.query_logs
GROUP BY ALL
)
SELECT *
, LAG(query_cnt, 1) OVER (PARTITION BY user ORDER BY week_number) AS prev_week_query_cnt
FROM query_cnt_by_team
ORDER BY user
, week_number
-- 4)
SELECT query_date
, team
, user
, query_cnt
, SUM(query_cnt) OVER (PARTITION BY user ORDER BY query_date ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS cumulative_sum
FROM (
SELECT query_date
, team
, user
, COUNT(user) AS query_cnt
FROM advanced.query_logs
GROUP BY ALL
)
ORDER BY user
, query_date
-- 5)
WITH raw_data AS (
SELECT DATE '2024-05-01' AS date, 15 AS number_of_orders UNION ALL
SELECT DATE '2024-05-02', 13 UNION ALL
SELECT DATE '2024-05-03', NULL UNION ALL
SELECT DATE '2024-05-04', 16 UNION ALL
SELECT DATE '2024-05-05', NULL UNION ALL
SELECT DATE '2024-05-06', 18 UNION ALL
SELECT DATE '2024-05-07', 20 UNION ALL
SELECT DATE '2024-05-08', NULL UNION ALL
SELECT DATE '2024-05-09', 13 UNION ALL
SELECT DATE '2024-05-10', 14 UNION ALL
SELECT DATE '2024-05-11', NULL UNION ALL
SELECT DATE '2024-05-12', NULL
)
SELECT *
, LAST_VALUE(number_of_orders IGNORE NULLS) OVER (ORDER BY date) AS before_number_of_orders
FROM raw_data
-- 6)
WITH raw_data AS (
SELECT DATE '2024-05-01' AS date, 15 AS number_of_orders UNION ALL
SELECT DATE '2024-05-02', 13 UNION ALL
SELECT DATE '2024-05-03', NULL UNION ALL
SELECT DATE '2024-05-04', 16 UNION ALL
SELECT DATE '2024-05-05', NULL UNION ALL
SELECT DATE '2024-05-06', 18 UNION ALL
SELECT DATE '2024-05-07', 20 UNION ALL
SELECT DATE '2024-05-08', NULL UNION ALL
SELECT DATE '2024-05-09', 13 UNION ALL
SELECT DATE '2024-05-10', 14 UNION ALL
SELECT DATE '2024-05-11', NULL UNION ALL
SELECT DATE '2024-05-12', NULL
),
filled_data AS (
SELECT * EXCEPT(number_of_orders)
, LAST_VALUE(number_of_orders IGNORE NULLS) OVER (ORDER BY date) AS number_of_orders
FROM raw_data
)
SELECT *
, AVG(number_of_orders) OVER (ORDER BY date ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) AS moving_avg
FROM filled_data
-- 7)
WITH step1 AS (
SELECT * EXCEPT(event_params, platform)
, DATETIME(TIMESTAMP_MICROS(event_timestamp), "Asia/Seoul") AS event_datetime
FROM advanced.app_logs
WHERE user_pseudo_id = "1997494153.8491999091"
AND event_date = '2022-08-18'
),
step2 AS (
SELECT *
, DATETIME_DIFF(event_datetime, prev_event_datetime, SECOND) AS second_diff
FROM (
SELECT *
, LAG(event_datetime) OVER (ORDER BY event_datetime) AS prev_event_datetime
FROM step1
)
ORDER BY event_datetime
)
SELECT *
, SUM(session_start) OVER (ORDER BY event_datetime) AS session_num
FROM (
SELECT *
, CASE WHEN second_diff IS NULL THEN 1
WHEN second_diff >= 20 THEN 1
ELSE 0
END AS session_start
FROM step2
)
ORDER BY event_datetime
답변 1
0
안녕하세요, 인프런 AI 인턴입니다. H J님.
윈도우 함수의 다양한 활용을 통해 데이터 분석에 필요한 정보를 추출하시는 과정에서 질문을 주셨군요. 윈도우 함수는 SQL에서 매우 강력한 도구로, 특히 데이터 분석에서는 데이터의 흐름이나 누적 계산, 전후 관계 분석에 유용합니다.
H J님이 주신 연습 문제는 BigQuery에서 주로 사용되는 패턴을 잘 다루고 있습니다. 관련된 유사한 질문과 답변을 통해 이미 다루어진 주제를 확인하실 수 있으며, 보다 깊이 있는 이해를 위해 추천합니다.
LAST_VALUE 함수와 IGNORE NULLS 사용법에 관한 토론
LAST_VALUE
함수는 주어진 범위 내에서의 값을 다루는 방법에 대한 설명이 잘 되어 있습니다. IGNORE NULLS
옵션을 통해 NULL 값을 무시하고 랜덤하게 깨질 수 있는 결과를 방지할 수 있습니다.각 연습 문제는 특정 함수나 개념을 잘 설명하고 있으며, H J님이 데이터를 어떻게 분석하고자 하는지에 대한 명확한 목표가 있다면 더욱 효율적인 분석이 가능할 것입니다.
제가 드린 링크들이 H J님께 도움이 되기를 바라며, 추가적으로 궁금한 사항이 있으시면 언제든지 질문해 주세요. 질문자님의 이해를 돕기 위해 최선을 다하겠습니다.
현재 베타 기능으로 답변이 만족스럽지 않을 수 있는 점 양해 부탁드립니다. 🙏 추가적으로 궁금한 점이 있으시면, 이어서 질문해 주세요. 곧 지식공유자께서 답변해 주실 것입니다.