작성
·
29
·
수정됨
0
문제 1) user들의 다음 접속 월과 다다음 접속 월을 구하는 쿼리를 작성해주세요.
LEAD는 반드시 정렬이 먼저 되어야 함 -> ORDER BY 추가
SELECT *,
LEAD(visit_month) OVER (PARTITION BY user_id ORDER BY visit_month ASC) AS lead_visit_month,
LEAD(visit_month,2) OVER (PARTITION BY user_id ORDER BY visit_month ASC) AS lead2_visit_month
FROM advanced.analytics_function_01
문제 2) user들의 다음 접속 월과 다다음 접속 월, 이전 접속 월을 구하는 쿼리를
작성해주세요.
SELECT *,
LEAD(visit_month) OVER (PARTITION BY user_id ORDER BY visit_month ASC) AS after_visit_month,
LEAD(visit_month,2) OVER (PARTITION BY user_id ORDER BY visit_month ASC) AS after2_visit_month,
LAG(visit_month) OVER (PARTITION BY user_id ORDER BY visit_month ASC) AS before_visit_month
ORDER BY user_id, visit_month
FROM advanced.analytics_function_01
문제 3) 유저가 접속 했을 때, 다음 접속까지의 간격
SELECT *,
after_visit_month - visit_month AS diff
FROM
(
SELECT *,
LEAD(visit_month, 1) OVER (PARTITION BY user_id ORDER BY visit_month) AS after_visit_month
FROM advanced.analytics_function_01
)
-- amount_total: 전체 SUM
-- cumulative_sum : row 시점에 누적 SUM
-- cumulative_sum_by_user : row 시점에 유저별 누적 SUM
-- last_5_orders_avg_amount : order_id 기준으로 정렬하고, 직전 5개 주문의 평균 amount
-- 집계분석함수() OVER(PARTITION BY ~~~ ORDER BY ~~~ ROWS BETWEEN A AND B)
SELECT *,
SUM(amount) OVER() AS amount_total,
-- OVER()에 아무것도 들어가지 않을 수 있음
SUM(amount) OVER(ORDER BY order_id) AS cumulative_sum,
SUM(amount) OVER(PARTITION BY user_id ORDER BY order_id) AS cumulative_sum_by_user,
AVG(amount) OVER(ORDER BY order_id ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING)
FROM advanced.orders
ORDER BY user_id, order_date
QUALIFY(조건 설정)
WHERE 대신 QUALIFY를 사용하면 윈도우 함수의 결과에 대해 필터링할 수 있음
WHERE과 같이 사용하는 경우엔 WHERE 아래에 작성하면 됨
SELECT
order_id,
order_date,
user_id,
amount,
SUM(amount) OVER (PARTITION BY user_id) AS amount_total
FROM advanced.orders
WHERE 1=1
QUALIFY amount_total >= 500
SELECT user,
team,
query_date,
COUNT(query_date) OVER (PARTITION BY user) AS total_query_cnt
FROM advanced.query_logs
ORDER BY user, query_date
문제 2
WITH base AS (
SELECT
EXTRACT(week FROM query_date) AS week_number,
user,
team,
COUNT(query_date) AS total_query_cnt
FROM advanced.query_logs
GROUP BY week_number, user, team
)
SELECT
week_number,
team,
user,
total_query_cnt,
RANK() OVER (PARTITION BY team ORDER BY total_query_cnt DESC) AS ranking_in_team
FROM base
QUALIFY ranking_in_team = 1
ORDER BY week_number, team
WITH base AS (
SELECT
EXTRACT(week FROM query_date) AS week_number,
user,
team,
COUNT(query_date) AS total_query_cnt
FROM advanced.query_logs
GROUP BY week_number, user, team
)
SELECT
week_number,
team,
user,
total_query_cnt,
RANK() OVER (PARTITION BY team ORDER BY total_query_cnt DESC) AS ranking_in_team
FROM base
QUALIFY ranking_in_team = 1
ORDER BY week_number, team
WITH base AS (
SELECT
EXTRACT(week FROM query_date) AS week_number,
user,
team,
COUNT(query_date) AS query_cnt
FROM advanced.query_logs
GROUP BY week_number, user, team
)
SELECT
user,
team,
week_number,
query_cnt,
LAG(query_cnt) OVER (PARTITION BY user ORDER BY week_number) AS prev_week_query_cnt
FROM base
ORDER BY user, week_number
문제 4
SELECT
user,
team,
query_date,
query_count,
SUM(query_count) OVER (PARTITION BY user ORDER BY query_date,
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS cumulative_query_count
FROM (
SELECT
user,
team,
query_date,
COUNT(query_date) AS query_count
FROM advanced.query_logs
GROUP BY 1,2,3
)
ORDER BY user, query_date
문제 5
WITH raw_data AS (
SELECT DATE '2024-05-01' AS date, 15 AS number_of_orders UNION ALL
SELECT DATE '2024-05-02', 13 UNION ALL
SELECT DATE '2024-05-03', NULL UNION ALL
SELECT DATE '2024-05-04', 16 UNION ALL
SELECT DATE '2024-05-05', NULL UNION ALL
SELECT DATE '2024-05-06', 18 UNION ALL
SELECT DATE '2024-05-07', 20 UNION ALL
SELECT DATE '2024-05-08', NULL UNION ALL
SELECT DATE '2024-05-09', 13 UNION ALL
SELECT DATE '2024-05-10', 14 UNION ALL
SELECT DATE '2024-05-11', NULL UNION ALL
SELECT DATE '2024-05-12', NULL
)
SELECT
date,
LAST_VALUE(number_of_orders ignore nulls) OVER (ORDER BY date) AS number_of_orders
FROM raw_data
WITH raw_data AS (
SELECT DATE '2024-05-01' AS date, 15 AS number_of_orders UNION ALL
SELECT DATE '2024-05-02', 13 UNION ALL
SELECT DATE '2024-05-03', NULL UNION ALL
SELECT DATE '2024-05-04', 16 UNION ALL
SELECT DATE '2024-05-05', NULL UNION ALL
SELECT DATE '2024-05-06', 18 UNION ALL
SELECT DATE '2024-05-07', 20 UNION ALL
SELECT DATE '2024-05-08', NULL UNION ALL
SELECT DATE '2024-05-09', 13 UNION ALL
SELECT DATE '2024-05-10', 14 UNION ALL
SELECT DATE '2024-05-11', NULL UNION ALL
SELECT DATE '2024-05-12', NULL
)
SELECT
date,
number_of_orders,
AVG(number_of_orders) OVER (ORDER BY date ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) AS moving_avg
FROM (
SELECT
date,
LAST_VALUE(number_of_orders ignore nulls) OVER (ORDER BY date) AS number_of_orders
FROM raw_data
)
문제 7
WITH base AS (
SELECT
event_date,
event_timestamp,
DATETIME(TIMESTAMP_MICROS(event_timestamp),'Asia/Seoul') AS event_datetime,
event_name,
user_id,
user_pseudo_id,
LAG(DATETIME(TIMESTAMP_MICROS(event_timestamp),'Asia/Seoul')) OVER (PARTITION BY user_pseudo_id ORDER BY event_timestamp) AS before_event_datetime
FROM advanced.app_logs
WHERE 1=1
AND event_date = '2022-08-18'
)
SELECT
*,
DATETIME_DIFF(event_datetime, before_event_datetime, SECOND) AS second_diff,
CASE WHEN
DATETIME_DIFF(event_datetime, before_event_datetime, SECOND) IS NULL
OR DATETIME_DIFF(event_datetime, before_event_datetime, SECOND) >= 20 THEN 1
ELSE 0
END AS session_start,
SUM(CASE WHEN DATETIME_DIFF(event_datetime, before_event_datetime, SECOND) >= 20 THEN 1 ELSE 0 END)
OVER (PARTITION BY user_pseudo_id ORDER BY event_datetime) + 1 AS session_temp
FROM base
답변 1
0
안녕하세요, 인프런 AI 인턴입니다. 정용님.
BigQuery에서 윈도우 함수를 통해 데이터 분석을 수행하실 때, PARTITION BY와 WINDOW FUNCTION의 다양한 활용을 익히는 것은 매우 중요합니다. 다음은 비슷한 주제를 다룬 몇 가지 링크들이며, 이번 문제 해결에 참고가 될 수 있을 것입니다.
BigQuery 윈도우 함수 사용법과 주의사항
이 게시물에서는 다양한 윈도우 함수들의 사용법을 예시와 함께 자세히 설명하고 있습니다. 특히 RANK, LAG 같은 함수의 활용 방법에 대해 더 깊이 이해할 수 있을 것입니다.
데이터 분석 중 윈도우 함수 사용 예제
윈도우 함수를 통해 데이터를 분석할 때, 실제로 현업에서 어떻게 적용되는지에 대한 예시와 그것이 어떤 의미를 가지는지 설명합니다.
문제 3에서 LAG 함수를 사용하는데, LAG와 LEAD의 차이점에 대한 설명을 제공하는 게시물: LAG와 LEAD의 차이점 이해하기
이 자료들이 정용님께서 윈도우 함수를 더 잘 이해하고, 주어진 SQL 문제들을 해결하는 데 도움이 되기를 바랍니다.
저는 질문자님의 이해를 돕고자 지식공유자님의 기존 답변을 참고하여 유사한 답변 링크를 추천해드리고 있습니다. 현재 베타 기능으로 답변이 만족스럽지 않을 수 있는 점 양해 부탁드립니다. 🙏 추가적으로 궁금한 점이 있으시면, 이어서 질문해 주세요. 곧 지식공유자께서 답변해 주실 것입니다.