작성
·
17
0
문제 1
SELECT
*,
LEAD(visit_month) OVER(PARTITION BY user_id ORDER BY visit_month) AS lead_visit_month ,
LEAD(visit_month, 2) OVER(PARTITION BY user_id ORDER BY visit_month) AS lead2_visit_month
FROM advanced.analytics_function_01
ORDER BY user_id
문제 2
SELECT
*,
LEAD(visit_month, 1) OVER(PARTITION BY user_id ORDER BY visit_month) AS lead_visit_month,
LEAD(visit_month, 2) OVER(PARTITION BY user_id ORDER BY visit_month) AS lead2_visit_month, -- 다다음달
LAG(visit_month, 1) OVER (PARTITION BY user_id ORDER BY visit_month) AS lag_visit_month -- 이전 달 (LAG)
FROM `advanced.analytics_function_01`
ORDER BY user_id, visit_month
문제 3
SELECT
*,
LEAD(visit_month, 1) OVER(PARTITION BY user_id ORDER BY visit_month) AS after_visit_month,
LEAD(visit_month, 1) OVER(PARTITION BY user_id ORDER BY visit_month) - visit_month AS diff
# 윈도우 함수를 이렇게 쓰는게 좋을까? => 중복된 쿼리는 줄이는 것이 좋을 수 있음
FROM `advanced.analytics_function_01`
- 총 정리 문제
문제 1
SELECT
*,
COUNT(query_date) OVER(PARTITION BY user ORDER BY query_date) AS total_query_cnt # 38 row
FROM `advanced.query_logs`
문제 2
WITH base AS (
SELECT
EXTRACT(WEEK FROM query_date) AS week_number,
team,
user,
COUNT(user) AS query_cnt
FROM advanced.query_logs
GROUP BY ALL)
SELECT
*,
RANK() OVER(PARTITION BY team ORDER BY query_cnt) AS rk
FROM base
QUALIFY rk = 1
ORDER BY week_number, team, query_cnt DESC
문제 3
WITH base AS (
SELECT
EXTRACT(WEEK FROM query_date) AS week_number,
team,
user,
COUNT(user) AS query_cnt
FROM advanced.query_logs
GROUP BY ALL)
SELECT
*,
LAG(query_cnt, 1) OVER(PARTITION BY user ORDER BY week_number) AS prev_week_cnt
FROM base
문제 4
SELECT
*,
SUM(query_cnt) OVER(PARTITION BY user ORDER BY query_date) AS cumulative_sum,
SUM(query_cnt) OVER(PARTITION BY user ORDER BY query_date ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS cumulative_sum2
FROM(
SELECT
query_date,
team,
user,
COUNT(user) AS query_cnt
FROM advanced.q
문제 5
WITH raw_data AS (
SELECT DATE '2024-05-01' AS date, 15 AS number_of_orders UNION ALL SELECT DATE '2024-05-02', 13 UNION ALL
SELECT DATE '2024-05-03', NULL UNION ALL
SELECT DATE '2024-05-04', 16 UNION ALL
SELECT DATE '2024-05-05', NULL UNION ALL
SELECT DATE '2024-05-06', 18 UNION ALL
SELECT DATE '2024-05-07', 20 UNION ALL
SELECT DATE '2024-05-08', NULL UNION ALL
SELECT DATE '2024-05-09', 13 UNION ALL
SELECT DATE '2024-05-10', 14 UNION ALL
SELECT DATE '2024-05-11', NULL UNION ALL
SELECT DATE '2024-05-12', NULL
)
SELECT
*,
LAST_VALUE(number_of_orders IGNORE NULLS) OVER(ORDER BY date) AS last_value_orders
FROM raw_data
문제 6
WITH raw_data AS (
SELECT DATE '2024-05-01' AS date, 15 AS number_of_orders UNION ALL SELECT DATE '2024-05-02', 13 UNION ALL
SELECT DATE '2024-05-03', NULL UNION ALL
SELECT DATE '2024-05-04', 16 UNION ALL
SELECT DATE '2024-05-05', NULL UNION ALL
SELECT DATE '2024-05-06', 18 UNION ALL
SELECT DATE '2024-05-07', 20 UNION ALL
SELECT DATE '2024-05-08', NULL UNION ALL
SELECT DATE '2024-05-09', 13 UNION ALL
SELECT DATE '2024-05-10', 14 UNION ALL
SELECT DATE '2024-05-11', NULL UNION ALL
SELECT DATE '2024-05-12', NULL
)
, filled_date AS(
SELECT
* EXCEPT(number_of_orders),
LAST_VALUE(number_of_orders IGNORE NULLS) OVER(ORDER BY date) AS number_of_orders
FROM raw_data)
SELECT
*,
AVG(number_of_orders) OVER (ORDER BY date ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) AS moving_avg
FROM filled_date
문제 7
WITH base AS(
SELECT
EVENT_DATE,
DATETIME(TIMESTAMP_MICROS(event_timestamp), 'Asia/Seoul') AS event_datetime,
event_name,
user_id,
user_pseudo_id
FROM advanced.app_logs
WHERE event_date = '2022-08-18'
AND user_pseudo_id = "1997494153.8491999091"
)
, diff_date AS(SELECT
*,
DATETIME_DIFF(event_datetime, prev_event_datetime, SECOND) AS second_diff
FROM (
SELECT
*,
LAG(EVENT_DATETIME, 1) OVER(PARTITION BY user_pseudo_id ORDER BY event_datetime) AS prev_event_datetime
FROM base
ORDER BY base.event_datetime))
SELECT
*,
SUM(session_start) OVER(PARTITION BY user_pseudo_id ORDER BY event_datetime) AS session_num
FROM (
SELECT
*,
CASE
WHEN prev_event_datetime IS NULL THEN 1
WHEN second_diff >= 20 THEN 1
ELSE 0
END AS session_start
FROM diff_date)
답변 1
0
안녕하세요, 인프런 AI 인턴입니다. hayeonkimmie님.
질문해주신 윈도우 함수 연습 문제에 대한 힌트와 이해의 도움을 드릴 수 있는 링크를 제공드립니다.
윈도우 함수는 SQL에서 매우 강력한 기능으로, 데이터 분석에 큰 도움을 줍니다. 이 함수들은 동일한 데이터 집합 내에서 레코드를 순차적으로 처리하거나 특정 집계를 수행하는 데 유용합니다.
다음 링크는 비슷한 주제에 대한 답변을 제공하고 있습니다. 이 내용을 참고하셔서 문제를 해결하시는 데 도움이 되기를 바랍니다.
Bigquery 활용편(PDF) 코드 오류 및 윈도우 함수
퍼널별 유저들의 새로운 세션 시작 횟수의 총합 - 윈도우 함수 응용
이러한 자료들이 hayeonkimmie님의 문제 해결에 유익한 정보가 되길 바라며, 추가적인 질문이 있으시다면 언제든지 말씀해 주세요. 곧 지식공유자께서도 답변해 주실 것입니다.
저는 질문자님의 이해를 돕고자 지식공유자님의 기존 답변을 참고하여 유사한 답변 링크를 추천해드리고 있습니다. 현재 베타 기능으로 답변이 만족스럽지 않을 수 있는 점 양해 부탁드립니다. 🙏 추가적으로 궁금한 점이 있으시면, 이어서 질문해 주세요. 곧 지식공유자께서 답변해 주실 것입니다.