작성
·
24
·
수정됨
0
#1
SELECT
user_id,
visit_month,
LEAD(visit_month,1) OVER(PARTITION BY user_id ORDER BY visit_month) AS next_month,
LEAD(visit_month,2) OVER(PARTITION BY user_id ORDER BY visit_month) AS the_month_after_next
FROM
`avdanced.analytics_function_01`
#2
SELECT
user_id,
visit_month,
LEAD(visit_month,1) OVER(PARTITION BY user_id ORDER BY visit_month) AS next_month,
LEAD(visit_month,2) OVER(PARTITION BY user_id ORDER BY visit_month) AS the_month_after_next,
LAG(visit_month,1) OVER(PARTITION BY user_id ORDER BY visit_month) AS last_month
FROM
`avdanced.analytics_function_01`
Frame
SELECT
SUM(amount) OVER() AS amount_total,
#SUM(amount) OVER(partition by order_date) AS cumulative_sum,
SUM(amount) OVER (ORDER BY order_date) AS cumulative_sum,
#SUM(amount) OVER(partition by user_id) AS cumulative_sum_by_user,
SUM(amount) OVER(partition by user_id ORDER BY order_id) AS cumulative_sum_by_user,
AVG(amount) OVER(ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING) AS last_5_orders_avg_amount,
AVG(amount) OVER(ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) AS last_5_unbounded_orders_avg_amount,
AVG(amount) OVER(ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS all_orders_avg_amount
FROM `avdanced.orders`
2)
#1
SELECT
user,
team,
query_date,
count(user) over(PARTITION BY user) AS total_query_cnt
FROM
advanced.query_logs
ORDER BY
query_date,
user
#2
WITH query_cnt_by_team AS (
SELECT
EXTRACT(WEEK FROM query_date) AS week_number,
team,
user,
COUNT(user) AS query_cnt
FROM advanced.query_logs
GROUP BY ALL
)
SELECT
*,
RANK() OVER (PARTITION BY week_number, team ORDER BY query_cnt DESC) AS rk
FROM query_cnt_by_team
QUALIFY rk = 1
ORDER BY
week_number,
team,
query_cnt DESC
#3
WITH query_cnt_by_team AS (
SELECT
EXTRACT(WEEK FROM query_date) AS week_number,
team,
user,
COUNT(user) AS query_cnt
FROM advanced.query_logs
GROUP BY ALL
)
SELECT
*,
LAG(query_cnt, 1) OVER (PARTITION BY user ORDER BY week_number) AS prev_week_query_cnt
FROM query_cnt_by_team
ORDER BY
user,
week_number
#4
SELECT
query_date,
team,
user,
query_cnt,
SUM(query_cnt) OVER (PARTITION BY user ORDER BY query_date ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS cumulative_sum
FROM (
SELECT
query_date,
team,
user,
COUNT(user) AS query_cnt
FROM advanced.query_logs
GROUP BY ALL
)
ORDER BY
user,
query_date
#5
WITH raw_data AS(
SELECT DATE'2024-05-01'AS date,15 AS number_of_orders UNION ALL
SELECT DATE'2024-05-02', 13 UNION ALL
SELECT DATE'2024-05-03', NULL UNION ALL
SELECT DATE'2024-05-04', 16 UNION ALL
SELECT DATE'2024-05-05', NULL UNION ALL
SELECT DATE'2024-05-06', 18 UNION ALL
SELECT DATE'2024-05-07', 20 UNION ALL
SELECT DATE'2024-05-08', NULL UNION ALL
SELECT DATE'2024-05-09', 13 UNION ALL
SELECT DATE'2024-05-10', 14 UNION ALL
SELECT DATE'2024-05-11', NULL UNION ALL
SELECT DATE'2024-05-12', NULL
)
SELECT
date,
IF(number_of_orders is null , last_value(number_of_orders IGNORE NULLS) OVER(ORDER BY date asc), number_of_orders) AS number_of_orders_not_null
FROM
raw_data;
#6
WITH raw_data AS(
SELECT DATE'2024-05-01'AS date,15 AS number_of_orders UNION ALL
SELECT DATE'2024-05-02', 13 UNION ALL
SELECT DATE'2024-05-03', NULL UNION ALL
SELECT DATE'2024-05-04', 16 UNION ALL
SELECT DATE'2024-05-05', NULL UNION ALL
SELECT DATE'2024-05-06', 18 UNION ALL
SELECT DATE'2024-05-07', 20 UNION ALL
SELECT DATE'2024-05-08', NULL UNION ALL
SELECT DATE'2024-05-09', 13 UNION ALL
SELECT DATE'2024-05-10', 14 UNION ALL
SELECT DATE'2024-05-11', NULL UNION ALL
SELECT DATE'2024-05-12', NULL
),
filled_data AS (
SELECT
* EXCEPT(number_of_orders),
LAST_VALUE(number_of_orders IGNORE NULLS) OVER (ORDER BY date) AS number_of_orders
FROM raw_data
)
SELECT
*,
AVG(number_of_orders) OVER (ORDER BY date ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) AS moving_avg
FROM filled_data
#7
WITH base AS(
SELECT
event_date,
event_timestamp,
DATETIME(TIMESTAMP_MICROS(event_timestamp), 'Asia/Seoul') AS event_datetime,
event_name,
user_id,
user_pseudo_id,
DATETIME(TIMESTAMP_MICROS(LAG(event_timestamp) OVER (PARTITION BY user_pseudo_id ORDER BY event_timestamp)), 'Asia/Seoul') AS before_event_datetime
FROM advanced.app_logs
WHERE event_date = '2022-08-18'
AND user_pseudo_id = '1997494153.8491999091'
),
session_info AS(
SELECT
*,
TIMESTAMP_DIFF(event_datetime, before_event_datetime, SECOND) AS second_diff,
CASE
WHEN TIMESTAMP_DIFF(event_datetime, before_event_datetime, SECOND) >= 20
OR TIMESTAMP_DIFF(event_datetime, before_event_datetime, SECOND) IS NULL THEN 1
ELSE NULL
END AS session_start
FROM base
)
SELECT
*,
SUM(session_start) OVER (PARTITION BY user_pseudo_id ORDER BY event_datetime) AS session_id
FROM session_info
ORDER BY event_date, event_timestamp;
답변 1
0
안녕하세요, 인프런 AI 인턴입니다. 이조현님.
윈도우 함수를 활용한 SQL 쿼리 연습문제에 도전하신 것을 환영합니다! 윈도우 함수는 SQL에서 매우 중요한 개념으로, 데이터를 보다 효과적으로 집계하고 분석할 수 있게 도와줍니다. 제공해주신 여러 쿼리와 예제를 통해 다양한 방식으로 데이터를 다루고 계신 것을 볼 수 있어 고무적입니다.
특히, LEAD
, LAG
, SUM
, AVG
와 같은 윈도우 함수를 활용하여 데이터를 어떻게 확장하고 분석할 수 있는지를 잘 보여주셨습니다. 이번 프로젝트에서 주요하게 다룬 개념들을 간단히 정리해볼까요:
관련 링크로써 최근 많은 관심을 받고 있는 SQL 윈도우 함수의 응용 예시가 있는 포스트를 추천드립니다:
현재 제안한 방법이 도움이 되었기를 바라며, 추가적으로 궁금한 점이 있는 경우, 질문을 이어가 주세요. 곧 지식공유자께서 친절하게 답변해 주실 것입니다.
저는 질문자님의 이해를 돕고자 지식공유자님의 기존 답변을 참고하여 유사한 답변 링크를 추천해드리고 있습니다. 현재 베타 기능으로 답변이 만족스럽지 않을 수 있는 점 양해 부탁드립니다. 🙏 추가적으로 궁금한 점이 있으시면, 이어서 질문해 주세요. 곧 지식공유자께서 답변해 주실 것입니다.