작성
·
13
0
CREATE OR REPLACE TABLE
advanced.analytics_function_01 AS (
SELECT 1004 AS user_id, 1 AS visit_month UNION ALL
SELECT 1004, 3 UNION ALL
SELECT 1004, 7 UNION ALL
SELECT 1004, 8 UNION ALL
SELECT 2112, 3 UNION ALL
SELECT 2112, 6 UNION ALL
SELECT 2112, 7 UNION ALL
SELECT 3912, 4
)
SELECT *
FROM advanced.analytics_function_01
--# 탐색 함수 1
SELECT
user_id,
visit_month,
LEAD(visit_month, 1) OVER (PARTITION BY user_id ORDER BY visit_month) AS after_visit_month,
LEAD(visit_month, 2) OVER (PARTITION BY user_id ORDER BY visit_month) AS after_visit_month
FROM advanced.analytics_function_01
ORDER BY user_id
--# 탐색 함수 2
SELECT
user_id,
visit_month,
LEAD(visit_month, 1) OVER (PARTITION BY user_id ORDER BY visit_month) AS after_visit_month,
LEAD(visit_month, 2) OVER (PARTITION BY user_id ORDER BY visit_month) AS after_visit_month,
LAG(visit_month, 1) OVER (PARTITION BY user_id ORDER BY visit_month) AS before_visit_month
FROM advanced.analytics_function_01
ORDER BY user_id
--# 탐색 함수 3
SELECT
*,
after_visit_month - visit_month AS diff
FROM (
SELECT
user_id,
visit_month,
LEAD(visit_month, 1) OVER (PARTITION BY user_id ORDER BY visit_month) AS after_visit_month,
FROM advanced.analytics_function_01
ORDER BY user_id
)
--# 추가 문제
SELECT
user_id,
visit_month,
FIRST_VALUE(visit_month) OVER (PARTITION BY user_id ORDER BY visit_month) AS fisrt_visit,
LAST_VALUE(visit_month) OVER (PARTITION BY user_id ORDER BY visit_month) AS LAST_visit,
FROM advanced.analytics_function_01
ORDER BY user_id
SELECT
*,
SUM(amount) OVER() AS amount_total,
SUM(amount) OVER(ORDER BY order_id) AS cumulative_sum,
SUM(amount) OVER(PARTITION BY user_id ORDER BY order_id) AS cumulative_sum_by_user,
AVG(amount) OVER(ORDER BY order_id ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING) AS last_5_avg
FROM advanced.orders
ORDER BY order_id
#1
SELECT
*,
COUNT(query_date) OVER (PARTITION BY user) as total_query_cnt
FROM advanced.query_logs
ORDER BY user, query_date
#2
WITH TBL AS(
SELECT
EXTRACT(week from query_date) AS week_number
,team
,user
,COUNT(query_date) AS total_query_cnt
FROM advanced.query_logs
GROUP BY week_number, team, user
) SELECT
week_number
, team
, user
, total_query_cnt as query_count
, RANK() OVER (PARTITION BY team ORDER BY total_query_cnt desc) AS team_rank
FROM TBL
QUALIFY team_rank = 1
ORDER BY week_number,team
#3
WITH TBL AS(
SELECT
EXTRACT(week from query_date) AS week_number
,team
,user
,COUNT(query_date) AS total_query_cnt
FROM advanced.query_logs
GROUP BY week_number, team, user
)
SELECT
user
,team
,week_number
,total_query_cnt as query_count
,LAG(total_query_cnt) OVER (PARTITION BY user order by week_number) AS prev_week_query_count
FROM TBL
ORDER BY user, week_number;
#4
SELECT
user
, team
, query_date
, query_count
, SUM(query_count) OVER (PARTITION BY user ORDER BY query_date
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS cumulative_query_count
FROM
(SELECT
user
,team
,query_date
,COUNT(query_date) AS query_count
FROM advanced.query_logs
GROUP BY user,team,query_date
)
ORDER BY user, query_date
#5
WITH raw_data AS (
SELECT DATE '2024-05-01' AS date, 15 AS number_of_orders UNION ALL
SELECT DATE '2024-05-02', 13 UNION ALL
SELECT DATE '2024-05-03', NULL UNION ALL
SELECT DATE '2024-05-04', 16 UNION ALL
SELECT DATE '2024-05-05', NULL UNION ALL
SELECT DATE '2024-05-06', 18 UNION ALL
SELECT DATE '2024-05-07', 20 UNION ALL
SELECT DATE '2024-05-08', NULL UNION ALL
SELECT DATE '2024-05-09', 13 UNION ALL
SELECT DATE '2024-05-10', 14 UNION ALL
SELECT DATE '2024-05-11', NULL UNION ALL
SELECT DATE '2024-05-12', NULL
)
SELECT
date
, LAST_VALUE(number_of_orders ignore NULLS) OVER (ORDER BY date) AS number_of_orders
FROM raw_data
#6
WITH raw_data AS (
SELECT DATE '2024-05-01' AS date, 15 AS number_of_orders UNION ALL
SELECT DATE '2024-05-02', 13 UNION ALL
SELECT DATE '2024-05-03', NULL UNION ALL
SELECT DATE '2024-05-04', 16 UNION ALL
SELECT DATE '2024-05-05', NULL UNION ALL
SELECT DATE '2024-05-06', 18 UNION ALL
SELECT DATE '2024-05-07', 20 UNION ALL
SELECT DATE '2024-05-08', NULL UNION ALL
SELECT DATE '2024-05-09', 13 UNION ALL
SELECT DATE '2024-05-10', 14 UNION ALL
SELECT DATE '2024-05-11', NULL UNION ALL
SELECT DATE '2024-05-12', NULL
)
SELECT
date
, number_of_orders
, AVG(number_of_orders) OVER (ORDER BY date ROWS BETWEEN 2 PRECEDING AND CURRENT ROW)
FROM (
SELECT
date
,LAST_VALUE(number_of_orders ignore NULLS) OVER (ORDER BY date) AS number_of_orders
FROM raw_data
)
#7
WITH base AS (
SELECT
event_date
, event_timestamp
, DATETIME(TIMESTAMP_MICROS(event_timestamp),'Asia/Seoul') AS event_datetime
, user_id
,user_pseudo_id
FROM advanced.app_logs
),
diff_sessions AS (
SELECT
*
, DATETIME_DIFF(event_datetime,prev_event_datetime, SECOND) AS second_diff
FROM (
SELECT
*,
LAG(event_datetime) OVER (PARTITION BY user_pseudo_id ORDER BY base.event_datetime) AS prev_event_datetime
FROM base
)
)
SELECT
*
, SUM(session_start) OVER (PARTITION BY user_pseudo_id ORDER BY event_datetime) AS session_start
FROM (
SELECT *
, CASE WHEN second_diff IS NULL THEN 1
WHEN second_diff >= 20 THEN 1
ELSE 0
END as session_start
FROM diff_sessions
)
ORDER BY event_datetime
답변 1
0
안녕하세요, 인프런 AI 인턴입니다. Je Ho님.
BigQuery를 활용한 다양한 윈도우 함수의 사용 예제를 살펴보고 계신 것 같습니다. 윈도우 함수는 데이터 분석에서 특정 파티션 내에서 데이터를 연산하는 데 매우 유용한 도구입니다. 특히 데이터를 순서에 따라서 계산하거나 누적합을 구할 때 자주 사용되죠.
Je Ho님이 작성한 SQL 쿼리에서는 다양한 윈도우 함수 연습 문제를 풀어보는 과정이 잘 나타나 있어요. 각 단계에서 LEAD, LAG, FIRST_VALUE, LAST_VALUE, SUM, RANK와 같은 함수들을 활용하여 SQL 실력을 쌓는 연습을 하고 계신 것 같아요.
다음 링크를 참고하시면, 비슷한 주제의 질문과 답변을 보실 수 있습니다:
1. 윈도우 함수의 기본: OVER 사용법 및 예시
- 윈도우 함수의 사용과 관련하여 기본적인 예시와 설명이 포함되어 있습니다.
윈도우 함수는 특히 누적 데이터를 구하거나 특정 조건 하에서 데이터를 비교할 때 유용합니다. 예를 들어, 특정 유저의 누적 방문 횟수나, 이벤트별로 시간을 비교하는 등의 상황에서 쓸 수 있습니다.
질문 내용은 잘 해결하신 것 같은데, 추가적으로 궁금한 부분이 있거나 특정 문제에 대해 더 알고 싶으시면 언제든지 질문해 주세요!
저는 질문자님의 이해를 돕고자 지식공유자님의 기존 답변을 참고하여 유사한 답변 링크를 추천해드리고 있습니다. 현재 베타 기능으로 답변이 만족스럽지 않을 수 있는 점 양해 부탁드립니다. 🙏 추가적으로 궁금한 점이 있으시면, 이어서 질문해 주세요. 곧 지식공유자께서 답변해 주실 것입니다.