작성
·
26
0
CREATE OR REPLACE TABLE workspace.analytics_function_01 AS (
SELECT 1004 AS user_id, 1 AS visit_month UNION ALL
SELECT 1004, 3 UNION ALL
SELECT 1004, 7 UNION ALL
SELECT 1004, 8 UNION ALL
SELECT 2112, 3 UNION ALL
SELECT 2112, 6 UNION ALL
SELECT 2112, 7 UNION ALL
SELECT 3912, 4
)
;
#문제1
SELECT
user_id,
visit_month,
lead(visit_month, 1) over(partition by user_id order by visit_month asc) as next_visit,
lead(visit_month, 2) over(partition by user_id order by visit_month asc) as next_next_visit
FROM workspace.analytics_function_01
ORDER BY user_id, visit_month
;
#문제2
SELECT
user_id,
visit_month,
lead(visit_month, 1) over(partition by user_id order by visit_month asc) as next_visit,
lead(visit_month, 2) over(partition by user_id order by visit_month asc) as next_next_visit,
lag(visit_month, 1) over(partition by user_id order by visit_month asc) as prev_visit
FROM workspace.analytics_function_01
ORDER BY user_id, visit_month
;
#문제3
SELECT
user_id,
visit_month,
lead(visit_month, 1) over(partition by user_id order by visit_month asc) as next_visit_month,
lead(visit_month, 1) over(partition by user_id order by visit_month asc) - visit_month as next_visit_month_diff
FROM workspace.analytics_function_01
ORDER BY user_id, visit_month
;
#추가문제
SELECT DISTINCT
user_id,
first_value(visit_month) over(partition by user_id order by visit_month asc rows between unbounded preceding and unbounded following) as first_visit_month,
last_value(visit_month) over(partition by user_id order by visit_month asc rows between unbounded preceding and unbounded following) as last_visit_month
FROM workspace.analytics_function_01
ORDER BY user_id
;
#문제4
SELECT
*,
sum(amount) over() as total_amount,
sum(amount) over(order by order_id asc rows between unbounded preceding and current row) as cumulative_sum,
sum(amount) over(partition by user_id order by order_id asc rows between unbounded preceding and current row) as cumulative_sum_by_user,
avg(amount) over(order by order_id asc rows between 5 preceding and 1 preceding) as last_five_orders_avg_amount
FROM workspace.orders
ORDER BY order_id
;
#연습문제1
SELECT
*,
count(*) over(partition by user) as query_count_by_users
FROM workspace.query_logs
;
#연습문제2
SELECT
query_weeknum,
team,
user,
query_count,
rank() over(partition by query_weeknum, team order by query_count desc) as query_rank
FROM
(
SELECT
extract(week from query_date) as query_weeknum,
team,
user,
count(1) as query_count
FROM workspace.query_logs
GROUP BY ALL
)
QUALIFY query_rank = 1
ORDER BY query_weeknum
;
#연습문제3
SELECT
team,
user,
query_weeknum,
query_count,
lag(query_count, 1) over(partition by team, user order by query_weeknum asc) as prev_week_query_count
FROM
(
SELECT
team,
user,
extract(week from query_date) as query_weeknum,
count(1) as query_count
FROM workspace.query_logs
GROUP BY ALL
)
#연습문제4
SELECT
team,
user,
query_date,
query_count,
sum(query_count) over(partition by team, user order by query_date asc rows between unbounded preceding and current row) as cumulative_sum
FROM
(
SELECT
team,
user,
query_date,
count(1) as query_count
FROM workspace.query_logs
GROUP BY ALL
)
ORDER BY team, user, query_date
;
#연습문제5
WITH raw_data AS (
SELECT DATE '2024-05-01' AS date, 15 AS number_of_orders UNION ALL
SELECT DATE '2024-05-02', 13 UNION ALL
SELECT DATE '2024-05-03', NULL UNION ALL
SELECT DATE '2024-05-04', 16 UNION ALL
SELECT DATE '2024-05-05', NULL UNION ALL
SELECT DATE '2024-05-06', 18 UNION ALL
SELECT DATE '2024-05-07', 20 UNION ALL
SELECT DATE '2024-05-08', NULL UNION ALL
SELECT DATE '2024-05-09', 13 UNION ALL
SELECT DATE '2024-05-10', 14 UNION ALL
SELECT DATE '2024-05-11', NULL UNION ALL
SELECT DATE '2024-05-12', NULL
)
SELECT
date,
ifnull(number_of_orders, real_prev_number_of_orders) as number_of_orders
FROM
(
SELECT
date,
number_of_orders,
last_value(number_of_orders ignore nulls) over(order by date asc rows between unbounded preceding and 1 preceding) as real_prev_number_of_orders
FROM raw_data
)
ORDER BY date asc
;
#연습문제6
WITH raw_data AS (
SELECT DATE '2024-05-01' AS date, 15 AS number_of_orders UNION ALL
SELECT DATE '2024-05-02', 13 UNION ALL
SELECT DATE '2024-05-03', NULL UNION ALL
SELECT DATE '2024-05-04', 16 UNION ALL
SELECT DATE '2024-05-05', NULL UNION ALL
SELECT DATE '2024-05-06', 18 UNION ALL
SELECT DATE '2024-05-07', 20 UNION ALL
SELECT DATE '2024-05-08', NULL UNION ALL
SELECT DATE '2024-05-09', 13 UNION ALL
SELECT DATE '2024-05-10', 14 UNION ALL
SELECT DATE '2024-05-11', NULL UNION ALL
SELECT DATE '2024-05-12', NULL
)
SELECT
date,
number_of_orders,
avg(number_of_orders) over(order by date asc rows between 2 preceding and current row) as moving_avg
FROM
(
SELECT
date,
ifnull(number_of_orders, real_prev_number_of_orders) as number_of_orders
FROM
(
SELECT
date,
number_of_orders,
last_value(number_of_orders ignore nulls) over(order by date asc rows between unbounded preceding and 1 preceding) as real_prev_number_of_orders
FROM raw_data
)
)
ORDER BY date asc
;
#연습문제7
WITH total_logs AS (
SELECT
user_pseudo_id,
event_name,
timestamp_micros(event_timestamp) as event_datetime
FROM workspace.app_logs
)
SELECT
user_pseudo_id,
event_name,
event_datetime,
prev_event_datetime,
second_diff,
sum(session_change) over(partition by user_pseudo_id order by event_datetime asc) as session_id
FROM
(
SELECT
*,
case when event_datetime = first_event_datetime then 1 end as session_id,
case when second_diff is null or second_diff >= 20 then 1
else 0 end as session_change
FROM
(
SELECT
*,
datetime_diff(event_datetime, prev_event_datetime, second) as second_diff
FROM
(
SELECT
user_pseudo_id,
event_name,
event_datetime,
lag(event_datetime, 1) over(partition by user_pseudo_id order by event_datetime asc) as prev_event_datetime,
first_value(event_datetime) over(partition by user_pseudo_id order by event_datetime asc) as first_event_datetime
FROM total_logs
)
)
)
ORDER BY user_pseudo_id, event_datetime
;
답변 1
0
안녕하세요, 인프런 AI 인턴입니다. MS님.
BigQuery에서 윈도우 함수를 활용하여 여러 유형의 데이터 분석을 하신 것에 대해 잘 정리된 코드입니다. 이 연습 문제를 통해 데이터 분석에 유용한 기술을 배우게 되신 것 같아 다행입니다.
이러한 빅쿼리에서의 윈도우 함수들은 데이터 안에서 파티션 또는 순서에 따른 다양한 집계나 계산을 쉽게 할 수 있는 기능입니다. LEAD와 LAG 함수로 이전 및 다음 값을 얻어내고자 하시는 시도나, FIRST_VALUE와 LAST_VALUE를 통해 파티션별 처음 및 마지막 값을 계산하는 것은 매우 유용한 기법입니다. 이런 점을 잘 활용하셔 다양한 데이터 분석에 접목할 수 있을 것입니다.
혹시 동일한 주제와 관련한 기존 답변을 확인하시고 싶은 경우, 몇 가지 유사한 질문을 참조해 상담할 수 있습니다:
Bigquery 활용편(PDF)파일 P.229 페이지 코드 오류에서는 윈도우 함수
의 기본 설정 및 최적화에 대해 논의가 있습니다. 질문의 설명과 비교하며 학습해보시면 좋습니다.
3-7 리텐션 SQL 쿼리 작성하기 연습 (Weekly, Monthly)는 리텐션 분석을 위한 쿼리 작성 방법과 관련된 질문입니다. 실용적인 예제도 포함되어 있어 구조를 파악하는 데 도움을 받을 수 있습니다.
[과제] 퍼널 쿼리(피벗테이블 적용) 작성](https://www.inflearn.com/questions/1401859)에서는 다양한 방법으로 쿼리를 구조화하여 앱 사용 데이터를 분석하는 방법에 대한 설명이 포함되어 있습니다.
이 질문과 답변이 MS님의 과제에 도움이 될 수 있기를 바라며, 추가적으로 궁금한 점이 있으시면, 이어서 질문해 주세요. 곧 지식공유자께서 답변해 주실 것입니다.
저는 질문자님의 이해를 돕고자 지식공유자님의 기존 답변을 참고하여 유사한 답변 링크를 추천해드리고 있습니다. 현재 베타 기능으로 답변이 만족스럽지 않을 수 있는 점 양해 부탁드립니다. 🙏 추가적으로 궁금한 점이 있으시면, 이어서 질문해 주세요. 곧 지식공유자께서 답변해 주실 것입니다.