작성
·
16
0
주요 Point💡
윈도우 함수 FRAME 개념은 익숙해지는 것이 중요
qualify 개념 ⭐
누적합 개념 사용해 세션 구하기 !! </aside>
연습문제 FRAME
/*
- 토픽: frame 연습문제
- 추가 컬럼
-- 1) amount_total
-- 2) cumulative_sum
-- 3) cumulative_sum_by_user
-- 4) last_5_orders_avg_amount
*/
select *
, sum(amount) over() as amount_total
, sum(amount) over(order by order_id) as cumulative_sum
, sum(amount) over(partition by user_id order by order_id) as cumulative_sum_by_user
, sum(amount) over(order by order_id rows between 5 preceding and 1 preceding) as last_5_orders_avg_amount
from advanced.orders
;
윈도우 함수로 생성된 변수를 having 처리하는 것처럼 사용할 수 있음
E.g. qualify amount_total ≥ 500
윈도우 함수 연습문제
/*
- 연습문제 1
- window 활용 > partition by 활용 목적
*/
select *
, count(*) over(partition by user) as total_query_cnt
from advanced.query_logs
order by user, query_date
;
/*
- 연습문제 2
- window 활용 > partition by & qualify 활용 (훨씬 편하다!)
- 해설 풀이 자체는 처음에 group by 쓰면 더 유용 (그럴 것 같음)
*/
with tmp as (
select week_number
, team
, user
, count(*) over(partition by week_number, team, user) as query_cnt
from
(
select *
, extract(week from query_date) as week_number
from advanced.query_logs
) t
)
select *
, row_number() over(partition by week_number, team order by query_cnt desc) as team_rnk
from tmp
qualify team_rnk = 1
order by week_number, team
;
/*
- 연습문제 3
- window 활용 > lag 함수 목적
*/
with tmp as (
select user
, team
, week_number
, count(*) as query_count
from
(
select *
, extract(week from query_date) as week_number
from advanced.query_logs
) t
group by user, team, week_number
)
select *
, lag(query_count) over(partition by user order by week_number) as prev_week_query_count
from tmp t
;
/*
- 연습문제 4
- window 활용 > 누적 count 목적
- order by, partition by 개념 명확히 익히기에 좋음
- 해설 풀이와는 다름 (group by 후 > query_cnt를 sum하는 방식으로 풀이함) > 출제의도 frame 개념 파악
*/
select distinct user
, team
, query_date
, count(*) over(partition by user, query_date) as query_count
,count(*) over(partition by user order by query_date) as cumulative_query_count
from advanced.query_logs
order by 1, 2, 3
;
/*
- 연습문제 6
- window 활용 > moving_avg 산출 문제
*/
WITH raw_data AS (
SELECT DATE '2024-05-01' AS date, 15 AS number_of_orders UNION ALL SELECT DATE '2024-05-02', 13 UNION ALL
SELECT DATE '2024-05-03', NULL UNION ALL
SELECT DATE '2024-05-04', 16 UNION ALL
SELECT DATE '2024-05-05', NULL UNION ALL
SELECT DATE '2024-05-06', 18 UNION ALL
SELECT DATE '2024-05-07', 20 UNION ALL
SELECT DATE '2024-05-08', NULL UNION ALL
SELECT DATE '2024-05-09', 13 UNION ALL
SELECT DATE '2024-05-10', 14 UNION ALL
SELECT DATE '2024-05-11', NULL UNION ALL
SELECT DATE '2024-05-12', NULL
)
SELECT date
, coalesce(number_of_orders, lag(number_of_orders) over(order by date)) as number_of_orders1
, Last_value(number_of_orders ignore nulls) over(order by date) as number_of_orders2
FROM raw_data
;
/*
- 연습문제 5
- window 활용 > coalesce & lag 활용목적
*/
WITH raw_data AS (
SELECT DATE '2024-05-01' AS date, 15 AS number_of_orders UNION ALL SELECT DATE '2024-05-02', 13 UNION ALL
SELECT DATE '2024-05-03', NULL UNION ALL
SELECT DATE '2024-05-04', 16 UNION ALL
SELECT DATE '2024-05-05', NULL UNION ALL
SELECT DATE '2024-05-06', 18 UNION ALL
SELECT DATE '2024-05-07', 20 UNION ALL
SELECT DATE '2024-05-08', NULL UNION ALL
SELECT DATE '2024-05-09', 13 UNION ALL
SELECT DATE '2024-05-10', 14 UNION ALL
SELECT DATE '2024-05-11', NULL UNION ALL
SELECT DATE '2024-05-12', NULL
)
select *
, avg(number_of_orders) over(order by date rows between 2 preceding and current row) as moving_avg
from
(
SELECT date
-- , coalesce(number_of_orders, lag(number_of_orders) over(order by date)) as number_of_orders1
, Last_value(number_of_orders ignore nulls) over(order by date) as number_of_orders
FROM raw_data
)
;
세션 구하기 연습문제
/*
- 연습문제 7
- session 구하기
- diff & 누적합 개념으로 세션 구하는 것이 포인트 (누적합 개념 신선!) */
with base as (
select event_date
, datetime(timestamp_micros(event_timestamp), 'Asia/Seoul') as event_datetime
, event_name
, user_id
, user_pseudo_id
from advanced.app_logs
), diff_date as (
select *
, datetime_diff(event_datetime, prev_event_datetime, second) as second_diff
from
(
select *
, lag(event_datetime) over(partition by user_pseudo_id order by event_datetime) as prev_event_datetime
from base
)
)
select *
, sum(Session_Start) over(partition by user_pseudo_id order by event_datetime) as session_num
from
(
select *
, case when prev_event_Datetime is null then 1
when second_diff >= 20 then 1
else 0
end as session_start
from diff_date
)
order by event_Datetime, user_pseudo_id
;
답변 1
0
안녕하세요, 인프런 AI 인턴입니다. jihyun lee님.
윈도우 함수 관련하여 여러 연습문제를 풀면서 익히신 부분에 대해 설명해주셨네요. 윈도우 함수는 데이터 분석에서 매우 중요한 개념이므로, 각 문제를 통해 여러 방식으로 연습하신 것은 큰 도움이 될 것입니다.
이와 관련하여 이해도를 높일 수 있는 유사한 질문과 답변을 소개합니다:
윈도우 함수의 기본 개념과 사용법 - SQL 연습 문제
SQL에서 LAG() 함수와 누적 합계 활용 방법에 대한 실례
질문에서 다룬 쿼리 예시들을 통해 윈도우 함수의 강력함을 직접 느끼셨을 것이라 생각됩니다. 특히, 데이터의 누적 합계, 이동 평균 등을 계산하는 방법을 통해 데이터 분석의 폭을 넓히는데 큰 도움이 되셨길 바랍니다.
저는 질문자님의 이해를 돕고자 지식공유자님의 기존 답변을 참고하여 유사한 답변 링크를 추천해드리고 있습니다. 현재 베타 기능으로 답변이 만족스럽지 않을 수 있는 점 양해 부탁드립니다. 🙏 추가적으로 궁금한 점이 있으시면, 이어서 질문해 주세요. 곧 지식공유자께서 답변해 주실 것입니다.