인프런 커뮤니티 질문&답변

김성호님의 프로필 이미지
김성호

작성한 질문수

BigQuery(SQL) 활용편(퍼널 분석, 리텐션 분석)

[인프런 빅쿼리 빠짝스터디 2주차] 윈도우 함수, FRAME설정, QUALITY

작성

·

20

0

윈도우 탐색 함수 연습문제

(1) 연습문제 1

-- 문제 1) USER의 다음 접속월, 다다음 접속 월

SELECT 
  user_id,
  visit_month,
  LEAD(visit_month,1) OVER(PARTITION BY user_id ORDER BY visit_month) AS next_month, 
  LEAD(visit_month,2) OVER(PARTITION BY user_id ORDER BY visit_month) AS the_month_after_next
FROM 
  `avdanced.analytics_function_01` 

(2) 연습문제 2

-- 문제 2) USER의 다음 접속월, 다다음 접속 월, 이전 접속 월
SELECT 
  user_id,
  visit_month,
  LEAD(visit_month,1) OVER(PARTITION BY user_id ORDER BY visit_month) AS next_month, 
  LEAD(visit_month,2) OVER(PARTITION BY user_id ORDER BY visit_month) AS the_month_after_next,
  LAG(visit_month,1) OVER(PARTITION BY user_id ORDER BY visit_month) AS last_month
FROM 
  `avdanced.analytics_function_01` 

윈도우 함수 FRAME 연습문제

연습문제 (1~4)

SELECT 
  -- 1)모든 주문량 
  SUM(amount) OVER() AS amount_total,
  -- 2)특정주문시점에서 누적주문량
  #SUM(amount) OVER(partition by order_date) AS cumulative_sum,
  SUM(amount) OVER (ORDER BY order_date) AS cumulative_sum,
  -- 3)고객별 주문 시점에서 누적 주문량
  #SUM(amount) OVER(partition by user_id) AS cumulative_sum_by_user,
  SUM(amount) OVER(partition by user_id ORDER BY order_id) AS cumulative_sum_by_user,
  -- 4) 최근 직전 5개 평균 주문량 
  AVG(amount) OVER(ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING) AS last_5_orders_avg_amount, 
  AVG(amount) OVER(ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) AS last_5_unbounded_orders_avg_amount, 
  AVG(amount) OVER(ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS all_orders_avg_amount
FROM `avdanced.orders` 

 

윈도우 함수

(1) 연습문제 1

-- 연습문제1) 사용자별 쿼리 실행 횟수
WITH base AS(
  SELECT 
    user,
    team,
    query_date,
    COUNT(*) OVER(PARTITION BY user) AS total_query_cnt,
  FROM 
    `avdanced.query_logs`
) 
SELECT
  *
FROM 
  base

(2) 연습문제 2

-- 연습문제2) 주차별 팀내 쿼리 실행한 수 (RANK 1만 보이도록)
WITH base2 AS(
  SELECT
    EXTRACT(WEEK FROM query_date) AS week_number,
    team,
    user,
    COUNT(*) OVER(PARTITION BY EXTRACT(WEEK FROM query_date) ,user ORDER BY EXTRACT(WEEK FROM query_date) ) AS query_cnt,

FROM 
    `avdanced.query_logs`
  ORDER BY EXTRACT(WEEK FROM query_date) 
)
  SELECT
    DISTINCT *,
    RANK() OVER(PARTITION BY team,week_number ORDER BY total_query_cnt DESC) AS team_rank
  FROM base2
  QUALIFY team_rank = 1
  ORDER BY week_number, team

(3) 연습문제 3

WITH base2 AS(
  SELECT
    EXTRACT(WEEK FROM query_date) AS week_number,
    team,
    user,
    COUNT(*) OVER(PARTITION BY EXTRACT(WEEK FROM query_date) ,user ORDER BY EXTRACT(WEEK FROM query_date) ) AS query_cnt,

  FROM 
      `avdanced.query_logs`
  #QUALIFY team_rank = 1
  ORDER BY EXTRACT(WEEK FROM query_date) 
), base3 AS(
  SELECT
    DISTINCT *,
    RANK() OVER(PARTITION BY team,week_number ORDER BY query_cnt DESC) AS team_rank
  FROM base2
  QUALIFY team_rank = 1
  ORDER BY week_number, team
)

-- 연습문제3) 쿼리 실행 시점 1주전 쿼리 실행

SELECT 
  DISTINCT *,
  LAG(query_cnt,1) OVER(PARTITION BY user ORDER BY week_number) AS prev_week_query_count 
FROM 
  base2
GROUP BY ALL
ORDER BY user, week_number

(4) 연습문제 4

--연습문제4)
SELECT
  *, 
  SUM(query_count) OVER(PARTITION BY user ORDER BY query_date) AS culmulative_query_count,
  SUM(query_cnt) OVER(PARTITION BY user ORDER BY query_date ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS cumulative_sum2
FROM(
  SELECT 
    DISTINCT *,
    COUNT(user) OVER(PARTITION BY query_date, user) AS query_count,
  FROM `avdanced.query_logs`
)
ORDER BY user,query_date 

(5) 연습문제 5

WITH raw_data AS (
  SELECT DATE '2024-05-01' AS date, 15 AS number_of_orders UNION ALL
  SELECT DATE '2024-05-02', 13 UNION ALL
  SELECT DATE '2024-05-03', NULL UNION ALL
  SELECT DATE '2024-05-04', 16 UNION ALL
  SELECT DATE '2024-05-05', NULL UNION ALL
  SELECT DATE '2024-05-06', 18 UNION ALL
  SELECT DATE '2024-05-07', 20 UNION ALL
  SELECT DATE '2024-05-08', NULL UNION ALL
  SELECT DATE '2024-05-09', 13 UNION ALL
  SELECT DATE '2024-05-10', 14 UNION ALL
  SELECT DATE '2024-05-11', NULL UNION ALL
  SELECT DATE '2024-05-12', NULL
)
--연습문제 5) null에 이전 값 삽입
SELECT
  raw_data.date,
  IF(raw_data.number_of_orders IS NULL, LAG(raw_data.number_of_orders,1) OVER(ORDER BY date), raw_data.number_of_orders)
FROM raw_data

(6) 연습문제 6

WITH raw_data AS (
  SELECT DATE '2024-05-01' AS date, 15 AS number_of_orders UNION ALL
  SELECT DATE '2024-05-02', 13 UNION ALL
  SELECT DATE '2024-05-03', NULL UNION ALL
  SELECT DATE '2024-05-04', 16 UNION ALL
  SELECT DATE '2024-05-05', NULL UNION ALL
  SELECT DATE '2024-05-06', 18 UNION ALL
  SELECT DATE '2024-05-07', 20 UNION ALL
  SELECT DATE '2024-05-08', NULL UNION ALL
  SELECT DATE '2024-05-09', 13 UNION ALL
  SELECT DATE '2024-05-10', 14 UNION ALL
  SELECT DATE '2024-05-11', NULL UNION ALL
  SELECT DATE '2024-05-12', NULL
), null_is_lag AS(
  --연습문제 5) null에 이전 값 삽입
  SELECT
    raw_data.date,
    IF(raw_data.number_of_orders IS NULL, LAG(raw_data.number_of_orders,1) OVER(ORDER BY date), raw_data.number_of_orders) AS number_of_orders
  FROM raw_data
)
-- 연습문제 6) 이동평균
SELECT 
  *,
  AVG(nl.number_of_orders) OVER(ORDER BY nl.date ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) AS moving_avg
FROM null_is_lag AS nl

(7) 연습문제 7

-- 1. TIMESTAMP → DATETIME
-- 2. SECOND_DIFF 생성 : uSER로 묶어서 -
-- 3. SESSION_START생성 : USER로 묶어서 LAG(DATA,1)이 NULL이면 1, SECOND_DIFF가 20이상이면 +1
-- 4. SESSION_ID생성: SESSION_START가 1일 경우 SESSION_ID +1, NULL일 경우 LAG(DATA,1)

WITH add_date AS (
-- 1. TIMESTAMP → DATETIME
  SELECT 
    event_date,
    event_timestamp,
    DATETIME(TIMESTAMP_MICROS(event_timestamp)) AS event_datetime,
    event_name,
    user_id, 
    user_pseudo_id,
    LAG(DATETIME(TIMESTAMP_MICROS(event_timestamp))) OVER(PARTITION BY user_pseudo_id ORDER BY event_timestamp) AS before_event_datetime
  FROM 
    `avdanced.app_logs_temp`
    --,UNNEST(event_params) AS param  -- FROM 절 안에서 UNNEST를 사용
  WHERE event_date ="2022-08-18" AND user_pseudo_id = "1997494153.8491999091"
), add_diff AS (
-- 2. SECOND_DIFF 생성 : uSER로 묶어서 -
SELECT 
  *,
  DATE_DIFF(event_datetime, before_event_datetime,SECOND) AS second_diff,
FROM add_date 
), add_session AS(
-- 3. SESSION_START생성 : USER로 묶어서 LAG(DATA,1)이 NULL이면 1, SECOND_DIFF가 20이상이면 +1  
  SELECT
  *, 
  IF(second_diff IS NULL OR second_diff >=20, 1, NULL) AS session_start
  FROM
    add_diff
)

-- 4. SESSION_ID생성
  *,
  SUM(session_start) OVER(PARTITION BY user_pseudo_id ORDER BY event_datetime) AS session_num
FROM add_session
ORDER BY event_datetime

답변 1

0

안녕하세요, 인프런 AI 인턴입니다. 김성호님.

윈도우 함수와 관련된 다양한 연습 문제를 통해 SQL에서의 데이터 다루기에 능숙해지시는 과정에 계신 것 같습니다. 그러한 연습을 통해 데이터 분석 및 처리에서의 다양한 패턴과 기법을 배워가는 모습이 인상적입니다.

이와 관련하여 프레임 설정과 데이터의 흐름을 잘 이해하신 것처럼 보입니다. 아래에 윈도우 함수와 관련하여 유사한 질문과 그에 대한 답변을 담은 링크를 제공해 드리겠습니다. 김성호님께서 코드 작성 시 직면하는 궁금증을 해결하는 데 도움이 될 수 있을 것입니다.

각 링크는 김성호님의 연습 문제에 직접적으로 관련될 수 있는 예시와 설명을 포함하고 있습니다.

저는 질문자님의 이해를 돕고자 지식공유자님의 기존 답변을 참고하여 유사한 답변 링크를 추천해드리고 있습니다. 현재 베타 기능으로 답변이 만족스럽지 않을 수 있는 점 양해 부탁드립니다. 🙏 추가적으로 궁금한 점이 있으시면, 이어서 질문해 주세요. 곧 지식공유자께서 답변해 주실 것입니다.

김성호님의 프로필 이미지
김성호

작성한 질문수

질문하기