인프런 커뮤니티 질문&답변

권혁범님의 프로필 이미지
권혁범

작성한 질문수

BigQuery(SQL) 활용편(퍼널 분석, 리텐션 분석)

[빠짝스터디 2주차 과제] 윈도우함수 활용과 null 다루는 법

작성

·

20

·

수정됨

0

  1. 강의 문제 탐색 함수, Frame 연습 문제 (1~3)

  • 1번

SELECT 
  user_id,
  visit_month,
  LEAD(visit_month,1) OVER(PARTITION BY user_id ORDER BY visit_month) AS next_month, 
  LEAD(visit_month,2) OVER(PARTITION BY user_id ORDER BY visit_month) AS the_month_after_next
FROM 
  `avdanced.analytics_function_01`
  • 2번

SELECT 
  user_id,
  visit_month,
  LEAD(visit_month,1) OVER(PARTITION BY user_id ORDER BY visit_month) AS next_month, 
  LEAD(visit_month,2) OVER(PARTITION BY user_id ORDER BY visit_month) AS the_month_after_next,
  LAG(visit_month,1) OVER(PARTITION BY user_id ORDER BY visit_month) AS last_month
FROM 
  `avdanced.analytics_function_01` 
  • Frame

SELECT 
  -- 1번 모든 주문량 
  SUM(amount) OVER() AS amount_total,
  -- 2번 특정주문시점 누적주문량
  #SUM(amount) OVER(partition by order_date) AS cumulative_sum,
  SUM(amount) OVER (ORDER BY order_date) AS cumulative_sum,
  -- 3번 고객별 주문 시점에서 누적 주문량
  #SUM(amount) OVER(partition by user_id) AS cumulative_sum_by_user,
  SUM(amount) OVER(partition by user_id ORDER BY order_id) AS cumulative_sum_by_user,
  -- 4번 최근 직전 5개 평균 주문량 
  AVG(amount) OVER(ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING) AS last_5_orders_avg_amount, 
  AVG(amount) OVER(ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) AS last_5_unbounded_orders_avg_amount, 
  AVG(amount) OVER(ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS all_orders_avg_amount
FROM `avdanced.orders` 
  1. 윈도우함수 연습문제(1~7)

  • 1번

SELECT
  user,
  team,
  query_date,
  count(user) over(PARTITION BY user) AS total_query_cnt
FROM
  advanced.query_logs
ORDER BY
  query_date, 
  user
  • 2번

WITH query_cnt_by_team AS (
  SELECT 
    EXTRACT(WEEK FROM query_date) AS week_number, 
    team, 
    user, 
    COUNT(user) AS query_cnt
  FROM advanced.query_logs
  GROUP BY ALL

)

SELECT 
  *, 
  RANK() OVER (PARTITION BY week_number, team ORDER BY query_cnt DESC) AS rk
FROM query_cnt_by_team
QUALIFY rk = 1
ORDER BY 
  week_number, 
  team, 
  query_cnt DESC
  • 3번

WITH query_cnt_by_team AS (
  SELECT 
    EXTRACT(WEEK FROM query_date) AS week_number, 
    team, 
    user, 
    COUNT(user) AS query_cnt
  FROM advanced.query_logs
  GROUP BY ALL

)

SELECT 
  *, 
  LAG(query_cnt, 1) OVER (PARTITION BY user ORDER BY week_number) AS prev_week_query_cnt
FROM query_cnt_by_team
ORDER BY 
  user, 
  week_number
  • 4번

SELECT 
  query_date, 
  team, 
  user, 
  query_cnt, 
  SUM(query_cnt) OVER (PARTITION BY user ORDER BY query_date ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS cumulative_sum

FROM (
  SELECT 
    query_date, 
    team, 
    user, 
    COUNT(user) AS query_cnt  
  FROM advanced.query_logs  
  GROUP BY ALL
)

ORDER BY 
    user, 
    query_date
  • 5번

WITH  raw_data  AS(
  SELECT  DATE'2024-05-01'AS date,15 AS number_of_orders UNION ALL
  SELECT  DATE'2024-05-02', 13 UNION ALL
  SELECT  DATE'2024-05-03', NULL UNION ALL
  SELECT  DATE'2024-05-04', 16 UNION ALL
  SELECT  DATE'2024-05-05', NULL UNION ALL
  SELECT  DATE'2024-05-06', 18 UNION ALL
  SELECT  DATE'2024-05-07', 20 UNION ALL
  SELECT  DATE'2024-05-08', NULL UNION ALL
  SELECT  DATE'2024-05-09', 13 UNION ALL
  SELECT  DATE'2024-05-10', 14 UNION ALL
  SELECT  DATE'2024-05-11', NULL UNION ALL
  SELECT  DATE'2024-05-12', NULL
)

SELECT
  date,
  IF(number_of_orders is null , last_value(number_of_orders IGNORE NULLS) OVER(ORDER BY date asc), number_of_orders) AS number_of_orders_not_null
FROM
  raw_data;
  • 6번

WITH  raw_data  AS(
  SELECT  DATE'2024-05-01'AS date,15 AS number_of_orders UNION ALL
  SELECT  DATE'2024-05-02', 13 UNION ALL
  SELECT  DATE'2024-05-03', NULL UNION ALL
  SELECT  DATE'2024-05-04', 16 UNION ALL
  SELECT  DATE'2024-05-05', NULL UNION ALL
  SELECT  DATE'2024-05-06', 18 UNION ALL
  SELECT  DATE'2024-05-07', 20 UNION ALL
  SELECT  DATE'2024-05-08', NULL UNION ALL
  SELECT  DATE'2024-05-09', 13 UNION ALL
  SELECT  DATE'2024-05-10', 14 UNION ALL
  SELECT  DATE'2024-05-11', NULL UNION ALL
  SELECT  DATE'2024-05-12', NULL
),
filled_data AS (

  SELECT 
    * EXCEPT(number_of_orders), 
    LAST_VALUE(number_of_orders IGNORE NULLS) OVER (ORDER BY date) AS number_of_orders
  FROM raw_data

)

SELECT 
  *, 
  AVG(number_of_orders) OVER (ORDER BY date ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) AS moving_avg
FROM filled_data
  • 7번

WITH base AS(
  SELECT 
    event_date, 
    event_timestamp, 
    DATETIME(TIMESTAMP_MICROS(event_timestamp), 'Asia/Seoul') AS event_datetime, 
    event_name, 
    user_id, 
    user_pseudo_id, 
    DATETIME(TIMESTAMP_MICROS(LAG(event_timestamp) OVER (PARTITION BY user_pseudo_id ORDER BY event_timestamp)), 'Asia/Seoul') AS before_event_datetime
  FROM advanced.app_logs
  WHERE event_date = '2022-08-18'
    AND user_pseudo_id = '1997494153.8491999091'
),


session_info AS(
  SELECT 
    *, 
    TIMESTAMP_DIFF(event_datetime, before_event_datetime, SECOND) AS second_diff, 
    CASE 
      WHEN TIMESTAMP_DIFF(event_datetime, before_event_datetime, SECOND) >= 20 
      OR TIMESTAMP_DIFF(event_datetime, before_event_datetime, SECOND) IS NULL THEN 1 
      ELSE NULL 
    END AS session_start
  FROM base
)

SELECT 
  *, 
  SUM(session_start) OVER (PARTITION BY user_pseudo_id ORDER BY event_datetime) AS session_id
FROM session_info
ORDER BY event_date, event_timestamp;

답변 1

0

안녕하세요, 인프런 AI 인턴입니다. 권혁범님.

윈도우 함수와 NULL 값을 다루는 다양한 SQL 쿼리 연습 문제에 대해 작성해 주셨군요. 이러한 연습을 통해 데이터 분석과 SQL 쿼리 작성 능력을 강화하는 것은 매우 중요합니다.

비슷한 주제에 대한 질문들이 여러 개 있었습니다. 아래 링크를 통해 유사한 질문과 답변을 확인할 수 있습니다. 이 자료들이 권혁범님의 스터디에 도움이 되길 바랍니다.

이처럼 다양한 쿼리 문제를 풀어보며 실력을 쌓아가셔서, 데이터 분석 과정에서의 윈도우 함수 활용 능력을 더욱 강화하실 수 있을 것입니다.

저는 질문자님의 이해를 돕고자 지식공유자님의 기존 답변을 참고하여 유사한 답변 링크를 추천해드리고 있습니다. 현재 베타 기능으로 답변이 만족스럽지 않을 수 있는 점 양해 부탁드립니다. 🙏 추가적으로 궁금한 점이 있으시면, 이어서 질문해 주세요. 곧 지식공유자께서 답변해 주실 것입니다.

권혁범님의 프로필 이미지
권혁범

작성한 질문수

질문하기