인프런 커뮤니티 질문&답변

Je Ho님의 프로필 이미지
Je Ho

작성한 질문수

BigQuery(SQL) 활용편(퍼널 분석, 리텐션 분석)

[ 빠짝 스터디 2주차 ] 윈도우 함수 연습 문제

작성

·

13

0

탐색함수 연습 문제

CREATE OR REPLACE TABLE
  advanced.analytics_function_01 AS (
    SELECT 1004 AS user_id, 1 AS visit_month UNION ALL
    SELECT 1004, 3 UNION ALL
    SELECT 1004, 7 UNION ALL
    SELECT 1004, 8 UNION ALL
    SELECT 2112, 3 UNION ALL
    SELECT 2112, 6 UNION ALL
    SELECT 2112, 7 UNION ALL
    SELECT 3912, 4
  )

  SELECT *
  FROM advanced.analytics_function_01

  --# 탐색 함수 1
  SELECT
    user_id,
    visit_month,
    LEAD(visit_month, 1) OVER (PARTITION BY user_id ORDER BY visit_month) AS after_visit_month,
    LEAD(visit_month, 2) OVER (PARTITION BY user_id ORDER BY visit_month) AS after_visit_month
  FROM advanced.analytics_function_01
  ORDER BY user_id

  --# 탐색 함수 2
  SELECT
    user_id,
    visit_month,
    LEAD(visit_month, 1) OVER (PARTITION BY user_id ORDER BY visit_month) AS after_visit_month,
    LEAD(visit_month, 2) OVER (PARTITION BY user_id ORDER BY visit_month) AS after_visit_month,
    LAG(visit_month, 1) OVER (PARTITION BY user_id ORDER BY visit_month) AS before_visit_month
  FROM advanced.analytics_function_01
  ORDER BY user_id

  --# 탐색 함수 3
  SELECT
    *,
    after_visit_month - visit_month AS diff
    FROM (
  SELECT
    user_id,
    visit_month,
    LEAD(visit_month, 1) OVER (PARTITION BY user_id ORDER BY visit_month) AS after_visit_month,
  FROM advanced.analytics_function_01
  ORDER BY user_id
    )

  --# 추가 문제
    SELECT
    user_id,
    visit_month,
    FIRST_VALUE(visit_month) OVER (PARTITION BY user_id ORDER BY visit_month) AS fisrt_visit,
    LAST_VALUE(visit_month) OVER (PARTITION BY user_id ORDER BY visit_month) AS LAST_visit,
  FROM advanced.analytics_function_01
  ORDER BY user_id

프레임 연습 문제

SELECT
 *,
 SUM(amount) OVER() AS amount_total,
 SUM(amount) OVER(ORDER BY order_id) AS cumulative_sum,
 SUM(amount) OVER(PARTITION BY user_id ORDER BY order_id) AS cumulative_sum_by_user,
 AVG(amount) OVER(ORDER BY order_id ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING) AS last_5_avg
 FROM advanced.orders
 ORDER BY order_id

윈도우함수 연습 문제

#1
SELECT 
  *,
  COUNT(query_date) OVER (PARTITION BY user) as total_query_cnt
FROM advanced.query_logs
ORDER BY user, query_date

#2
WITH TBL AS(
  SELECT
  EXTRACT(week from query_date) AS week_number
  ,team
  ,user
  ,COUNT(query_date) AS total_query_cnt
FROM advanced.query_logs
GROUP BY week_number, team, user
) SELECT 
    week_number
    , team
    , user
    , total_query_cnt as query_count
    , RANK() OVER (PARTITION BY team ORDER BY total_query_cnt desc) AS team_rank
  FROM TBL
  QUALIFY team_rank = 1
  ORDER BY week_number,team

#3 
WITH TBL AS(
  SELECT
  EXTRACT(week from query_date) AS week_number
  ,team
  ,user
  ,COUNT(query_date) AS total_query_cnt
FROM advanced.query_logs
GROUP BY week_number, team, user
)
SELECT
  user
  ,team
  ,week_number
  ,total_query_cnt as query_count
  ,LAG(total_query_cnt) OVER (PARTITION BY user order by week_number) AS prev_week_query_count
FROM TBL
ORDER BY user, week_number;

#4
SELECT
  user
  , team
  , query_date
  , query_count
  , SUM(query_count) OVER (PARTITION BY user ORDER BY query_date 
    ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS cumulative_query_count
FROM
 (SELECT
  user
  ,team
  ,query_date
  ,COUNT(query_date) AS query_count
  FROM advanced.query_logs
  GROUP BY user,team,query_date
 )
 ORDER BY user, query_date

 #5
 WITH raw_data AS (
  SELECT DATE '2024-05-01' AS date, 15 AS number_of_orders UNION ALL
  SELECT DATE '2024-05-02', 13 UNION ALL
  SELECT DATE '2024-05-03', NULL UNION ALL
  SELECT DATE '2024-05-04', 16 UNION ALL
  SELECT DATE '2024-05-05', NULL UNION ALL
  SELECT DATE '2024-05-06', 18 UNION ALL
  SELECT DATE '2024-05-07', 20 UNION ALL
  SELECT DATE '2024-05-08', NULL UNION ALL
  SELECT DATE '2024-05-09', 13 UNION ALL
  SELECT DATE '2024-05-10', 14 UNION ALL
  SELECT DATE '2024-05-11', NULL UNION ALL
  SELECT DATE '2024-05-12', NULL
)
SELECT
 date 
 , LAST_VALUE(number_of_orders ignore NULLS) OVER (ORDER BY date) AS number_of_orders
 FROM raw_data

#6
 WITH raw_data AS (
  SELECT DATE '2024-05-01' AS date, 15 AS number_of_orders UNION ALL
  SELECT DATE '2024-05-02', 13 UNION ALL
  SELECT DATE '2024-05-03', NULL UNION ALL
  SELECT DATE '2024-05-04', 16 UNION ALL
  SELECT DATE '2024-05-05', NULL UNION ALL
  SELECT DATE '2024-05-06', 18 UNION ALL
  SELECT DATE '2024-05-07', 20 UNION ALL
  SELECT DATE '2024-05-08', NULL UNION ALL
  SELECT DATE '2024-05-09', 13 UNION ALL
  SELECT DATE '2024-05-10', 14 UNION ALL
  SELECT DATE '2024-05-11', NULL UNION ALL
  SELECT DATE '2024-05-12', NULL
)
SELECT
 date 
 , number_of_orders
 , AVG(number_of_orders) OVER (ORDER BY date ROWS BETWEEN 2 PRECEDING AND CURRENT ROW)
 FROM (
  SELECT
  date
  ,LAST_VALUE(number_of_orders ignore NULLS) OVER (ORDER BY date) AS number_of_orders
 FROM raw_data
 )

 #7
WITH base AS (
  SELECT
  event_date
  , event_timestamp
  , DATETIME(TIMESTAMP_MICROS(event_timestamp),'Asia/Seoul') AS event_datetime
  , user_id
  ,user_pseudo_id
  FROM advanced.app_logs
),
diff_sessions AS (
  SELECT
   *
  , DATETIME_DIFF(event_datetime,prev_event_datetime, SECOND) AS second_diff
  FROM (
    SELECT
      *,
      LAG(event_datetime) OVER (PARTITION BY user_pseudo_id ORDER BY base.event_datetime) AS prev_event_datetime
    FROM base
  )
)

SELECT 
  *
  , SUM(session_start) OVER (PARTITION BY user_pseudo_id ORDER BY event_datetime) AS session_start
  FROM (
    SELECT *
    , CASE WHEN second_diff IS NULL THEN 1
      WHEN second_diff >= 20 THEN 1
      ELSE 0
    END as session_start
    FROM diff_sessions
  )
  ORDER BY event_datetime

답변 1

0

안녕하세요, 인프런 AI 인턴입니다. Je Ho님.

BigQuery를 활용한 다양한 윈도우 함수의 사용 예제를 살펴보고 계신 것 같습니다. 윈도우 함수는 데이터 분석에서 특정 파티션 내에서 데이터를 연산하는 데 매우 유용한 도구입니다. 특히 데이터를 순서에 따라서 계산하거나 누적합을 구할 때 자주 사용되죠.

Je Ho님이 작성한 SQL 쿼리에서는 다양한 윈도우 함수 연습 문제를 풀어보는 과정이 잘 나타나 있어요. 각 단계에서 LEAD, LAG, FIRST_VALUE, LAST_VALUE, SUM, RANK와 같은 함수들을 활용하여 SQL 실력을 쌓는 연습을 하고 계신 것 같아요.

다음 링크를 참고하시면, 비슷한 주제의 질문과 답변을 보실 수 있습니다:
1. 윈도우 함수의 기본: OVER 사용법 및 예시
- 윈도우 함수의 사용과 관련하여 기본적인 예시와 설명이 포함되어 있습니다.

  1. BigQuery의 다양한 윈도우 함수 활용 사례 및 오류 해결
    • 쿼리 작성 시 발생할 수 있는 일반적인 오류 및 해결 방법에 대해 아실 수 있습니다.

윈도우 함수는 특히 누적 데이터를 구하거나 특정 조건 하에서 데이터를 비교할 때 유용합니다. 예를 들어, 특정 유저의 누적 방문 횟수나, 이벤트별로 시간을 비교하는 등의 상황에서 쓸 수 있습니다.

질문 내용은 잘 해결하신 것 같은데, 추가적으로 궁금한 부분이 있거나 특정 문제에 대해 더 알고 싶으시면 언제든지 질문해 주세요!

저는 질문자님의 이해를 돕고자 지식공유자님의 기존 답변을 참고하여 유사한 답변 링크를 추천해드리고 있습니다. 현재 베타 기능으로 답변이 만족스럽지 않을 수 있는 점 양해 부탁드립니다. 🙏 추가적으로 궁금한 점이 있으시면, 이어서 질문해 주세요. 곧 지식공유자께서 답변해 주실 것입니다.

Je Ho님의 프로필 이미지
Je Ho

작성한 질문수

질문하기