인프런 커뮤니티 질문&답변

song님의 프로필 이미지
song

작성한 질문수

BigQuery(SQL) 활용편(퍼널 분석, 리텐션 분석)

[바짝스터디 1주차 과제] ARRAY, STRUCT 연습 문제 / PIVOT 연습 문제 / 퍼널 쿼리 연습 문제

작성

·

33

0

# ARRAY, STRUCT 연습 문제 1. array_exercises 테이블에서 각 영화(title) 별로 장르(genres)를 UNNEST 해서 보여주세요
SELECT
  title,
  genre
FROM advanced.array_exercises
CROSS JOIN UNNEST(genres) AS genre
# ARRAY, STRUCT 연습 문제 2. array_exercises 테이블에서 각 영화(title)별로 배우(actor)와 배역(character)을 보여주세요. 배우와 배역은 별도의 컬럼으로 나와야 합니다.
SELECT
  title,
  actor.actor,
  actor.character
FROM advanced.array_exercises
CROSS JOIN UNNEST(actors) AS actor
# ARRAY, STRUCT 연습 문제 3. array_exercises 테이블에서 각 영화(title)별로 배우(actor), 배역(character), 장르(genre)를 출력하세요. 한 Row에 배우, 배역, 장르가 모두 표시되어야 합니다
SELECT
  title,
  actor.actor,
  actor.character,
  genre
FROM advanced.array_exercises
CROSS JOIN UNNEST(actors) AS actor,
UNNEST(genres) AS genre
# ARRAY, STRUCT 연습 문제 4. 앱 로그 데이터(app_logs)의 배열을 풀어주세요
SELECT
  user_id,
  event_date,
  event_name,
  user_pseudo_id,
  event_param.key,
  event_param.value.string_value,
  event_param.value.int_value
FROM advanced.app_logs
CROSS JOIN UNNEST(event_params) AS event_param
# PIVOT 연습 문제 1. orders 테이블에서 유저(user_id)별로 주문 금액(amount)의 합계를 PIVOT 해주세요. 날짜(order_date)를 행(Row)으로, user_id를 열(Column)으로 만들어아 합니다
-- SELECT
--   order_date,
--   MAX(IF(user_id = 1, amount, 0)) AS user_1,
--   MAX(IF(user_id = 2, amount, 0)) AS user_2,
--   MAX(IF(user_id = 3, amount, 0)) AS user_3
-- FROM advanced.orders
-- GROUP BY
--   order_date
-- ORDER BY
--   order_date
SELECT
  order_date,
  MAX(IF(user_id = 1, sum_of_amount, 0)) AS user_1,
  MAX(IF(user_id = 2, sum_of_amount, 0)) AS user_2,
  MAX(IF(user_id = 3, sum_of_amount, 0)) AS user_3
FROM (
  SELECT
    order_date,
    user_id,
    MAX(amount) AS sum_of_amount
  FROM advanced.orders
  GROUP BY
    order_date,
    user_id
)
GROUP BY
  order_date
ORDER BY
  order_date
# PIVOT 연습 문제 2. orders 테이블에서 날짜(order_date)별로 유저들의 주문 금액(amount)의 합계를 PIVOT 해주세요. user_id를 행(Row)으로, order_date를 열(Column)으로 만들어야 합니다
SELECT
  user_id,
  SUM(IF(order_date = '2023-05-01', amount, 0)) AS `2023-05-01`,
  SUM(IF(order_date = '2023-05-02', amount, 0)) AS `2023-05-02`,
  SUM(IF(order_date = '2023-05-03', amount, 0)) AS `2023-05-03`,
  SUM(IF(order_date = '2023-05-04', amount, 0)) AS `2023-05-04`,
  SUM(IF(order_date = '2023-05-05', amount, 0)) AS `2023-05-05`
FROM advanced.orders
GROUP BY
  user_id
ORDER BY
  user_id
# PIVOT 연습 문제 3) orders 테이블에서 사용자(user_id)별, 날짜(order_date)별로 주문이 있다면 1, 없다면 0으로 PIVOT 해주세요. user_id를 행(Row)으로, order_date를 열(Column)로 만들고 주문을 많이 해도 1로 처리합니다
SELECT
  user_id,
  MAX(IF(order_date = '2023-05-01', amount, 0)) AS `2023-05-01`,
  MAX(IF(order_date = '2023-05-02', amount, 0)) AS `2023-05-02`,
  MAX(IF(order_date = '2023-05-03', amount, 0)) AS `2023-05-03`,
  MAX(IF(order_date = '2023-05-04', amount, 0)) AS `2023-05-04`,
  MAX(IF(order_date = '2023-05-05', amount, 0)) AS `2023-05-05`
FROM (
  SELECT
    user_id,
    order_date,
    CASE
      WHEN amount IS NULL THEN 0
      ELSE 1
    END AS amount
  FROM advanced.orders
)
GROUP BY
  user_id
ORDER BY
  user_id
# PIVOT 연습 문제 version 1
-- WITH sub AS (
-- SELECT
--   user_id,
--   event_date,
--   event_name,
--   event_timestamp,
--   user_pseudo_id,
--   event_param.key AS key,
--   event_param.value.string_value AS string_value,
--   event_param.value.int_value AS int_value
-- FROM (
--   SELECT
--     *
--   FROM advanced.app_logs
--   WHERE
--     event_date = '2022-08-01')
-- CROSS JOIN UNNEST(event_params) AS event_param
-- )

-- SELECT
--   event_date,
--   COUNT(user_id) AS user_cnt
-- FROM (
--   SELECT
--     user_id,
--     event_date,
--     event_name,
--     event_timestamp,
--     user_pseudo_id,
--     MAX(IF(sub.key = 'firebase_screen', sub.string_value, NULL)) AS firebase_screen,
--     MAX(IF(sub.key = 'food_id', sub.int_value, NULL)) AS food_id,
--     MAX(IF(sub.key = 'session_id', sub.string_value, NULL)) AS session_id
--   FROM sub
--   GROUP BY
--     user_id,
--     event_date,
--     event_name,
--     event_timestamp,
--     user_pseudo_id
-- )
-- WHERE
--   event_name = 'click_cart' # AND food_id = 1544
-- GROUP BY
--   event_date

# PIVOT 연습 문제 version 2
WITH base AS (
  SELECT
  -- * EXCEPT(event_params), # * EXCEPT(컬럼) : 컬럼을 제외하고 모두 다 보여줘
    event_date,
    event_timestamp,
    event_name,
    user_id,
    user_pseudo_id,
    MAX(IF(param.key = 'firebase_screen', param.value.string_value, NULL)) AS firebase_screen,
    MAX(IF(param.key = 'food_id', param.value.int_value, NULL)) AS food_id,
    MAX(IF(param.key = 'session_id', param.value.string_value, NULL)) AS session_id,
  FROM advanced.app_logs
  CROSS JOIN UNNEST(event_params) AS param
  WHERE
    event_date = "2022-08-01"
  GROUP BY ALL
)

SELECT
  event_date,
  COUNT(user_id) AS user_cnt
FROM base
WHERE
  event_name = 'click_cart' # AND food_id = 1544
GROUP BY
  event_date
# 퍼널 쿼리 연습 문제 퍼널 별 전환율
WITH base AS (
  SELECT
    event_date,
    event_timestamp,
    event_name,
    user_id,
    user_pseudo_id,
    platform,
    MAX(IF(event_param.key = 'firebase_screen', event_param.value.string_value, NULL)) AS firebase_screen,
    MAX(IF(event_param.key = 'session_id', event_param.value.string_value, NULL)) AS session_id
  FROM advanced.app_logs
  CROSS JOIN UNNEST(event_params) AS event_param
  WHERE
    event_date BETWEEN '2022-08-01' AND '2022-08-18'
  GROUP BY ALL
), filter_event_and_concat_event_and_screen AS (
  SELECT
    * EXCEPT(event_name, firebase_screen, event_timestamp),
    CONCAT(event_name,'-',firebase_screen) AS event_name_with_screen,
    DATETIME(TIMESTAMP_MICROS(event_timestamp), 'Asia/Seoul') AS event_datetime
  FROM base
  WHERE
    event_name IN ('screen_view', 'click_payment')
), before_pivot AS (
  SELECT
    event_date,
    event_name_with_screen,
    CASE
      WHEN event_name_with_screen = 'screen_view-welcome' THEN 1
      WHEN event_name_with_screen = 'screen_view-home' THEN 2
      WHEN event_name_with_screen = 'screen_view-food_category' THEN 3
      WHEN event_name_with_screen = 'screen_view-restaurant' THEN 4
      WHEN event_name_with_screen = 'screen_view-cart' THEN 5
      WHEN event_name_with_screen = 'click_payment-cart' THEN 6
      ELSE NULL
    END AS step_number,
    COUNT(DISTINCT user_pseudo_id) AS cnt
  FROM filter_event_and_concat_event_and_screen
  GROUP BY ALL
  HAVING
    step_number IS NOT NULL
  ORDER BY
    event_date
)

SELECT
  event_date,
  MAX(IF(step_number = 1, cnt, NULL)) AS `screen_view-welcome`,
  MAX(IF(step_number = 2, cnt, NULL)) AS `screen_view-home`,
  MAX(IF(step_number = 3, cnt, NULL)) AS `screen_view-food_category`,
  MAX(IF(step_number = 4, cnt, NULL)) AS `screen_view-restaurant`,
  MAX(IF(step_number = 5, cnt, NULL)) AS `screen_view-cart`,
  MAX(IF(step_number = 6, cnt, NULL)) AS `click_payment-cart`,
  CAST(MAX(IF(step_number = 2, cnt, NULL)) AS FLOAT64) / CAST(MAX(IF(step_number = 1, cnt, NULL)) AS FLOAT64) AS CVR1,
  CAST(MAX(IF(step_number = 3, cnt, NULL)) AS FLOAT64) / CAST(MAX(IF(step_number = 2, cnt, NULL)) AS FLOAT64) AS CVR2,
  CAST(MAX(IF(step_number = 4, cnt, NULL)) AS FLOAT64) / CAST(MAX(IF(step_number = 3, cnt, NULL)) AS FLOAT64) AS CVR3,
  CAST(MAX(IF(step_number = 5, cnt, NULL)) AS FLOAT64) / CAST(MAX(IF(step_number = 4, cnt, NULL)) AS FLOAT64) AS CVR4,
  CAST(MAX(IF(step_number = 6, cnt, NULL)) AS FLOAT64) / CAST(MAX(IF(step_number = 5, cnt, NULL)) AS FLOAT64) AS CVR5
FROM before_pivot
GROUP BY
  event_date
ORDER BY
  event_date

 

답변 1

0

안녕하세요, 인프런 AI 인턴입니다. song님.

BigQuery SQL을 활용하여 다양한 데이터 변환 및 분석 작업을 수행해 보는 것은 매우 유익한 경험입니다. ARRAY, STRUCT, PIVOT 방법을 이용하여 데이터 구조를 조작하고, 퍼널 쿼리를 학습하여 비즈니스 인사이트를 얻는 과정을 잘 진행하고 계신 것 같습니다. 관련하여 유사한 질문과 답변을 찾았으니, 학습에 도움이 되길 바랍니다.

  1. ARRAY와 STRUCT 다루기

  2. PIVOT을 이용한 데이터 집계

  3. 퍼널 SQL 쿼리 작성

위의 링크들이 song님의 학습에 도움이 되기를 바랍니다. 유사한 문제들을 풀어보면서 SQL에 대한 이해를 더욱 깊게 할 수 있을 것입니다.

저는 질문자님의 이해를 돕고자 지식공유자님의 기존 답변을 참고하여 유사한 답변 링크를 추천해드리고 있습니다. 현재 베타 기능으로 답변이 만족스럽지 않을 수 있는 점 양해 부탁드립니다. 🙏 추가적으로 궁금한 점이 있으시면, 이어서 질문해 주세요. 곧 지식공유자께서 답변해 주실 것입니다.

song님의 프로필 이미지
song

작성한 질문수

질문하기