작성
·
29
0
다른 문제들은 미리 다 혼자 풀면 해결 가능했는데, 맨 마지막 문제는 혼자 시도해서 답은 맞게 나왔으나 쿼리 접근 방식이 상이했습니다.
WITH 구문으로 2~3개의 데이터를 미리 구축해두고 거기서 퍼널 분석을 하시던데, 해당 쿼리의 경우 좀 더 익숙해지면 정말 실무에서도 바로바로 써먹을 수 있겠다는 생각이 들더군요!
피벗은 case when으로 작성했었는데, max와 if로 작성하는 방법을 알아가서 좋았습니다.
다음주 강의도 열심히 들어보겠습니다! 감사합니다.
SELECT title, genre
FROM advanced.array_exercises
CROSS JOIN UNNEST(genres) as genre;
SELECT title, act.actor as actor, act.character as character
FROM advanced.array_exercises
CROSS JOIN UNNEST(actors) as act
SELECT title, act.actor as actor, act.character as character, genre
FROM advanced.array_exercises
CROSS JOIN UNNEST(actors) as act
CROSS JOIN UNNEST(genres) as genre;
WITH base AS (
SELECT user_id, event_date, event_name, user_pseudo_id, events.key, events.value
FROM advanced.app_logs
CROSS JOIN UNNEST(event_params) AS events
)
SELECT event_date, event_name, COUNT(DISTINCT user_id) AS cnt
FROM base
GROUP BY ALL
ORDER BY cnt DESC
SELECT
order_date,
MAX(IF(user_id=1, amount, 0)) as user_1,
MAX(IF(user_id=2, amount, 0)) as user_2,
MAX(IF(user_id=3, amount, 0)) as user_3
FROM advanced.orders
GROUP BY order_date
ORDER BY order_date;
SELECT
user_id,
SUM(IF(order_date='2023-05-01', amount, 0)) as 2023-05-01
,
SUM(IF(order_date='2023-05-02', amount, 0)) as 2023-05-02
,
SUM(IF(order_date='2023-05-03', amount, 0)) as 2023-05-03
,
SUM(IF(order_date='2023-05-04', amount, 0)) as 2023-05-04
,
SUM(IF(order_date='2023-05-05', amount, 0)) as 2023-05-05
FROM advanced.orders
GROUP BY user_id
ORDER BY user_id;
SELECT
user_id,
MAX(IF(order_date = '2023-05-01', 1, 0)) AS 2023-05-01
,
MAX(IF(order_date = '2023-05-02', 1, 0)) AS 2023-05-02
,
MAX(IF(order_date = '2023-05-03', 1, 0)) AS 2023-05-03
,
MAX(IF(order_date = '2023-05-04', 1, 0)) AS 2023-05-04
,
MAX(IF(order_date = '2023-05-05', 1, 0)) AS 2023-05-05
,
FROM advanced.orders
GROUP BY user_id
ORDER BY user_id;
SELECT user_id
, event_date
, event_timestamp
, event_name
, user_pseudo_id
, MAX(IF(events.key = 'firebase_screen', events.value.string_value, NULL)) AS firebase_screen
, MAX(IF(events.key = 'food_id', events.value.int_value, NULL)) AS food_id
, MAX(IF(events.key = 'session_id', events.value.int_value, NULL)) AS session_id
FROM advanced.app_logs
CROSS JOIN UNNEST(event_params) AS events
WHERE user_id = 32888 AND event_name = 'click_cart'
GROUP BY ALL
#정석 방법
WITH base AS (
SELECT event_date
, event_timestamp
, event_name
, user_id
, user_pseudo_id
, platform
, MAX(IF(event_param.key = 'firebase_screen', event_param.value.string_value, NULL)) AS firebase_screen
, MAX(IF(event_param.key = 'session_id', event_param.value.string_value, NULL)) AS session_id
FROM advanced.app_logs
CROSS JOIN UNNEST(event_params) AS event_param
WHERE event_date BETWEEN '2022-08-01' AND '2022-08-18'
GROUP BY ALL
)
, filter_event_and_concat_event_and_screen AS (
SELECT * EXCEPT(event_name, firebase_screen, event_timestamp)
, DATETIME(TIMESTAMP_MICROS(event_timestamp), 'Asia/Seoul') AS event_datetime
, CONCAT(event_name, '-', firebase_screen) AS event_name_with_screen
FROM base
WHERE event_name IN ('screen_view', 'click_payment')
)
SELECT event_name_with_screen
, CASE
WHEN event_name_with_screen = 'screen_view-welcome' THEN 1
WHEN event_name_with_screen = 'screen_view-home' THEN 2
WHEN event_name_with_screen = 'screen_view-food_category' THEN 3
WHEN event_name_with_screen = 'screen_view-restaurant' THEN 4
WHEN event_name_with_screen = 'screen_view-cart' THEN 5
WHEN event_name_with_screen = 'click_payment-cart' THEN 6
ELSE NULL END AS STEP_NUMBER
, COUNT(DISTINCT user_pseudo_id) AS cnt
FROM filter_event_and_concat_event_and_screen
GROUP BY ALL
HAVING step_number IS NOT NULL
#내가 시도한 방법
SELECT CONCAT(event_name, param.value.string_value)
, COUNT(DISTINCT user_pseudo_id)
FROM advanced.app_logs
CROSS JOIN UNNEST(event_params) AS param
WHERE event_name IN ('screen_view', 'click_payment')
AND param.value.string_value IN ('welcome', 'home', 'food_category', 'restaurant', 'cart')
AND event_date BETWEEN '2022-08-01' AND '2022-08-18'
GROUP BY event_name, param.value.string_value
답변 1
0
안녕하세요, 인프런 AI 인턴입니다. Min Choi님.
BigQuery SQL에서 ARRAY, STRUCT, PIVOT, 퍼널 쿼리 수행에 대해 고민을 하시고, 특히 마지막 과제를 해결하는 데 있어 접근 방식의 차이를 느끼신 것 같습니다. 각 문제를 해결하려고 하시는 노력은 데이터 분석 실무에서 매우 중요한 부분입니다.
아래 관련 질문들을 통해 퍼널 쿼리와 피벗 테이블에 대한 다양한 접근 방법을 이해할 수 있을 것입니다:
이 자료들은 Min Choi님의 학습에 큰 도움이 될 것입니다. 다양한 쿼리 작성 방법을 통해 실무에서도 바로 적용 가능한 기술을 습득하실 수 있기를 응원합니다.
저는 질문자님의 이해를 돕고자 지식공유자님의 기존 답변을 참고하여 유사한 답변 링크를 추천해드리고 있습니다. 현재 베타 기능으로 답변이 만족스럽지 않을 수 있는 점 양해 부탁드립니다. 🙏 추가적으로 궁금한 점이 있으시면, 이어서 질문해 주세요. 곧 지식공유자께서 답변해 주실 것입니다.