작성
·
25
·
수정됨
0
1번
WITH base AS (
SELECT
DISTINCT
user_id,
user_pseudo_id,
event_name,
DATE(DATETIME(TIMESTAMP_MICROS(event_timestamp), 'Asia/Seoul')) AS event_date,
DATETIME(TIMESTAMP_MICROS(event_timestamp), 'Asia/Seoul') AS event_datetime
FROM advanced.app_logs
WHERE
event_date BETWEEN "2022-08-01" AND "2022-11-03"
), first_week_and_diff AS (
SELECT
*,
-- DATE_DIFF(event_date, first_date, DAY) AS diff_of_day
DATE_DIFF(event_week, first_week, WEEK) AS diff_of_week
FROM (
SELECT
DISTINCT
user_pseudo_id,
-- DATE_TRUNC
DATE_TRUNC(MIN(event_date) OVER(PARTITION BY user_pseudo_id), WEEK(MONDAY)) AS first_week,
DATE_TRUNC(event_date, WEEK(MONDAY)) AS event_week
FROM base
)
), user_counts AS (
SELECT
diff_of_week,
COUNT(DISTINCT user_pseudo_id) AS user_cnt
FROM first_week_and_diff
GROUP BY diff_of_week
)
SELECT
*,
ROUND(SAFE_DIVIDE(user_cnt, first_week_user_cnt), 2) AS retention_rate
FROM (
SELECT
diff_of_week,
user_cnt,
FIRST_VALUE(user_cnt) OVER(ORDER BY diff_of_week ASC) AS first_week_user_cnt
FROM user_counts
)
2번
WITH base AS (
SELECT
DISTINCT
user_id,
user_pseudo_id,
event_name,
DATETIME(TIMESTAMP_MICROS(event_timestamp),'Asia/Seoul') AS event_datetime,
DATE(DATETIME(TIMESTAMP_MICROS(event_timestamp),'Asia/Seoul')) AS event_date,
FROM advanced.app_logs)
, weekly_user_active AS (
SELECT
user_pseudo_id,
DATE_TRUNC(event_date, WEEK) AS event_week,
MIN(DATE_TRUNC(event_date, WEEK)) OVER (PARTITION BY user_pseudo_id) AS first_active_week,
LAG(DATE_TRUNC(event_date, WEEK)) OVER (PARTITION BY user_pseudo_id ORDER BY DATE_TRUNC(event_date, WEEK)) AS pre_active_week
FROM base
)
, user_group AS (
SELECT
user_pseudo_id,
event_week,
DATE_DIFF(event_week, pre_active_week, WEEK(MONDAY)) AS diff_prior_week,
DATE_DIFF(event_week, first_active_week, WEEK(MONDAY)) AS diff_first_week,
CASE
WHEN event_week = first_active_week THEN 'NEW'
WHEN DATE_DIFF(event_week, pre_active_week, WEEK) = 1 THEN 'Current'
WHEN DATE_DIFF(event_week, pre_active_week, WEEK) > 1 THEN 'Resurrected'
ELSE 'Dormant '
END AS user_seg
FROM weekly_user_active
)
SELECT
event_week,
user_seg,
COUNT(DISTINCT user_pseudo_id) AS user_cnt,
FROM user_group
GROUP BY ALL
ORDER BY 1 ;
4번
WITH base AS (
SELECT
DISTINCT
user_id,
user_pseudo_id,
event_name,
DATE(DATETIME(TIMESTAMP_MICROS(event_timestamp), 'Asia/Seoul')) AS event_date,
DATETIME(TIMESTAMP_MICROS(event_timestamp), 'Asia/Seoul') AS event_datetime
FROM advanced.app_logs
WHERE
event_date BETWEEN "2022-08-01" AND "2022-11-03"
AND event_name = 'click_payment'
),
first_week_and_diff AS (
SELECT
*,
DATE_DIFF(event_week, first_week, WEEK) AS diff_of_week
FROM (
SELECT
DISTINCT
user_pseudo_id,
DATE_TRUNC(MIN(event_date) OVER(PARTITION BY user_pseudo_id), WEEK(MONDAY)) AS first_week,
DATE_TRUNC(event_date, WEEK(MONDAY)) AS event_week
FROM base
)
),
user_counts AS (
SELECT
diff_of_week,
COUNT(DISTINCT user_pseudo_id) AS user_cnt
FROM first_week_and_diff
GROUP BY diff_of_week
)
SELECT
diff_of_week AS week,
user_cnt,
FIRST_VALUE(user_cnt) OVER(ORDER BY diff_of_week ASC) AS first_week_user_cnt,
ROUND(SAFE_DIVIDE(user_cnt, FIRST_VALUE(user_cnt) OVER(ORDER BY diff_of_week ASC)) * 100, 2) AS retention_rate
FROM user_counts
ORDER BY diff_of_week
답변 1
0
안녕하세요, 인프런 AI 인턴입니다. 병호르님께서 올려주신 SQL 리텐션 과제에 대한 다양한 접근법과 유용한 리소스를 제공해 드리겠습니다. 리텐션 분석은 주어지신 데이터에서 특정 주기마다 사용자 유지율을 계산하는 것이 핵심입니다. 병호르님께서 작성하신 SQL 쿼리들에서 중요한 부분들을 살펴보겠습니다.
리텐션 쿼리의 기본 구조
유저 그룹화 (Segmentation) 쿼리
특정 이벤트 기반 리텐션 (예: click_payment)
첨부된 SQL 쿼리에 대한 조언과 함께 참고할 만한 내용을 다음 URL에서 확인할 수 있습니다:
- 3-7 리텐션 SQL 쿼리 작성하기 연습 (Weekly, Monthly)
이 외에도 리텐션 분석을 위한 다양한 유사 질문들이 있으니 병호르님께서 작성하신 SQL의 각 부분을 본 게시물답변에 참고하여 발전시켜 나가시면 좋겠습니다.
저는 질문자님의 이해를 돕고자 지식공유자님의 기존 답변을 참고하여 유사한 답변 링크를 추천해드리고 있습니다.
현재 베타 기능으로 답변이 만족스럽지 않을 수 있는 점 양해 부탁드립니다. 🙏
추가적으로 궁금한 점이 있으시면, 이어서 질문해 주세요. 곧 지식공유자께서 답변해 주실 것입니다.