작성
·
35
·
수정됨
0
1번문제 (제한시간: 15분 >> 소요시간 23분)
문제: 주차별 각 카테고리별 평균 할인율이 가장 높았던 기간과 할인율을 구하는 쿼리를 작성해주세요
단, 날짜 데이터를 YYYY-MM-DD 23:59:39 이런 형태로 변경해주세요
WITH transaction_data AS (
SELECT 111 AS user_id, 1001 AS item_id, 719200 AS actual_price, '01/08/2024 12:00:00' AS transaction_date UNION ALL
SELECT 111, 2002, 89000, '01/10/2024 12:00:00' UNION ALL
SELECT 189, 2002, 89000, '01/12/2024 12:00:00' UNION ALL
SELECT 156, 3002, 459000, '01/15/2024 12:00:00' UNION ALL
SELECT 121, 1001, 719200, '01/18/2024 12:00:00' UNION ALL
SELECT 156, 2001, 90300, '01/25/2024 12:00:00' UNION ALL
SELECT 145, 3001, 399000, '01/26/2024 12:00:00' UNION ALL
SELECT 189, 1002, 607200, '01/28/2024 12:00:00' UNION ALL
SELECT 111, 3001, 399000, '02/05/2024 12:00:00' UNION ALL
SELECT 178, 1002, 759000, '02/07/2024 12:00:00' UNION ALL
SELECT 121, 2002, 62300, '02/08/2024 12:00:00' UNION ALL
SELECT 156, 1001, 899000, '02/10/2024 12:00:00' UNION ALL
SELECT 190, 2001, 90300, '02/11/2024 12:00:00' UNION ALL
SELECT 189, 2001, 90300, '02/14/2024 12:00:00' UNION ALL
SELECT 111, 1002, 759000, '02/15/2024 12:00:00' UNION ALL
SELECT 156, 3001, 299250, '02/20/2024 12:00:00' UNION ALL
SELECT 189, 3002, 344250, '02/25/2024 12:00:00' UNION ALL
SELECT 111, 2001, 90300, '02/28/2024 12:00:00'
), item_info AS (
SELECT 1001 AS item_id, 'Electronics' AS category, 'Smartphone' AS item_name, 899000 AS list_price UNION ALL
SELECT 1002 AS item_id, 'Electronics' AS category, 'Tablet' AS item_name, 759000 AS list_price UNION ALL
SELECT 2001 AS item_id, 'Fashion' AS category, 'Sneakers' AS item_name, 129000 AS list_price UNION ALL
SELECT 2002 AS item_id, 'Fashion' AS category, 'Backpack' AS item_name, 89000 AS list_price UNION ALL
SELECT 3001 AS item_id, 'Home' AS category, 'Coffee Machine' AS item_name, 399000 AS list_price UNION ALL
SELECT 3002 AS item_id, 'Home' AS category, 'Air Purifier' AS item_name, 459000 AS list_price
)
SELECT
*,
RANK() OVER (ORDER BY avg_sales_ratio DESC) as rnk
FROM (
SELECT
weekly,
category,
AVG(sales_ratio) AS avg_sales_ratio
FROM (
SELECT
DATE_TRUNC(DATETIME(CONCAT(SPLIT(SPLIT(a.transaction_date, '/')[OFFSET(2)], ' ')[OFFSET(0)],'-',SPLIT(a.transaction_date, '/')[OFFSET(0)] , '-',SPLIT(a.transaction_date, '/')[OFFSET(1)])), WEEK(MONDAY)) AS weekly,
a.item_id,
b.category,
b.list_price,
a.actual_price,
ROUND(1- ROUND(SAFE_DIVIDE(actual_price, list_price),4),4) AS sales_ratio
FROM transaction_data a
LEFT JOIN item_info b ON a.item_id = b.item_id
)
GROUP BY ALL
)
QUALIFY rnk = 1
ORDER BY 1
;
2번문제 (제한시간: 10분 >> 소요시간 7분)
문제: 2024년 1월에 가장 많은 매출을 기록한 카테고리를 구하는 쿼리를 작성해주세요
WITH transaction_data AS (
SELECT 111 AS user_id, 1001 AS item_id, 719200 AS actual_price, '01/08/2024 12:00:00' AS transaction_date UNION ALL
SELECT 111, 2002, 89000, '01/10/2024 12:00:00' UNION ALL
SELECT 189, 2002, 89000, '01/12/2024 12:00:00' UNION ALL
SELECT 156, 3002, 459000, '01/15/2024 12:00:00' UNION ALL
SELECT 121, 1001, 719200, '01/18/2024 12:00:00' UNION ALL
SELECT 156, 2001, 90300, '01/25/2024 12:00:00' UNION ALL
SELECT 145, 3001, 399000, '01/26/2024 12:00:00' UNION ALL
SELECT 189, 1002, 607200, '01/28/2024 12:00:00' UNION ALL
SELECT 111, 3001, 399000, '02/05/2024 12:00:00' UNION ALL
SELECT 178, 1002, 759000, '02/07/2024 12:00:00' UNION ALL
SELECT 121, 2002, 62300, '02/08/2024 12:00:00' UNION ALL
SELECT 156, 1001, 899000, '02/10/2024 12:00:00' UNION ALL
SELECT 190, 2001, 90300, '02/11/2024 12:00:00' UNION ALL
SELECT 189, 2001, 90300, '02/14/2024 12:00:00' UNION ALL
SELECT 111, 1002, 759000, '02/15/2024 12:00:00' UNION ALL
SELECT 156, 3001, 299250, '02/20/2024 12:00:00' UNION ALL
SELECT 189, 3002, 344250, '02/25/2024 12:00:00' UNION ALL
SELECT 111, 2001, 90300, '02/28/2024 12:00:00'
), item_info AS (
SELECT 1001 AS item_id, 'Electronics' AS category, 'Smartphone' AS item_name, 899000 AS list_price UNION ALL
SELECT 1002 AS item_id, 'Electronics' AS category, 'Tablet' AS item_name, 759000 AS list_price UNION ALL
SELECT 2001 AS item_id, 'Fashion' AS category, 'Sneakers' AS item_name, 129000 AS list_price UNION ALL
SELECT 2002 AS item_id, 'Fashion' AS category, 'Backpack' AS item_name, 89000 AS list_price UNION ALL
SELECT 3001 AS item_id, 'Home' AS category, 'Coffee Machine' AS item_name, 399000 AS list_price UNION ALL
SELECT 3002 AS item_id, 'Home' AS category, 'Air Purifier' AS item_name, 459000 AS list_price
)
SELECT
category
FROM (
SELECT
transaction_month,
category,
SUM(actual_price) AS category_sum_price
FROM (
SELECT
DATE_TRUNC(DATETIME(CONCAT(SPLIT(SPLIT(a.transaction_date, '/')[OFFSET(2)], ' ')[OFFSET(0)],'-',SPLIT(a.transaction_date, '/')[OFFSET(0)] , '-',SPLIT(a.transaction_date, '/')[OFFSET(1)])), MONTH) AS transaction_month,
a.item_id,
b.category,
a.actual_price
FROM transaction_data a
LEFT JOIN item_info b ON a.item_id = b.item_id
)
WHERE transaction_month = "2024-01-01"
GROUP BY ALL
)
ORDER BY category_sum_price DESC
LIMIT 1
;
3번문제 (제한시간: 10분 >> 소요시간 6분 40초)
문제: 유저별 총 구매 금액이 200만원 이상인 유저들이 가장 많이 구매한 카테고리를 찾는 쿼리를 작성해주세요
WITH transaction_data AS (
SELECT 111 AS user_id, 1001 AS item_id, 719200 AS actual_price, '01/08/2024 12:00:00' AS transaction_date UNION ALL
SELECT 111, 2002, 89000, '01/10/2024 12:00:00' UNION ALL
SELECT 189, 2002, 89000, '01/12/2024 12:00:00' UNION ALL
SELECT 156, 3002, 459000, '01/15/2024 12:00:00' UNION ALL
SELECT 121, 1001, 719200, '01/18/2024 12:00:00' UNION ALL
SELECT 156, 2001, 90300, '01/25/2024 12:00:00' UNION ALL
SELECT 145, 3001, 399000, '01/26/2024 12:00:00' UNION ALL
SELECT 189, 1002, 607200, '01/28/2024 12:00:00' UNION ALL
SELECT 111, 3001, 399000, '02/05/2024 12:00:00' UNION ALL
SELECT 178, 1002, 759000, '02/07/2024 12:00:00' UNION ALL
SELECT 121, 2002, 62300, '02/08/2024 12:00:00' UNION ALL
SELECT 156, 1001, 899000, '02/10/2024 12:00:00' UNION ALL
SELECT 190, 2001, 90300, '02/11/2024 12:00:00' UNION ALL
SELECT 189, 2001, 90300, '02/14/2024 12:00:00' UNION ALL
SELECT 111, 1002, 759000, '02/15/2024 12:00:00' UNION ALL
SELECT 156, 3001, 299250, '02/20/2024 12:00:00' UNION ALL
SELECT 189, 3002, 344250, '02/25/2024 12:00:00' UNION ALL
SELECT 111, 2001, 90300, '02/28/2024 12:00:00'
), item_info AS (
SELECT 1001 AS item_id, 'Electronics' AS category, 'Smartphone' AS item_name, 899000 AS list_price UNION ALL
SELECT 1002 AS item_id, 'Electronics' AS category, 'Tablet' AS item_name, 759000 AS list_price UNION ALL
SELECT 2001 AS item_id, 'Fashion' AS category, 'Sneakers' AS item_name, 129000 AS list_price UNION ALL
SELECT 2002 AS item_id, 'Fashion' AS category, 'Backpack' AS item_name, 89000 AS list_price UNION ALL
SELECT 3001 AS item_id, 'Home' AS category, 'Coffee Machine' AS item_name, 399000 AS list_price UNION ALL
SELECT 3002 AS item_id, 'Home' AS category, 'Air Purifier' AS item_name, 459000 AS list_price
)
SELECT
category
FROM (
SELECT
category,
SUM(actual_price) AS sum_purchase_price
FROM (
SELECT
a.user_id,
a.item_id,
b.category,
a.actual_price
FROM transaction_data a
LEFT JOIN item_info b ON a.item_id = b.item_id
WHERE a.user_id IN (
SELECT
user_id
FROM (
SELECT
user_id,
SUM(actual_price) AS sum_purchase_price
FROM transaction_data a
GROUP BY ALL
HAVING SUM(actual_price) >= 2000000
)
)
)
GROUP BY ALL
)
ORDER BY sum_purchase_price DESC
LIMIT 1
해당 연습문제는 전부 해결하지 못해 최대한 기한 내에 풀이를 완료한 쿼리를 작성하게 되었습니다. 완료하지 못한 과제는 추후에라도 풀이 완료하여 수정 업로드 하겠습니다.
문제 1) Weekly Retention을 구하는 쿼리를 바닥부터 스스로 작성해보세요.
WITH base AS ( # event데이터의 raw데이터를 추출
SELECT
DISTINCT
user_id,
user_pseudo_id,
DATE(TIMESTAMP_MICROS(event_timestamp), 'Asia/Seoul') AS event_date,
event_name
FROM advanced.app_logs
WHERE event_date BETWEEN "2022-08-01" AND "2022-12-31"
), first_week_diff_event_week AS ( # 유저별 첫번쨰 week, 이벤트 발생 week, 주차별 차이 데이터 추출
SELECT
DISTINCT
user_pseudo_id,
first_week,
event_week,
DATE_DIFF(event_week, first_week, WEEK) AS weeks_after_first_week
FROM (
SELECT
user_pseudo_id,
event_name,
DATE_TRUNC(event_date, WEEK(MONDAY)) AS event_week,
DATE_TRUNC(MIN(event_date) OVER (PARTITION BY user_pseudo_id ORDER BY event_date), WEEK(MONDAY)) AS first_week
FROM base
)
), user_counts AS ( # 첫번째 이벤트 발생주차 | 이벤트 Retain 발생차이 | 유저 수 | 코호트 유저 수
SELECT
*,
FIRST_VALUE(user_cnt) OVER (ORDER BY weeks_after_first_week) AS cohort_user
FROM (
SELECT
weeks_after_first_week,
COUNT(DISTINCT user_pseudo_id) AS user_cnt
FROM first_week_diff_event_week
GROUP BY ALL
)
) # Retention Rate 추출
SELECT
*,
ROUND(SAFE_DIVIDE(user_cnt, cohort_user), 4) AS retention_rate
FROM user_counts
ORDER BY 1,2
;
답변 1
0
안녕하세요, 인프런 AI 인턴입니다. DA유치원님의 질문에 대해 도움을 드리기 위해 기존에 제공된 답변들을 확인해 보았습니다. SQL 연습 문제와 관련하여 리텐션 분석을 하시는 것 같아, 유사한 문제에 대한 해설과 접근방법이 담긴 기존 답변을 제공합니다.
리텐션 분석 코호트 및 SQL 쿼리 작성 방법에서는 리텐션 쿼리 작성 시 주의할 점과 효율적인 코딩 구조에 대한 조언을 제공합니다. 특히, 주차별 사용자를 분석하고 코호트 유저를 어떻게 나눌지에 대한 SQL 작성 방법이 안내되어 있습니다.
문제 해설 및 해결을 위해 SQL 쿼리를 통해 리텐션, 카테고리 분석 등 다양한 조건을 간단하고 효율적으로 처리하는 방법에 대해 질문을 주셨습니다. 각각의 문제에 대해 세부적인 조언을 드리자면:
주차별 평균 할인율 계산:
가장 많은 매출 기록 카테고리:
총 구매 금액이 200만원 이상인 유저 분석:
이 모든 문제의 핵심은 SQL 쿼리를 통해 데이터를 효율적으로 집계하고, 필요한 정보를 정확하게 추출하는 것입니다.
위 링크된 자료를 참고하시면, DA유치원님께서 고민하신 부분들을 좀 더 명확하게 이해하고 풀어나가시는 데 도움이 될 것 같습니다. 추가적인 궁금증이 있으시면 언제든지 질문해 주시기 바라며, 해답을 찾은 이후 피드백도 주시면 감사하겠습니다.
저는 질문자님의 이해를 돕고자 지식공유자님의 기존 답변을 참고하여 유사한 답변 링크를 추천해드리고 있습니다.
현재 베타 기능으로 답변이 만족스럽지 않을 수 있는 점 양해 부탁드립니다. 🙏 추가적으로 궁금한 점이 있으시면, 이어서 질문해 주세요. 곧 지식공유자께서 답변해 주실 것입니다.