작성
·
22
0
문제 1.
SELECT
*,
LEAD(visit_month) OVER(PARTITION BY user_id ORDER BY visit_month) AS next_visit_month,
LEAD(visit_month, 2) OVER (PARTITION BY user_id ORDER BY visit_month) AS nnext_visit_month
FROM advanced.analytics_function_01
문제2.
SELECT
*,
LEAD(visit_month) OVER(PARTITION BY user_id ORDER BY visit_month) AS next_visit_month,
LEAD(visit_month, 2) OVER (PARTITION BY user_id ORDER BY visit_month) AS nnext_visit_month,
LAG(visit_month) OVER(PARTITION BY user_id ORDER BY visit_month) AS previous_visit_month
FROM advanced.analytics_function_01
문제3.
SELECT
*,
SUM(amount) OVER (PARTITION BY user_id ORDER BY order_date, order_id
) AS cumulative_sum_by_user
문제 4.
SELECT
*,
AVG(amount) OVER (ORDER BY order_date, order_id
ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING) AS last_5_orders_avg_amout
문제 1.
SELECT
*,
COUNT(query_date) OVER (PARTITION BY user ORDER BY user) AS total_query_cnt
FROM advanced.query_logs
문제2.
WITH table AS (
SELECT
EXTRACT(WEEK FROM query_date) AS week_number,
team,
user,
COUNT(user) AS query_cnt
FROM advanced.query_logs
GROUP BY ALL
)
SELECT
*,
RANK() OVER(PARTITION BY week_number ORDER BY query_cnt) AS team_rank
FROM table
QUALIFY team_rank = 1
문제3.
WITH table AS (
SELECT
EXTRACT(WEEK FROM query_date) AS week_number,
team,
user,
COUNT(user) AS query_cnt
FROM advanced.query_logs
GROUP BY ALL
)
SELECT
*,
LAG(query_cnt) OVER (PARTITION BY user ORDER BY week_number) AS prev_week_query_cnt
FROM table
문제4.
WITH query_count_table AS (
SELECT
*,
COUNT(*) AS query_count
FROM advanced.query_logs
GROUP BY ALL
)
SELECT
*,
SUM(query_count) OVER (PARTITION BY user ORDER BY query_date) AS cululative_query_count
FROM query_count_table
문제5.
WITH raw_data AS (
SELECT DATE '2024-05-01' AS date, 15 AS number_of_orders UNION ALL
SELECT DATE '2024-05-02', 13 UNION ALL
SELECT DATE '2024-05-03', NULL UNION ALL
SELECT DATE '2024-05-04', 16 UNION ALL
SELECT DATE '2024-05-05', NULL UNION ALL
SELECT DATE '2024-05-06', 18 UNION ALL
SELECT DATE '2024-05-07' , 20 UNION ALL
SELECT DATE '2024-05-08', NULL UNION ALL
SELECT DATE '2024-05-09', 13 UNION ALL
SELECT DATE '2024-05-10', 14 UNION ALL
SELECT DATE '2024-05-11', NULL UNION ALL
SELECT DATE '2024-05-12', NULL UNION ALL
)
SELECT
*,
LAST_VALUE(number_of_order IGNORE NULLS) OVER (ORDER BY date) AS last_value_orders
FROM raw_data
문제6.
WITH raw_data AS (
SELECT DATE '2024-05-01' AS date, 15 AS number_of_orders UNION ALL
SELECT DATE '2024-05-02', 13 UNION ALL
SELECT DATE '2024-05-03', NULL UNION ALL
SELECT DATE '2024-05-04', 16 UNION ALL
SELECT DATE '2024-05-05', NULL UNION ALL
SELECT DATE '2024-05-06', 18 UNION ALL
SELECT DATE '2024-05-07' , 20 UNION ALL
SELECT DATE '2024-05-08', NULL UNION ALL
SELECT DATE '2024-05-09', 13 UNION ALL
SELECT DATE '2024-05-10', 14 UNION ALL
SELECT DATE '2024-05-11', NULL UNION ALL
SELECT DATE '2024-05-12', NULL
), filled_data AS (
SELECT
* EXCEPT(number_of_orders),
**LAST_VALUE**(number_of_orders **IGNORE NULLS**) OVER (ORDER BY date) AS number_of_orders
FROM raw_data
)
SELECT
*,
AVG(number_of_orders) OVER (ORDER BY date ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) AS moving_avg
FROM filled_data
문제 7.
WITH base AS(
SELECT
event_date,
DATETIME(TIMESTAMP_MICROS(event_timestamp), 'Asia/Seoul') AS event_datetime,
event_name,
user_id,
user_pseudo_id
FROM advanced.app_logs
WHERE
event_date = '2022-08-18'
AND user_pseudo_id = "1997494153.8491999091"
), diff_data AS(
SELECT
*,
#이전 이벤트 시간과 현재 이벤트시간의 간격을 SECOND 초단위로 구하기 / second_diff를 기반으로 새로운 세션의 시작일지 아닐지 판단
DATETIME_DIFF(event_datetime, prev_event_datetime, SECOND) AS second_diff
FROM
(
SELECT
*,
# 직전 이벤트 시간을 prev_event_datetime으로 만들기
LAG(event_datetime, 1) OVER(PARTITION BY user_pseudo_id ORDER BY event_datetime) AS prev_event_datetime
FROM base
)
)
SELECT
*,
SUM(session_start) OVER(PARTITION BY user_pseudo_id ORDER BY event_datetime) AS session_num
FROM(
SELECT
*,
CASE
WHEN prev_event_datetime IS NULL THEN 1
WHEN second_diff >= 20 THEN 1
ELSE NULL
END AS session_start
FROM diff_data
)
ORDER BY event_datetime
답변 1
0
안녕하세요, 인프런 AI 인턴입니다. goo님.
제출해주신 SQL 과제 코드들에 대해 몇 가지 개선 및 유용한 정보를 제공하기 위해 관련 자료를 함께 안내드리겠습니다.
LEAD 및 LAG 함수 사용:
누적 합계 및 이동 평균:
NULL 값 처리:
세션 시작 및 지속 시간 계산:
위의 링크들은 유사한 질문과 사례를 다룬 게시물들로, 필요한 내용이 있다면 참고하셔서 추가적인 인사이트를 얻으시면 좋겠습니다. 요점은, 데이터 분석에는 다양한 방법과 접근 방식이 있으므로, 다양한 예시와 접근 방식을 학습하는 것이 언제나 유익합니다.
저는 질문자님의 이해를 돕고자 지식공유자님의 기존 답변을 참고하여 유사한 답변 링크를 추천해드리고 있습니다. 현재 베타 기능으로 답변이 만족스럽지 않을 수 있는 점 양해 부탁드립니다. 🙏 추가적으로 궁금한 점이 있으시면, 이어서 질문해 주세요. 곧 지식공유자께서 답변해 주실 것입니다.