Skip to content

Instantly share code, notes, and snippets.

@wooparadog
Created December 11, 2025 03:55
Show Gist options
  • Select an option

  • Save wooparadog/c1e7190dfe78b8632ad0edf4c8793dba to your computer and use it in GitHub Desktop.

Select an option

Save wooparadog/c1e7190dfe78b8632ad0edf4c8793dba to your computer and use it in GitHub Desktop.
--- original_query.sql 2025-12-11 11:15:25.108047457 +0800
+++ refresh-by-day.sql 2025-12-11 11:33:17.670129089 +0800
@@ -1,6 +1,102 @@
+insert into behavior_syft_user_details_1d_di (
+ "user_id",
+ "register_date",
+ "if_tutorial_input_flag",
+ "if_tutorial_end_flag",
+ "if_channel_select_flag",
+ "if_end_flag",
+ "event_date",
+ "interval_days",
+ "engage_min",
+
+ "onboard_subs",
+ "creation_subs",
+ "search_subs",
+ "explore_subs",
+ "hotfy_subs",
+ "migrated_subs",
+ "other_subs",
+ "ttl_subs",
+
+ "followingtop_pv",
+ "followinglatest_pv",
+ "explore_pv",
+ "channel_pv",
+ "dailybrief_pv",
+
+ "followingtop_click",
+ "followinglatest_click",
+ "explore_click",
+ "channel_click",
+ "dailybrief_click",
+
+ "followingtop_read",
+ "followinglatest_read",
+ "explore_read",
+ "channel_read",
+ "dailybrief_read",
+
+ "followingtop_like",
+ "followinglatest_like",
+ "explore_like",
+ "channel_like",
+ "dailybrief_like",
+
+ "followingtop_dislike",
+ "followinglatest_dislike",
+ "explore_dislike",
+ "channel_dislike",
+ "dailybrief_dislike",
+
+ "dw_created_at"
+)
+
+-- ------------------- query start ----------------------
+
-- onboarding 阶段创/订行为的完成情况,以及后续内容浏览和留存时长的关系(具体的浏览内容好坏,需要另外走浏览明细看,这边只能统计量级)
--- from: https://metabase.orionarm.ai/question/2274
with
+ TimezoneNamesFixed as (
+ select
+ tz_name as timezone_name,
+ max(COALESCE(adb_user_timezone_alias.pg_tz, tz_name)) as timezone_name_fixed,
+ -- for Debug
+ -- now() at time zone max(COALESCE(adb_user_timezone_alias.pg_tz, tz_name)) as now_local
+ -- for DMS
+ '${biz_now}'::timestamptz at time zone max(COALESCE(adb_user_timezone_alias.pg_tz, tz_name)) as now_local
+ from rt_mv_syft_users
+ left join adb_user_timezone_alias on adb_user_timezone_alias.alias = tz_name
+ where tz_name is not null and tz_name != ''
+ group by 1
+ ),
+
+ TimezoneFiltered as (
+ select
+ timezone_name,
+ timezone_name_fixed,
+
+ -- 当地时间
+ now_local,
+ ( now_local - interval '1 day')::date as day_date_local,
+ ((now_local - interval '1 day')::date::text || ' 00:00:00 ' || timezone_name_fixed)::timestamptz as day_start_tz,
+ ((now_local )::date::text || ' 00:00:00 ' || timezone_name_fixed)::timestamptz as day_end_tz
+ from TimezoneNamesFixed
+ -- DIFF: 注释了这里
+ -- where EXTRACT(hour FROM now_local) = 0
+ order by now_local
+ ),
+ TimezoneFinal as (
+ select
+ TimezoneFiltered.*,
+
+ (day_start_tz at time zone 'Asia/Shanghai') as day_start_sh, -- shanghai local time, no tz, same as orion db
+ (day_end_tz at time zone 'Asia/Shanghai') as day_end_sh, -- shanghai local time, no tz, same as orion db
+
+ extract(epoch from day_start_tz)::bigint * 1e6 as day_start_ms,
+ extract(epoch from day_end_tz)::bigint * 1e6 as day_end_ms
+
+ from TimezoneFiltered
+ ),
+
real_user as (
select
oauth.user_id,
@@ -35,6 +131,27 @@
3,
4
),
+ ga_scope AS (
+ SELECT DISTINCT
+ event_name,
+ event_timestamp,
+ (
+ to_timestamp(app.event_timestamp / 1000000)::timestamptz AT time zone real_user.tz_name
+ )::date AS user_event_date,
+ event_params,
+ app.user_id,
+ app_info ->> 'version' AS app_version
+ FROM
+ public.v_ga_app_syft as app
+ INNER JOIN real_user ON real_user.user_id = app.user_id
+ WHERE
+ event_date
+ between (select min(day_start_sh) - interval '1 day' from TimezoneFinal)::date
+ and (select max(day_end_sh) + interval '1 day' from TimezoneFinal)::date
+ AND event_timestamp
+ between (select min(day_start_ms) from TimezoneFinal)
+ and (select max(day_end_ms) from TimezoneFinal)
+ ),
obd_status as (
select
app.user_id,
@@ -73,11 +190,10 @@
end
) as if_end_flag
from
- public.v_ga_app_syft as app
+ ga_scope AS app
inner join real_user on app.user_id = real_user.user_id
- and (
- to_timestamp(event_timestamp / 1000000)::timestamptz at time zone real_user.tz_name
- )::date <= (real_user.register_date + '1 day'::interval) -- 注册完两天内走完流程的才算,隔离太久的不算
+ AND
+ user_event_date <= (real_user.register_date + '1 day'::interval) -- 注册完两天内走完流程的才算,隔离太久的不算
where
1 = 1
and app.event_params ->> 'ga_session_id' is not null
@@ -87,19 +203,16 @@
),
active_base as (
select
- real_user.user_id,
- (
- to_timestamp(app.event_timestamp / 1000000)::timestamptz at time zone real_user.tz_name
- )::date as event_date,
+ app.user_id,
+ app.user_event_date AS event_date,
sum(
(event_params ->> 'engagement_time_msec')::int / 1000.0
) as engage_sec
from
- v_ga_app_syft as app
- inner join real_user on real_user.user_id = app.user_id
+ ga_scope AS app
where
app.user_id is not null
- and substring(app.app_info ->> 'version', 1, 1) = '2'
+ AND substring(app.app_version, 1, 1) = '2'
and event_params ->> 'ga_session_id' is not null
and event_name not in (
'notification_receive',
@@ -188,9 +301,7 @@
app_batch as (
select distinct
app.user_id,
- (
- to_timestamp(app.event_timestamp / 1000000)::timestamptz at time zone real_user.tz_name
- )::date as event_date,
+ user_event_date AS event_date,
case
when app.event_name in (
'tsc_2_post_pv_n_channel_pv_batch',
@@ -206,11 +317,10 @@
string_to_array(event_params ->> 'postid_batch', ',')
) as uniq_id
from
- v_ga_app_syft as app
- inner join real_user on app.user_id = real_user.user_id
+ ga_scope AS app
where
1 = 1
- and substring(app.app_info ->> 'version', 1, 1) = '2'
+ AND substring(app.app_version, 1, 1) = '2'
and app.event_params ->> 'ga_session_id' is not null
and (
(
@@ -237,9 +347,7 @@
app_single as (
select distinct
app.user_id,
- (
- to_timestamp(app.event_timestamp / 1000000)::timestamptz at time zone real_user.tz_name
- )::date as event_date,
+ user_event_date AS event_date,
case
when app.event_name in (
'top_stories_card_2_post_pv_n_channel_pv',
@@ -305,11 +413,10 @@
else null
end as uniq_id
from
- v_ga_app_syft as app
- inner join real_user on app.user_id = real_user.user_id
+ ga_scope AS app
where
1 = 1
- and substring(app.app_info ->> 'version', 1, 1) = '2'
+ AND substring(app.app_version, 1, 1) = '2'
and app.event_params ->> 'ga_session_id' is not null
and (
(
@@ -379,7 +486,7 @@
fdbk.user_id,
(
to_timestamp(event_timestamp / 1000000)::timestamptz at time zone real_user.tz_name
- )::date as event_date,
+ )::date AS event_date,
case
when event_name = 'dislike_story_explore' then 'dislike'
when event_params ->> 'reaction' in ('😐', '🙁') then 'dislike'
@@ -400,7 +507,7 @@
end as uniq_id
from
ga_syft_system_events as fdbk
- inner join real_user on fdbk.user_id = real_user.user_id
+ inner join real_user on fdbk.user_id = real_user.user_id
where
1 = 1
and coalesce(
@@ -472,13 +579,13 @@
)
select
active_base.user_id,
- obd_status.register_date,
+ real_user.register_date,
obd_status.if_tutorial_input_flag,
obd_status.if_tutorial_end_flag,
obd_status.if_channel_select_flag,
obd_status.if_end_flag,
active_base.event_date,
- (active_base.event_date - obd_status.register_date)::int as interval_days,
+ (active_base.event_date - real_user.register_date)::int AS interval_days,
active_base.engage_sec / 60.0 as engage_min,
subscription.onboard_subs,
subscription.creation_subs,
@@ -512,9 +619,11 @@
consumption.followinglatest_dislike,
consumption.explore_dislike,
consumption.channel_dislike,
- consumption.dailybrief_dislike
+ consumption.dailybrief_dislike,
+ now() as dw_created_at
from
active_base
+ LEFT JOIN real_user ON active_base.user_id = real_user.user_id
left join obd_status on obd_status.user_id = active_base.user_id
left join subscription on active_base.user_id = subscription.user_id
and active_base.event_date = subscription.created_dt
@@ -679,5 +788,55 @@
2
) as consumption on consumption.user_id = active_base.user_id
and consumption.event_date = active_base.event_date
-where
- 1 = 1
+
+-- ------------------- query end ----------------------
+
+ON CONFLICT (event_date, user_id) DO UPDATE SET
+ register_date = EXCLUDED.register_date,
+ if_tutorial_input_flag = EXCLUDED.if_tutorial_input_flag,
+ if_tutorial_end_flag = EXCLUDED.if_tutorial_end_flag,
+ if_channel_select_flag = EXCLUDED.if_channel_select_flag,
+ if_end_flag = EXCLUDED.if_end_flag,
+ interval_days = EXCLUDED.interval_days,
+ engage_min = EXCLUDED.engage_min,
+
+ onboard_subs = EXCLUDED.onboard_subs,
+ creation_subs = EXCLUDED.creation_subs,
+ search_subs = EXCLUDED.search_subs,
+ explore_subs = EXCLUDED.explore_subs,
+ hotfy_subs = EXCLUDED.hotfy_subs,
+ migrated_subs = EXCLUDED.migrated_subs,
+ other_subs = EXCLUDED.other_subs,
+ ttl_subs = EXCLUDED.ttl_subs,
+
+ followingtop_pv = EXCLUDED.followingtop_pv,
+ followinglatest_pv = EXCLUDED.followinglatest_pv,
+ explore_pv = EXCLUDED.explore_pv,
+ channel_pv = EXCLUDED.channel_pv,
+ dailybrief_pv = EXCLUDED.dailybrief_pv,
+
+ followingtop_click = EXCLUDED.followingtop_click,
+ followinglatest_click = EXCLUDED.followinglatest_click,
+ explore_click = EXCLUDED.explore_click,
+ channel_click = EXCLUDED.channel_click,
+ dailybrief_click = EXCLUDED.dailybrief_click,
+
+ followingtop_read = EXCLUDED.followingtop_read,
+ followinglatest_read = EXCLUDED.followinglatest_read,
+ explore_read = EXCLUDED.explore_read,
+ channel_read = EXCLUDED.channel_read,
+ dailybrief_read = EXCLUDED.dailybrief_read,
+
+ followingtop_like = EXCLUDED.followingtop_like,
+ followinglatest_like = EXCLUDED.followinglatest_like,
+ explore_like = EXCLUDED.explore_like,
+ channel_like = EXCLUDED.channel_like,
+ dailybrief_like = EXCLUDED.dailybrief_like,
+
+ followingtop_dislike = EXCLUDED.followingtop_dislike,
+ followinglatest_dislike = EXCLUDED.followinglatest_dislike,
+ explore_dislike = EXCLUDED.explore_dislike,
+ channel_dislike = EXCLUDED.channel_dislike,
+ dailybrief_dislike = EXCLUDED.dailybrief_dislike,
+
+ dw_created_at = EXCLUDED.dw_created_at
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment