From 9f01456246147a471e16441a1ca9b433215c3039 Mon Sep 17 00:00:00 2001 From: Hannah Wolfe Date: Thu, 31 Oct 2024 13:15:41 +0000 Subject: [PATCH 01/14] WIP: Adding post type column - This is genuinely something we need to do :D --- .../datasources/analytics_pages_mv.datasource | 3 ++- .../datasources/analytics_sessions_mv.datasource | 3 ++- ghost/tinybird/pipes/analytics_pages.pipe | 11 +++++++---- ghost/tinybird/pipes/analytics_sessions.pipe | 3 ++- ghost/tinybird/pipes/analytics_sources.pipe | 3 ++- ghost/tinybird/pipes/kpis.pipe | 2 +- ghost/tinybird/pipes/top_browsers.pipe | 2 +- ghost/tinybird/pipes/top_devices.pipe | 2 +- ghost/tinybird/pipes/top_locations.pipe | 2 +- ghost/tinybird/pipes/top_pages.pipe | 2 +- ghost/tinybird/pipes/top_sources.pipe | 2 +- ghost/tinybird/pipes/trend.pipe | 3 ++- 12 files changed, 23 insertions(+), 15 deletions(-) diff --git a/ghost/tinybird/datasources/analytics_pages_mv.datasource b/ghost/tinybird/datasources/analytics_pages_mv.datasource index 75c5c9b8707a..10fb4e143a6c 100644 --- a/ghost/tinybird/datasources/analytics_pages_mv.datasource +++ b/ghost/tinybird/datasources/analytics_pages_mv.datasource @@ -1,8 +1,9 @@ -VERSION 0 +VERSION 1 SCHEMA > `site_uuid` String, `post_uuid` String, + `post_type` String, `date` Date, `device` String, `browser` String, diff --git a/ghost/tinybird/datasources/analytics_sessions_mv.datasource b/ghost/tinybird/datasources/analytics_sessions_mv.datasource index 6bd7786783bf..b2f8bd43d8c3 100644 --- a/ghost/tinybird/datasources/analytics_sessions_mv.datasource +++ b/ghost/tinybird/datasources/analytics_sessions_mv.datasource @@ -1,10 +1,11 @@ -VERSION 0 +VERSION 1 SCHEMA > `site_uuid` String, `date` Date, `session_id` String, `member_status` SimpleAggregateFunction(any, String), `post_uuid` SimpleAggregateFunction(any, String), + `post_type` SimpleAggregateFunction(any, String), `device` SimpleAggregateFunction(any, String), `browser` SimpleAggregateFunction(any, String), `location` SimpleAggregateFunction(any, String), diff --git a/ghost/tinybird/pipes/analytics_pages.pipe b/ghost/tinybird/pipes/analytics_pages.pipe index 8ecdc089bc46..0804c86b756f 100644 --- a/ghost/tinybird/pipes/analytics_pages.pipe +++ b/ghost/tinybird/pipes/analytics_pages.pipe @@ -1,4 +1,4 @@ -VERSION 0 +VERSION 1 NODE parsed_hits DESCRIPTION > @@ -19,6 +19,7 @@ SQL > JSONExtractString(payload, 'member_uuid') as member_uuid, JSONExtractString(payload, 'member_status') as member_status, JSONExtractString(payload, 'post_uuid') as post_uuid, + JSONExtractString(payload, 'post_type') as post_type, lower(JSONExtractString(payload, 'user-agent')) as user_agent FROM analytics_events where action = 'page_hit' @@ -34,6 +35,7 @@ SQL > member_uuid, member_status, post_uuid, + post_type, location, domainWithoutWWW(referrer) as source, pathname, @@ -71,6 +73,7 @@ SQL > site_uuid, toDate(timestamp) AS date, post_uuid, + post_type, device, browser, location, @@ -82,8 +85,8 @@ SQL > ) AS member_status, uniqState(session_id) AS visits, countState() AS pageviews - FROM analytics_hits_data - GROUP BY date, device, browser, location, source, pathname, post_uuid,site_uuid + FROM analytics_hits + GROUP BY date, device, browser, location, source, pathname, post_uuid, post_type, site_uuid TYPE MATERIALIZED -DATASOURCE analytics_pages_mv__v0 +DATASOURCE analytics_pages_mv__v1 diff --git a/ghost/tinybird/pipes/analytics_sessions.pipe b/ghost/tinybird/pipes/analytics_sessions.pipe index 0ef44093fc43..6d9020ff17da 100644 --- a/ghost/tinybird/pipes/analytics_sessions.pipe +++ b/ghost/tinybird/pipes/analytics_sessions.pipe @@ -1,4 +1,4 @@ -VERSION 0 +VERSION 1 NODE parsed_hits DESCRIPTION > @@ -19,6 +19,7 @@ SQL > JSONExtractString(payload, 'member_uuid') as member_uuid, JSONExtractString(payload, 'member_status') as member_status, JSONExtractString(payload, 'post_uuid') as post_uuid, + JSONExtractString(payload, 'post_type') as post_type, lower(JSONExtractString(payload, 'user-agent')) as user_agent FROM analytics_events where action = 'page_hit' diff --git a/ghost/tinybird/pipes/analytics_sources.pipe b/ghost/tinybird/pipes/analytics_sources.pipe index 482b73ad4fdd..3a49c1ed6717 100644 --- a/ghost/tinybird/pipes/analytics_sources.pipe +++ b/ghost/tinybird/pipes/analytics_sources.pipe @@ -1,4 +1,4 @@ -VERSION 0 +VERSION 1 NODE parsed_hits DESCRIPTION > @@ -19,6 +19,7 @@ SQL > JSONExtractString(payload, 'member_uuid') as member_uuid, JSONExtractString(payload, 'member_status') as member_status, JSONExtractString(payload, 'post_uuid') as post_uuid, + JSONExtractString(payload, 'post_type') as post_type, lower(JSONExtractString(payload, 'user-agent')) as user_agent FROM analytics_events where action = 'page_hit' diff --git a/ghost/tinybird/pipes/kpis.pipe b/ghost/tinybird/pipes/kpis.pipe index fa969be31cda..166562c476cf 100644 --- a/ghost/tinybird/pipes/kpis.pipe +++ b/ghost/tinybird/pipes/kpis.pipe @@ -1,4 +1,4 @@ -VERSION 0 +VERSION 1 DESCRIPTION > Summary with general KPIs per date, including visits, page views, bounce rate and average session duration. diff --git a/ghost/tinybird/pipes/top_browsers.pipe b/ghost/tinybird/pipes/top_browsers.pipe index 00625ec48865..4676c18fc506 100644 --- a/ghost/tinybird/pipes/top_browsers.pipe +++ b/ghost/tinybird/pipes/top_browsers.pipe @@ -1,4 +1,4 @@ -VERSION 0 +VERSION 1 DESCRIPTION > Top Browsers ordered by most visits. diff --git a/ghost/tinybird/pipes/top_devices.pipe b/ghost/tinybird/pipes/top_devices.pipe index 3e77b510e0e5..9b13ba1634ac 100644 --- a/ghost/tinybird/pipes/top_devices.pipe +++ b/ghost/tinybird/pipes/top_devices.pipe @@ -1,4 +1,4 @@ -VERSION 0 +VERSION 1 DESCRIPTION > Top Device Types ordered by most visits. diff --git a/ghost/tinybird/pipes/top_locations.pipe b/ghost/tinybird/pipes/top_locations.pipe index beee3d92dbfd..edf0eb21fd9f 100644 --- a/ghost/tinybird/pipes/top_locations.pipe +++ b/ghost/tinybird/pipes/top_locations.pipe @@ -1,4 +1,4 @@ -VERSION 0 +VERSION 1 DESCRIPTION > Top visiting Countries ordered by most visits. diff --git a/ghost/tinybird/pipes/top_pages.pipe b/ghost/tinybird/pipes/top_pages.pipe index 3ba741d8b2bb..7fa25c5f160e 100644 --- a/ghost/tinybird/pipes/top_pages.pipe +++ b/ghost/tinybird/pipes/top_pages.pipe @@ -1,4 +1,4 @@ -VERSION 0 +VERSION 1 DESCRIPTION > Most visited pages for a given period. diff --git a/ghost/tinybird/pipes/top_sources.pipe b/ghost/tinybird/pipes/top_sources.pipe index 3235b0b70845..92dd427af295 100644 --- a/ghost/tinybird/pipes/top_sources.pipe +++ b/ghost/tinybird/pipes/top_sources.pipe @@ -1,4 +1,4 @@ -VERSION 0 +VERSION 1 DESCRIPTION > Top traffic sources (domains), ordered by most visits. Accepts `date_from` and `date_to` date filter. Defaults to last 7 days. diff --git a/ghost/tinybird/pipes/trend.pipe b/ghost/tinybird/pipes/trend.pipe index 06faa5b26884..707108a8903a 100644 --- a/ghost/tinybird/pipes/trend.pipe +++ b/ghost/tinybird/pipes/trend.pipe @@ -1,4 +1,4 @@ -VERSION 0 +VERSION 1 DESCRIPTION > Visits trend over time for the last 30 minutes, filling the blanks. Works great for the realtime chart. @@ -25,6 +25,7 @@ SQL > JSONExtractString(payload, 'member_uuid') as member_uuid, JSONExtractString(payload, 'member_status') as member_status, JSONExtractString(payload, 'post_uuid') as post_uuid, + JSONExtractString(payload, 'post_type') as post_type, lower(JSONExtractString(payload, 'user-agent')) as user_agent FROM analytics_events where action = 'page_hit' From 305c19b6c7151ea8297bca5981e8f50a862aa476 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Paco=20Gonz=C3=A1lez=20L=C3=B3pez?= Date: Fri, 13 Dec 2024 16:53:45 +0000 Subject: [PATCH 02/14] Remove reference to deprecated analytics_hits --- ghost/tinybird/pipes/analytics_pages.pipe | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ghost/tinybird/pipes/analytics_pages.pipe b/ghost/tinybird/pipes/analytics_pages.pipe index 0804c86b756f..83afeaca4d3b 100644 --- a/ghost/tinybird/pipes/analytics_pages.pipe +++ b/ghost/tinybird/pipes/analytics_pages.pipe @@ -85,7 +85,7 @@ SQL > ) AS member_status, uniqState(session_id) AS visits, countState() AS pageviews - FROM analytics_hits + FROM analytics_hits_data GROUP BY date, device, browser, location, source, pathname, post_uuid, post_type, site_uuid TYPE MATERIALIZED From 41029a44764a3a843070bb45bc2c3363078fc577 Mon Sep 17 00:00:00 2001 From: Paco Gonzalez Date: Fri, 13 Dec 2024 17:06:26 +0000 Subject: [PATCH 03/14] Remove remaining references to analytics_hits --- ghost/tinybird/pipes/kpis.pipe | 65 +++++++++++++++++++++++++++++++++- 1 file changed, 64 insertions(+), 1 deletion(-) diff --git a/ghost/tinybird/pipes/kpis.pipe b/ghost/tinybird/pipes/kpis.pipe index 166562c476cf..9961c4c50dfe 100644 --- a/ghost/tinybird/pipes/kpis.pipe +++ b/ghost/tinybird/pipes/kpis.pipe @@ -65,6 +65,69 @@ SQL > ) as date {% end %} +NODE parsed_hits +DESCRIPTION > + Parse raw page_hit events + +SQL > + SELECT + timestamp, + action, + version, + coalesce(session_id, '0') as session_id, + JSONExtractString(payload, 'locale') as locale, + JSONExtractString(payload, 'location') as location, + JSONExtractString(payload, 'referrer') as referrer, + JSONExtractString(payload, 'pathname') as pathname, + JSONExtractString(payload, 'href') as href, + JSONExtractString(payload, 'site_uuid') as site_uuid, + JSONExtractString(payload, 'member_uuid') as member_uuid, + JSONExtractString(payload, 'member_status') as member_status, + JSONExtractString(payload, 'post_uuid') as post_uuid, + JSONExtractString(payload, 'post_type') as post_type, + lower(JSONExtractString(payload, 'user-agent')) as user_agent + FROM analytics_events + where action = 'page_hit' + +NODE analytics_hits_data +SQL > + SELECT + site_uuid, + timestamp, + action, + version, + session_id, + member_uuid, + member_status, + post_uuid, + location, + domainWithoutWWW(referrer) as source, + pathname, + href, + case + when match(user_agent, 'wget|ahrefsbot|curl|urllib|bitdiscovery|\+https://|googlebot') + then 'bot' + when match(user_agent, 'android') + then 'mobile-android' + when match(user_agent, 'ipad|iphone|ipod') + then 'mobile-ios' + else 'desktop' + END as device, + case + when match(user_agent, 'firefox') + then 'firefox' + when match(user_agent, 'chrome|crios') + then 'chrome' + when match(user_agent, 'opera') + then 'opera' + when match(user_agent, 'msie|trident') + then 'ie' + when match(user_agent, 'iphone|ipad|safari') + then 'safari' + else 'Unknown' + END as browser + FROM parsed_hits + NODE pageviews DESCRIPTION > Group by sessions and calculate metrics at that level @@ -87,7 +150,7 @@ SQL > case when min(timestamp) = max(timestamp) then 1 else 0 end as is_bounce, max(timestamp) as latest_view_aux, min(timestamp) as first_view_aux - from analytics_hits + from analytics_hits_data where toDate(timestamp) = {{ Date(date_from) }} group by toStartOfHour(timestamp), session_id, site_uuid, member_status, device, browser, location, source, pathname {% else %} From a9dd9f5bcc3455ec3c0bb02141a749a2dfef4df8 Mon Sep 17 00:00:00 2001 From: Paco Gonzalez Date: Fri, 13 Dec 2024 17:08:56 +0000 Subject: [PATCH 04/14] Fix target DS versions for MVs --- ghost/tinybird/pipes/analytics_sessions.pipe | 2 +- ghost/tinybird/pipes/analytics_sources.pipe | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ghost/tinybird/pipes/analytics_sessions.pipe b/ghost/tinybird/pipes/analytics_sessions.pipe index 6d9020ff17da..2fc6546ff221 100644 --- a/ghost/tinybird/pipes/analytics_sessions.pipe +++ b/ghost/tinybird/pipes/analytics_sessions.pipe @@ -89,4 +89,4 @@ SQL > GROUP BY date, session_id, site_uuid TYPE MATERIALIZED -DATASOURCE analytics_sessions_mv__v0 +DATASOURCE analytics_sessions_mv__v1 diff --git a/ghost/tinybird/pipes/analytics_sources.pipe b/ghost/tinybird/pipes/analytics_sources.pipe index 3a49c1ed6717..16f3525be601 100644 --- a/ghost/tinybird/pipes/analytics_sources.pipe +++ b/ghost/tinybird/pipes/analytics_sources.pipe @@ -93,4 +93,4 @@ SQL > HAVING b.source != current_domain TYPE MATERIALIZED -DATASOURCE analytics_sources_mv__v0 +DATASOURCE analytics_sources_mv__v1 From ff0117f70078e8a93bd211062097c321a1c536e4 Mon Sep 17 00:00:00 2001 From: Paco Gonzalez Date: Fri, 13 Dec 2024 17:10:19 +0000 Subject: [PATCH 05/14] Fix sorting key of analytics_pages_mv to match the group by fields in analytics_pages MV --- ghost/tinybird/datasources/analytics_pages_mv.datasource | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ghost/tinybird/datasources/analytics_pages_mv.datasource b/ghost/tinybird/datasources/analytics_pages_mv.datasource index 10fb4e143a6c..0bbedcf9a250 100644 --- a/ghost/tinybird/datasources/analytics_pages_mv.datasource +++ b/ghost/tinybird/datasources/analytics_pages_mv.datasource @@ -16,4 +16,4 @@ SCHEMA > ENGINE AggregatingMergeTree ENGINE_PARTITION_KEY toYYYYMM(date) -ENGINE_SORTING_KEY date, device, browser, location, source, pathname, post_uuid, site_uuid +ENGINE_SORTING_KEY date, device, browser, location, source, pathname, post_type, post_uuid, site_uuid From bf509678a6d26d99f9d881da2e6a4d7314af8b5c Mon Sep 17 00:00:00 2001 From: Paco Gonzalez Date: Fri, 13 Dec 2024 17:25:33 +0000 Subject: [PATCH 06/14] Remove typo in DS reference --- ghost/tinybird/pipes/analytics_sources.pipe | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ghost/tinybird/pipes/analytics_sources.pipe b/ghost/tinybird/pipes/analytics_sources.pipe index 16f3525be601..3a49c1ed6717 100644 --- a/ghost/tinybird/pipes/analytics_sources.pipe +++ b/ghost/tinybird/pipes/analytics_sources.pipe @@ -93,4 +93,4 @@ SQL > HAVING b.source != current_domain TYPE MATERIALIZED -DATASOURCE analytics_sources_mv__v1 +DATASOURCE analytics_sources_mv__v0 From ddc4f756b43633a0ed75adcd28dcc348a87d8bdb Mon Sep 17 00:00:00 2001 From: Paco Gonzalez Date: Fri, 13 Dec 2024 17:43:19 +0000 Subject: [PATCH 07/14] Add first version status DS --- ghost/tinybird/datasources/version_log.datasource | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 ghost/tinybird/datasources/version_log.datasource diff --git a/ghost/tinybird/datasources/version_log.datasource b/ghost/tinybird/datasources/version_log.datasource new file mode 100644 index 000000000000..ec14c7995d81 --- /dev/null +++ b/ghost/tinybird/datasources/version_log.datasource @@ -0,0 +1,8 @@ +SCHEMA > + `version` LowCardinality(String) `json:$.version`, + `timestamp` DateTime64(3) `json:$.timestamp` DEFAULT now64(3), + `step_id` Int64 `json:$.step_id`, + `message` LowCardinality(String) `json:$.message` + +ENGINE "MergeTree" +ENGINE_SORTING_KEY "version, timestamp" From e0a44001f362ecfd005268e384b15c849a071192 Mon Sep 17 00:00:00 2001 From: Paco Gonzalez Date: Fri, 13 Dec 2024 17:58:18 +0000 Subject: [PATCH 08/14] Add first version of upgrade script --- ghost/tinybird/scripts/version_upgrade.sh | 92 +++++++++++++++++++++++ 1 file changed, 92 insertions(+) create mode 100644 ghost/tinybird/scripts/version_upgrade.sh diff --git a/ghost/tinybird/scripts/version_upgrade.sh b/ghost/tinybird/scripts/version_upgrade.sh new file mode 100644 index 000000000000..d2adabd79c0e --- /dev/null +++ b/ghost/tinybird/scripts/version_upgrade.sh @@ -0,0 +1,92 @@ +#!/bin/bash + +ver_from="0.0.0" +ver_to="1.0.0" + + + +current_ver=$(tb sql --format JSON "SELECT argMax(version, timestamp) version FROM version_log" | jq -r '.data[0].version') + +echo "Current version: $current_ver" + +# TODO: If current_ver = '', the DS is empty, we need to initialize +# I am going to leave this command as a placeholder initializer for now: +# tb datasource truncate version_log --yes +# curl -X POST 'https://api.tinybird.co/v0/events?name=version_log' -H "Authorization: Bearer $TB_TOKEN" -d '{"version":"0.0.0","step_id":-1,"retry":-1,"message":"Initial version statement"}' + + +if [ "$ver_from" != "$current_ver" ]; +then + echo "This script is valid only for version $ver_from" + exit 1 +fi + +echo "Upgrading from: \"$ver_from\" to: \"$ver_to\"" + + +# Get the highest step done: +query_result=$(tb sql --format JSON "SELECT max(step_id) last_step, max(retry) last_retry FROM version_log WHERE version = '$current_ver'") + +max_step=$(echo "$query_result" | jq -r '.data[0].last_step') +current_retry=$(echo "$query_result" | jq -r '.data[0].last_retry') + +current_retry=$((current_retry+1)) + +if [ $max_step -lt 0 ] +then + # Start at -1 + current_step=-1 +else + current_step=$max_step +fi + +# The idea is that a logged step means that it is done already, so we go to the next +current_step=$((current_step+1)) + +echo "Running from step id $current_step" + + +max_steps=5 +while [ $current_step -le $max_steps ]; do + echo + echo "Running step $current_step" + if [ "$current_step" -le 0 ];then + # Do stuff... + # Log the stuff you've done + curl -X POST 'https://api.tinybird.co/v0/events?name=version_log' -H "Authorization: Bearer $TB_TOKEN" -d "{\"version\":\"0.0.0\",\"step_id\":$current_step,\"retry\":$current_retry,\"message\":\"Start update to $ver_to\"}" + elif [ "$current_step" -le 1 ]; + then + # Do stuff... + # Deploy changes + output=$(tb push --push-deps --only-changes --no-check --yes | tee /dev/tty) + + # If a step fails, the script should stop without logging so it can be retried + if [ $? -ne 0 ]; then + echo "Error in step $current_step" + exit 1 + fi + + # Log the stuff you've done + curl -X POST 'https://api.tinybird.co/v0/events?name=version_log' -H "Authorization: Bearer $TB_TOKEN" -d "{\"version\":\"0.0.0\",\"step_id\":1,\"retry\":$current_retry,\"message\":\"Deploy changes\"}" + elif [ "$current_step" -le 2 ]; + then + # Do stuff... + # Migrate the data + let + + if [ $? -ne 0 ]; then + echo "Error in step $current_step" + exit 1 + fi + # Log the stuff you've done + curl -X POST 'https://api.tinybird.co/v0/events?name=version_log' -H "Authorization: Bearer $TB_TOKEN" -d "{\"version\":\"0.0.0\",\"step_id\":1,\"retry\":$current_retry,\"message\":\"Deploy changes\"}" + else + sleep 1 + fi + # Go to the next step + current_step=$((current_step+1)) +done + + +# When all runs ok, finish + From 570b21dd33784786732018d2eda7ec86fb3c2684 Mon Sep 17 00:00:00 2001 From: Paco Gonzalez Date: Fri, 13 Dec 2024 18:03:14 +0000 Subject: [PATCH 09/14] Remove unused field in version_log --- ghost/tinybird/scripts/version_upgrade.sh | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/ghost/tinybird/scripts/version_upgrade.sh b/ghost/tinybird/scripts/version_upgrade.sh index d2adabd79c0e..14071a0225e2 100644 --- a/ghost/tinybird/scripts/version_upgrade.sh +++ b/ghost/tinybird/scripts/version_upgrade.sh @@ -12,7 +12,7 @@ echo "Current version: $current_ver" # TODO: If current_ver = '', the DS is empty, we need to initialize # I am going to leave this command as a placeholder initializer for now: # tb datasource truncate version_log --yes -# curl -X POST 'https://api.tinybird.co/v0/events?name=version_log' -H "Authorization: Bearer $TB_TOKEN" -d '{"version":"0.0.0","step_id":-1,"retry":-1,"message":"Initial version statement"}' +# curl -X POST 'https://api.tinybird.co/v0/events?name=version_log' -H "Authorization: Bearer $TB_TOKEN" -d '{"version":"0.0.0","step_id":-1,"message":"Initial version statement"}' if [ "$ver_from" != "$current_ver" ]; @@ -25,12 +25,9 @@ echo "Upgrading from: \"$ver_from\" to: \"$ver_to\"" # Get the highest step done: -query_result=$(tb sql --format JSON "SELECT max(step_id) last_step, max(retry) last_retry FROM version_log WHERE version = '$current_ver'") +query_result=$(tb sql --format JSON "SELECT max(step_id) last_step FROM version_log WHERE version = '$current_ver'") max_step=$(echo "$query_result" | jq -r '.data[0].last_step') -current_retry=$(echo "$query_result" | jq -r '.data[0].last_retry') - -current_retry=$((current_retry+1)) if [ $max_step -lt 0 ] then @@ -53,7 +50,7 @@ while [ $current_step -le $max_steps ]; do if [ "$current_step" -le 0 ];then # Do stuff... # Log the stuff you've done - curl -X POST 'https://api.tinybird.co/v0/events?name=version_log' -H "Authorization: Bearer $TB_TOKEN" -d "{\"version\":\"0.0.0\",\"step_id\":$current_step,\"retry\":$current_retry,\"message\":\"Start update to $ver_to\"}" + curl -X POST 'https://api.tinybird.co/v0/events?name=version_log' -H "Authorization: Bearer $TB_TOKEN" -d "{\"version\":\"0.0.0\",\"step_id\":$current_step,\"message\":\"Start update to $ver_to\"}" elif [ "$current_step" -le 1 ]; then # Do stuff... @@ -67,7 +64,7 @@ while [ $current_step -le $max_steps ]; do fi # Log the stuff you've done - curl -X POST 'https://api.tinybird.co/v0/events?name=version_log' -H "Authorization: Bearer $TB_TOKEN" -d "{\"version\":\"0.0.0\",\"step_id\":1,\"retry\":$current_retry,\"message\":\"Deploy changes\"}" + curl -X POST 'https://api.tinybird.co/v0/events?name=version_log' -H "Authorization: Bearer $TB_TOKEN" -d "{\"version\":\"0.0.0\",\"step_id\":$current_step,\"message\":\"Deploy changes\"}" elif [ "$current_step" -le 2 ]; then # Do stuff... @@ -79,7 +76,7 @@ while [ $current_step -le $max_steps ]; do exit 1 fi # Log the stuff you've done - curl -X POST 'https://api.tinybird.co/v0/events?name=version_log' -H "Authorization: Bearer $TB_TOKEN" -d "{\"version\":\"0.0.0\",\"step_id\":1,\"retry\":$current_retry,\"message\":\"Deploy changes\"}" + curl -X POST 'https://api.tinybird.co/v0/events?name=version_log' -H "Authorization: Bearer $TB_TOKEN" -d "{\"version\":\"0.0.0\",\"step_id\":$current_step,\"message\":\"Deploy changes\"}" else sleep 1 fi From 5eec1e3ca0ee07c1308644c2dc9bb936cab2457f Mon Sep 17 00:00:00 2001 From: Paco Gonzalez Date: Mon, 16 Dec 2024 12:22:30 +0000 Subject: [PATCH 10/14] Remove version upgrade, there are no changes to the output --- ghost/tinybird/pipes/analytics_sources.pipe | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ghost/tinybird/pipes/analytics_sources.pipe b/ghost/tinybird/pipes/analytics_sources.pipe index 3a49c1ed6717..5734295004d7 100644 --- a/ghost/tinybird/pipes/analytics_sources.pipe +++ b/ghost/tinybird/pipes/analytics_sources.pipe @@ -1,4 +1,4 @@ -VERSION 1 +VERSION 0 NODE parsed_hits DESCRIPTION > From abeea144b14070c10d9bf9a6cb564ab7f74613b1 Mon Sep 17 00:00:00 2001 From: Paco Gonzalez Date: Mon, 16 Dec 2024 12:23:07 +0000 Subject: [PATCH 11/14] Remove deployment step to focus the script on data migration --- ghost/tinybird/scripts/version_upgrade.sh | 28 +++++++++++++++-------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/ghost/tinybird/scripts/version_upgrade.sh b/ghost/tinybird/scripts/version_upgrade.sh index 14071a0225e2..a225f9de797e 100644 --- a/ghost/tinybird/scripts/version_upgrade.sh +++ b/ghost/tinybird/scripts/version_upgrade.sh @@ -4,7 +4,6 @@ ver_from="0.0.0" ver_to="1.0.0" - current_ver=$(tb sql --format JSON "SELECT argMax(version, timestamp) version FROM version_log" | jq -r '.data[0].version') echo "Current version: $current_ver" @@ -12,7 +11,7 @@ echo "Current version: $current_ver" # TODO: If current_ver = '', the DS is empty, we need to initialize # I am going to leave this command as a placeholder initializer for now: # tb datasource truncate version_log --yes -# curl -X POST 'https://api.tinybird.co/v0/events?name=version_log' -H "Authorization: Bearer $TB_TOKEN" -d '{"version":"0.0.0","step_id":-1,"message":"Initial version statement"}' +# curl -X POST 'https://api.tinybird.co/v0/events?name=version_log' -H "Authorization: Bearer $TB_TOKEN" -d '{"version":"$ver_from","step_id":-1,"message":"Initial version statement"}' if [ "$ver_from" != "$current_ver" ]; @@ -42,6 +41,12 @@ current_step=$((current_step+1)) echo "Running from step id $current_step" +# Migration plan: +# analytics_sources.pipe should not really be iterated, as the output +# does not change and thus does not require a migration +# 1. Start (can be removed) +# 2. Populate analytics_sessions_mv__v1 with analytics_sessions_v1 +# 3. Populate analytics_pages_mv__v1 with analytics_pages_v1 max_steps=5 while [ $current_step -le $max_steps ]; do @@ -49,34 +54,37 @@ while [ $current_step -le $max_steps ]; do echo "Running step $current_step" if [ "$current_step" -le 0 ];then # Do stuff... + # Log the stuff you've done - curl -X POST 'https://api.tinybird.co/v0/events?name=version_log' -H "Authorization: Bearer $TB_TOKEN" -d "{\"version\":\"0.0.0\",\"step_id\":$current_step,\"message\":\"Start update to $ver_to\"}" + curl -X POST 'https://api.tinybird.co/v0/events?name=version_log' -H "Authorization: Bearer $TB_TOKEN" -d "{\"version\":\"$ver_from\",\"step_id\":$current_step,\"message\":\"Start update to $ver_to\"}" elif [ "$current_step" -le 1 ]; then # Do stuff... - # Deploy changes - output=$(tb push --push-deps --only-changes --no-check --yes | tee /dev/tty) + step_message="Populate analytics_sessions_mv__v1 with analytics_sessions_v1" + # Migrate the data + output=$(tb pipe populate --truncate --wait analytics_sessions_v1 | tee /dev/tty) - # If a step fails, the script should stop without logging so it can be retried + # Check that it ran ok if [ $? -ne 0 ]; then echo "Error in step $current_step" exit 1 fi - # Log the stuff you've done - curl -X POST 'https://api.tinybird.co/v0/events?name=version_log' -H "Authorization: Bearer $TB_TOKEN" -d "{\"version\":\"0.0.0\",\"step_id\":$current_step,\"message\":\"Deploy changes\"}" + curl -X POST 'https://api.tinybird.co/v0/events?name=version_log' -H "Authorization: Bearer $TB_TOKEN" -d "{\"version\":\"$ver_from\",\"step_id\":$current_step,\"message\":\"$step_message\"}" elif [ "$current_step" -le 2 ]; then # Do stuff... + step_message="Populate analytics_pages_mv__v1 with analytics_pages_v1" # Migrate the data - let + output=$(tb pipe populate --truncate --wait analytics_pages_v1 | tee /dev/tty) + # Check that it ran ok if [ $? -ne 0 ]; then echo "Error in step $current_step" exit 1 fi # Log the stuff you've done - curl -X POST 'https://api.tinybird.co/v0/events?name=version_log' -H "Authorization: Bearer $TB_TOKEN" -d "{\"version\":\"0.0.0\",\"step_id\":$current_step,\"message\":\"Deploy changes\"}" + curl -X POST 'https://api.tinybird.co/v0/events?name=version_log' -H "Authorization: Bearer $TB_TOKEN" -d "{\"version\":\"$ver_from\",\"step_id\":$current_step,\"message\":\"$step_message\"}" else sleep 1 fi From aebe98dd15ce585d7fad1c24cc3b7513401a1860 Mon Sep 17 00:00:00 2001 From: Paco Gonzalez Date: Mon, 16 Dec 2024 12:27:28 +0000 Subject: [PATCH 12/14] Fix migration script --- ghost/tinybird/scripts/version_upgrade.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/ghost/tinybird/scripts/version_upgrade.sh b/ghost/tinybird/scripts/version_upgrade.sh index a225f9de797e..bcfed7bf7486 100644 --- a/ghost/tinybird/scripts/version_upgrade.sh +++ b/ghost/tinybird/scripts/version_upgrade.sh @@ -86,6 +86,7 @@ while [ $current_step -le $max_steps ]; do # Log the stuff you've done curl -X POST 'https://api.tinybird.co/v0/events?name=version_log' -H "Authorization: Bearer $TB_TOKEN" -d "{\"version\":\"$ver_from\",\"step_id\":$current_step,\"message\":\"$step_message\"}" else + # Empty step for testing sleep 1 fi # Go to the next step From 087e81d803eff39194f76838053dfdf2f8a65be9 Mon Sep 17 00:00:00 2001 From: Paco Gonzalez Date: Mon, 16 Dec 2024 13:03:33 +0000 Subject: [PATCH 13/14] Finalize first draft of an upgrade script --- ghost/tinybird/scripts/version_upgrade.sh | 40 ++++++++++++++++------- 1 file changed, 29 insertions(+), 11 deletions(-) diff --git a/ghost/tinybird/scripts/version_upgrade.sh b/ghost/tinybird/scripts/version_upgrade.sh index bcfed7bf7486..e59a74ead6b6 100644 --- a/ghost/tinybird/scripts/version_upgrade.sh +++ b/ghost/tinybird/scripts/version_upgrade.sh @@ -4,6 +4,8 @@ ver_from="0.0.0" ver_to="1.0.0" + + current_ver=$(tb sql --format JSON "SELECT argMax(version, timestamp) version FROM version_log" | jq -r '.data[0].version') echo "Current version: $current_ver" @@ -11,8 +13,8 @@ echo "Current version: $current_ver" # TODO: If current_ver = '', the DS is empty, we need to initialize # I am going to leave this command as a placeholder initializer for now: # tb datasource truncate version_log --yes -# curl -X POST 'https://api.tinybird.co/v0/events?name=version_log' -H "Authorization: Bearer $TB_TOKEN" -d '{"version":"$ver_from","step_id":-1,"message":"Initial version statement"}' - +# echo "{\"version\":\"0.0.0\",\"step_id\":-1,\"message\":\"Current version statement\"}" > /tmp/msg.ndjson +# tb datasource append version_log /tmp/msg.ndjson if [ "$ver_from" != "$current_ver" ]; then @@ -48,35 +50,46 @@ echo "Running from step id $current_step" # 2. Populate analytics_sessions_mv__v1 with analytics_sessions_v1 # 3. Populate analytics_pages_mv__v1 with analytics_pages_v1 -max_steps=5 +max_steps=3 while [ $current_step -le $max_steps ]; do echo echo "Running step $current_step" if [ "$current_step" -le 0 ];then # Do stuff... + step_message="Start update to $ver_to" # Log the stuff you've done - curl -X POST 'https://api.tinybird.co/v0/events?name=version_log' -H "Authorization: Bearer $TB_TOKEN" -d "{\"version\":\"$ver_from\",\"step_id\":$current_step,\"message\":\"Start update to $ver_to\"}" + echo "{\"version\":\"$ver_from\",\"step_id\":$current_step,\"message\":\"$step_message\"}" > /tmp/msg.ndjson + tb datasource append version_log /tmp/msg.ndjson + elif [ "$current_step" -le 1 ]; then # Do stuff... - step_message="Populate analytics_sessions_mv__v1 with analytics_sessions_v1" + step_message="Populate analytics_sessions_mv__v1 with analytics_sessions__v1" # Migrate the data - output=$(tb pipe populate --truncate --wait analytics_sessions_v1 | tee /dev/tty) + output=$(tb pipe populate --truncate --wait analytics_sessions__v1 | tee /dev/tty) # Check that it ran ok if [ $? -ne 0 ]; then echo "Error in step $current_step" exit 1 fi + # ^ Right now this is broken, on error it does not stop + # Suggested approach: parse the output of the job. Output: + # ** Populating job url https://api.tinybird.co/v0/jobs/b19bebf2-6d21-435b-ac63-2ab1703fa563 + # tb job details + + # Log the stuff you've done - curl -X POST 'https://api.tinybird.co/v0/events?name=version_log' -H "Authorization: Bearer $TB_TOKEN" -d "{\"version\":\"$ver_from\",\"step_id\":$current_step,\"message\":\"$step_message\"}" + # curl -X POST 'https://api.tinybird.co/v0/events?name=version_log' -H "Authorization: Bearer $TB_TOKEN" -d "{\"version\":\"$ver_from\",\"step_id\":$current_step,\"message\":\"$step_message\"}" + echo "{\"version\":\"$ver_from\",\"step_id\":$current_step,\"message\":\"$step_message\"}" > /tmp/msg.ndjson + tb datasource append version_log /tmp/msg.ndjson elif [ "$current_step" -le 2 ]; then # Do stuff... - step_message="Populate analytics_pages_mv__v1 with analytics_pages_v1" + step_message="Populate analytics_pages_mv__v1 with analytics_pages__v1" # Migrate the data - output=$(tb pipe populate --truncate --wait analytics_pages_v1 | tee /dev/tty) + output=$(tb pipe populate --truncate --wait analytics_pages__v1 | tee /dev/tty) # Check that it ran ok if [ $? -ne 0 ]; then @@ -84,7 +97,8 @@ while [ $current_step -le $max_steps ]; do exit 1 fi # Log the stuff you've done - curl -X POST 'https://api.tinybird.co/v0/events?name=version_log' -H "Authorization: Bearer $TB_TOKEN" -d "{\"version\":\"$ver_from\",\"step_id\":$current_step,\"message\":\"$step_message\"}" + echo "{\"version\":\"$ver_from\",\"step_id\":$current_step,\"message\":\"$step_message\"}" > /tmp/msg.ndjson + tb datasource append version_log /tmp/msg.ndjson else # Empty step for testing sleep 1 @@ -94,5 +108,9 @@ while [ $current_step -le $max_steps ]; do done -# When all runs ok, finish +# When all runs ok, finish logging that the new version is up and running +step_message="Current version statement" +current_step=-1 +echo "{\"version\":\"$ver_to\",\"step_id\":$current_step,\"message\":\"$step_message\"}" > /tmp/msg.ndjson +tb datasource append version_log /tmp/msg.ndjson From 21e0560578cc19b378528caf9799dd32ba11c3e8 Mon Sep 17 00:00:00 2001 From: Paco Gonzalez Date: Mon, 16 Dec 2024 13:10:49 +0000 Subject: [PATCH 14/14] Fix bug in script not reporting errors --- ghost/tinybird/scripts/version_upgrade.sh | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/ghost/tinybird/scripts/version_upgrade.sh b/ghost/tinybird/scripts/version_upgrade.sh index e59a74ead6b6..3ec445d8c4d5 100644 --- a/ghost/tinybird/scripts/version_upgrade.sh +++ b/ghost/tinybird/scripts/version_upgrade.sh @@ -67,18 +67,14 @@ while [ $current_step -le $max_steps ]; do # Do stuff... step_message="Populate analytics_sessions_mv__v1 with analytics_sessions__v1" # Migrate the data - output=$(tb pipe populate --truncate --wait analytics_sessions__v1 | tee /dev/tty) + output=$(tb pipe populate --truncate --wait analytics_sessions__v1) # Check that it ran ok if [ $? -ne 0 ]; then echo "Error in step $current_step" + echo $output exit 1 fi - # ^ Right now this is broken, on error it does not stop - # Suggested approach: parse the output of the job. Output: - # ** Populating job url https://api.tinybird.co/v0/jobs/b19bebf2-6d21-435b-ac63-2ab1703fa563 - # tb job details - # Log the stuff you've done # curl -X POST 'https://api.tinybird.co/v0/events?name=version_log' -H "Authorization: Bearer $TB_TOKEN" -d "{\"version\":\"$ver_from\",\"step_id\":$current_step,\"message\":\"$step_message\"}" @@ -89,13 +85,15 @@ while [ $current_step -le $max_steps ]; do # Do stuff... step_message="Populate analytics_pages_mv__v1 with analytics_pages__v1" # Migrate the data - output=$(tb pipe populate --truncate --wait analytics_pages__v1 | tee /dev/tty) + output=$(tb pipe populate --truncate --wait analytics_pages__v1) # Check that it ran ok if [ $? -ne 0 ]; then echo "Error in step $current_step" + echo $output exit 1 fi + # Log the stuff you've done echo "{\"version\":\"$ver_from\",\"step_id\":$current_step,\"message\":\"$step_message\"}" > /tmp/msg.ndjson tb datasource append version_log /tmp/msg.ndjson