diff --git a/ghost/tinybird/datasources/analytics_pages_mv.datasource b/ghost/tinybird/datasources/analytics_pages_mv.datasource index 75c5c9b8707..0bbedcf9a25 100644 --- a/ghost/tinybird/datasources/analytics_pages_mv.datasource +++ b/ghost/tinybird/datasources/analytics_pages_mv.datasource @@ -1,8 +1,9 @@ -VERSION 0 +VERSION 1 SCHEMA > `site_uuid` String, `post_uuid` String, + `post_type` String, `date` Date, `device` String, `browser` String, @@ -15,4 +16,4 @@ SCHEMA > ENGINE AggregatingMergeTree ENGINE_PARTITION_KEY toYYYYMM(date) -ENGINE_SORTING_KEY date, device, browser, location, source, pathname, post_uuid, site_uuid +ENGINE_SORTING_KEY date, device, browser, location, source, pathname, post_type, post_uuid, site_uuid diff --git a/ghost/tinybird/datasources/analytics_sessions_mv.datasource b/ghost/tinybird/datasources/analytics_sessions_mv.datasource index 6bd7786783b..b2f8bd43d8c 100644 --- a/ghost/tinybird/datasources/analytics_sessions_mv.datasource +++ b/ghost/tinybird/datasources/analytics_sessions_mv.datasource @@ -1,10 +1,11 @@ -VERSION 0 +VERSION 1 SCHEMA > `site_uuid` String, `date` Date, `session_id` String, `member_status` SimpleAggregateFunction(any, String), `post_uuid` SimpleAggregateFunction(any, String), + `post_type` SimpleAggregateFunction(any, String), `device` SimpleAggregateFunction(any, String), `browser` SimpleAggregateFunction(any, String), `location` SimpleAggregateFunction(any, String), diff --git a/ghost/tinybird/datasources/version_log.datasource b/ghost/tinybird/datasources/version_log.datasource new file mode 100644 index 00000000000..ec14c7995d8 --- /dev/null +++ b/ghost/tinybird/datasources/version_log.datasource @@ -0,0 +1,8 @@ +SCHEMA > + `version` LowCardinality(String) `json:$.version`, + `timestamp` DateTime64(3) `json:$.timestamp` DEFAULT now64(3), + `step_id` Int64 `json:$.step_id`, + `message` LowCardinality(String) `json:$.message` + +ENGINE "MergeTree" +ENGINE_SORTING_KEY "version, timestamp" diff --git a/ghost/tinybird/pipes/analytics_pages.pipe b/ghost/tinybird/pipes/analytics_pages.pipe index 8ecdc089bc4..83afeaca4d3 100644 --- a/ghost/tinybird/pipes/analytics_pages.pipe +++ b/ghost/tinybird/pipes/analytics_pages.pipe @@ -1,4 +1,4 @@ -VERSION 0 +VERSION 1 NODE parsed_hits DESCRIPTION > @@ -19,6 +19,7 @@ SQL > JSONExtractString(payload, 'member_uuid') as member_uuid, JSONExtractString(payload, 'member_status') as member_status, JSONExtractString(payload, 'post_uuid') as post_uuid, + JSONExtractString(payload, 'post_type') as post_type, lower(JSONExtractString(payload, 'user-agent')) as user_agent FROM analytics_events where action = 'page_hit' @@ -34,6 +35,7 @@ SQL > member_uuid, member_status, post_uuid, + post_type, location, domainWithoutWWW(referrer) as source, pathname, @@ -71,6 +73,7 @@ SQL > site_uuid, toDate(timestamp) AS date, post_uuid, + post_type, device, browser, location, @@ -83,7 +86,7 @@ SQL > uniqState(session_id) AS visits, countState() AS pageviews FROM analytics_hits_data - GROUP BY date, device, browser, location, source, pathname, post_uuid,site_uuid + GROUP BY date, device, browser, location, source, pathname, post_uuid, post_type, site_uuid TYPE MATERIALIZED -DATASOURCE analytics_pages_mv__v0 +DATASOURCE analytics_pages_mv__v1 diff --git a/ghost/tinybird/pipes/analytics_sessions.pipe b/ghost/tinybird/pipes/analytics_sessions.pipe index 0ef44093fc4..2fc6546ff22 100644 --- a/ghost/tinybird/pipes/analytics_sessions.pipe +++ b/ghost/tinybird/pipes/analytics_sessions.pipe @@ -1,4 +1,4 @@ -VERSION 0 +VERSION 1 NODE parsed_hits DESCRIPTION > @@ -19,6 +19,7 @@ SQL > JSONExtractString(payload, 'member_uuid') as member_uuid, JSONExtractString(payload, 'member_status') as member_status, JSONExtractString(payload, 'post_uuid') as post_uuid, + JSONExtractString(payload, 'post_type') as post_type, lower(JSONExtractString(payload, 'user-agent')) as user_agent FROM analytics_events where action = 'page_hit' @@ -88,4 +89,4 @@ SQL > GROUP BY date, session_id, site_uuid TYPE MATERIALIZED -DATASOURCE analytics_sessions_mv__v0 +DATASOURCE analytics_sessions_mv__v1 diff --git a/ghost/tinybird/pipes/analytics_sources.pipe b/ghost/tinybird/pipes/analytics_sources.pipe index 482b73ad4fd..5734295004d 100644 --- a/ghost/tinybird/pipes/analytics_sources.pipe +++ b/ghost/tinybird/pipes/analytics_sources.pipe @@ -19,6 +19,7 @@ SQL > JSONExtractString(payload, 'member_uuid') as member_uuid, JSONExtractString(payload, 'member_status') as member_status, JSONExtractString(payload, 'post_uuid') as post_uuid, + JSONExtractString(payload, 'post_type') as post_type, lower(JSONExtractString(payload, 'user-agent')) as user_agent FROM analytics_events where action = 'page_hit' diff --git a/ghost/tinybird/pipes/kpis.pipe b/ghost/tinybird/pipes/kpis.pipe index fa969be31cd..9961c4c50df 100644 --- a/ghost/tinybird/pipes/kpis.pipe +++ b/ghost/tinybird/pipes/kpis.pipe @@ -1,4 +1,4 @@ -VERSION 0 +VERSION 1 DESCRIPTION > Summary with general KPIs per date, including visits, page views, bounce rate and average session duration. @@ -65,6 +65,69 @@ SQL > ) as date {% end %} +NODE parsed_hits +DESCRIPTION > + Parse raw page_hit events + +SQL > + SELECT + timestamp, + action, + version, + coalesce(session_id, '0') as session_id, + JSONExtractString(payload, 'locale') as locale, + JSONExtractString(payload, 'location') as location, + JSONExtractString(payload, 'referrer') as referrer, + JSONExtractString(payload, 'pathname') as pathname, + JSONExtractString(payload, 'href') as href, + JSONExtractString(payload, 'site_uuid') as site_uuid, + JSONExtractString(payload, 'member_uuid') as member_uuid, + JSONExtractString(payload, 'member_status') as member_status, + JSONExtractString(payload, 'post_uuid') as post_uuid, + JSONExtractString(payload, 'post_type') as post_type, + lower(JSONExtractString(payload, 'user-agent')) as user_agent + FROM analytics_events + where action = 'page_hit' + +NODE analytics_hits_data +SQL > + SELECT + site_uuid, + timestamp, + action, + version, + session_id, + member_uuid, + member_status, + post_uuid, + location, + domainWithoutWWW(referrer) as source, + pathname, + href, + case + when match(user_agent, 'wget|ahrefsbot|curl|urllib|bitdiscovery|\+https://|googlebot') + then 'bot' + when match(user_agent, 'android') + then 'mobile-android' + when match(user_agent, 'ipad|iphone|ipod') + then 'mobile-ios' + else 'desktop' + END as device, + case + when match(user_agent, 'firefox') + then 'firefox' + when match(user_agent, 'chrome|crios') + then 'chrome' + when match(user_agent, 'opera') + then 'opera' + when match(user_agent, 'msie|trident') + then 'ie' + when match(user_agent, 'iphone|ipad|safari') + then 'safari' + else 'Unknown' + END as browser + FROM parsed_hits + NODE pageviews DESCRIPTION > Group by sessions and calculate metrics at that level @@ -87,7 +150,7 @@ SQL > case when min(timestamp) = max(timestamp) then 1 else 0 end as is_bounce, max(timestamp) as latest_view_aux, min(timestamp) as first_view_aux - from analytics_hits + from analytics_hits_data where toDate(timestamp) = {{ Date(date_from) }} group by toStartOfHour(timestamp), session_id, site_uuid, member_status, device, browser, location, source, pathname {% else %} diff --git a/ghost/tinybird/pipes/top_browsers.pipe b/ghost/tinybird/pipes/top_browsers.pipe index 00625ec4886..4676c18fc50 100644 --- a/ghost/tinybird/pipes/top_browsers.pipe +++ b/ghost/tinybird/pipes/top_browsers.pipe @@ -1,4 +1,4 @@ -VERSION 0 +VERSION 1 DESCRIPTION > Top Browsers ordered by most visits. diff --git a/ghost/tinybird/pipes/top_devices.pipe b/ghost/tinybird/pipes/top_devices.pipe index 3e77b510e0e..9b13ba1634a 100644 --- a/ghost/tinybird/pipes/top_devices.pipe +++ b/ghost/tinybird/pipes/top_devices.pipe @@ -1,4 +1,4 @@ -VERSION 0 +VERSION 1 DESCRIPTION > Top Device Types ordered by most visits. diff --git a/ghost/tinybird/pipes/top_locations.pipe b/ghost/tinybird/pipes/top_locations.pipe index beee3d92dbf..edf0eb21fd9 100644 --- a/ghost/tinybird/pipes/top_locations.pipe +++ b/ghost/tinybird/pipes/top_locations.pipe @@ -1,4 +1,4 @@ -VERSION 0 +VERSION 1 DESCRIPTION > Top visiting Countries ordered by most visits. diff --git a/ghost/tinybird/pipes/top_pages.pipe b/ghost/tinybird/pipes/top_pages.pipe index 3ba741d8b2b..7fa25c5f160 100644 --- a/ghost/tinybird/pipes/top_pages.pipe +++ b/ghost/tinybird/pipes/top_pages.pipe @@ -1,4 +1,4 @@ -VERSION 0 +VERSION 1 DESCRIPTION > Most visited pages for a given period. diff --git a/ghost/tinybird/pipes/top_sources.pipe b/ghost/tinybird/pipes/top_sources.pipe index 3235b0b7084..92dd427af29 100644 --- a/ghost/tinybird/pipes/top_sources.pipe +++ b/ghost/tinybird/pipes/top_sources.pipe @@ -1,4 +1,4 @@ -VERSION 0 +VERSION 1 DESCRIPTION > Top traffic sources (domains), ordered by most visits. Accepts `date_from` and `date_to` date filter. Defaults to last 7 days. diff --git a/ghost/tinybird/pipes/trend.pipe b/ghost/tinybird/pipes/trend.pipe index 06faa5b2688..707108a8903 100644 --- a/ghost/tinybird/pipes/trend.pipe +++ b/ghost/tinybird/pipes/trend.pipe @@ -1,4 +1,4 @@ -VERSION 0 +VERSION 1 DESCRIPTION > Visits trend over time for the last 30 minutes, filling the blanks. Works great for the realtime chart. @@ -25,6 +25,7 @@ SQL > JSONExtractString(payload, 'member_uuid') as member_uuid, JSONExtractString(payload, 'member_status') as member_status, JSONExtractString(payload, 'post_uuid') as post_uuid, + JSONExtractString(payload, 'post_type') as post_type, lower(JSONExtractString(payload, 'user-agent')) as user_agent FROM analytics_events where action = 'page_hit' diff --git a/ghost/tinybird/scripts/version_upgrade.sh b/ghost/tinybird/scripts/version_upgrade.sh new file mode 100644 index 00000000000..3ec445d8c4d --- /dev/null +++ b/ghost/tinybird/scripts/version_upgrade.sh @@ -0,0 +1,114 @@ +#!/bin/bash + +ver_from="0.0.0" +ver_to="1.0.0" + + + + +current_ver=$(tb sql --format JSON "SELECT argMax(version, timestamp) version FROM version_log" | jq -r '.data[0].version') + +echo "Current version: $current_ver" + +# TODO: If current_ver = '', the DS is empty, we need to initialize +# I am going to leave this command as a placeholder initializer for now: +# tb datasource truncate version_log --yes +# echo "{\"version\":\"0.0.0\",\"step_id\":-1,\"message\":\"Current version statement\"}" > /tmp/msg.ndjson +# tb datasource append version_log /tmp/msg.ndjson + +if [ "$ver_from" != "$current_ver" ]; +then + echo "This script is valid only for version $ver_from" + exit 1 +fi + +echo "Upgrading from: \"$ver_from\" to: \"$ver_to\"" + + +# Get the highest step done: +query_result=$(tb sql --format JSON "SELECT max(step_id) last_step FROM version_log WHERE version = '$current_ver'") + +max_step=$(echo "$query_result" | jq -r '.data[0].last_step') + +if [ $max_step -lt 0 ] +then + # Start at -1 + current_step=-1 +else + current_step=$max_step +fi + +# The idea is that a logged step means that it is done already, so we go to the next +current_step=$((current_step+1)) + +echo "Running from step id $current_step" + +# Migration plan: +# analytics_sources.pipe should not really be iterated, as the output +# does not change and thus does not require a migration +# 1. Start (can be removed) +# 2. Populate analytics_sessions_mv__v1 with analytics_sessions_v1 +# 3. Populate analytics_pages_mv__v1 with analytics_pages_v1 + +max_steps=3 +while [ $current_step -le $max_steps ]; do + echo + echo "Running step $current_step" + if [ "$current_step" -le 0 ];then + # Do stuff... + step_message="Start update to $ver_to" + + # Log the stuff you've done + echo "{\"version\":\"$ver_from\",\"step_id\":$current_step,\"message\":\"$step_message\"}" > /tmp/msg.ndjson + tb datasource append version_log /tmp/msg.ndjson + + elif [ "$current_step" -le 1 ]; + then + # Do stuff... + step_message="Populate analytics_sessions_mv__v1 with analytics_sessions__v1" + # Migrate the data + output=$(tb pipe populate --truncate --wait analytics_sessions__v1) + + # Check that it ran ok + if [ $? -ne 0 ]; then + echo "Error in step $current_step" + echo $output + exit 1 + fi + + # Log the stuff you've done + # curl -X POST 'https://api.tinybird.co/v0/events?name=version_log' -H "Authorization: Bearer $TB_TOKEN" -d "{\"version\":\"$ver_from\",\"step_id\":$current_step,\"message\":\"$step_message\"}" + echo "{\"version\":\"$ver_from\",\"step_id\":$current_step,\"message\":\"$step_message\"}" > /tmp/msg.ndjson + tb datasource append version_log /tmp/msg.ndjson + elif [ "$current_step" -le 2 ]; + then + # Do stuff... + step_message="Populate analytics_pages_mv__v1 with analytics_pages__v1" + # Migrate the data + output=$(tb pipe populate --truncate --wait analytics_pages__v1) + + # Check that it ran ok + if [ $? -ne 0 ]; then + echo "Error in step $current_step" + echo $output + exit 1 + fi + + # Log the stuff you've done + echo "{\"version\":\"$ver_from\",\"step_id\":$current_step,\"message\":\"$step_message\"}" > /tmp/msg.ndjson + tb datasource append version_log /tmp/msg.ndjson + else + # Empty step for testing + sleep 1 + fi + # Go to the next step + current_step=$((current_step+1)) +done + + +# When all runs ok, finish logging that the new version is up and running +step_message="Current version statement" +current_step=-1 +echo "{\"version\":\"$ver_to\",\"step_id\":$current_step,\"message\":\"$step_message\"}" > /tmp/msg.ndjson +tb datasource append version_log /tmp/msg.ndjson +