Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix problems with Tinybird deployment #21884

Draft
wants to merge 13 commits into
base: tinybird-add-column-test
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion ghost/tinybird/datasources/analytics_pages_mv.datasource
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,4 @@ SCHEMA >

ENGINE AggregatingMergeTree
ENGINE_PARTITION_KEY toYYYYMM(date)
ENGINE_SORTING_KEY date, device, browser, location, source, pathname, post_uuid, site_uuid
ENGINE_SORTING_KEY date, device, browser, location, source, pathname, post_type, post_uuid, site_uuid
8 changes: 8 additions & 0 deletions ghost/tinybird/datasources/version_log.datasource
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
SCHEMA >
`version` LowCardinality(String) `json:$.version`,
`timestamp` DateTime64(3) `json:$.timestamp` DEFAULT now64(3),
`step_id` Int64 `json:$.step_id`,
`message` LowCardinality(String) `json:$.message`

ENGINE "MergeTree"
ENGINE_SORTING_KEY "version, timestamp"
2 changes: 1 addition & 1 deletion ghost/tinybird/pipes/analytics_pages.pipe
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ SQL >
) AS member_status,
uniqState(session_id) AS visits,
countState() AS pageviews
FROM analytics_hits
FROM analytics_hits_data
GROUP BY date, device, browser, location, source, pathname, post_uuid, post_type, site_uuid

TYPE MATERIALIZED
Expand Down
2 changes: 1 addition & 1 deletion ghost/tinybird/pipes/analytics_sessions.pipe
Original file line number Diff line number Diff line change
Expand Up @@ -89,4 +89,4 @@ SQL >
GROUP BY date, session_id, site_uuid

TYPE MATERIALIZED
DATASOURCE analytics_sessions_mv__v0
DATASOURCE analytics_sessions_mv__v1
2 changes: 1 addition & 1 deletion ghost/tinybird/pipes/analytics_sources.pipe
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
VERSION 1
VERSION 0

NODE parsed_hits
DESCRIPTION >
Expand Down
65 changes: 64 additions & 1 deletion ghost/tinybird/pipes/kpis.pipe
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,69 @@ SQL >
) as date
{% end %}

NODE parsed_hits
DESCRIPTION >
Parse raw page_hit events

SQL >
SELECT
timestamp,
action,
version,
coalesce(session_id, '0') as session_id,
JSONExtractString(payload, 'locale') as locale,
JSONExtractString(payload, 'location') as location,
JSONExtractString(payload, 'referrer') as referrer,
JSONExtractString(payload, 'pathname') as pathname,
JSONExtractString(payload, 'href') as href,
JSONExtractString(payload, 'site_uuid') as site_uuid,
JSONExtractString(payload, 'member_uuid') as member_uuid,
JSONExtractString(payload, 'member_status') as member_status,
JSONExtractString(payload, 'post_uuid') as post_uuid,
JSONExtractString(payload, 'post_type') as post_type,
lower(JSONExtractString(payload, 'user-agent')) as user_agent
FROM analytics_events
where action = 'page_hit'

NODE analytics_hits_data
SQL >
SELECT
site_uuid,
timestamp,
action,
version,
session_id,
member_uuid,
member_status,
post_uuid,
location,
domainWithoutWWW(referrer) as source,
pathname,
href,
case
when match(user_agent, 'wget|ahrefsbot|curl|urllib|bitdiscovery|\+https://|googlebot')
then 'bot'
when match(user_agent, 'android')
then 'mobile-android'
when match(user_agent, 'ipad|iphone|ipod')
then 'mobile-ios'
else 'desktop'
END as device,
case
when match(user_agent, 'firefox')
then 'firefox'
when match(user_agent, 'chrome|crios')
then 'chrome'
when match(user_agent, 'opera')
then 'opera'
when match(user_agent, 'msie|trident')
then 'ie'
when match(user_agent, 'iphone|ipad|safari')
then 'safari'
else 'Unknown'
END as browser
FROM parsed_hits

NODE pageviews
DESCRIPTION >
Group by sessions and calculate metrics at that level
Expand All @@ -87,7 +150,7 @@ SQL >
case when min(timestamp) = max(timestamp) then 1 else 0 end as is_bounce,
max(timestamp) as latest_view_aux,
min(timestamp) as first_view_aux
from analytics_hits
from analytics_hits_data
where toDate(timestamp) = {{ Date(date_from) }}
group by toStartOfHour(timestamp), session_id, site_uuid, member_status, device, browser, location, source, pathname
{% else %}
Expand Down
114 changes: 114 additions & 0 deletions ghost/tinybird/scripts/version_upgrade.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
#!/bin/bash

ver_from="0.0.0"
ver_to="1.0.0"




current_ver=$(tb sql --format JSON "SELECT argMax(version, timestamp) version FROM version_log" | jq -r '.data[0].version')

echo "Current version: $current_ver"

# TODO: If current_ver = '', the DS is empty, we need to initialize
# I am going to leave this command as a placeholder initializer for now:
# tb datasource truncate version_log --yes
# echo "{\"version\":\"0.0.0\",\"step_id\":-1,\"message\":\"Current version statement\"}" > /tmp/msg.ndjson
# tb datasource append version_log /tmp/msg.ndjson

if [ "$ver_from" != "$current_ver" ];
then
echo "This script is valid only for version $ver_from"
exit 1
fi

echo "Upgrading from: \"$ver_from\" to: \"$ver_to\""


# Get the highest step done:
query_result=$(tb sql --format JSON "SELECT max(step_id) last_step FROM version_log WHERE version = '$current_ver'")

max_step=$(echo "$query_result" | jq -r '.data[0].last_step')

if [ $max_step -lt 0 ]
then
# Start at -1
current_step=-1
else
current_step=$max_step
fi

# The idea is that a logged step means that it is done already, so we go to the next
current_step=$((current_step+1))

echo "Running from step id $current_step"

# Migration plan:
# analytics_sources.pipe should not really be iterated, as the output
# does not change and thus does not require a migration
# 1. Start (can be removed)
# 2. Populate analytics_sessions_mv__v1 with analytics_sessions_v1
# 3. Populate analytics_pages_mv__v1 with analytics_pages_v1

max_steps=3
while [ $current_step -le $max_steps ]; do
echo
echo "Running step $current_step"
if [ "$current_step" -le 0 ];then
# Do stuff...
step_message="Start update to $ver_to"

# Log the stuff you've done
echo "{\"version\":\"$ver_from\",\"step_id\":$current_step,\"message\":\"$step_message\"}" > /tmp/msg.ndjson
tb datasource append version_log /tmp/msg.ndjson

elif [ "$current_step" -le 1 ];
then
# Do stuff...
step_message="Populate analytics_sessions_mv__v1 with analytics_sessions__v1"
# Migrate the data
output=$(tb pipe populate --truncate --wait analytics_sessions__v1)

# Check that it ran ok
if [ $? -ne 0 ]; then
echo "Error in step $current_step"
echo $output
exit 1
fi

# Log the stuff you've done
# curl -X POST 'https://api.tinybird.co/v0/events?name=version_log' -H "Authorization: Bearer $TB_TOKEN" -d "{\"version\":\"$ver_from\",\"step_id\":$current_step,\"message\":\"$step_message\"}"
echo "{\"version\":\"$ver_from\",\"step_id\":$current_step,\"message\":\"$step_message\"}" > /tmp/msg.ndjson
tb datasource append version_log /tmp/msg.ndjson
elif [ "$current_step" -le 2 ];
then
# Do stuff...
step_message="Populate analytics_pages_mv__v1 with analytics_pages__v1"
# Migrate the data
output=$(tb pipe populate --truncate --wait analytics_pages__v1)

# Check that it ran ok
if [ $? -ne 0 ]; then
echo "Error in step $current_step"
echo $output
exit 1
fi

# Log the stuff you've done
echo "{\"version\":\"$ver_from\",\"step_id\":$current_step,\"message\":\"$step_message\"}" > /tmp/msg.ndjson
tb datasource append version_log /tmp/msg.ndjson
else
# Empty step for testing
sleep 1
fi
# Go to the next step
current_step=$((current_step+1))
done


# When all runs ok, finish logging that the new version is up and running
step_message="Current version statement"
current_step=-1
echo "{\"version\":\"$ver_to\",\"step_id\":$current_step,\"message\":\"$step_message\"}" > /tmp/msg.ndjson
tb datasource append version_log /tmp/msg.ndjson

Loading