diff --git a/ops-catalog/data-plane-template.flow.yaml b/ops-catalog/data-plane-template.flow.yaml index 18e0384251c..246875ba327 100644 --- a/ops-catalog/data-plane-template.flow.yaml +++ b/ops-catalog/data-plane-template.flow.yaml @@ -43,7 +43,7 @@ collections: select json($flow_document) where json_type($fields, '$.eventType') = 'text' and json_type($fields, '$.eventTarget') = 'text' - and json_type($fields, '$.error') in ('null', 'text'); + and coalesce(json_type($fields, '$.error'), 'null') in ('null', 'text'); shards: minTxnDuration: 5s diff --git a/ops-catalog/tests.flow.yaml b/ops-catalog/tests.flow.yaml index 3b160e8ef85..a79ad35661b 100644 --- a/ops-catalog/tests.flow.yaml +++ b/ops-catalog/tests.flow.yaml @@ -5,6 +5,80 @@ import: - reporting-L2-template.flow.yaml tests: + doNotPublish/tests/events: + steps: + - ingest: + collection: ops/tasks/BASE_NAME/logs + documents: + - shard: &shard + name: a/b/c + kind: capture + keyBegin: "00000000" + rClockBegin: "00000000" + build: "0000000000000000" + ts: "2025-01-02T03:04:05.01Z" + level: info + message: "not an event: missing eventTarget" + fields: + eventType: "missingEventTarget" + - shard: *shard + ts: "2025-01-02T03:04:05.02Z" + level: info + message: "not an event: no fields" + - shard: *shard + ts: "2025-01-02T03:04:05.03Z" + level: info + message: "not an event: wrong eventType" + fields: + eventType: { "wrong": "type" } + eventTarget: "a/b/c" + - shard: *shard + ts: "2025-01-02T03:04:05.04Z" + level: info + message: "not an event: wrong error type" + fields: + eventType: "eventA" + eventTarget: "a/b/c" + error: { "wrong": "type" } + - shard: *shard + ts: "2025-01-02T03:04:05.05Z" + level: info + message: "an eventA happened" + fields: + eventType: "eventA" + eventTarget: "a/b/c" + - shard: *shard + ts: "2025-01-02T03:04:05.06Z" + level: info + message: "an eventB happened" + fields: + eventType: "eventB" + eventTarget: "a/b/bb" + error: "oh no" + - verify: + collection: ops/rollups/L1/BASE_NAME/events + documents: + - shard: &shard + name: a/b/c + kind: capture + keyBegin: "00000000" + rClockBegin: "00000000" + build: "0000000000000000" + ts: "2025-01-02T03:04:05.06Z" + level: info + message: "an eventB happened" + fields: + eventType: "eventB" + eventTarget: "a/b/bb" + error: "oh no" + - shard: *shard + ts: "2025-01-02T03:04:05.05Z" + level: info + message: "an eventA happened" + fields: + eventType: "eventA" + eventTarget: "a/b/c" + doNotPublish/tests/catalog-stats: steps: - ingest: diff --git a/supabase/migrations/20250204230547_ops_catalog_status.sql b/supabase/migrations/20250204230547_ops_catalog_status.sql index 915ff1c4108..33e588bdc3c 100644 --- a/supabase/migrations/20250204230547_ops_catalog_status.sql +++ b/supabase/migrations/20250204230547_ops_catalog_status.sql @@ -115,9 +115,6 @@ set where ops_l1_events_name is null and ops_l2_events_transform is null; -alter table public.data_planes alter column ops_l1_events_name set not null; -alter table public.data_planes alter column ops_l2_events_transform set not null; - -- We need to update the `catalog_stats*` tables in order to allow a trailing -- slash in the `catalog_name` and add `txn_count`. This is a gross and annoying -- process because there's views that depend on that column, which all need to diff --git a/supabase/migrations/20250312121737_data-planes-events-column-constraints.sql b/supabase/migrations/20250312121737_data-planes-events-column-constraints.sql new file mode 100644 index 00000000000..dc33e74be22 --- /dev/null +++ b/supabase/migrations/20250312121737_data-planes-events-column-constraints.sql @@ -0,0 +1,19 @@ +begin; + +-- Populate these new columns. Note that the legacy data plane needs some +-- special handling because it doesn't follow the normal naming convention. +-- The substr call is to strip off the 'ops/dp/' prefix. +update public.data_planes +set + ops_l1_events_name = case when data_plane_name = 'ops/dp/public/gcp-us-central1-c1' + then 'ops/rollups/L1/public/gcp-us-central1-c1/events' + else concat('ops/rollups/L1/', substr(data_plane_name, 8), '/events') + end, + ops_l2_events_transform = concat('from.', data_plane_fqdn, '.events') +where + ops_l1_events_name is null and ops_l2_events_transform is null; + +alter table public.data_planes alter column ops_l1_events_name set not null; +alter table public.data_planes alter column ops_l2_events_transform set not null; + +commit; diff --git a/supabase/migrations/20250314113757_fix-free-trial-start.sql b/supabase/migrations/20250314113757_fix-free-trial-start.sql new file mode 100644 index 00000000000..9345ab45cb2 --- /dev/null +++ b/supabase/migrations/20250314113757_fix-free-trial-start.sql @@ -0,0 +1,77 @@ +begin; + + +CREATE or replace VIEW internal.new_free_trial_tenants AS WITH hours_by_day AS ( + SELECT + t.tenant, + cs.ts, + ( + cs.usage_seconds :: numeric / 3600.0 + ) as daily_usage_hours + FROM + public.tenants as t + join public.catalog_stats_daily cs on t.tenant :: text = cs.catalog_name + WHERE + -- We run set_new_free_trials daily, so don't bother looking at old data. 7 days is so + -- we can tolerate up to 7 days of failures. + cs.ts >= (now() - '7 days'::interval) + and t.trial_start IS NULL -- Where the tenant has used more than 52.8 hours of task time in a given day. + and ( + cs.usage_seconds :: numeric / 3600.0 + ) > 52.8 +), +hours_by_month AS ( + SELECT + t.tenant, + cs.ts, + cs.usage_seconds :: numeric / 3600.0 as monthly_usage_hours + FROM + public.tenants t + join public.catalog_stats_monthly cs on t.tenant :: text = cs.catalog_name + WHERE + cs.ts >= date_trunc('month', now() AT TIME ZONE 'UTC') + and t.trial_start IS NULL + and ( + cs.usage_seconds :: numeric / 3600.0 + ) > (24 * 31 * 2):: numeric * 1.1 +), +gbs_by_month AS ( + SELECT + t.tenant, + cs.ts, + ceil( + ( + cs.bytes_written_by_me + cs.bytes_read_by_me + ):: numeric / (10.0 ^ 9.0) + ) AS monthly_usage_gbs + FROM + public.tenants t + join public.catalog_stats_monthly cs on t.tenant = cs.catalog_name + WHERE + cs.ts >= date_trunc('month', now() AT TIME ZONE 'UTC') + and t.trial_start IS NULL + and ceil( + ( + cs.bytes_written_by_me + cs.bytes_read_by_me + ):: numeric / (10.0 ^ 9.0) + ) > 10.0 +) +SELECT + t.tenant, + max(hours_by_day.daily_usage_hours) AS max_daily_usage_hours, + max( + hours_by_month.monthly_usage_hours + ) AS max_monthly_usage_hours, + max(gbs_by_month.monthly_usage_gbs) AS max_monthly_gb +FROM + public.tenants t + left join hours_by_day on t.tenant = hours_by_day.tenant + left join hours_by_month on t.tenant = hours_by_month.tenant + left join gbs_by_month on t.tenant = gbs_by_month.tenant +where t.trial_start is null +group by t.tenant +HAVING ((count(hours_by_month.*) > 0) OR (count(hours_by_day.*) > 0) OR (count(gbs_by_month.*) > 0)); + +ALTER VIEW internal.new_free_trial_tenants OWNER TO postgres; + +commit; diff --git a/supabase/tests/free-trial.test.sql b/supabase/tests/free-trial.test.sql new file mode 100644 index 00000000000..db9ad7b4128 --- /dev/null +++ b/supabase/tests/free-trial.test.sql @@ -0,0 +1,39 @@ + +create function tests.test_new_free_trial_tenants() +returns setof text as $$ +begin + + delete from tenants; + insert into tenants (tenant, trial_start) values + ('freebie/', null), -- stays just under the limits + ('ghost/', null), -- no usage at all + ('alreadyPay/', now() - '45 days'::interval), + ('overHoursByDay/', null), + ('overHoursByMonth/', null), + ('overGBs/', null), + ('overAll/', null); + + insert into catalog_stats(grain, catalog_name, ts, usage_seconds, bytes_written_by_me, bytes_read_by_me, flow_document) + values + -- freebie used 48 hours per day, and just under 10GBs in a month + ('daily', 'freebie/', date_trunc('day', now() - '24 h'::interval), 48 * 3600, 0, 0, '{}'), + ('monthly', 'freebie/', date_trunc('month', now() at time zone 'UTC'), 24 * 31 * 2 * 3600, 4000000000, 5000000000, '{}'), + -- alreadyPay is using way above the free tier limits, but already has a trial_start + ('daily', 'alreadyPay/', date_trunc('day', now() - '24 h'::interval), 300 * 3600, 0, 0, '{}'), + ('monthly', 'alreadyPay/', date_trunc('month', now() at time zone 'UTC'), 24 * 31 * 6 * 3600, 99000000000, 99000000000, '{}'), + ('daily', 'overHoursByDay/', date_trunc('day', now() - '24 h'::interval), 60 * 3600, 0, 0, '{}'), + ('monthly', 'overHoursByMonth/', date_trunc('month', now()), 24 * 31 * 4 * 3600, 0, 0, '{}'), + ('monthly', 'overGBs/', date_trunc('month', now()), 55, 6000000000, 6000000000, '{}'), + ('daily', 'overAll/', date_trunc('day', now() - '24 h'::interval), 60 * 3600, 0, 0, '{}'), + ('monthly', 'overAll/', date_trunc('month', now()), 24 * 31 * 6 * 3600, 6000000000, 6000000000, '{}'); + + + + return query select results_eq( + $i$ select tenant::text from internal.new_free_trial_tenants order by tenant $i$, + $i$ values ('overAll/'), ('overGBs/'), ('overHoursByDay/'), ('overHoursByMonth/') $i$, + 'expect correct tenants returned' + ); + +end; +$$ language plpgsql;