diff --git a/app/common/openmeter.go b/app/common/openmeter.go index 6b05467eb..bf60d27ad 100644 --- a/app/common/openmeter.go +++ b/app/common/openmeter.go @@ -21,7 +21,9 @@ import ( "github.com/openmeterio/openmeter/openmeter/namespace" "github.com/openmeterio/openmeter/openmeter/sink/flushhandler" "github.com/openmeterio/openmeter/openmeter/sink/flushhandler/ingestnotification" - "github.com/openmeterio/openmeter/openmeter/streaming/clickhouse_connector" + "github.com/openmeterio/openmeter/openmeter/streaming" + "github.com/openmeterio/openmeter/openmeter/streaming/clickhouse/materialized_view" + "github.com/openmeterio/openmeter/openmeter/streaming/clickhouse/raw_events" watermillkafka "github.com/openmeterio/openmeter/openmeter/watermill/driver/kafka" "github.com/openmeterio/openmeter/openmeter/watermill/driver/noop" "github.com/openmeterio/openmeter/openmeter/watermill/eventbus" @@ -34,25 +36,56 @@ func NewMeterRepository(meters []*models.Meter) *meter.InMemoryRepository { return meter.NewInMemoryRepository(slicesx.Map(meters, lo.FromPtr[models.Meter])) } -func NewClickHouseStreamingConnector( +func NewStreamingConnector( + ctx context.Context, conf config.AggregationConfiguration, clickHouse clickhouse.Conn, meterRepository meter.Repository, logger *slog.Logger, -) (*clickhouse_connector.ClickhouseConnector, error) { - streamingConnector, err := clickhouse_connector.NewClickhouseConnector(clickhouse_connector.ClickhouseConnectorConfig{ - ClickHouse: clickHouse, - Database: conf.ClickHouse.Database, - Meters: meterRepository, - CreateOrReplaceMeter: conf.CreateOrReplaceMeter, - PopulateMeter: conf.PopulateMeter, - Logger: logger, - }) - if err != nil { - return nil, fmt.Errorf("init clickhouse streaming: %w", err) +) (streaming.Connector, error) { + var ( + connector streaming.Connector + err error + ) + + switch conf.Engine { + case config.AggregationEngineClickHouseRaw: + connector, err = raw_events.NewConnector(ctx, raw_events.ConnectorConfig{ + ClickHouse: clickHouse, + Database: conf.ClickHouse.Database, + EventsTableName: conf.EventsTableName, + Logger: logger, + AsyncInsert: conf.AsyncInsert, + AsyncInsertWait: conf.AsyncInsertWait, + InsertQuerySettings: conf.InsertQuerySettings, + }) + if err != nil { + return nil, fmt.Errorf("init clickhouse raw engine: %w", err) + } + + case config.AggregationEngineClickHouseMV: + connector, err = materialized_view.NewConnector(ctx, materialized_view.ConnectorConfig{ + ClickHouse: clickHouse, + Database: conf.ClickHouse.Database, + EventsTableName: conf.EventsTableName, + Logger: logger, + AsyncInsert: conf.AsyncInsert, + AsyncInsertWait: conf.AsyncInsertWait, + InsertQuerySettings: conf.InsertQuerySettings, + + Meters: meterRepository, + PopulateMeter: conf.PopulateMeter, + CreateOrReplaceMeter: conf.CreateOrReplaceMeter, + QueryRawEvents: conf.QueryRawEvents, + }) + if err != nil { + return nil, fmt.Errorf("init clickhouse mv engine: %w", err) + } + default: + return nil, fmt.Errorf("invalid aggregation engine: %s", conf.Engine) } - return streamingConnector, nil + return connector, nil } func NewNamespacedTopicResolver(config config.Configuration) (*topicresolver.NamespacedTopicResolver, error) { @@ -135,7 +168,7 @@ func NewKafkaNamespaceHandler( func NewNamespaceHandlers( kafkaHandler *kafkaingest.NamespaceHandler, - clickHouseHandler *clickhouse_connector.ClickhouseConnector, + clickHouseHandler streaming.Connector, ) []namespace.Handler { return []namespace.Handler{ kafkaHandler, diff --git a/app/common/wire.go b/app/common/wire.go index 9c4e79f1e..b12fdad29 100644 --- a/app/common/wire.go +++ b/app/common/wire.go @@ -15,8 +15,6 @@ import ( "github.com/openmeterio/openmeter/openmeter/ingest/kafkaingest/topicresolver" "github.com/openmeterio/openmeter/openmeter/meter" registrybuilder "github.com/openmeterio/openmeter/openmeter/registry/builder" - "github.com/openmeterio/openmeter/openmeter/streaming" - "github.com/openmeterio/openmeter/openmeter/streaming/clickhouse_connector" watermillkafka "github.com/openmeterio/openmeter/openmeter/watermill/driver/kafka" "github.com/openmeterio/openmeter/openmeter/watermill/router" ) @@ -102,8 +100,7 @@ var OpenMeter = wire.NewSet( NewMeterRepository, wire.Bind(new(meter.Repository), new(*meter.InMemoryRepository)), - NewClickHouseStreamingConnector, - wire.Bind(new(streaming.Connector), new(*clickhouse_connector.ClickhouseConnector)), + NewStreamingConnector, NewNamespacedTopicResolver, wire.Bind(new(topicresolver.Resolver), new(*topicresolver.NamespacedTopicResolver)), diff --git a/app/config/aggregation.go b/app/config/aggregation.go index dca483983..2c8d17cdb 100644 --- a/app/config/aggregation.go +++ b/app/config/aggregation.go @@ -4,20 +4,64 @@ import ( "crypto/tls" "errors" "fmt" + "slices" "time" "github.com/ClickHouse/clickhouse-go/v2" "github.com/spf13/viper" ) +type AggregationEngine string + +const ( + // Raw engine queries the raw events table + AggregationEngineClickHouseRaw AggregationEngine = "clickhouse_raw" + // MV engine maintains and queries materialized views + AggregationEngineClickHouseMV AggregationEngine = "clickhouse_mv" +) + +func (e AggregationEngine) Values() []AggregationEngine { + return []AggregationEngine{AggregationEngineClickHouseRaw, AggregationEngineClickHouseMV} +} + +func (e AggregationEngine) Validate() error { + if !slices.Contains(e.Values(), e) { + return fmt.Errorf("invalid value") + } + return nil +} + type AggregationConfiguration struct { + // Engine is the aggregation engine to use + Engine AggregationEngine ClickHouse ClickHouseAggregationConfiguration + + EventsTableName string + + // Set true for ClickHouse first store the incoming inserts into an in-memory buffer + // before flushing them regularly to disk. + // See https://clickhouse.com/docs/en/cloud/bestpractices/asynchronous-inserts + AsyncInsert bool + // Set true if you want an insert statement to return with an acknowledgment immediately + // without waiting for the data got inserted into the buffer. + // Setting true can cause silent errors that you need to monitor separately. + AsyncInsertWait bool + + // See https://clickhouse.com/docs/en/operations/settings/settings + // For example, you can set the `max_insert_threads` setting to control the number of threads + // or the `parallel_view_processing` setting to enable pushing to attached views concurrently. + InsertQuerySettings map[string]string + + // Engine specific options + // Populate creates the materialized view with data from the events table // This is not safe to use in production as requires to stop ingestion PopulateMeter bool // CreateOrReplace is used to force the recreation of the materialized view // This is not safe to use in production as it will drop the existing views CreateOrReplaceMeter bool + // QueryRawEvents is used to query the raw events table instead of the materialized view + QueryRawEvents bool } // Validate validates the configuration. @@ -26,6 +70,37 @@ func (c AggregationConfiguration) Validate() error { return fmt.Errorf("clickhouse: %w", err) } + if c.Engine == "" { + return errors.New("engine is required") + } + + if err := c.Engine.Validate(); err != nil { + return fmt.Errorf("engine: %w", err) + } + + if c.EventsTableName == "" { + return errors.New("events table is required") + } + + if c.AsyncInsertWait && !c.AsyncInsert { + return errors.New("async insert wait is set but async insert is not") + } + + // Validate engine specific options + if c.Engine != AggregationEngineClickHouseMV { + if c.PopulateMeter { + return errors.New("populate meter is only supported with materialized view engine") + } + + if c.CreateOrReplaceMeter { + return errors.New("create or replace meter is only with materialized view engine") + } + + if c.QueryRawEvents { + return errors.New("query raw events is only with materialized view engine") + } + } + return nil } @@ -100,6 +175,11 @@ func (c ClickHouseAggregationConfiguration) GetClientOptions() *clickhouse.Optio // ConfigureAggregation configures some defaults in the Viper instance. func ConfigureAggregation(v *viper.Viper) { + v.SetDefault("aggregation.engine", AggregationEngineClickHouseMV) + v.SetDefault("aggregation.eventsTableName", "om_events") + v.SetDefault("aggregation.asyncInsert", false) + v.SetDefault("aggregation.asyncInsertWait", false) + v.SetDefault("aggregation.clickhouse.address", "127.0.0.1:9000") v.SetDefault("aggregation.clickhouse.tls", false) v.SetDefault("aggregation.clickhouse.database", "openmeter") diff --git a/app/config/config_test.go b/app/config/config_test.go index f7138841e..4909c5a42 100644 --- a/app/config/config_test.go +++ b/app/config/config_test.go @@ -124,6 +124,10 @@ func TestComplete(t *testing.T) { ConnMaxLifetime: 10 * time.Minute, BlockBufferSize: 10, }, + Engine: AggregationEngineClickHouseMV, + EventsTableName: "om_events", + AsyncInsert: false, + AsyncInsertWait: false, }, Sink: SinkConfiguration{ GroupId: "openmeter-sink-worker", diff --git a/app/config/sink.go b/app/config/sink.go index ff0e29c72..0a89f08c0 100644 --- a/app/config/sink.go +++ b/app/config/sink.go @@ -22,6 +22,7 @@ type SinkConfiguration struct { IngestNotifications IngestNotificationsConfiguration // Kafka client/Consumer configuration Kafka KafkaConfig + // TODO: remove, config moved to aggregation config // Storage configuration Storage StorageConfiguration @@ -102,7 +103,7 @@ type StorageConfiguration struct { // before flushing them regularly to disk. // See https://clickhouse.com/docs/en/cloud/bestpractices/asynchronous-inserts AsyncInsert bool - // Set true if you want an insert statement to return with an acknowledgment immediatelyy + // Set true if you want an insert statement to return with an acknowledgment immediately // without waiting for the data got inserted into the buffer. // Setting true can cause silent errors that you need to monitor separately. AsyncInsertWait bool @@ -154,6 +155,7 @@ func ConfigureSink(v *viper.Viper) { v.SetDefault("sink.namespaceRefetchTimeout", "10s") v.SetDefault("sink.namespaceTopicRegexp", "^om_([A-Za-z0-9]+(?:_[A-Za-z0-9]+)*)_events$") + // TODO: remove, config moved to aggregation config // Sink Storage v.SetDefault("sink.storage.asyncInsert", false) v.SetDefault("sink.storage.asyncInsertWait", false) diff --git a/cmd/balance-worker/wire_gen.go b/cmd/balance-worker/wire_gen.go index c572f76e3..e5238a13b 100644 --- a/cmd/balance-worker/wire_gen.go +++ b/cmd/balance-worker/wire_gen.go @@ -134,7 +134,7 @@ func initializeApplication(ctx context.Context, conf config.Configuration) (Appl } v3 := conf.Meters inMemoryRepository := common.NewMeterRepository(v3) - clickhouseConnector, err := common.NewClickHouseStreamingConnector(aggregationConfiguration, v2, inMemoryRepository, logger) + connector, err := common.NewStreamingConnector(ctx, aggregationConfiguration, v2, inMemoryRepository, logger) if err != nil { cleanup5() cleanup4() @@ -145,7 +145,7 @@ func initializeApplication(ctx context.Context, conf config.Configuration) (Appl } entitlementOptions := registrybuilder.EntitlementOptions{ DatabaseClient: client, - StreamingConnector: clickhouseConnector, + StreamingConnector: connector, Logger: logger, MeterRepository: inMemoryRepository, Publisher: eventbusPublisher, diff --git a/cmd/jobs/entitlement/init.go b/cmd/jobs/entitlement/init.go index 89c2ee12e..2a77c375f 100644 --- a/cmd/jobs/entitlement/init.go +++ b/cmd/jobs/entitlement/init.go @@ -8,11 +8,11 @@ import ( "github.com/ClickHouse/clickhouse-go/v2" "go.opentelemetry.io/otel/metric" + "github.com/openmeterio/openmeter/app/common" "github.com/openmeterio/openmeter/app/config" "github.com/openmeterio/openmeter/openmeter/meter" "github.com/openmeterio/openmeter/openmeter/registry" registrybuilder "github.com/openmeterio/openmeter/openmeter/registry/builder" - "github.com/openmeterio/openmeter/openmeter/streaming/clickhouse_connector" watermillkafka "github.com/openmeterio/openmeter/openmeter/watermill/driver/kafka" "github.com/openmeterio/openmeter/openmeter/watermill/eventbus" entdriver "github.com/openmeterio/openmeter/pkg/framework/entutils/entdriver" @@ -50,14 +50,7 @@ func initEntitlements(ctx context.Context, conf config.Configuration, logger *sl return nil, fmt.Errorf("failed to initialize clickhouse client: %w", err) } - streamingConnector, err := clickhouse_connector.NewClickhouseConnector(clickhouse_connector.ClickhouseConnectorConfig{ - Logger: logger, - ClickHouse: clickHouseClient, - Database: conf.Aggregation.ClickHouse.Database, - Meters: meterRepository, - CreateOrReplaceMeter: conf.Aggregation.CreateOrReplaceMeter, - PopulateMeter: conf.Aggregation.PopulateMeter, - }) + streamingConnector, err := common.NewStreamingConnector(ctx, conf.Aggregation, clickHouseClient, meterRepository, logger) if err != nil { return nil, fmt.Errorf("init clickhouse streaming: %w", err) } diff --git a/cmd/notification-service/wire_gen.go b/cmd/notification-service/wire_gen.go index d96175fee..7551d8a92 100644 --- a/cmd/notification-service/wire_gen.go +++ b/cmd/notification-service/wire_gen.go @@ -74,7 +74,7 @@ func initializeApplication(ctx context.Context, conf config.Configuration) (Appl } v2 := conf.Meters inMemoryRepository := common.NewMeterRepository(v2) - clickhouseConnector, err := common.NewClickHouseStreamingConnector(aggregationConfiguration, v, inMemoryRepository, logger) + connector, err := common.NewStreamingConnector(ctx, aggregationConfiguration, v, inMemoryRepository, logger) if err != nil { cleanup4() cleanup3() @@ -140,7 +140,7 @@ func initializeApplication(ctx context.Context, conf config.Configuration) (Appl GlobalInitializer: globalInitializer, Migrator: migrator, Metadata: commonMetadata, - StreamingConnector: clickhouseConnector, + StreamingConnector: connector, MeterRepository: inMemoryRepository, EntClient: client, TelemetryServer: v3, diff --git a/cmd/server/main.go b/cmd/server/main.go index 26479b4ea..ea66b1497 100644 --- a/cmd/server/main.go +++ b/cmd/server/main.go @@ -336,7 +336,7 @@ func main() { }) for _, meter := range conf.Meters { - err := app.StreamingConnector.CreateMeter(ctx, app.NamespaceManager.GetDefaultNamespace(), meter) + err := app.StreamingConnector.CreateMeter(ctx, app.NamespaceManager.GetDefaultNamespace(), *meter) if err != nil { slog.Warn("failed to initialize meter", "error", err) os.Exit(1) diff --git a/cmd/server/wire_gen.go b/cmd/server/wire_gen.go index cc4c32719..2c4b5f045 100644 --- a/cmd/server/wire_gen.go +++ b/cmd/server/wire_gen.go @@ -78,7 +78,7 @@ func initializeApplication(ctx context.Context, conf config.Configuration) (Appl } v2 := conf.Meters inMemoryRepository := common.NewMeterRepository(v2) - clickhouseConnector, err := common.NewClickHouseStreamingConnector(aggregationConfiguration, v, inMemoryRepository, logger) + connector, err := common.NewStreamingConnector(ctx, aggregationConfiguration, v, inMemoryRepository, logger) if err != nil { cleanup4() cleanup3() @@ -198,7 +198,7 @@ func initializeApplication(ctx context.Context, conf config.Configuration) (Appl cleanup() return Application{}, nil, err } - v5 := common.NewNamespaceHandlers(namespaceHandler, clickhouseConnector) + v5 := common.NewNamespaceHandlers(namespaceHandler, connector) namespaceConfiguration := conf.Namespace manager, err := common.NewNamespaceManager(v5, namespaceConfiguration) if err != nil { @@ -215,7 +215,7 @@ func initializeApplication(ctx context.Context, conf config.Configuration) (Appl application := Application{ GlobalInitializer: globalInitializer, Migrator: migrator, - StreamingConnector: clickhouseConnector, + StreamingConnector: connector, MeterRepository: inMemoryRepository, EntClient: client, TelemetryServer: v3, diff --git a/cmd/sink-worker/main.go b/cmd/sink-worker/main.go index 792f1d27a..e7c346a19 100644 --- a/cmd/sink-worker/main.go +++ b/cmd/sink-worker/main.go @@ -17,6 +17,7 @@ import ( "go.opentelemetry.io/otel/metric" "go.opentelemetry.io/otel/trace" + "github.com/openmeterio/openmeter/app/common" "github.com/openmeterio/openmeter/app/config" "github.com/openmeterio/openmeter/openmeter/dedupe" "github.com/openmeterio/openmeter/openmeter/ingest/kafkaingest/topicresolver" @@ -96,7 +97,7 @@ func main() { var group run.Group // Initialize sink worker - sink, err := initSink(conf, logger, app.Meter, app.Tracer, app.MeterRepository, app.FlushHandler) + sink, err := initSink(ctx, conf, logger, app.Meter, app.Tracer, app.MeterRepository, app.FlushHandler) if err != nil { logger.Error("failed to initialize sink worker", "error", err) os.Exit(1) @@ -135,37 +136,51 @@ func main() { } } -func initSink(config config.Configuration, logger *slog.Logger, metricMeter metric.Meter, tracer trace.Tracer, meterRepository meter.Repository, flushHandler flushhandler.FlushEventHandler) (*sink.Sink, error) { - clickhouseClient, err := clickhouse.Open(config.Aggregation.ClickHouse.GetClientOptions()) +func initSink(ctx context.Context, conf config.Configuration, logger *slog.Logger, metricMeter metric.Meter, tracer trace.Tracer, meterRepository meter.Repository, flushHandler flushhandler.FlushEventHandler) (*sink.Sink, error) { + // Initialize ClickHouse client + clickhouseClient, err := clickhouse.Open(conf.Aggregation.ClickHouse.GetClientOptions()) if err != nil { return nil, fmt.Errorf("init clickhouse client: %w", err) } + // Temporary: copy over sink storage settings + // TODO: remove after config migration is over + if conf.Sink.Storage.AsyncInsert { + conf.Aggregation.AsyncInsert = conf.Sink.Storage.AsyncInsert + } + if conf.Sink.Storage.AsyncInsertWait { + conf.Aggregation.AsyncInsertWait = conf.Sink.Storage.AsyncInsertWait + } + if conf.Sink.Storage.QuerySettings != nil { + conf.Aggregation.InsertQuerySettings = conf.Sink.Storage.QuerySettings + } + + // Initialize streaming connector + streaming, err := common.NewStreamingConnector(ctx, conf.Aggregation, clickhouseClient, meterRepository, logger) + if err != nil { + return nil, fmt.Errorf("init clickhouse streaming connector: %w", err) + } + + // Initialize deduplicator if enabled var deduplicator dedupe.Deduplicator - if config.Sink.Dedupe.Enabled { - deduplicator, err = config.Sink.Dedupe.NewDeduplicator() + if conf.Sink.Dedupe.Enabled { + deduplicator, err = conf.Sink.Dedupe.NewDeduplicator() if err != nil { return nil, fmt.Errorf("failed to initialize deduplicator: %w", err) } } // Initialize storage - storage, err := sink.NewClickhouseStorage( - sink.ClickHouseStorageConfig{ - ClickHouse: clickhouseClient, - Database: config.Aggregation.ClickHouse.Database, - AsyncInsert: config.Sink.Storage.AsyncInsert, - AsyncInsertWait: config.Sink.Storage.AsyncInsertWait, - QuerySettings: config.Sink.Storage.QuerySettings, - }, - ) + storage, err := sink.NewClickhouseStorage(sink.ClickHouseStorageConfig{ + Streaming: streaming, + }) if err != nil { return nil, fmt.Errorf("failed to initialize storage: %w", err) } // Initialize Kafka consumer - consumerConfig := config.Sink.Kafka.AsConsumerConfig() + consumerConfig := conf.Sink.Kafka.AsConsumerConfig() // Override following Kafka consumer configuration parameters with hardcoded values as the Sink implementation relies on // these to be set to a specific value. @@ -200,7 +215,7 @@ func initSink(config config.Configuration, logger *slog.Logger, metricMeter metr // Enable Kafka client logging go pkgkafka.ConsumeLogChannel(consumer, logger.WithGroup("kafka").WithGroup("consumer")) - topicResolver, err := topicresolver.NewNamespacedTopicResolver(config.Ingest.Kafka.EventsTopicTemplate) + topicResolver, err := topicresolver.NewNamespacedTopicResolver(conf.Ingest.Kafka.EventsTopicTemplate) if err != nil { return nil, fmt.Errorf("failed to create topic name resolver: %w", err) } @@ -213,16 +228,16 @@ func initSink(config config.Configuration, logger *slog.Logger, metricMeter metr Storage: storage, Deduplicator: deduplicator, Consumer: consumer, - MinCommitCount: config.Sink.MinCommitCount, - MaxCommitWait: config.Sink.MaxCommitWait, - MaxPollTimeout: config.Sink.MaxPollTimeout, - FlushSuccessTimeout: config.Sink.FlushSuccessTimeout, - DrainTimeout: config.Sink.DrainTimeout, - NamespaceRefetch: config.Sink.NamespaceRefetch, + MinCommitCount: conf.Sink.MinCommitCount, + MaxCommitWait: conf.Sink.MaxCommitWait, + MaxPollTimeout: conf.Sink.MaxPollTimeout, + FlushSuccessTimeout: conf.Sink.FlushSuccessTimeout, + DrainTimeout: conf.Sink.DrainTimeout, + NamespaceRefetch: conf.Sink.NamespaceRefetch, FlushEventHandler: flushHandler, TopicResolver: topicResolver, - NamespaceRefetchTimeout: config.Sink.NamespaceRefetchTimeout, - NamespaceTopicRegexp: config.Sink.NamespaceTopicRegexp, + NamespaceRefetchTimeout: conf.Sink.NamespaceRefetchTimeout, + NamespaceTopicRegexp: conf.Sink.NamespaceTopicRegexp, } return sink.NewSink(sinkConfig) diff --git a/openmeter/credit/balance.go b/openmeter/credit/balance.go index cda8bd19b..b1ba7e87b 100644 --- a/openmeter/credit/balance.go +++ b/openmeter/credit/balance.go @@ -229,7 +229,7 @@ func (m *connector) ResetUsageForOwner(ctx context.Context, owner grant.Namespac return nil, fmt.Errorf("failed to get owner query params for owner %s: %w", owner.ID, err) } - at := params.At.Truncate(ownerMeter.WindowSize.Duration()) + at := params.At.Truncate(ownerMeter.Meter.WindowSize.Duration()) // check if reset is possible (after last reset) periodStart, err := m.ownerConnector.GetUsagePeriodStartAt(ctx, owner, clock.Now()) @@ -425,9 +425,9 @@ func (m *connector) getQueryUsageFn(ctx context.Context, owner grant.NamespacedO params.From = &from params.To = &to params.FilterSubject = []string{subjectKey} - rows, err := m.streamingConnector.QueryMeter(ctx, owner.Namespace, ownerMeter.MeterSlug, params) + rows, err := m.streamingConnector.QueryMeter(ctx, owner.Namespace, ownerMeter.Meter, params) if err != nil { - return 0.0, fmt.Errorf("failed to query meter %s: %w", ownerMeter.MeterSlug, err) + return 0.0, fmt.Errorf("failed to query meter %s: %w", ownerMeter.Meter.Slug, err) } if len(rows) > 1 { return 0.0, fmt.Errorf("expected 1 row, got %d", len(rows)) @@ -437,7 +437,7 @@ func (m *connector) getQueryUsageFn(ctx context.Context, owner grant.NamespacedO } return rows[0].Value, nil }, - Grantuality: ownerMeter.WindowSize, + Grantuality: ownerMeter.Meter.WindowSize, }, nil } diff --git a/openmeter/credit/engine/engine_test.go b/openmeter/credit/engine/engine_test.go index fb7a046e9..25c9bfb53 100644 --- a/openmeter/credit/engine/engine_test.go +++ b/openmeter/credit/engine/engine_test.go @@ -26,6 +26,10 @@ func TestEngine(t *testing.T) { assert.NoError(t, err) meterSlug := "meter-1" + meter := models.Meter{ + Slug: meterSlug, + } + grant1 := makeGrant(grant.Grant{ ID: "grant-1", Amount: 100.0, @@ -795,7 +799,7 @@ func TestEngine(t *testing.T) { streamingConnector := testutils.NewMockStreamingConnector(t) queryFeatureUsage := func(ctx context.Context, from, to time.Time) (float64, error) { - rows, err := streamingConnector.QueryMeter(ctx, "default", meterSlug, &streaming.QueryParams{ + rows, err := streamingConnector.QueryMeter(ctx, "default", meter, streaming.QueryParams{ From: &from, To: &to, }) @@ -1091,7 +1095,7 @@ func TestEngine(t *testing.T) { streamingConnector := testutils.NewMockStreamingConnector(t) queryFeatureUsage := func(ctx context.Context, from, to time.Time) (float64, error) { - rows, err := streamingConnector.QueryMeter(ctx, "default", meterSlug, &streaming.QueryParams{ + rows, err := streamingConnector.QueryMeter(ctx, "default", meter, streaming.QueryParams{ From: &from, To: &to, }) diff --git a/openmeter/credit/grant.go b/openmeter/credit/grant.go index 3eeec0aea..2afa0aeb3 100644 --- a/openmeter/credit/grant.go +++ b/openmeter/credit/grant.go @@ -41,11 +41,11 @@ func (m *connector) CreateGrant(ctx context.Context, owner grant.NamespacedOwner // All metering information is stored in windowSize chunks, // so we cannot do accurate calculations unless we follow that same windowing. - meter, err := m.ownerConnector.GetMeter(ctx, owner) + ownerMeter, err := m.ownerConnector.GetMeter(ctx, owner) if err != nil { return nil, err } - granularity := meter.WindowSize.Duration() + granularity := ownerMeter.Meter.WindowSize.Duration() input.EffectiveAt = input.EffectiveAt.Truncate(granularity) if input.Recurrence != nil { input.Recurrence.Anchor = input.Recurrence.Anchor.Truncate(granularity) diff --git a/openmeter/credit/grant/owner_connector.go b/openmeter/credit/grant/owner_connector.go index 50cea5629..ce7ad14e9 100644 --- a/openmeter/credit/grant/owner_connector.go +++ b/openmeter/credit/grant/owner_connector.go @@ -15,9 +15,8 @@ type EndCurrentUsagePeriodParams struct { } type OwnerMeter struct { - MeterSlug string - DefaultParams *streaming.QueryParams - WindowSize models.WindowSize + Meter models.Meter + DefaultParams streaming.QueryParams SubjectKey string } diff --git a/openmeter/entitlement/metered/balance.go b/openmeter/entitlement/metered/balance.go index 29b4d5353..af8c8f807 100644 --- a/openmeter/entitlement/metered/balance.go +++ b/openmeter/entitlement/metered/balance.go @@ -81,7 +81,7 @@ func (e *connector) GetEntitlementBalance(ctx context.Context, entitlementID mod meterQuery.To = convert.ToPointer(trunc.Add(time.Minute)) } - rows, err := e.streamingConnector.QueryMeter(ctx, entitlementID.Namespace, ownerMeter.MeterSlug, meterQuery) + rows, err := e.streamingConnector.QueryMeter(ctx, entitlementID.Namespace, ownerMeter.Meter, meterQuery) if err != nil { return nil, fmt.Errorf("failed to query meter: %w", err) } @@ -144,8 +144,8 @@ func (e *connector) GetEntitlementBalanceHistory(ctx context.Context, entitlemen // 1. we get the burndown history burndownHistory, err := e.balanceConnector.GetBalanceHistoryOfOwner(ctx, owner, credit.BalanceHistoryParams{ - From: params.From.Truncate(ownerMeter.WindowSize.Duration()), - To: params.To.Truncate(ownerMeter.WindowSize.Duration()), + From: params.From.Truncate(ownerMeter.Meter.WindowSize.Duration()), + To: params.To.Truncate(ownerMeter.Meter.WindowSize.Duration()), }) if err != nil { return nil, engine.GrantBurnDownHistory{}, fmt.Errorf("failed to get balance history: %w", err) @@ -159,7 +159,7 @@ func (e *connector) GetEntitlementBalanceHistory(ctx context.Context, entitlemen meterQuery.WindowSize = convert.ToPointer(models.WindowSize(params.WindowSize)) meterQuery.WindowTimeZone = ¶ms.WindowTimeZone - meterRows, err := e.streamingConnector.QueryMeter(ctx, owner.Namespace, ownerMeter.MeterSlug, meterQuery) + meterRows, err := e.streamingConnector.QueryMeter(ctx, owner.Namespace, ownerMeter.Meter, meterQuery) if err != nil { return nil, engine.GrantBurnDownHistory{}, fmt.Errorf("failed to query meter: %w", err) } @@ -167,11 +167,11 @@ func (e *connector) GetEntitlementBalanceHistory(ctx context.Context, entitlemen // If we get 0 rows that means the windowsize is larger than the queried period. // In this case we simply query for the entire period. if len(meterRows) == 0 { - nonWindowedParams := *meterQuery + nonWindowedParams := meterQuery nonWindowedParams.FilterSubject = []string{ownerMeter.SubjectKey} nonWindowedParams.WindowSize = nil nonWindowedParams.WindowTimeZone = nil - meterRows, err = e.streamingConnector.QueryMeter(ctx, owner.Namespace, ownerMeter.MeterSlug, &nonWindowedParams) + meterRows, err = e.streamingConnector.QueryMeter(ctx, owner.Namespace, ownerMeter.Meter, nonWindowedParams) if err != nil { return nil, engine.GrantBurnDownHistory{}, fmt.Errorf("failed to query meter: %w", err) } diff --git a/openmeter/entitlement/metered/grant_owner_adapter.go b/openmeter/entitlement/metered/grant_owner_adapter.go index b14861489..8ee5fb7f4 100644 --- a/openmeter/entitlement/metered/grant_owner_adapter.go +++ b/openmeter/entitlement/metered/grant_owner_adapter.go @@ -65,9 +65,7 @@ func (e *entitlementGrantOwner) GetMeter(ctx context.Context, owner grant.Namesp return nil, fmt.Errorf("failed to get meter: %w", err) } - queryParams := &streaming.QueryParams{ - Aggregation: meter.Aggregation, - } + queryParams := streaming.QueryParams{} if feature.MeterGroupByFilters != nil { queryParams.FilterGroupBy = map[string][]string{} @@ -77,9 +75,8 @@ func (e *entitlementGrantOwner) GetMeter(ctx context.Context, owner grant.Namesp } return &grant.OwnerMeter{ - MeterSlug: meter.Slug, + Meter: meter, DefaultParams: queryParams, - WindowSize: meter.WindowSize, SubjectKey: entitlement.SubjectKey, }, nil } diff --git a/openmeter/ingest/kafkaingest/serializer/serializer.go b/openmeter/ingest/kafkaingest/serializer/serializer.go index e486ca31f..d99a9f585 100644 --- a/openmeter/ingest/kafkaingest/serializer/serializer.go +++ b/openmeter/ingest/kafkaingest/serializer/serializer.go @@ -3,6 +3,7 @@ package serializer import ( _ "embed" "encoding/json" + "time" "github.com/cloudevents/sdk-go/v2/event" ) @@ -16,13 +17,12 @@ type Serializer interface { } type CloudEventsKafkaPayload struct { - Id string `json:"id"` - Type string `json:"type"` - Source string `json:"source"` - Subject string `json:"subject"` - // Note: By converting to unix timestamp we loose timezone information. - Time int64 `json:"time"` - Data string `json:"data"` + Id string `json:"id"` + Type string `json:"type"` + Source string `json:"source"` + Subject string `json:"subject"` + Time time.Time `json:"time"` + Data string `json:"data"` } func toCloudEventsKafkaPayload(ev event.Event) (CloudEventsKafkaPayload, error) { @@ -31,7 +31,7 @@ func toCloudEventsKafkaPayload(ev event.Event) (CloudEventsKafkaPayload, error) Type: ev.Type(), Source: ev.Source(), Subject: ev.Subject(), - Time: ev.Time().Unix(), + Time: ev.Time(), } // We try to parse data as JSON. diff --git a/openmeter/meter/parse.go b/openmeter/meter/parse.go new file mode 100644 index 000000000..59b3b3eb7 --- /dev/null +++ b/openmeter/meter/parse.go @@ -0,0 +1,93 @@ +package meter + +import ( + "errors" + "fmt" + "strconv" + + "github.com/cloudevents/sdk-go/v2/event" + "github.com/oliveagle/jsonpath" + + "github.com/openmeterio/openmeter/pkg/models" +) + +// ParseEvent validates and parses an event against a meter. +func ParseEvent(meter Meter, ev event.Event) (*float64, *string, map[string]string, error) { + // Parse CloudEvents data + var data interface{} + + err := ev.DataAs(&data) + if err != nil { + return nil, nil, map[string]string{}, errors.New("cannot unmarshal event data") + } + + // Parse group by fields + groupBy := parseGroupBy(meter, data) + + // We can skip count events as they don't have value property + if meter.Aggregation == MeterAggregationCount { + value := 1.0 + return &value, nil, groupBy, nil + } + + // Get value from event data by value property + rawValue, err := jsonpath.JsonPathLookup(data, meter.ValueProperty) + if err != nil { + return nil, nil, groupBy, fmt.Errorf("event data is missing value property at %q", meter.ValueProperty) + } + + if rawValue == nil { + return nil, nil, groupBy, errors.New("event data value cannot be null") + } + + // Aggregation specific value validation + switch meter.Aggregation { + // UNIQUE_COUNT aggregation requires string property value + case MeterAggregationUniqueCount: + // We convert the value to string + val := fmt.Sprintf("%v", rawValue) + return nil, &val, groupBy, nil + + // SUM, AVG, MIN, MAX aggregations require float64 parsable value property value + case MeterAggregationSum, MeterAggregationAvg, MeterAggregationMin, MeterAggregationMax: + switch value := rawValue.(type) { + case string: + val, err := strconv.ParseFloat(value, 64) + if err != nil { + // TODO: omit value or make sure it's length is not too long + return nil, nil, groupBy, fmt.Errorf("event data value cannot be parsed as float64: %s", value) + } + + return &val, nil, groupBy, nil + + case float64: + return &value, nil, groupBy, nil + + default: + return nil, nil, groupBy, errors.New("event data value property cannot be parsed") + } + } + + return nil, nil, groupBy, fmt.Errorf("unknown meter aggregation: %s", meter.Aggregation) +} + +// parseGroupBy parses the group by fields from the event data +func parseGroupBy(meter models.Meter, data interface{}) map[string]string { + groupBy := map[string]string{} + + // Group by fields + for groupByKey, groupByPath := range meter.GroupBy { + var groupByValue string + + rawGroupBy, err := jsonpath.JsonPathLookup(data, groupByPath) + if err != nil { + groupByValue = "" + } else { + groupByValue = fmt.Sprintf("%v", rawGroupBy) + } + + groupBy[groupByKey] = groupByValue + } + + return groupBy +} diff --git a/openmeter/meter/parse_test.go b/openmeter/meter/parse_test.go new file mode 100644 index 000000000..dbc1c3293 --- /dev/null +++ b/openmeter/meter/parse_test.go @@ -0,0 +1,220 @@ +package meter_test + +import ( + "errors" + "testing" + + "github.com/cloudevents/sdk-go/v2/event" + "github.com/samber/lo" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/openmeterio/openmeter/openmeter/meter" + "github.com/openmeterio/openmeter/pkg/models" +) + +func TestParseEvent(t *testing.T) { + meterSum := meter.Meter{ + Namespace: "default", + Slug: "m1", + Aggregation: models.MeterAggregationSum, + EventType: "api-calls", + ValueProperty: "$.duration_ms", + GroupBy: map[string]string{ + "method": "$.method", + "path": "$.path", + }, + WindowSize: meter.WindowSizeMinute, + } + + meterCount := meter.Meter{ + Namespace: "default", + Slug: "m2", + Aggregation: models.MeterAggregationCount, + EventType: "api-calls", + WindowSize: meter.WindowSizeMinute, + } + + meterUniqueCount := meter.Meter{ + Namespace: "default", + Slug: "m3", + Aggregation: models.MeterAggregationUniqueCount, + EventType: "spans", + ValueProperty: "$.trace_id", + WindowSize: meter.WindowSizeMinute, + } + + tests := []struct { + description string + meter meter.Meter + event func(t *testing.T) event.Event + err error + value *float64 + valueStr *string + groupBy map[string]string + }{ + { + description: "should parse event", + meter: meterSum, + event: func(t *testing.T) event.Event { + ev := event.New() + ev.SetType("api-calls") + + err := ev.SetData(event.ApplicationJSON, []byte(`{"duration_ms": 100, "method": "GET", "path": "/api/v1"}`)) + require.NoError(t, err) + + return ev + }, + value: lo.ToPtr(100.0), + groupBy: map[string]string{ + "method": "GET", + "path": "/api/v1", + }, + }, + { + description: "should parse event with numeric string value", + meter: meterSum, + event: func(t *testing.T) event.Event { + ev := event.New() + ev.SetType("api-calls") + + err := ev.SetData(event.ApplicationJSON, []byte(`{"duration_ms": "100", "method": "GET", "path": "/api/v1"}`)) + require.NoError(t, err) + + return ev + }, + value: lo.ToPtr(100.0), + groupBy: map[string]string{ + "method": "GET", + "path": "/api/v1", + }, + }, + { + description: "should parse count as value one", + meter: meterCount, + event: func(t *testing.T) event.Event { + ev := event.New() + ev.SetType("api-calls") + + return ev + }, + value: lo.ToPtr(1.0), + groupBy: map[string]string{}, + }, + { + description: "should parse unique count as string", + meter: meterUniqueCount, + event: func(t *testing.T) event.Event { + ev := event.New() + ev.SetType("spans") + + err := ev.SetData(event.ApplicationJSON, []byte(`{"trace_id": "test_trace_id"}`)) + require.NoError(t, err) + + return ev + }, + valueStr: lo.ToPtr("test_trace_id"), + groupBy: map[string]string{}, + }, + { + description: "should parse event with missing group by properties", + meter: meterSum, + event: func(t *testing.T) event.Event { + ev := event.New() + ev.SetType("api-calls") + + err := ev.SetData(event.ApplicationJSON, []byte(`{"duration_ms": 100}`)) + require.NoError(t, err) + + return ev + }, + value: lo.ToPtr(100.0), + groupBy: map[string]string{ + "method": "", + "path": "", + }, + }, + { + description: "should return error with invalid json", + meter: meterSum, + event: func(t *testing.T) event.Event { + ev := event.New() + ev.SetType("api-calls") + + err := ev.SetData(event.ApplicationJSON, []byte(`{`)) + require.NoError(t, err) + + return ev + }, + err: errors.New("cannot unmarshal event data"), + groupBy: map[string]string{}, + }, + { + description: "should return error with value property not found", + meter: meterSum, + event: func(t *testing.T) event.Event { + ev := event.New() + ev.SetType("api-calls") + + err := ev.SetData(event.ApplicationJSON, []byte(`{"method": "GET", "path": "/api/v1"}`)) + require.NoError(t, err) + + return ev + }, + err: errors.New("event data is missing value property at \"$.duration_ms\""), + groupBy: map[string]string{ + "method": "GET", + "path": "/api/v1", + }, + }, + { + description: "should return error when value property is null", + meter: meterSum, + event: func(t *testing.T) event.Event { + ev := event.New() + ev.SetType("api-calls") + + err := ev.SetData(event.ApplicationJSON, []byte(`{"duration_ms": null, "method": "GET", "path": "/api/v1"}`)) + require.NoError(t, err) + + return ev + }, + err: errors.New("event data value cannot be null"), + groupBy: map[string]string{ + "method": "GET", + "path": "/api/v1", + }, + }, + { + description: "should return error when value property cannot be parsed as number", + meter: meterSum, + event: func(t *testing.T) event.Event { + ev := event.New() + ev.SetType("api-calls") + + err := ev.SetData(event.ApplicationJSON, []byte(`{"duration_ms": "not a number", "method": "GET", "path": "/api/v1"}`)) + require.NoError(t, err) + + return ev + }, + err: errors.New("event data value cannot be parsed as float64: not a number"), + groupBy: map[string]string{ + "method": "GET", + "path": "/api/v1", + }, + }, + } + + for _, test := range tests { + test := test + + t.Run(test.description, func(t *testing.T) { + value, valueStr, groupBy, err := meter.ParseEvent(test.meter, test.event(t)) + + assert.Equal(t, test.err, err) + assert.Equal(t, test.value, value) + assert.Equal(t, test.valueStr, valueStr) + assert.Equal(t, test.groupBy, groupBy) + }) + } +} diff --git a/openmeter/meter/validate.go b/openmeter/meter/validate.go deleted file mode 100644 index dfe186503..000000000 --- a/openmeter/meter/validate.go +++ /dev/null @@ -1,68 +0,0 @@ -package meter - -import ( - "errors" - "fmt" - "strconv" - - "github.com/cloudevents/sdk-go/v2/event" - "github.com/oliveagle/jsonpath" -) - -// ValidateEvent validates an event against a meter. -func ValidateEvent(meter Meter, ev event.Event) error { - // Parse CloudEvents data - var data interface{} - - err := ev.DataAs(&data) - if err != nil { - return errors.New("cannot unmarshal event data") - } - - // We can skip count events as they don't have value property - if meter.Aggregation == MeterAggregationCount { - return nil - } - - // Get value from event data by value property - rawValue, err := jsonpath.JsonPathLookup(data, meter.ValueProperty) - if err != nil { - return fmt.Errorf("event data is missing value property at %q", meter.ValueProperty) - } - - if rawValue == nil { - return errors.New("event data value cannot be null") - } - - // Aggregation specific value validation - switch meter.Aggregation { - // UNIQUE_COUNT aggregation requires string property value - case MeterAggregationUniqueCount: - switch rawValue.(type) { - case string, float64: - // No need to do anything - - default: - return errors.New("event data value property must be string for unique count aggregation") - } - - // SUM, AVG, MIN, MAX aggregations require float64 parsable value property value - case MeterAggregationSum, MeterAggregationAvg, MeterAggregationMin, MeterAggregationMax: - switch value := rawValue.(type) { - case string: - _, err = strconv.ParseFloat(value, 64) - if err != nil { - // TODO: omit value or make sure it's length is not too long - return fmt.Errorf("event data value cannot be parsed as float64: %s", value) - } - - case float64: - // No need to do anything - - default: - return errors.New("event data value property cannot be parsed") - } - } - - return nil -} diff --git a/openmeter/meter/validate_test.go b/openmeter/meter/validate_test.go deleted file mode 100644 index 0c485677a..000000000 --- a/openmeter/meter/validate_test.go +++ /dev/null @@ -1,102 +0,0 @@ -package meter_test - -import ( - "errors" - "testing" - - "github.com/cloudevents/sdk-go/v2/event" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - - "github.com/openmeterio/openmeter/openmeter/meter" -) - -func TestValidateEvent(t *testing.T) { - m := meter.Meter{ - Namespace: "default", - Slug: "m1", - Description: "", - Aggregation: "SUM", - EventType: "api-calls", - ValueProperty: "$.duration_ms", - GroupBy: map[string]string{ - "method": "$.method", - "path": "$.path", - }, - WindowSize: meter.WindowSizeMinute, - } - - tests := []struct { - description string - event func(t *testing.T) event.Event - want error - }{ - { - description: "should return error with invalid json", - event: func(t *testing.T) event.Event { - ev := event.New() - ev.SetType("api-calls") - - err := ev.SetData(event.ApplicationJSON, []byte(`{`)) - require.NoError(t, err) - - return ev - }, - want: errors.New("cannot unmarshal event data"), - }, - { - description: "should return error with value property not found", - event: func(t *testing.T) event.Event { - ev := event.New() - ev.SetType("api-calls") - - err := ev.SetData(event.ApplicationJSON, []byte(`{"method": "GET", "path": "/api/v1"}`)) - require.NoError(t, err) - - return ev - }, - want: errors.New("event data is missing value property at \"$.duration_ms\""), - }, - { - description: "should return error when value property is null", - event: func(t *testing.T) event.Event { - ev := event.New() - ev.SetType("api-calls") - - err := ev.SetData(event.ApplicationJSON, []byte(`{"duration_ms": null, "method": "GET", "path": "/api/v1"}`)) - require.NoError(t, err) - - return ev - }, - want: errors.New("event data value cannot be null"), - }, - { - description: "should return error when value property cannot be parsed as number", - event: func(t *testing.T) event.Event { - ev := event.New() - ev.SetType("api-calls") - - err := ev.SetData(event.ApplicationJSON, []byte(`{"duration_ms": "not a number", "method": "GET", "path": "/api/v1"}`)) - require.NoError(t, err) - - return ev - }, - want: errors.New("event data value cannot be parsed as float64: not a number"), - }, - } - - for _, test := range tests { - test := test - - t.Run(test.description, func(t *testing.T) { - err := meter.ValidateEvent(m, test.event(t)) - if test.want == nil { - assert.Nil(t, err) - - return - } - - assert.Equal(t, test.want, err) - }) - } -} diff --git a/openmeter/server/router/meter_query.go b/openmeter/server/router/meter_query.go index 3bc4cf7b1..53d6963f3 100644 --- a/openmeter/server/router/meter_query.go +++ b/openmeter/server/router/meter_query.go @@ -51,11 +51,10 @@ func (a *Router) QueryMeter(w http.ResponseWriter, r *http.Request, meterIDOrSlu // QueryMeter queries the values stored for a meter. func (a *Router) QueryMeterWithMeter(ctx context.Context, w http.ResponseWriter, r *http.Request, logger *slog.Logger, meter models.Meter, params api.QueryMeterParams) { // Query Params - queryParams := &streaming.QueryParams{ - From: params.From, - To: params.To, - WindowSize: params.WindowSize, - Aggregation: meter.Aggregation, + queryParams := streaming.QueryParams{ + From: params.From, + To: params.To, + WindowSize: params.WindowSize, } if params.GroupBy != nil { @@ -122,7 +121,7 @@ func (a *Router) QueryMeterWithMeter(ctx context.Context, w http.ResponseWriter, } // Query connector - data, err := a.config.StreamingConnector.QueryMeter(ctx, meter.Namespace, meter.Slug, queryParams) + data, err := a.config.StreamingConnector.QueryMeter(ctx, meter.Namespace, meter, queryParams) if err != nil { err := fmt.Errorf("query meter: %w", err) diff --git a/openmeter/server/router/meter_subject.go b/openmeter/server/router/meter_subject.go index 14a2c1949..6738e7ed2 100644 --- a/openmeter/server/router/meter_subject.go +++ b/openmeter/server/router/meter_subject.go @@ -6,6 +6,7 @@ import ( "github.com/go-chi/render" + "github.com/openmeterio/openmeter/openmeter/streaming" "github.com/openmeterio/openmeter/pkg/contextx" "github.com/openmeterio/openmeter/pkg/models" ) @@ -17,7 +18,26 @@ func (a *Router) ListMeterSubjects(w http.ResponseWriter, r *http.Request, meter namespace := a.config.NamespaceManager.GetDefaultNamespace() - subjects, err := a.config.StreamingConnector.ListMeterSubjects(ctx, namespace, meterIDOrSlug, nil, nil) + // Get meter + meter, err := a.config.Meters.GetMeterByIDOrSlug(ctx, namespace, meterIDOrSlug) + if err != nil { + if _, ok := err.(*models.MeterNotFoundError); ok { + err := fmt.Errorf("meter not found: %w", err) + + models.NewStatusProblem(ctx, err, http.StatusNotFound).Respond(w) + + return + } + + err := fmt.Errorf("get meter: %w", err) + + a.config.ErrorHandler.HandleContext(ctx, err) + models.NewStatusProblem(ctx, err, http.StatusInternalServerError).Respond(w) + + return + } + + subjects, err := a.config.StreamingConnector.ListMeterSubjects(ctx, namespace, meter, streaming.ListMeterSubjectsParams{}) if err != nil { if _, ok := err.(*models.MeterNotFoundError); ok { err := fmt.Errorf("meter not found: %w", err) diff --git a/openmeter/server/server_test.go b/openmeter/server/server_test.go index 8cbcb4f53..6c29cd5eb 100644 --- a/openmeter/server/server_test.go +++ b/openmeter/server/server_test.go @@ -75,6 +75,14 @@ var ( type MockStreamingConnector struct{} +func (c *MockStreamingConnector) CreateNamespace(ctx context.Context, namespace string) error { + return nil +} + +func (c *MockStreamingConnector) DeleteNamespace(ctx context.Context, namespace string) error { + return nil +} + func (c *MockStreamingConnector) CountEvents(ctx context.Context, namespace string, params streaming.CountEventsParams) ([]streaming.CountEventRow, error) { return []streaming.CountEventRow{}, nil } @@ -88,15 +96,15 @@ func (c *MockStreamingConnector) ListEvents(ctx context.Context, namespace strin return events, nil } -func (c *MockStreamingConnector) CreateMeter(ctx context.Context, namespace string, meter *models.Meter) error { +func (c *MockStreamingConnector) CreateMeter(ctx context.Context, namespace string, meter models.Meter) error { return nil } -func (c *MockStreamingConnector) DeleteMeter(ctx context.Context, namespace string, meterSlug string) error { +func (c *MockStreamingConnector) DeleteMeter(ctx context.Context, namespace string, meter models.Meter) error { return nil } -func (c *MockStreamingConnector) QueryMeter(ctx context.Context, namespace string, meterSlug string, params *streaming.QueryParams) ([]models.MeterQueryRow, error) { +func (c *MockStreamingConnector) QueryMeter(ctx context.Context, namespace string, meter models.Meter, params streaming.QueryParams) ([]models.MeterQueryRow, error) { value := mockQueryValue if params.FilterSubject == nil { @@ -106,10 +114,14 @@ func (c *MockStreamingConnector) QueryMeter(ctx context.Context, namespace strin return []models.MeterQueryRow{value}, nil } -func (c *MockStreamingConnector) ListMeterSubjects(ctx context.Context, namespace string, meterSlug string, from *time.Time, to *time.Time) ([]string, error) { +func (c *MockStreamingConnector) ListMeterSubjects(ctx context.Context, namespace string, meter models.Meter, params streaming.ListMeterSubjectsParams) ([]string, error) { return []string{"s1"}, nil } +func (c *MockStreamingConnector) BatchInsert(ctx context.Context, events []streaming.RawEvent) error { + return nil +} + type MockDebugHandler struct{} func (h MockDebugHandler) GetDebugMetrics(ctx context.Context, namespace string) (string, error) { diff --git a/openmeter/sink/namespaces.go b/openmeter/sink/namespaces.go index 16a81749c..5e1c78bbc 100644 --- a/openmeter/sink/namespaces.go +++ b/openmeter/sink/namespaces.go @@ -4,12 +4,11 @@ import ( "context" "errors" "fmt" - "time" "github.com/cloudevents/sdk-go/v2/event" "github.com/openmeterio/openmeter/openmeter/ingest/kafkaingest/serializer" - "github.com/openmeterio/openmeter/openmeter/meter" + ommeter "github.com/openmeterio/openmeter/openmeter/meter" sinkmodels "github.com/openmeterio/openmeter/openmeter/sink/models" "github.com/openmeterio/openmeter/pkg/models" ) @@ -60,8 +59,28 @@ func (n *NamespaceStore) ValidateEvent(_ context.Context, m *sinkmodels.SinkMess // // On the other hand we still want to collect the list of affected meters // for the FlushEventHandler. - if m.Status.Error == nil { - validateEventWithMeter(meter, m) + if m.Status.Error != nil { + return + } + + // Parse kafka event + event, err := kafkaPayloadToCloudEvents(*m.Serialized) + if err != nil { + m.Status = sinkmodels.ProcessingStatus{ + State: sinkmodels.INVALID, + Error: errors.New("cannot parse event"), + } + } + + // Parse event with meter + _, _, _, err = ommeter.ParseEvent(meter, event) + if err != nil { + m.Status = sinkmodels.ProcessingStatus{ + State: sinkmodels.INVALID, + Error: err, + } + + return } } } @@ -82,7 +101,7 @@ func kafkaPayloadToCloudEvents(payload serializer.CloudEventsKafkaPayload) (even ev.SetType(payload.Type) ev.SetSource(payload.Source) ev.SetSubject(payload.Subject) - ev.SetTime(time.Unix(payload.Time, 0)) + ev.SetTime(payload.Time) err := ev.SetData(event.ApplicationJSON, []byte(payload.Data)) if err != nil { @@ -91,26 +110,3 @@ func kafkaPayloadToCloudEvents(payload serializer.CloudEventsKafkaPayload) (even return ev, nil } - -// validateEventWithMeter validates a single event against a single meter -func validateEventWithMeter(m models.Meter, sm *sinkmodels.SinkMessage) { - ev, err := kafkaPayloadToCloudEvents(*sm.Serialized) - if err != nil { - sm.Status = sinkmodels.ProcessingStatus{ - State: sinkmodels.INVALID, - Error: errors.New("cannot parse event"), - } - - return - } - - err = meter.ValidateEvent(m, ev) - if err != nil { - sm.Status = sinkmodels.ProcessingStatus{ - State: sinkmodels.INVALID, - Error: err, - } - - return - } -} diff --git a/openmeter/sink/storage.go b/openmeter/sink/storage.go index bd7109532..a33650a9d 100644 --- a/openmeter/sink/storage.go +++ b/openmeter/sink/storage.go @@ -3,14 +3,10 @@ package sink import ( "context" "fmt" - "strings" "time" - "github.com/ClickHouse/clickhouse-go/v2" - "github.com/huandu/go-sqlbuilder" - sinkmodels "github.com/openmeterio/openmeter/openmeter/sink/models" - "github.com/openmeterio/openmeter/openmeter/streaming/clickhouse_connector" + "github.com/openmeterio/openmeter/openmeter/streaming" ) type Storage interface { @@ -18,20 +14,12 @@ type Storage interface { } type ClickHouseStorageConfig struct { - ClickHouse clickhouse.Conn - Database string - AsyncInsert bool - AsyncInsertWait bool - QuerySettings map[string]string + Streaming streaming.Connector } func (c ClickHouseStorageConfig) Validate() error { - if c.ClickHouse == nil { - return fmt.Errorf("clickhouse connection is required") - } - - if c.Database == "" { - return fmt.Errorf("database is required") + if c.Streaming == nil { + return fmt.Errorf("streaming connection is required") } return nil @@ -51,67 +39,17 @@ type ClickHouseStorage struct { config ClickHouseStorageConfig } +// BatchInsert inserts multiple messages into ClickHouse. func (c *ClickHouseStorage) BatchInsert(ctx context.Context, messages []sinkmodels.SinkMessage) error { - query := InsertEventsQuery{ - Clock: realClock{}, - Database: c.config.Database, - Messages: messages, - QuerySettings: c.config.QuerySettings, - } - sql, args, err := query.ToSQL() - if err != nil { - return err - } - - // By default, ClickHouse is writing data synchronously. - // See https://clickhouse.com/docs/en/cloud/bestpractices/asynchronous-inserts - if c.config.AsyncInsert { - // With the `wait_for_async_insert` setting, you can configure - // if you want an insert statement to return with an acknowledgment - // either immediately after the data got inserted into the buffer. - err = c.config.ClickHouse.AsyncInsert(ctx, sql, c.config.AsyncInsertWait, args...) - } else { - err = c.config.ClickHouse.Exec(ctx, sql, args...) - } - - if err != nil { - return fmt.Errorf("failed to batch insert events: %w", err) - } - - return nil -} - -type InsertEventsQuery struct { - Clock Clock - Database string - Messages []sinkmodels.SinkMessage - QuerySettings map[string]string -} + var rawEvents []streaming.RawEvent -func (q InsertEventsQuery) ToSQL() (string, []interface{}, error) { - tableName := clickhouse_connector.GetEventsTableName(q.Database) - - query := sqlbuilder.ClickHouse.NewInsertBuilder() - query.InsertInto(tableName) - query.Cols("namespace", "validation_error", "id", "type", "source", "subject", "time", "data", "ingested_at", "stored_at") - - // Add settings - var settings []string - for key, value := range q.QuerySettings { - settings = append(settings, fmt.Sprintf("%s = %s", key, value)) - } - - if len(settings) > 0 { - query.SQL(fmt.Sprintf("SETTINGS %s", strings.Join(settings, ", "))) - } - - for _, message := range q.Messages { + for _, message := range messages { var eventErr string if message.Status.Error != nil { eventErr = message.Status.Error.Error() } - storedAt := q.Clock.Now() + storedAt := time.Now() ingestedAt := storedAt if message.KafkaMessage != nil { @@ -128,33 +66,25 @@ func (q InsertEventsQuery) ToSQL() (string, []interface{}, error) { } } - query.Values( - message.Namespace, - eventErr, - message.Serialized.Id, - message.Serialized.Type, - message.Serialized.Source, - message.Serialized.Subject, - message.Serialized.Time, - message.Serialized.Data, - ingestedAt, - storedAt, - ) - } - - sql, args := query.Build() - return sql, args, nil -} + rawEvent := streaming.RawEvent{ + Namespace: message.Namespace, + ValidationError: eventErr, + ID: message.Serialized.Id, + Type: message.Serialized.Type, + Source: message.Serialized.Source, + Subject: message.Serialized.Subject, + Time: message.Serialized.Time, + Data: message.Serialized.Data, + IngestedAt: ingestedAt, + StoredAt: storedAt, + } -// Clock is an interface for getting the current time. -// It is used to make the code testable. -type Clock interface { - Now() time.Time -} + rawEvents = append(rawEvents, rawEvent) + } -// realClock implements Clock using the system clock. -type realClock struct{} + if err := c.config.Streaming.BatchInsert(ctx, rawEvents); err != nil { + return fmt.Errorf("failed to store events: %w", err) + } -func (realClock) Now() time.Time { - return time.Now() + return nil } diff --git a/openmeter/sink/storage_test.go b/openmeter/sink/storage_test.go deleted file mode 100644 index 16aacd5bf..000000000 --- a/openmeter/sink/storage_test.go +++ /dev/null @@ -1,79 +0,0 @@ -package sink_test - -import ( - "errors" - "testing" - "time" - - "github.com/stretchr/testify/assert" - - "github.com/openmeterio/openmeter/openmeter/ingest/kafkaingest/serializer" - "github.com/openmeterio/openmeter/openmeter/sink" - sinkmodels "github.com/openmeterio/openmeter/openmeter/sink/models" -) - -func TestInsertEventsQuery(t *testing.T) { - now := time.Now() - - query := sink.InsertEventsQuery{ - Clock: mockClock{now: now}, - Database: "database", - Messages: []sinkmodels.SinkMessage{ - { - Namespace: "my_namespace", - Serialized: &serializer.CloudEventsKafkaPayload{ - Id: "1", - Source: "source", - Subject: "subject-1", - Time: now.UnixMilli(), - Type: "api-calls", - Data: `{"duration_ms": 100, "method": "GET", "path": "/api/v1"}`, - }, - }, - { - Namespace: "my_namespace", - Serialized: &serializer.CloudEventsKafkaPayload{ - Id: "2", - Source: "source", - Subject: "subject-2", - Time: now.UnixMilli(), - Type: "api-calls", - Data: `{"duration_ms": 80, "method": "GET", "path": "/api/v1"}`, - }, - }, - { - Namespace: "my_namespace", - Status: sinkmodels.ProcessingStatus{ - State: sinkmodels.INVALID, - Error: errors.New("event data value cannot be parsed as float64: not a number"), - }, - Serialized: &serializer.CloudEventsKafkaPayload{ - Id: "3", - Source: "source", - Subject: "subject-2", - Time: now.UnixMilli(), - Type: "api-calls", - Data: `{"duration_ms": "foo", "method": "GET", "path": "/api/v1"}`, - }, - }, - }, - } - - sql, args, err := query.ToSQL() - assert.NoError(t, err) - - assert.Equal(t, []interface{}{ - "my_namespace", "", "1", "api-calls", "source", "subject-1", now.UnixMilli(), `{"duration_ms": 100, "method": "GET", "path": "/api/v1"}`, now, now, - "my_namespace", "", "2", "api-calls", "source", "subject-2", now.UnixMilli(), `{"duration_ms": 80, "method": "GET", "path": "/api/v1"}`, now, now, - "my_namespace", "event data value cannot be parsed as float64: not a number", "3", "api-calls", "source", "subject-2", now.UnixMilli(), `{"duration_ms": "foo", "method": "GET", "path": "/api/v1"}`, now, now, - }, args) - assert.Equal(t, `INSERT INTO database.om_events (namespace, validation_error, id, type, source, subject, time, data, ingested_at, stored_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?), (?, ?, ?, ?, ?, ?, ?, ?, ?, ?), (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, sql) -} - -type mockClock struct { - now time.Time -} - -func (m mockClock) Now() time.Time { - return m.now -} diff --git a/openmeter/streaming/clickhouse/materialized_view/connector.go b/openmeter/streaming/clickhouse/materialized_view/connector.go new file mode 100644 index 000000000..9bf3be3d7 --- /dev/null +++ b/openmeter/streaming/clickhouse/materialized_view/connector.go @@ -0,0 +1,407 @@ +package materialized_view + +import ( + "context" + "fmt" + "log/slog" + "strings" + "time" + + "github.com/ClickHouse/clickhouse-go/v2" + + "github.com/openmeterio/openmeter/api" + "github.com/openmeterio/openmeter/openmeter/meter" + "github.com/openmeterio/openmeter/openmeter/streaming" + raw_events "github.com/openmeterio/openmeter/openmeter/streaming/clickhouse/raw_events" + "github.com/openmeterio/openmeter/pkg/models" +) + +var _ streaming.Connector = (*Connector)(nil) + +// Connector implements `ingest.Connector“ and `namespace.Handler interfaces. +type Connector struct { + config ConnectorConfig + rawEventConnector *raw_events.Connector +} + +type ConnectorConfig struct { + Logger *slog.Logger + ClickHouse clickhouse.Conn + Database string + EventsTableName string + Meters meter.Repository + CreateOrReplaceMeter bool + PopulateMeter bool + AsyncInsert bool + AsyncInsertWait bool + InsertQuerySettings map[string]string + QueryRawEvents bool +} + +func (c ConnectorConfig) Validate() error { + if c.Logger == nil { + return fmt.Errorf("logger is required") + } + + if c.ClickHouse == nil { + return fmt.Errorf("clickhouse connection is required") + } + + if c.Database == "" { + return fmt.Errorf("database is required") + } + + if c.EventsTableName == "" { + return fmt.Errorf("events table name is required") + } + + if c.Meters == nil { + return fmt.Errorf("meters repository is required") + } + + return nil +} + +func NewConnector(ctx context.Context, config ConnectorConfig) (*Connector, error) { + if err := config.Validate(); err != nil { + return nil, fmt.Errorf("validate config: %w", err) + } + + rawEventConnector, err := raw_events.NewConnector(ctx, raw_events.ConnectorConfig{ + Logger: config.Logger, + ClickHouse: config.ClickHouse, + Database: config.Database, + EventsTableName: config.EventsTableName, + AsyncInsert: config.AsyncInsert, + AsyncInsertWait: config.AsyncInsertWait, + InsertQuerySettings: config.InsertQuerySettings, + }) + if err != nil { + return nil, fmt.Errorf("create raw event connector: %w", err) + } + + connector := &Connector{ + config: config, + rawEventConnector: rawEventConnector, + } + + return connector, nil +} + +func (c *Connector) CreateNamespace(ctx context.Context, namespace string) error { + return nil +} + +func (c *Connector) DeleteNamespace(ctx context.Context, namespace string) error { + err := c.deleteNamespace(ctx, namespace) + if err != nil { + return fmt.Errorf("delete namespace in clickhouse: %w", err) + } + return nil +} + +func (c *Connector) BatchInsert(ctx context.Context, rawEvents []streaming.RawEvent) error { + return c.rawEventConnector.BatchInsert(ctx, rawEvents) +} + +func (c *Connector) CountEvents(ctx context.Context, namespace string, params streaming.CountEventsParams) ([]streaming.CountEventRow, error) { + return c.rawEventConnector.CountEvents(ctx, namespace, params) +} + +func (c *Connector) ListEvents(ctx context.Context, namespace string, params streaming.ListEventsParams) ([]api.IngestedEvent, error) { + return c.rawEventConnector.ListEvents(ctx, namespace, params) +} + +func (c *Connector) CreateMeter(ctx context.Context, namespace string, meter models.Meter) error { + if namespace == "" { + return fmt.Errorf("namespace is required") + } + if meter.Slug == "" { + return fmt.Errorf("meter is required") + } + + err := c.createMeterView(ctx, namespace, meter) + if err != nil { + return fmt.Errorf("init: %w", err) + } + + return nil +} + +func (c *Connector) DeleteMeter(ctx context.Context, namespace string, meter models.Meter) error { + if namespace == "" { + return fmt.Errorf("namespace is required") + } + if meter.Slug == "" { + return fmt.Errorf("meter is required") + } + + err := c.deleteMeterView(ctx, namespace, meter) + if err != nil { + if _, ok := err.(*models.MeterNotFoundError); ok { + return err + } + + return fmt.Errorf("delete meter view: %w", err) + } + + return nil +} + +func (c *Connector) QueryMeter(ctx context.Context, namespace string, meter models.Meter, params streaming.QueryParams) ([]models.MeterQueryRow, error) { + if namespace == "" { + return nil, fmt.Errorf("namespace is required") + } + if meter.Slug == "" { + return nil, fmt.Errorf("meter is required") + } + + // Query raw events if the flag is set + if c.config.QueryRawEvents { + return c.rawEventConnector.QueryMeter(ctx, namespace, meter, params) + } + + values, err := c.queryMeterView(ctx, namespace, meter, params) + if err != nil { + if _, ok := err.(*models.MeterNotFoundError); ok { + return nil, err + } + + return nil, fmt.Errorf("get values: %w", err) + } + + // If the total usage is queried for a single period (no window size), + // replace the window start and end with the period for each row. + // We can still have multiple rows for a single period due to group bys. + if params.WindowSize == nil { + for i := range values { + if params.From != nil { + values[i].WindowStart = *params.From + } + if params.To != nil { + values[i].WindowEnd = *params.To + } + } + } + + return values, nil +} + +func (c *Connector) ListMeterSubjects(ctx context.Context, namespace string, meter models.Meter, params streaming.ListMeterSubjectsParams) ([]string, error) { + if namespace == "" { + return nil, fmt.Errorf("namespace is required") + } + if meter.Slug == "" { + return nil, fmt.Errorf("meter is required") + } + + // Query raw events if the flag is set + if c.config.QueryRawEvents { + return c.rawEventConnector.ListMeterSubjects(ctx, namespace, meter, params) + } + + subjects, err := c.listMeterViewSubjects(ctx, namespace, meter, params) + if err != nil { + if _, ok := err.(*models.MeterNotFoundError); ok { + return nil, err + } + + return nil, fmt.Errorf("list meter subjects: %w", err) + } + + return subjects, nil +} + +// DeleteNamespace deletes the namespace related resources from Clickhouse +// We don't delete the events table as it it reused between namespaces +// We only delete the materialized views for the meters +func (c *Connector) deleteNamespace(ctx context.Context, namespace string) error { + // Retrieve meters belonging to the namespace + meters, err := c.config.Meters.ListMeters(ctx, namespace) + if err != nil { + return fmt.Errorf("failed to list meters: %w", err) + } + + for _, meter := range meters { + err := c.deleteMeterView(ctx, namespace, meter) + if err != nil { + // If the meter view does not exist, we ignore the error + if _, ok := err.(*models.MeterNotFoundError); ok { + return nil + } + return fmt.Errorf("delete meter view: %w", err) + } + } + + return nil +} + +func (c *Connector) createMeterView(ctx context.Context, namespace string, meter models.Meter) error { + // CreateOrReplace is used to force the recreation of the materialized view + // This is not safe to use in production as it will drop the existing views + if c.config.CreateOrReplaceMeter { + err := c.deleteMeterView(ctx, namespace, meter) + if err != nil { + return fmt.Errorf("drop meter view: %w", err) + } + } + + view := createMeterView{ + Populate: c.config.PopulateMeter, + Database: c.config.Database, + EventsTableName: c.config.EventsTableName, + Namespace: namespace, + MeterSlug: meter.Slug, + Aggregation: meter.Aggregation, + EventType: meter.EventType, + ValueProperty: meter.ValueProperty, + GroupBy: meter.GroupBy, + } + sql, args, err := view.toSQL() + if err != nil { + return fmt.Errorf("create meter view: %w", err) + } + err = c.config.ClickHouse.Exec(ctx, sql, args...) + if err != nil { + return fmt.Errorf("create meter view: %w", err) + } + + return nil +} + +func (c *Connector) deleteMeterView(ctx context.Context, namespace string, meter models.Meter) error { + query := deleteMeterView{ + Database: c.config.Database, + Namespace: namespace, + MeterSlug: meter.Slug, + } + + sql := query.toSQL() + + err := c.config.ClickHouse.Exec(ctx, sql) + if err != nil { + if strings.Contains(err.Error(), "code: 60") { + return &models.MeterNotFoundError{MeterSlug: meter.Slug} + } + + return fmt.Errorf("delete meter view: %w", err) + } + + return nil +} + +func (c *Connector) queryMeterView(ctx context.Context, namespace string, meter models.Meter, params streaming.QueryParams) ([]models.MeterQueryRow, error) { + queryMeter := queryMeterView{ + Database: c.config.Database, + Namespace: namespace, + MeterSlug: meter.Slug, + Aggregation: meter.Aggregation, + From: params.From, + To: params.To, + Subject: params.FilterSubject, + FilterGroupBy: params.FilterGroupBy, + GroupBy: params.GroupBy, + WindowSize: params.WindowSize, + WindowTimeZone: params.WindowTimeZone, + } + + values := []models.MeterQueryRow{} + + sql, args, err := queryMeter.toSQL() + if err != nil { + return values, fmt.Errorf("query meter view: %w", err) + } + + start := time.Now() + rows, err := c.config.ClickHouse.Query(ctx, sql, args...) + if err != nil { + if strings.Contains(err.Error(), "code: 60") { + return nil, &models.MeterNotFoundError{MeterSlug: meter.Slug} + } + + return values, fmt.Errorf("query meter view query: %w", err) + } + elapsed := time.Since(start) + slog.Debug("query meter view", "elapsed", elapsed.String(), "sql", sql, "args", args) + + for rows.Next() { + value := models.MeterQueryRow{ + GroupBy: map[string]*string{}, + } + + args := []interface{}{&value.WindowStart, &value.WindowEnd, &value.Value} + argCount := len(args) + + for range queryMeter.GroupBy { + tmp := "" + args = append(args, &tmp) + } + + if err := rows.Scan(args...); err != nil { + return values, fmt.Errorf("query meter view row scan: %w", err) + } + + for i, key := range queryMeter.GroupBy { + if s, ok := args[i+argCount].(*string); ok { + if key == "subject" { + value.Subject = s + continue + } + + // We treat empty string as nil + if s != nil && *s == "" { + value.GroupBy[key] = nil + } else { + value.GroupBy[key] = s + } + } + } + + // an empty row is returned when there are no values for the meter + if value.WindowStart.IsZero() && value.WindowEnd.IsZero() && value.Value == 0 { + continue + } + + values = append(values, value) + } + rows.Close() + err = rows.Err() + if err != nil { + return values, fmt.Errorf("query meter rows error: %w", err) + } + + return values, nil +} + +func (c *Connector) listMeterViewSubjects(ctx context.Context, namespace string, meter models.Meter, params streaming.ListMeterSubjectsParams) ([]string, error) { + query := listMeterViewSubjects{ + Database: c.config.Database, + Namespace: namespace, + MeterSlug: meter.Slug, + From: params.From, + To: params.To, + } + + sql, args := query.toSQL() + + rows, err := c.config.ClickHouse.Query(ctx, sql, args...) + if err != nil { + if strings.Contains(err.Error(), "code: 60") { + return nil, &models.MeterNotFoundError{MeterSlug: meter.Slug} + } + + return nil, fmt.Errorf("list meter view subjects: %w", err) + } + + subjects := []string{} + for rows.Next() { + var subject string + if err = rows.Scan(&subject); err != nil { + return nil, err + } + + subjects = append(subjects, subject) + } + + return subjects, nil +} diff --git a/openmeter/streaming/clickhouse_connector/query.go b/openmeter/streaming/clickhouse/materialized_view/meter_query.go similarity index 75% rename from openmeter/streaming/clickhouse_connector/query.go rename to openmeter/streaming/clickhouse/materialized_view/meter_query.go index 2dbc4f8ee..1618ed73a 100644 --- a/openmeter/streaming/clickhouse_connector/query.go +++ b/openmeter/streaming/clickhouse/materialized_view/meter_query.go @@ -1,4 +1,4 @@ -package clickhouse_connector +package materialized_view import ( _ "embed" @@ -18,120 +18,15 @@ type column struct { Type string } -// Create Events Table -type createEventsTable struct { - Database string -} - -func (d createEventsTable) toSQL() string { - tableName := GetEventsTableName(d.Database) - - sb := sqlbuilder.ClickHouse.NewCreateTableBuilder() - sb.CreateTable(tableName) - sb.IfNotExists() - sb.Define("namespace", "String") - sb.Define("validation_error", "String") - sb.Define("id", "String") - sb.Define("type", "LowCardinality(String)") - sb.Define("subject", "String") - sb.Define("source", "String") - sb.Define("time", "DateTime") - sb.Define("data", "String") - sb.Define("ingested_at", "DateTime") - sb.Define("stored_at", "DateTime") - sb.SQL("ENGINE = MergeTree") - sb.SQL("PARTITION BY toYYYYMM(time)") - sb.SQL("ORDER BY (namespace, time, type, subject)") - - sql, _ := sb.Build() - return sql -} - -type queryEventsTable struct { - Database string - Namespace string - From *time.Time - To *time.Time - IngestedAtFrom *time.Time - IngestedAtTo *time.Time - ID *string - Subject *string - HasError *bool - Limit int -} - -func (d queryEventsTable) toSQL() (string, []interface{}) { - tableName := GetEventsTableName(d.Database) - where := []string{} - - query := sqlbuilder.ClickHouse.NewSelectBuilder() - query.Select("id", "type", "subject", "source", "time", "data", "validation_error", "ingested_at", "stored_at") - query.From(tableName) - - where = append(where, query.Equal("namespace", d.Namespace)) - if d.From != nil { - where = append(where, query.GreaterEqualThan("time", d.From.Unix())) - } - if d.To != nil { - where = append(where, query.LessEqualThan("time", d.To.Unix())) - } - if d.IngestedAtFrom != nil { - where = append(where, query.GreaterEqualThan("ingested_at", d.IngestedAtFrom.Unix())) - } - if d.IngestedAtTo != nil { - where = append(where, query.LessEqualThan("ingested_at", d.IngestedAtTo.Unix())) - } - if d.ID != nil { - where = append(where, query.Like("id", fmt.Sprintf("%%%s%%", *d.ID))) - } - if d.Subject != nil { - where = append(where, query.Equal("subject", *d.Subject)) - } - if d.HasError != nil { - if *d.HasError { - where = append(where, "notEmpty(validation_error) = 1") - } else { - where = append(where, "empty(validation_error) = 1") - } - } - query.Where(where...) - - query.Desc().OrderBy("time") - query.Limit(d.Limit) - - sql, args := query.Build() - return sql, args -} - -type queryCountEvents struct { - Database string - Namespace string - From time.Time -} - -func (d queryCountEvents) toSQL() (string, []interface{}) { - tableName := GetEventsTableName(d.Database) - - query := sqlbuilder.ClickHouse.NewSelectBuilder() - query.Select("count() as count", "subject", "notEmpty(validation_error) as is_error") - query.From(tableName) - - query.Where(query.Equal("namespace", d.Namespace)) - query.Where(query.GreaterEqualThan("time", d.From.Unix())) - query.GroupBy("subject", "is_error") - - sql, args := query.Build() - return sql, args -} - type createMeterView struct { - Database string - Aggregation models.MeterAggregation - Namespace string - MeterSlug string - EventType string - ValueProperty string - GroupBy map[string]string + Database string + EventsTableName string + Aggregation models.MeterAggregation + Namespace string + MeterSlug string + EventType string + ValueProperty string + GroupBy map[string]string // Populate creates the materialized view with data from the events table // This is not safe to use in production as requires to stop ingestion Populate bool @@ -208,7 +103,7 @@ func (d createMeterView) toSQL() (string, []interface{}, error) { } func (d createMeterView) toSelectSQL() (string, error) { - eventsTableName := GetEventsTableName(d.Database) + eventsTableName := getTableName(d.Database, d.EventsTableName) aggStateFn := "" switch d.Aggregation { @@ -459,12 +354,8 @@ func (d listMeterViewSubjects) toSQL() (string, []interface{}) { return sql, args } -func GetEventsTableName(database string) string { - return fmt.Sprintf("%s.%s%s", sqlbuilder.Escape(database), tablePrefix, EventsTableName) -} - func GetMeterViewName(database string, namespace string, meterSlug string) string { - meterViewName := fmt.Sprintf("%s%s_%s", tablePrefix, namespace, meterSlug) + meterViewName := fmt.Sprintf("om_%s_%s", namespace, meterSlug) return fmt.Sprintf("%s.%s", sqlbuilder.Escape(database), sqlbuilder.Escape(meterViewName)) } @@ -473,3 +364,7 @@ func columnFactory(alias string) func(string) string { return fmt.Sprintf("%s.%s", alias, column) } } + +func getTableName(database string, tableName string) string { + return fmt.Sprintf("%s.%s", database, tableName) +} diff --git a/openmeter/streaming/clickhouse_connector/query_test.go b/openmeter/streaming/clickhouse/materialized_view/meter_query_test.go similarity index 67% rename from openmeter/streaming/clickhouse_connector/query_test.go rename to openmeter/streaming/clickhouse/materialized_view/meter_query_test.go index b9cb17743..573fb6f37 100644 --- a/openmeter/streaming/clickhouse_connector/query_test.go +++ b/openmeter/streaming/clickhouse/materialized_view/meter_query_test.go @@ -1,4 +1,4 @@ -package clickhouse_connector +package materialized_view import ( "testing" @@ -9,130 +9,6 @@ import ( "github.com/openmeterio/openmeter/pkg/models" ) -func TestCreateEventsTable(t *testing.T) { - tests := []struct { - data createEventsTable - want string - }{ - { - data: createEventsTable{ - Database: "openmeter", - }, - want: "CREATE TABLE IF NOT EXISTS openmeter.om_events (namespace String, validation_error String, id String, type LowCardinality(String), subject String, source String, time DateTime, data String, ingested_at DateTime, stored_at DateTime) ENGINE = MergeTree PARTITION BY toYYYYMM(time) ORDER BY (namespace, time, type, subject)", - }, - } - - for _, tt := range tests { - tt := tt - t.Run("", func(t *testing.T) { - got := tt.data.toSQL() - assert.Equal(t, tt.want, got) - }) - } -} - -func TestQueryEventsTable(t *testing.T) { - subjectFilter := "customer-1" - idFilter := "event-id-1" - hasErrorTrue := true - hasErrorFalse := false - - tests := []struct { - query queryEventsTable - wantSQL string - wantArgs []interface{} - }{ - { - query: queryEventsTable{ - Database: "openmeter", - Namespace: "my_namespace", - Limit: 100, - }, - wantSQL: "SELECT id, type, subject, source, time, data, validation_error, ingested_at, stored_at FROM openmeter.om_events WHERE namespace = ? ORDER BY time DESC LIMIT 100", - wantArgs: []interface{}{"my_namespace"}, - }, - { - query: queryEventsTable{ - Database: "openmeter", - Namespace: "my_namespace", - Limit: 100, - Subject: &subjectFilter, - }, - wantSQL: "SELECT id, type, subject, source, time, data, validation_error, ingested_at, stored_at FROM openmeter.om_events WHERE namespace = ? AND subject = ? ORDER BY time DESC LIMIT 100", - wantArgs: []interface{}{"my_namespace", subjectFilter}, - }, - { - query: queryEventsTable{ - Database: "openmeter", - Namespace: "my_namespace", - Limit: 100, - ID: &idFilter, - }, - wantSQL: "SELECT id, type, subject, source, time, data, validation_error, ingested_at, stored_at FROM openmeter.om_events WHERE namespace = ? AND id LIKE ? ORDER BY time DESC LIMIT 100", - wantArgs: []interface{}{"my_namespace", "%event-id-1%"}, - }, - { - query: queryEventsTable{ - Database: "openmeter", - Namespace: "my_namespace", - Limit: 100, - HasError: &hasErrorTrue, - }, - wantSQL: "SELECT id, type, subject, source, time, data, validation_error, ingested_at, stored_at FROM openmeter.om_events WHERE namespace = ? AND notEmpty(validation_error) = 1 ORDER BY time DESC LIMIT 100", - wantArgs: []interface{}{"my_namespace"}, - }, - { - query: queryEventsTable{ - Database: "openmeter", - Namespace: "my_namespace", - Limit: 100, - HasError: &hasErrorFalse, - }, - wantSQL: "SELECT id, type, subject, source, time, data, validation_error, ingested_at, stored_at FROM openmeter.om_events WHERE namespace = ? AND empty(validation_error) = 1 ORDER BY time DESC LIMIT 100", - wantArgs: []interface{}{"my_namespace"}, - }, - } - - for _, tt := range tests { - tt := tt - t.Run("", func(t *testing.T) { - gotSql, gotArgs := tt.query.toSQL() - - assert.Equal(t, tt.wantArgs, gotArgs) - assert.Equal(t, tt.wantSQL, gotSql) - }) - } -} - -func TestQueryEventsCount(t *testing.T) { - from, _ := time.Parse(time.RFC3339, "2023-01-01T00:00:00.001Z") - tests := []struct { - query queryCountEvents - wantSQL string - wantArgs []interface{} - }{ - { - query: queryCountEvents{ - Database: "openmeter", - Namespace: "my_namespace", - From: from, - }, - wantSQL: "SELECT count() as count, subject, notEmpty(validation_error) as is_error FROM openmeter.om_events WHERE namespace = ? AND time >= ? GROUP BY subject, is_error", - wantArgs: []interface{}{"my_namespace", from.Unix()}, - }, - } - - for _, tt := range tests { - tt := tt - t.Run("", func(t *testing.T) { - gotSql, gotArgs := tt.query.toSQL() - - assert.Equal(t, tt.wantArgs, gotArgs) - assert.Equal(t, tt.wantSQL, gotSql) - }) - } -} - func TestCreateMeterView(t *testing.T) { tests := []struct { query createMeterView @@ -141,65 +17,70 @@ func TestCreateMeterView(t *testing.T) { }{ { query: createMeterView{ - Database: "openmeter", - Namespace: "my_namespace", - MeterSlug: "meter1", - Aggregation: models.MeterAggregationSum, - EventType: "myevent", - ValueProperty: "$.duration_ms", - GroupBy: map[string]string{"group1": "$.group1", "group2": "$.group2"}, + Database: "openmeter", + EventsTableName: "om_events", + Namespace: "my_namespace", + MeterSlug: "meter1", + Aggregation: models.MeterAggregationSum, + EventType: "myevent", + ValueProperty: "$.duration_ms", + GroupBy: map[string]string{"group1": "$.group1", "group2": "$.group2"}, }, wantSQL: "CREATE MATERIALIZED VIEW IF NOT EXISTS openmeter.om_my_namespace_meter1 (subject String, windowstart DateTime, windowend DateTime, value AggregateFunction(sum, Float64), group1 String, group2 String) ENGINE = AggregatingMergeTree() ORDER BY (windowstart, windowend, subject, group1, group2) AS SELECT subject, tumbleStart(time, toIntervalMinute(1)) AS windowstart, tumbleEnd(time, toIntervalMinute(1)) AS windowend, sumState(cast(JSON_VALUE(data, '$.duration_ms'), 'Float64')) AS value, JSON_VALUE(data, '$.group1') as group1, JSON_VALUE(data, '$.group2') as group2 FROM openmeter.om_events WHERE openmeter.om_events.namespace = 'my_namespace' AND empty(openmeter.om_events.validation_error) = 1 AND openmeter.om_events.type = 'myevent' GROUP BY windowstart, windowend, subject, group1, group2", wantArgs: nil, }, { query: createMeterView{ - Database: "openmeter", - Namespace: "my_namespace", - MeterSlug: "meter1", - Aggregation: models.MeterAggregationAvg, - EventType: "myevent", - ValueProperty: "$.token_count", - GroupBy: map[string]string{}, + Database: "openmeter", + EventsTableName: "om_events", + Namespace: "my_namespace", + MeterSlug: "meter1", + Aggregation: models.MeterAggregationAvg, + EventType: "myevent", + ValueProperty: "$.token_count", + GroupBy: map[string]string{}, }, wantSQL: "CREATE MATERIALIZED VIEW IF NOT EXISTS openmeter.om_my_namespace_meter1 (subject String, windowstart DateTime, windowend DateTime, value AggregateFunction(avg, Float64)) ENGINE = AggregatingMergeTree() ORDER BY (windowstart, windowend, subject) AS SELECT subject, tumbleStart(time, toIntervalMinute(1)) AS windowstart, tumbleEnd(time, toIntervalMinute(1)) AS windowend, avgState(cast(JSON_VALUE(data, '$.token_count'), 'Float64')) AS value FROM openmeter.om_events WHERE openmeter.om_events.namespace = 'my_namespace' AND empty(openmeter.om_events.validation_error) = 1 AND openmeter.om_events.type = 'myevent' GROUP BY windowstart, windowend, subject", wantArgs: nil, }, { query: createMeterView{ - Database: "openmeter", - Namespace: "my_namespace", - MeterSlug: "meter1", - Aggregation: models.MeterAggregationCount, - EventType: "myevent", - ValueProperty: "", - GroupBy: map[string]string{}, + Database: "openmeter", + EventsTableName: "om_events", + Namespace: "my_namespace", + MeterSlug: "meter1", + Aggregation: models.MeterAggregationCount, + EventType: "myevent", + ValueProperty: "", + GroupBy: map[string]string{}, }, wantSQL: "CREATE MATERIALIZED VIEW IF NOT EXISTS openmeter.om_my_namespace_meter1 (subject String, windowstart DateTime, windowend DateTime, value AggregateFunction(count, Float64)) ENGINE = AggregatingMergeTree() ORDER BY (windowstart, windowend, subject) AS SELECT subject, tumbleStart(time, toIntervalMinute(1)) AS windowstart, tumbleEnd(time, toIntervalMinute(1)) AS windowend, countState(*) AS value FROM openmeter.om_events WHERE openmeter.om_events.namespace = 'my_namespace' AND empty(openmeter.om_events.validation_error) = 1 AND openmeter.om_events.type = 'myevent' GROUP BY windowstart, windowend, subject", wantArgs: nil, }, { query: createMeterView{ - Database: "openmeter", - Namespace: "my_namespace", - MeterSlug: "meter1", - Aggregation: models.MeterAggregationCount, - EventType: "myevent", - ValueProperty: "", - GroupBy: map[string]string{}, + Database: "openmeter", + EventsTableName: "om_events", + Namespace: "my_namespace", + MeterSlug: "meter1", + Aggregation: models.MeterAggregationCount, + EventType: "myevent", + ValueProperty: "", + GroupBy: map[string]string{}, }, wantSQL: "CREATE MATERIALIZED VIEW IF NOT EXISTS openmeter.om_my_namespace_meter1 (subject String, windowstart DateTime, windowend DateTime, value AggregateFunction(count, Float64)) ENGINE = AggregatingMergeTree() ORDER BY (windowstart, windowend, subject) AS SELECT subject, tumbleStart(time, toIntervalMinute(1)) AS windowstart, tumbleEnd(time, toIntervalMinute(1)) AS windowend, countState(*) AS value FROM openmeter.om_events WHERE openmeter.om_events.namespace = 'my_namespace' AND empty(openmeter.om_events.validation_error) = 1 AND openmeter.om_events.type = 'myevent' GROUP BY windowstart, windowend, subject", wantArgs: nil, }, { query: createMeterView{ - Database: "openmeter", - Namespace: "my_namespace", - MeterSlug: "meter1", - Aggregation: models.MeterAggregationUniqueCount, - EventType: "myevent", - ValueProperty: "$.trace_id", - GroupBy: map[string]string{}, + Database: "openmeter", + EventsTableName: "om_events", + Namespace: "my_namespace", + MeterSlug: "meter1", + Aggregation: models.MeterAggregationUniqueCount, + EventType: "myevent", + ValueProperty: "$.trace_id", + GroupBy: map[string]string{}, }, wantSQL: "CREATE MATERIALIZED VIEW IF NOT EXISTS openmeter.om_my_namespace_meter1 (subject String, windowstart DateTime, windowend DateTime, value AggregateFunction(uniq, String)) ENGINE = AggregatingMergeTree() ORDER BY (windowstart, windowend, subject) AS SELECT subject, tumbleStart(time, toIntervalMinute(1)) AS windowstart, tumbleEnd(time, toIntervalMinute(1)) AS windowend, uniqState(JSON_VALUE(data, '$.trace_id')) AS value FROM openmeter.om_events WHERE openmeter.om_events.namespace = 'my_namespace' AND empty(openmeter.om_events.validation_error) = 1 AND openmeter.om_events.type = 'myevent' GROUP BY windowstart, windowend, subject", wantArgs: nil, @@ -481,53 +362,3 @@ func TestListMeterViewSubjects(t *testing.T) { }) } } - -func TestQueryEvents(t *testing.T) { - fromTime, _ := time.Parse(time.RFC3339, "2023-01-01T00:00:00Z") - toTime, _ := time.Parse(time.RFC3339, "2023-01-02T00:00:00Z") - - tests := []struct { - query queryEventsTable - wantSQL string - wantArgs []interface{} - }{ - { - query: queryEventsTable{ - Database: "openmeter", - Namespace: "my_namespace", - From: &fromTime, - To: &toTime, - Limit: 10, - }, - wantSQL: "SELECT id, type, subject, source, time, data, validation_error, ingested_at, stored_at FROM openmeter.om_events WHERE namespace = ? AND time >= ? AND time <= ? ORDER BY time DESC LIMIT 10", - wantArgs: []interface{}{"my_namespace", fromTime.Unix(), toTime.Unix()}, - }, - { - query: queryEventsTable{ - Database: "openmeter", - Namespace: "my_namespace", - From: &fromTime, - Limit: 10, - }, - wantSQL: "SELECT id, type, subject, source, time, data, validation_error, ingested_at, stored_at FROM openmeter.om_events WHERE namespace = ? AND time >= ? ORDER BY time DESC LIMIT 10", - wantArgs: []interface{}{"my_namespace", fromTime.Unix()}, - }, - { - query: queryEventsTable{ - Database: "openmeter", - Namespace: "my_namespace", - To: &toTime, - Limit: 10, - }, - wantSQL: "SELECT id, type, subject, source, time, data, validation_error, ingested_at, stored_at FROM openmeter.om_events WHERE namespace = ? AND time <= ? ORDER BY time DESC LIMIT 10", - wantArgs: []interface{}{"my_namespace", toTime.Unix()}, - }, - } - - for _, tt := range tests { - gotSql, gotArgs := tt.query.toSQL() - - assert.Equal(t, tt.wantSQL, gotSql) - assert.Equal(t, tt.wantArgs, gotArgs) - } -} diff --git a/openmeter/streaming/clickhouse/raw_events/connector.go b/openmeter/streaming/clickhouse/raw_events/connector.go new file mode 100644 index 000000000..0127cb601 --- /dev/null +++ b/openmeter/streaming/clickhouse/raw_events/connector.go @@ -0,0 +1,453 @@ +package raw_events + +import ( + "context" + "encoding/json" + "fmt" + "log/slog" + "strings" + "time" + + "github.com/ClickHouse/clickhouse-go/v2" + "github.com/cloudevents/sdk-go/v2/event" + "github.com/shopspring/decimal" + + "github.com/openmeterio/openmeter/api" + "github.com/openmeterio/openmeter/openmeter/streaming" + "github.com/openmeterio/openmeter/pkg/models" +) + +var _ streaming.Connector = (*Connector)(nil) + +// Connector implements `ingest.Connector“ and `namespace.Handler interfaces. +type Connector struct { + config ConnectorConfig +} + +type ConnectorConfig struct { + Logger *slog.Logger + ClickHouse clickhouse.Conn + Database string + EventsTableName string + AsyncInsert bool + AsyncInsertWait bool + InsertQuerySettings map[string]string +} + +func (c ConnectorConfig) Validate() error { + if c.Logger == nil { + return fmt.Errorf("logger is required") + } + + if c.ClickHouse == nil { + return fmt.Errorf("clickhouse connection is required") + } + + if c.Database == "" { + return fmt.Errorf("database is required") + } + + if c.EventsTableName == "" { + return fmt.Errorf("events table is required") + } + + return nil +} + +func NewConnector(ctx context.Context, config ConnectorConfig) (*Connector, error) { + if err := config.Validate(); err != nil { + return nil, fmt.Errorf("validate config: %w", err) + } + + connector := &Connector{ + config: config, + } + + err := connector.createEventsTable(ctx) + if err != nil { + return nil, fmt.Errorf("create events table in clickhouse: %w", err) + } + + return connector, nil +} + +func (c *Connector) ListEvents(ctx context.Context, namespace string, params streaming.ListEventsParams) ([]api.IngestedEvent, error) { + if namespace == "" { + return nil, fmt.Errorf("namespace is required") + } + + events, err := c.queryEventsTable(ctx, namespace, params) + if err != nil { + if _, ok := err.(*models.NamespaceNotFoundError); ok { + return nil, err + } + + return nil, fmt.Errorf("query events: %w", err) + } + + return events, nil +} + +func (c *Connector) CreateMeter(ctx context.Context, namespace string, meter models.Meter) error { + // Do nothing + return nil +} + +func (c *Connector) DeleteMeter(ctx context.Context, namespace string, meter models.Meter) error { + // Do nothing + return nil +} + +func (c *Connector) QueryMeter(ctx context.Context, namespace string, meter models.Meter, params streaming.QueryParams) ([]models.MeterQueryRow, error) { + if namespace == "" { + return nil, fmt.Errorf("namespace is required") + } + + values, err := c.queryMeter(ctx, namespace, meter, params) + if err != nil { + if _, ok := err.(*models.MeterNotFoundError); ok { + return nil, err + } + + return nil, fmt.Errorf("get values: %w", err) + } + + // If the total usage is queried for a single period (no window size), + // replace the window start and end with the period for each row. + // We can still have multiple rows for a single period due to group bys. + if params.WindowSize == nil { + for i := range values { + if params.From != nil { + values[i].WindowStart = *params.From + } + if params.To != nil { + values[i].WindowEnd = *params.To + } + } + } + + return values, nil +} + +func (c *Connector) ListMeterSubjects(ctx context.Context, namespace string, meter models.Meter, params streaming.ListMeterSubjectsParams) ([]string, error) { + if namespace == "" { + return nil, fmt.Errorf("namespace is required") + } + if meter.Slug == "" { + return nil, fmt.Errorf("meter is required") + } + + subjects, err := c.listMeterViewSubjects(ctx, namespace, meter, params.From, params.To) + if err != nil { + if _, ok := err.(*models.MeterNotFoundError); ok { + return nil, err + } + + return nil, fmt.Errorf("list meter subjects: %w", err) + } + + return subjects, nil +} + +func (c *Connector) CreateNamespace(ctx context.Context, namespace string) error { + return nil +} + +func (c *Connector) DeleteNamespace(ctx context.Context, namespace string) error { + // We don't delete the event tables as it it reused between namespaces + return nil +} + +func (c *Connector) CountEvents(ctx context.Context, namespace string, params streaming.CountEventsParams) ([]streaming.CountEventRow, error) { + if namespace == "" { + return nil, fmt.Errorf("namespace is required") + } + + rows, err := c.queryCountEvents(ctx, namespace, params) + if err != nil { + if _, ok := err.(*models.NamespaceNotFoundError); ok { + return nil, err + } + + return nil, fmt.Errorf("query count events: %w", err) + } + + return rows, nil +} + +func (c *Connector) BatchInsert(ctx context.Context, rawEvents []streaming.RawEvent) error { + var err error + + // Insert raw events + query := InsertEventsQuery{ + Database: c.config.Database, + EventsTableName: c.config.EventsTableName, + Events: rawEvents, + QuerySettings: c.config.InsertQuerySettings, + } + sql, args := query.ToSQL() + + // By default, ClickHouse is writing data synchronously. + // See https://clickhouse.com/docs/en/cloud/bestpractices/asynchronous-inserts + if c.config.AsyncInsert { + // With the `wait_for_async_insert` setting, you can configure + // if you want an insert statement to return with an acknowledgment + // either immediately after the data got inserted into the buffer. + err = c.config.ClickHouse.AsyncInsert(ctx, sql, c.config.AsyncInsertWait, args...) + } else { + err = c.config.ClickHouse.Exec(ctx, sql, args...) + } + + if err != nil { + return fmt.Errorf("failed to batch insert raw events: %w", err) + } + + return nil +} + +func (c *Connector) createEventsTable(ctx context.Context) error { + table := createEventsTable{ + Database: c.config.Database, + EventsTableName: c.config.EventsTableName, + } + + err := c.config.ClickHouse.Exec(ctx, table.toSQL()) + if err != nil { + return fmt.Errorf("create events table: %w", err) + } + + return nil +} + +func (c *Connector) queryEventsTable(ctx context.Context, namespace string, params streaming.ListEventsParams) ([]api.IngestedEvent, error) { + table := queryEventsTable{ + Database: c.config.Database, + EventsTableName: c.config.EventsTableName, + Namespace: namespace, + From: params.From, + To: params.To, + IngestedAtFrom: params.IngestedAtFrom, + IngestedAtTo: params.IngestedAtTo, + ID: params.ID, + Subject: params.Subject, + HasError: params.HasError, + Limit: params.Limit, + } + + sql, args := table.toSQL() + + rows, err := c.config.ClickHouse.Query(ctx, sql, args...) + if err != nil { + if strings.Contains(err.Error(), "code: 60") { + return nil, &models.NamespaceNotFoundError{Namespace: namespace} + } + + return nil, fmt.Errorf("query events table query: %w", err) + } + + events := []api.IngestedEvent{} + + for rows.Next() { + var id string + var eventType string + var subject string + var source string + var eventTime time.Time + var dataStr string + var validationError string + var ingestedAt time.Time + var storedAt time.Time + + if err = rows.Scan(&id, &eventType, &subject, &source, &eventTime, &dataStr, &validationError, &ingestedAt, &storedAt); err != nil { + return nil, err + } + + // Parse data + var data interface{} + err := json.Unmarshal([]byte(dataStr), &data) + if err != nil { + return nil, fmt.Errorf("query events parse data: %w", err) + } + + event := event.New() + event.SetID(id) + event.SetType(eventType) + event.SetSubject(subject) + event.SetSource(source) + event.SetTime(eventTime) + err = event.SetData("application/json", data) + if err != nil { + return nil, fmt.Errorf("query events set data: %w", err) + } + + ingestedEvent := api.IngestedEvent{ + Event: event, + } + + if validationError != "" { + ingestedEvent.ValidationError = &validationError + } + + ingestedEvent.IngestedAt = ingestedAt + ingestedEvent.StoredAt = storedAt + + events = append(events, ingestedEvent) + } + + return events, nil +} + +func (c *Connector) queryCountEvents(ctx context.Context, namespace string, params streaming.CountEventsParams) ([]streaming.CountEventRow, error) { + table := queryCountEvents{ + Database: c.config.Database, + EventsTableName: c.config.EventsTableName, + Namespace: namespace, + From: params.From, + } + + sql, args := table.toSQL() + + rows, err := c.config.ClickHouse.Query(ctx, sql, args...) + if err != nil { + if strings.Contains(err.Error(), "code: 60") { + return nil, &models.NamespaceNotFoundError{Namespace: namespace} + } + + return nil, fmt.Errorf("query events count query: %w", err) + } + + results := []streaming.CountEventRow{} + + for rows.Next() { + result := streaming.CountEventRow{} + + if err = rows.Scan(&result.Count, &result.Subject, &result.IsError); err != nil { + return nil, err + } + + results = append(results, result) + } + + return results, nil +} + +func (c *Connector) queryMeter(ctx context.Context, namespace string, meter models.Meter, params streaming.QueryParams) ([]models.MeterQueryRow, error) { + queryMeter := queryMeter{ + Database: c.config.Database, + EventsTableName: c.config.EventsTableName, + Namespace: namespace, + Meter: meter, + From: params.From, + To: params.To, + Subject: params.FilterSubject, + FilterGroupBy: params.FilterGroupBy, + GroupBy: params.GroupBy, + WindowSize: params.WindowSize, + WindowTimeZone: params.WindowTimeZone, + } + + values := []models.MeterQueryRow{} + + sql, args, err := queryMeter.toSQL() + if err != nil { + return values, fmt.Errorf("query meter view: %w", err) + } + + start := time.Now() + rows, err := c.config.ClickHouse.Query(ctx, sql, args...) + if err != nil { + if strings.Contains(err.Error(), "code: 60") { + return nil, &models.MeterNotFoundError{MeterSlug: meter.Slug} + } + + return values, fmt.Errorf("query meter view query: %w", err) + } + elapsed := time.Since(start) + slog.Debug("query meter view", "elapsed", elapsed.String(), "sql", sql, "args", args) + + for rows.Next() { + row := models.MeterQueryRow{ + GroupBy: map[string]*string{}, + } + + var value decimal.Decimal + args := []interface{}{&row.WindowStart, &row.WindowEnd, &value} + argCount := len(args) + + for range queryMeter.GroupBy { + tmp := "" + args = append(args, &tmp) + } + + if err := rows.Scan(args...); err != nil { + return values, fmt.Errorf("query meter view row scan: %w", err) + } + + // TODO: should we use decima all the way? + row.Value, _ = value.Float64() + + for i, key := range queryMeter.GroupBy { + if s, ok := args[i+argCount].(*string); ok { + if key == "subject" { + row.Subject = s + continue + } + + // We treat empty string as nil + if s != nil && *s == "" { + row.GroupBy[key] = nil + } else { + row.GroupBy[key] = s + } + } + } + + // an empty row is returned when there are no values for the meter + if row.WindowStart.IsZero() && row.WindowEnd.IsZero() && row.Value == 0 { + continue + } + + values = append(values, row) + } + rows.Close() + err = rows.Err() + if err != nil { + return values, fmt.Errorf("query meter rows error: %w", err) + } + + return values, nil +} + +func (c *Connector) listMeterViewSubjects(ctx context.Context, namespace string, meter models.Meter, from *time.Time, to *time.Time) ([]string, error) { + query := listMeterSubjectsQuery{ + Database: c.config.Database, + EventsTableName: c.config.EventsTableName, + Namespace: namespace, + Meter: meter, + From: from, + To: to, + } + + sql, args := query.toSQL() + + rows, err := c.config.ClickHouse.Query(ctx, sql, args...) + if err != nil { + if strings.Contains(err.Error(), "code: 60") { + return nil, &models.MeterNotFoundError{MeterSlug: meter.Slug} + } + + return nil, fmt.Errorf("list meter view subjects: %w", err) + } + + subjects := []string{} + for rows.Next() { + var subject string + if err = rows.Scan(&subject); err != nil { + return nil, err + } + + subjects = append(subjects, subject) + } + + return subjects, nil +} diff --git a/openmeter/streaming/clickhouse/raw_events/event_query.go b/openmeter/streaming/clickhouse/raw_events/event_query.go new file mode 100644 index 000000000..afde09936 --- /dev/null +++ b/openmeter/streaming/clickhouse/raw_events/event_query.go @@ -0,0 +1,176 @@ +package raw_events + +import ( + _ "embed" + "fmt" + "strings" + "time" + + "github.com/huandu/go-sqlbuilder" + + "github.com/openmeterio/openmeter/openmeter/streaming" +) + +// Create Events Table +type createEventsTable struct { + Database string + EventsTableName string +} + +func (d createEventsTable) toSQL() string { + tableName := getTableName(d.Database, d.EventsTableName) + + sb := sqlbuilder.ClickHouse.NewCreateTableBuilder() + sb.CreateTable(tableName) + sb.IfNotExists() + sb.Define("namespace", "String") + sb.Define("validation_error", "String") + sb.Define("id", "String") + sb.Define("type", "LowCardinality(String)") + sb.Define("subject", "String") + sb.Define("source", "String") + sb.Define("time", "DateTime") + sb.Define("data", "String") + sb.Define("ingested_at", "DateTime") + sb.Define("stored_at", "DateTime") + sb.SQL("ENGINE = MergeTree") + sb.SQL("PARTITION BY toYYYYMM(time)") + // Lowest cardinality columns we always filter on goes to the most left. + // ClickHouse always picks partition first so we always filter time by month. + // Theoretically we could add toStartOfHour(time) to the order sooner than subject + // but we bet on that a typical namespace has more subjects than hours in a month. + // Subject is an optional filter so it won't always help to reduce number of rows scanned. + // Finally we add time not just to speed up queries but also to keep data on the disk together. + sb.SQL("ORDER BY (namespace, type, subject, toStartOfHour(time))") + + sql, _ := sb.Build() + return sql +} + +// Query Events Table +type queryEventsTable struct { + Database string + EventsTableName string + Namespace string + From *time.Time + To *time.Time + IngestedAtFrom *time.Time + IngestedAtTo *time.Time + ID *string + Subject *string + HasError *bool + Limit int +} + +func (d queryEventsTable) toSQL() (string, []interface{}) { + tableName := getTableName(d.Database, d.EventsTableName) + where := []string{} + + query := sqlbuilder.ClickHouse.NewSelectBuilder() + query.Select("id", "type", "subject", "source", "time", "data", "validation_error", "ingested_at", "stored_at") + query.From(tableName) + + where = append(where, query.Equal("namespace", d.Namespace)) + if d.From != nil { + where = append(where, query.GreaterEqualThan("time", d.From.Unix())) + } + if d.To != nil { + where = append(where, query.LessEqualThan("time", d.To.Unix())) + } + if d.IngestedAtFrom != nil { + where = append(where, query.GreaterEqualThan("ingested_at", d.IngestedAtFrom.Unix())) + } + if d.IngestedAtTo != nil { + where = append(where, query.LessEqualThan("ingested_at", d.IngestedAtTo.Unix())) + } + if d.ID != nil { + where = append(where, query.Like("id", fmt.Sprintf("%%%s%%", *d.ID))) + } + if d.Subject != nil { + where = append(where, query.Equal("subject", *d.Subject)) + } + if d.HasError != nil { + if *d.HasError { + where = append(where, "notEmpty(validation_error) = 1") + } else { + where = append(where, "empty(validation_error) = 1") + } + } + query.Where(where...) + + query.Desc().OrderBy("time") + query.Limit(d.Limit) + + sql, args := query.Build() + return sql, args +} + +type queryCountEvents struct { + Database string + EventsTableName string + Namespace string + From time.Time +} + +func (d queryCountEvents) toSQL() (string, []interface{}) { + tableName := getTableName(d.Database, d.EventsTableName) + + query := sqlbuilder.ClickHouse.NewSelectBuilder() + query.Select("count() as count", "subject", "notEmpty(validation_error) as is_error") + query.From(tableName) + + query.Where(query.Equal("namespace", d.Namespace)) + query.Where(query.GreaterEqualThan("time", d.From.Unix())) + query.GroupBy("subject", "is_error") + + sql, args := query.Build() + return sql, args +} + +// Insert Events Query +type InsertEventsQuery struct { + Database string + EventsTableName string + Events []streaming.RawEvent + QuerySettings map[string]string +} + +func (q InsertEventsQuery) ToSQL() (string, []interface{}) { + tableName := getTableName(q.Database, q.EventsTableName) + + query := sqlbuilder.ClickHouse.NewInsertBuilder() + query.InsertInto(tableName) + query.Cols("namespace", "validation_error", "id", "type", "source", "subject", "time", "data", "ingested_at", "stored_at") + + // Add settings + var settings []string + for key, value := range q.QuerySettings { + settings = append(settings, fmt.Sprintf("%s = %s", key, value)) + } + + if len(settings) > 0 { + query.SQL(fmt.Sprintf("SETTINGS %s", strings.Join(settings, ", "))) + } + + for _, event := range q.Events { + query.Values( + event.Namespace, + event.ValidationError, + event.ID, + event.Type, + event.Source, + event.Subject, + event.Time, + event.Data, + event.IngestedAt, + event.StoredAt, + ) + } + + sql, args := query.Build() + return sql, args +} + +func getTableName(database string, tableName string) string { + return fmt.Sprintf("%s.%s", database, tableName) +} diff --git a/openmeter/streaming/clickhouse/raw_events/event_query_test.go b/openmeter/streaming/clickhouse/raw_events/event_query_test.go new file mode 100644 index 000000000..278593f63 --- /dev/null +++ b/openmeter/streaming/clickhouse/raw_events/event_query_test.go @@ -0,0 +1,195 @@ +package raw_events + +import ( + "testing" + "time" + + "github.com/stretchr/testify/assert" + + "github.com/openmeterio/openmeter/openmeter/streaming" +) + +func TestCreateEventsTable(t *testing.T) { + tests := []struct { + data createEventsTable + want string + }{ + { + data: createEventsTable{ + Database: "openmeter", + EventsTableName: "om_events", + }, + want: "CREATE TABLE IF NOT EXISTS openmeter.om_events (namespace String, validation_error String, id String, type LowCardinality(String), subject String, source String, time DateTime, data String, ingested_at DateTime, stored_at DateTime) ENGINE = MergeTree PARTITION BY toYYYYMM(time) ORDER BY (namespace, type, subject, toStartOfHour(time))", + }, + } + + for _, tt := range tests { + tt := tt + t.Run("", func(t *testing.T) { + got := tt.data.toSQL() + assert.Equal(t, tt.want, got) + }) + } +} + +func TestQueryEventsTable(t *testing.T) { + subjectFilter := "customer-1" + idFilter := "event-id-1" + hasErrorTrue := true + hasErrorFalse := false + + tests := []struct { + query queryEventsTable + wantSQL string + wantArgs []interface{} + }{ + { + query: queryEventsTable{ + Database: "openmeter", + EventsTableName: "om_events", + Namespace: "my_namespace", + Limit: 100, + }, + wantSQL: "SELECT id, type, subject, source, time, data, validation_error, ingested_at, stored_at FROM openmeter.om_events WHERE namespace = ? ORDER BY time DESC LIMIT 100", + wantArgs: []interface{}{"my_namespace"}, + }, + { + query: queryEventsTable{ + Database: "openmeter", + EventsTableName: "om_events", + Namespace: "my_namespace", + Limit: 100, + Subject: &subjectFilter, + }, + wantSQL: "SELECT id, type, subject, source, time, data, validation_error, ingested_at, stored_at FROM openmeter.om_events WHERE namespace = ? AND subject = ? ORDER BY time DESC LIMIT 100", + wantArgs: []interface{}{"my_namespace", subjectFilter}, + }, + { + query: queryEventsTable{ + Database: "openmeter", + EventsTableName: "om_events", + Namespace: "my_namespace", + Limit: 100, + ID: &idFilter, + }, + wantSQL: "SELECT id, type, subject, source, time, data, validation_error, ingested_at, stored_at FROM openmeter.om_events WHERE namespace = ? AND id LIKE ? ORDER BY time DESC LIMIT 100", + wantArgs: []interface{}{"my_namespace", "%event-id-1%"}, + }, + { + query: queryEventsTable{ + Database: "openmeter", + EventsTableName: "om_events", + Namespace: "my_namespace", + Limit: 100, + HasError: &hasErrorTrue, + }, + wantSQL: "SELECT id, type, subject, source, time, data, validation_error, ingested_at, stored_at FROM openmeter.om_events WHERE namespace = ? AND notEmpty(validation_error) = 1 ORDER BY time DESC LIMIT 100", + wantArgs: []interface{}{"my_namespace"}, + }, + { + query: queryEventsTable{ + Database: "openmeter", + EventsTableName: "om_events", + Namespace: "my_namespace", + Limit: 100, + HasError: &hasErrorFalse, + }, + wantSQL: "SELECT id, type, subject, source, time, data, validation_error, ingested_at, stored_at FROM openmeter.om_events WHERE namespace = ? AND empty(validation_error) = 1 ORDER BY time DESC LIMIT 100", + wantArgs: []interface{}{"my_namespace"}, + }, + } + + for _, tt := range tests { + tt := tt + t.Run("", func(t *testing.T) { + gotSql, gotArgs := tt.query.toSQL() + + assert.Equal(t, tt.wantArgs, gotArgs) + assert.Equal(t, tt.wantSQL, gotSql) + }) + } +} + +func TestQueryEventsCount(t *testing.T) { + from, _ := time.Parse(time.RFC3339, "2023-01-01T00:00:00.001Z") + tests := []struct { + query queryCountEvents + wantSQL string + wantArgs []interface{} + }{ + { + query: queryCountEvents{ + Database: "openmeter", + EventsTableName: "om_events", + Namespace: "my_namespace", + From: from, + }, + wantSQL: "SELECT count() as count, subject, notEmpty(validation_error) as is_error FROM openmeter.om_events WHERE namespace = ? AND time >= ? GROUP BY subject, is_error", + wantArgs: []interface{}{"my_namespace", from.Unix()}, + }, + } + + for _, tt := range tests { + tt := tt + t.Run("", func(t *testing.T) { + gotSql, gotArgs := tt.query.toSQL() + + assert.Equal(t, tt.wantArgs, gotArgs) + assert.Equal(t, tt.wantSQL, gotSql) + }) + } +} + +func TestInsertEventsQuery(t *testing.T) { + now := time.Now() + + query := InsertEventsQuery{ + Database: "database", + EventsTableName: "om_events", + Events: []streaming.RawEvent{ + { + Namespace: "my_namespace", + ID: "1", + Source: "source", + Subject: "subject-1", + Time: now, + StoredAt: now, + IngestedAt: now, + Type: "api-calls", + Data: `{"duration_ms": 100, "method": "GET", "path": "/api/v1"}`, + }, + { + Namespace: "my_namespace", + ID: "2", + Source: "source", + Subject: "subject-2", + Time: now, + StoredAt: now, + IngestedAt: now, + Type: "api-calls", + Data: `{"duration_ms": 80, "method": "GET", "path": "/api/v1"}`, + }, + { + Namespace: "my_namespace", + ValidationError: "event data value cannot be parsed as float64: not a number", + ID: "3", + Source: "source", + Subject: "subject-2", + Time: now, + StoredAt: now, + IngestedAt: now, + Type: "api-calls", + Data: `{"duration_ms": "foo", "method": "GET", "path": "/api/v1"}`, + }, + }, + } + + sql, args := query.ToSQL() + + assert.Equal(t, []interface{}{ + "my_namespace", "", "1", "api-calls", "source", "subject-1", now, `{"duration_ms": 100, "method": "GET", "path": "/api/v1"}`, now, now, + "my_namespace", "", "2", "api-calls", "source", "subject-2", now, `{"duration_ms": 80, "method": "GET", "path": "/api/v1"}`, now, now, + "my_namespace", "event data value cannot be parsed as float64: not a number", "3", "api-calls", "source", "subject-2", now, `{"duration_ms": "foo", "method": "GET", "path": "/api/v1"}`, now, now, + }, args) + assert.Equal(t, `INSERT INTO database.om_events (namespace, validation_error, id, type, source, subject, time, data, ingested_at, stored_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?), (?, ?, ?, ?, ?, ?, ?, ?, ?, ?), (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, sql) +} diff --git a/openmeter/streaming/clickhouse/raw_events/meter_query.go b/openmeter/streaming/clickhouse/raw_events/meter_query.go new file mode 100644 index 000000000..8ba046530 --- /dev/null +++ b/openmeter/streaming/clickhouse/raw_events/meter_query.go @@ -0,0 +1,237 @@ +package raw_events + +import ( + _ "embed" + "fmt" + "slices" + "sort" + "time" + + "github.com/huandu/go-sqlbuilder" + + "github.com/openmeterio/openmeter/pkg/models" + "github.com/openmeterio/openmeter/pkg/slicesx" +) + +type queryMeter struct { + Database string + EventsTableName string + Namespace string + Meter models.Meter + Subject []string + FilterGroupBy map[string][]string + From *time.Time + To *time.Time + GroupBy []string + WindowSize *models.WindowSize + WindowTimeZone *time.Location +} + +func (d queryMeter) toSQL() (string, []interface{}, error) { + tableName := getTableName(d.Database, d.EventsTableName) + getColumn := columnFactory(d.EventsTableName) + timeColumn := getColumn("time") + + var selectColumns, groupByColumns, where []string + + // Select windows if any + groupByWindowSize := d.WindowSize != nil + + tz := "UTC" + if d.WindowTimeZone != nil { + tz = d.WindowTimeZone.String() + } + + if groupByWindowSize { + switch *d.WindowSize { + case models.WindowSizeMinute: + selectColumns = append( + selectColumns, + fmt.Sprintf("tumbleStart(%s, toIntervalMinute(1), '%s') AS windowstart", timeColumn, tz), + fmt.Sprintf("tumbleEnd(%s, toIntervalMinute(1), '%s') AS windowend", timeColumn, tz), + ) + + case models.WindowSizeHour: + selectColumns = append( + selectColumns, + fmt.Sprintf("tumbleStart(%s, toIntervalHour(1), '%s') AS windowstart", timeColumn, tz), + fmt.Sprintf("tumbleEnd(%s, toIntervalHour(1), '%s') AS windowend", timeColumn, tz), + ) + + case models.WindowSizeDay: + selectColumns = append( + selectColumns, + fmt.Sprintf("tumbleStart(%s, toIntervalDay(1), '%s') AS windowstart", timeColumn, tz), + fmt.Sprintf("tumbleEnd(%s, toIntervalDay(1), '%s') AS windowend", timeColumn, tz), + ) + + default: + return "", nil, fmt.Errorf("invalid window size type: %s", *d.WindowSize) + } + + groupByColumns = append(groupByColumns, "windowstart", "windowend") + } else { + // TODO: remove this when we don't round to the nearest minute anymore + // We round them to the nearest minute to ensure the result is the same as with + // streaming connector using materialized views with per minute windows + selectColumn := fmt.Sprintf("tumbleStart(min(%s), toIntervalMinute(1)) AS windowstart, tumbleEnd(max(%s), toIntervalMinute(1)) AS windowend", timeColumn, timeColumn) + selectColumns = append(selectColumns, selectColumn) + } + + // Select Value + sqlAggregation := "" + switch d.Meter.Aggregation { + case models.MeterAggregationSum: + sqlAggregation = "sum" + case models.MeterAggregationAvg: + sqlAggregation = "avg" + case models.MeterAggregationMin: + sqlAggregation = "min" + case models.MeterAggregationMax: + sqlAggregation = "max" + case models.MeterAggregationUniqueCount: + sqlAggregation = "uniq" + case models.MeterAggregationCount: + sqlAggregation = "count" + default: + return "", []interface{}{}, fmt.Errorf("invalid aggregation type: %s", d.Meter.Aggregation) + } + + if d.Meter.ValueProperty == "" && d.Meter.Aggregation == models.MeterAggregationCount { + selectColumns = append(selectColumns, fmt.Sprintf("%s(*) AS value", sqlAggregation)) + } else if d.Meter.Aggregation == models.MeterAggregationUniqueCount { + selectColumns = append(selectColumns, fmt.Sprintf("%s(JSON_VALUE(%s, '%s')) AS value", sqlAggregation, getColumn("data"), sqlbuilder.Escape(d.Meter.ValueProperty))) + } else { + selectColumns = append(selectColumns, fmt.Sprintf("%s(cast(JSON_VALUE(%s, '%s'), 'Float64')) AS value", sqlAggregation, getColumn("data"), sqlbuilder.Escape(d.Meter.ValueProperty))) + } + + groupBys := make([]string, 0, len(d.GroupBy)) + + for _, groupBy := range d.GroupBy { + if groupBy == "subject" { + selectColumns = append(selectColumns, getColumn("subject")) + groupByColumns = append(groupByColumns, "subject") + continue + } + + groupBys = append(groupBys, groupBy) + } + + // Select Group By + slices.Sort(groupBys) + + for _, groupByKey := range groupBys { + groupByColumn := sqlbuilder.Escape(groupByKey) + groupByJSONPath := sqlbuilder.Escape(d.Meter.GroupBy[groupByKey]) + selectColumn := fmt.Sprintf("JSON_VALUE(%s, '%s') as %s", getColumn("data"), groupByJSONPath, groupByColumn) + + selectColumns = append(selectColumns, selectColumn) + groupByColumns = append(groupByColumns, groupByColumn) + } + + query := sqlbuilder.ClickHouse.NewSelectBuilder() + query.Select(selectColumns...) + query.From(tableName) + query.Where(query.Equal(getColumn("namespace"), d.Namespace)) + query.Where(query.Equal(getColumn("type"), d.Meter.EventType)) + + if len(d.Subject) > 0 { + mapFunc := func(subject string) string { + return query.Equal(getColumn("subject"), subject) + } + + where = append(where, query.Or(slicesx.Map(d.Subject, mapFunc)...)) + } + + if len(d.FilterGroupBy) > 0 { + // We sort the group by s to ensure the query is deterministic + groupByKeys := make([]string, 0, len(d.FilterGroupBy)) + for k := range d.FilterGroupBy { + groupByKeys = append(groupByKeys, k) + } + sort.Strings(groupByKeys) + + for _, groupByKey := range groupByKeys { + if _, ok := d.Meter.GroupBy[groupByKey]; !ok { + return "", nil, fmt.Errorf("meter does not have group by: %s", groupByKey) + } + + groupByJSONPath := sqlbuilder.Escape(d.Meter.GroupBy[groupByKey]) + + values := d.FilterGroupBy[groupByKey] + if len(values) == 0 { + return "", nil, fmt.Errorf("empty filter for group by: %s", groupByKey) + } + mapFunc := func(value string) string { + column := fmt.Sprintf("JSON_VALUE(%s, '%s')", getColumn("data"), groupByJSONPath) + + // Subject is a special case + if groupByKey == "subject" { + column = "subject" + } + + return fmt.Sprintf("%s = '%s'", column, sqlbuilder.Escape((value))) + } + + where = append(where, query.Or(slicesx.Map(values, mapFunc)...)) + } + } + + if d.From != nil { + where = append(where, query.GreaterEqualThan(getColumn("time"), d.From.Unix())) + } + + if d.To != nil { + where = append(where, query.LessEqualThan(getColumn("time"), d.To.Unix())) + } + + if len(where) > 0 { + query.Where(where...) + } + + query.GroupBy(groupByColumns...) + + if groupByWindowSize { + query.OrderBy("windowstart") + } + + sql, args := query.Build() + return sql, args, nil +} + +type listMeterSubjectsQuery struct { + Database string + EventsTableName string + Namespace string + Meter models.Meter + From *time.Time + To *time.Time +} + +func (d listMeterSubjectsQuery) toSQL() (string, []interface{}) { + tableName := getTableName(d.Database, d.EventsTableName) + + sb := sqlbuilder.ClickHouse.NewSelectBuilder() + sb.Select("DISTINCT subject") + sb.Where(sb.Equal("namespace", d.Namespace)) + sb.Where(sb.Equal("type", d.Meter.EventType)) + sb.From(tableName) + sb.OrderBy("subject") + + if d.From != nil { + sb.Where(sb.GreaterEqualThan("time", d.From.Unix())) + } + + if d.To != nil { + sb.Where(sb.LessEqualThan("time", d.To.Unix())) + } + + sql, args := sb.Build() + return sql, args +} + +func columnFactory(alias string) func(string) string { + return func(column string) string { + return fmt.Sprintf("%s.%s", alias, column) + } +} diff --git a/openmeter/streaming/clickhouse/raw_events/meter_query_test.go b/openmeter/streaming/clickhouse/raw_events/meter_query_test.go new file mode 100644 index 000000000..3fedc7db5 --- /dev/null +++ b/openmeter/streaming/clickhouse/raw_events/meter_query_test.go @@ -0,0 +1,377 @@ +package raw_events + +import ( + "testing" + "time" + + "github.com/stretchr/testify/assert" + + "github.com/openmeterio/openmeter/pkg/models" +) + +func TestQueryMeter(t *testing.T) { + subject := "subject1" + from, _ := time.Parse(time.RFC3339, "2023-01-01T00:00:00.001Z") + to, _ := time.Parse(time.RFC3339, "2023-01-02T00:00:00Z") + tz, _ := time.LoadLocation("Asia/Shanghai") + windowSize := models.WindowSizeHour + + tests := []struct { + query queryMeter + wantSQL string + wantArgs []interface{} + }{ + { + query: queryMeter{ + Database: "openmeter", + EventsTableName: "om_events", + Namespace: "my_namespace", + Meter: models.Meter{ + Slug: "meter1", + EventType: "event1", + Aggregation: models.MeterAggregationSum, + ValueProperty: "$.value", + GroupBy: map[string]string{ + "group1": "$.group1", + "group2": "$.group2", + }, + }, + Subject: []string{subject}, + From: &from, + To: &to, + GroupBy: []string{"subject", "group1", "group2"}, + WindowSize: &windowSize, + }, + wantSQL: "SELECT tumbleStart(om_events.time, toIntervalHour(1), 'UTC') AS windowstart, tumbleEnd(om_events.time, toIntervalHour(1), 'UTC') AS windowend, sum(cast(JSON_VALUE(om_events.data, '$.value'), 'Float64')) AS value, om_events.subject, JSON_VALUE(om_events.data, '$.group1') as group1, JSON_VALUE(om_events.data, '$.group2') as group2 FROM openmeter.om_events WHERE om_events.namespace = ? AND om_events.type = ? AND (om_events.subject = ?) AND om_events.time >= ? AND om_events.time <= ? GROUP BY windowstart, windowend, subject, group1, group2 ORDER BY windowstart", + wantArgs: []interface{}{"my_namespace", "event1", "subject1", from.Unix(), to.Unix()}, + }, + { // Aggregate all available data + query: queryMeter{ + Database: "openmeter", + EventsTableName: "om_events", + Namespace: "my_namespace", + Meter: models.Meter{ + Slug: "meter1", + EventType: "event1", + Aggregation: models.MeterAggregationSum, + ValueProperty: "$.value", + GroupBy: map[string]string{ + "group1": "$.group1", + "group2": "$.group2", + }, + }, + }, + wantSQL: "SELECT tumbleStart(min(om_events.time), toIntervalMinute(1)) AS windowstart, tumbleEnd(max(om_events.time), toIntervalMinute(1)) AS windowend, sum(cast(JSON_VALUE(om_events.data, '$.value'), 'Float64')) AS value FROM openmeter.om_events WHERE om_events.namespace = ? AND om_events.type = ?", + wantArgs: []interface{}{"my_namespace", "event1"}, + }, + { // Aggregate with count aggregation + query: queryMeter{ + Database: "openmeter", + EventsTableName: "om_events", + Namespace: "my_namespace", + Meter: models.Meter{ + Slug: "meter1", + EventType: "event1", + Aggregation: models.MeterAggregationCount, + ValueProperty: "$.value", + GroupBy: map[string]string{ + "group1": "$.group1", + "group2": "$.group2", + }, + }, + }, + wantSQL: "SELECT tumbleStart(min(om_events.time), toIntervalMinute(1)) AS windowstart, tumbleEnd(max(om_events.time), toIntervalMinute(1)) AS windowend, count(cast(JSON_VALUE(om_events.data, '$.value'), 'Float64')) AS value FROM openmeter.om_events WHERE om_events.namespace = ? AND om_events.type = ?", + wantArgs: []interface{}{"my_namespace", "event1"}, + }, + { // Aggregate data from start + query: queryMeter{ + Database: "openmeter", + EventsTableName: "om_events", + Namespace: "my_namespace", + Meter: models.Meter{ + Slug: "meter1", + EventType: "event1", + Aggregation: models.MeterAggregationSum, + ValueProperty: "$.value", + GroupBy: map[string]string{ + "group1": "$.group1", + "group2": "$.group2", + }, + }, + From: &from, + }, + wantSQL: "SELECT tumbleStart(min(om_events.time), toIntervalMinute(1)) AS windowstart, tumbleEnd(max(om_events.time), toIntervalMinute(1)) AS windowend, sum(cast(JSON_VALUE(om_events.data, '$.value'), 'Float64')) AS value FROM openmeter.om_events WHERE om_events.namespace = ? AND om_events.type = ? AND om_events.time >= ?", + wantArgs: []interface{}{"my_namespace", "event1", from.Unix()}, + }, + { // Aggregate data between period + query: queryMeter{ + Database: "openmeter", + EventsTableName: "om_events", + Namespace: "my_namespace", + Meter: models.Meter{ + Slug: "meter1", + EventType: "event1", + Aggregation: models.MeterAggregationSum, + ValueProperty: "$.value", + GroupBy: map[string]string{ + "group1": "$.group1", + "group2": "$.group2", + }, + }, + From: &from, + To: &to, + }, + wantSQL: "SELECT tumbleStart(min(om_events.time), toIntervalMinute(1)) AS windowstart, tumbleEnd(max(om_events.time), toIntervalMinute(1)) AS windowend, sum(cast(JSON_VALUE(om_events.data, '$.value'), 'Float64')) AS value FROM openmeter.om_events WHERE om_events.namespace = ? AND om_events.type = ? AND om_events.time >= ? AND om_events.time <= ?", + wantArgs: []interface{}{"my_namespace", "event1", from.Unix(), to.Unix()}, + }, + { // Aggregate data between period, groupped by window size + query: queryMeter{ + Database: "openmeter", + EventsTableName: "om_events", + Namespace: "my_namespace", + Meter: models.Meter{ + Slug: "meter1", + EventType: "event1", + Aggregation: models.MeterAggregationSum, + ValueProperty: "$.value", + GroupBy: map[string]string{ + "group1": "$.group1", + "group2": "$.group2", + }, + }, + From: &from, + To: &to, + WindowSize: &windowSize, + }, + wantSQL: "SELECT tumbleStart(om_events.time, toIntervalHour(1), 'UTC') AS windowstart, tumbleEnd(om_events.time, toIntervalHour(1), 'UTC') AS windowend, sum(cast(JSON_VALUE(om_events.data, '$.value'), 'Float64')) AS value FROM openmeter.om_events WHERE om_events.namespace = ? AND om_events.type = ? AND om_events.time >= ? AND om_events.time <= ? GROUP BY windowstart, windowend ORDER BY windowstart", + wantArgs: []interface{}{"my_namespace", "event1", from.Unix(), to.Unix()}, + }, + { // Aggregate data between period in a different timezone, groupped by window size + query: queryMeter{ + Database: "openmeter", + EventsTableName: "om_events", + Namespace: "my_namespace", + Meter: models.Meter{ + Slug: "meter1", + EventType: "event1", + Aggregation: models.MeterAggregationSum, + ValueProperty: "$.value", + GroupBy: map[string]string{ + "group1": "$.group1", + "group2": "$.group2", + }, + }, + From: &from, + To: &to, + WindowSize: &windowSize, + WindowTimeZone: tz, + }, + wantSQL: "SELECT tumbleStart(om_events.time, toIntervalHour(1), 'Asia/Shanghai') AS windowstart, tumbleEnd(om_events.time, toIntervalHour(1), 'Asia/Shanghai') AS windowend, sum(cast(JSON_VALUE(om_events.data, '$.value'), 'Float64')) AS value FROM openmeter.om_events WHERE om_events.namespace = ? AND om_events.type = ? AND om_events.time >= ? AND om_events.time <= ? GROUP BY windowstart, windowend ORDER BY windowstart", + wantArgs: []interface{}{"my_namespace", "event1", from.Unix(), to.Unix()}, + }, + { // Aggregate data for a single subject + query: queryMeter{ + Database: "openmeter", + EventsTableName: "om_events", + Namespace: "my_namespace", + Meter: models.Meter{ + Slug: "meter1", + EventType: "event1", + Aggregation: models.MeterAggregationSum, + ValueProperty: "$.value", + GroupBy: map[string]string{ + "group1": "$.group1", + "group2": "$.group2", + }, + }, + Subject: []string{subject}, + GroupBy: []string{"subject"}, + }, + wantSQL: "SELECT tumbleStart(min(om_events.time), toIntervalMinute(1)) AS windowstart, tumbleEnd(max(om_events.time), toIntervalMinute(1)) AS windowend, sum(cast(JSON_VALUE(om_events.data, '$.value'), 'Float64')) AS value, om_events.subject FROM openmeter.om_events WHERE om_events.namespace = ? AND om_events.type = ? AND (om_events.subject = ?) GROUP BY subject", + wantArgs: []interface{}{"my_namespace", "event1", "subject1"}, + }, + { // Aggregate data for a single subject and group by additional fields + query: queryMeter{ + Database: "openmeter", + EventsTableName: "om_events", + Namespace: "my_namespace", + Meter: models.Meter{ + Slug: "meter1", + EventType: "event1", + Aggregation: models.MeterAggregationSum, + ValueProperty: "$.value", + GroupBy: map[string]string{ + "group1": "$.group1", + "group2": "$.group2", + }, + }, + Subject: []string{subject}, + GroupBy: []string{"subject", "group1", "group2"}, + }, + wantSQL: "SELECT tumbleStart(min(om_events.time), toIntervalMinute(1)) AS windowstart, tumbleEnd(max(om_events.time), toIntervalMinute(1)) AS windowend, sum(cast(JSON_VALUE(om_events.data, '$.value'), 'Float64')) AS value, om_events.subject, JSON_VALUE(om_events.data, '$.group1') as group1, JSON_VALUE(om_events.data, '$.group2') as group2 FROM openmeter.om_events WHERE om_events.namespace = ? AND om_events.type = ? AND (om_events.subject = ?) GROUP BY subject, group1, group2", + wantArgs: []interface{}{"my_namespace", "event1", "subject1"}, + }, + { // Aggregate data for a multiple subjects + query: queryMeter{ + Database: "openmeter", + EventsTableName: "om_events", + Namespace: "my_namespace", + Meter: models.Meter{ + Slug: "meter1", + EventType: "event1", + Aggregation: models.MeterAggregationSum, + ValueProperty: "$.value", + GroupBy: map[string]string{ + "group1": "$.group1", + "group2": "$.group2", + }, + }, + Subject: []string{subject, "subject2"}, + GroupBy: []string{"subject"}, + }, + wantSQL: "SELECT tumbleStart(min(om_events.time), toIntervalMinute(1)) AS windowstart, tumbleEnd(max(om_events.time), toIntervalMinute(1)) AS windowend, sum(cast(JSON_VALUE(om_events.data, '$.value'), 'Float64')) AS value, om_events.subject FROM openmeter.om_events WHERE om_events.namespace = ? AND om_events.type = ? AND (om_events.subject = ? OR om_events.subject = ?) GROUP BY subject", + wantArgs: []interface{}{"my_namespace", "event1", "subject1", "subject2"}, + }, + { // Aggregate data with filtering for a single group and single value + query: queryMeter{ + Database: "openmeter", + EventsTableName: "om_events", + Namespace: "my_namespace", + Meter: models.Meter{ + Slug: "meter1", + EventType: "event1", + Aggregation: models.MeterAggregationSum, + ValueProperty: "$.value", + GroupBy: map[string]string{ + "g1": "$.group1", + "g2": "$.group2", + }, + }, + FilterGroupBy: map[string][]string{"g1": {"g1v1"}}, + }, + wantSQL: "SELECT tumbleStart(min(om_events.time), toIntervalMinute(1)) AS windowstart, tumbleEnd(max(om_events.time), toIntervalMinute(1)) AS windowend, sum(cast(JSON_VALUE(om_events.data, '$.value'), 'Float64')) AS value FROM openmeter.om_events WHERE om_events.namespace = ? AND om_events.type = ? AND (JSON_VALUE(om_events.data, '$.group1') = 'g1v1')", + wantArgs: []interface{}{"my_namespace", "event1"}, + }, + { // Aggregate data with filtering for a single group and multiple values + query: queryMeter{ + Database: "openmeter", + EventsTableName: "om_events", + Namespace: "my_namespace", + Meter: models.Meter{ + Slug: "meter1", + EventType: "event1", + Aggregation: models.MeterAggregationSum, + ValueProperty: "$.value", + GroupBy: map[string]string{ + "g1": "$.group1", + "g2": "$.group2", + }, + }, + FilterGroupBy: map[string][]string{"g1": {"g1v1", "g1v2"}}, + }, + wantSQL: "SELECT tumbleStart(min(om_events.time), toIntervalMinute(1)) AS windowstart, tumbleEnd(max(om_events.time), toIntervalMinute(1)) AS windowend, sum(cast(JSON_VALUE(om_events.data, '$.value'), 'Float64')) AS value FROM openmeter.om_events WHERE om_events.namespace = ? AND om_events.type = ? AND (JSON_VALUE(om_events.data, '$.group1') = 'g1v1' OR JSON_VALUE(om_events.data, '$.group1') = 'g1v2')", + wantArgs: []interface{}{"my_namespace", "event1"}, + }, + { // Aggregate data with filtering for multiple groups and multiple values + query: queryMeter{ + Database: "openmeter", + EventsTableName: "om_events", + Namespace: "my_namespace", + Meter: models.Meter{ + Slug: "meter1", + EventType: "event1", + Aggregation: models.MeterAggregationSum, + ValueProperty: "$.value", + GroupBy: map[string]string{ + "g1": "$.group1", + "g2": "$.group2", + }, + }, + FilterGroupBy: map[string][]string{"g1": {"g1v1", "g1v2"}, "g2": {"g2v1", "g2v2"}}, + }, + wantSQL: "SELECT tumbleStart(min(om_events.time), toIntervalMinute(1)) AS windowstart, tumbleEnd(max(om_events.time), toIntervalMinute(1)) AS windowend, sum(cast(JSON_VALUE(om_events.data, '$.value'), 'Float64')) AS value FROM openmeter.om_events WHERE om_events.namespace = ? AND om_events.type = ? AND (JSON_VALUE(om_events.data, '$.group1') = 'g1v1' OR JSON_VALUE(om_events.data, '$.group1') = 'g1v2') AND (JSON_VALUE(om_events.data, '$.group2') = 'g2v1' OR JSON_VALUE(om_events.data, '$.group2') = 'g2v2')", + wantArgs: []interface{}{"my_namespace", "event1"}, + }, + } + + for _, tt := range tests { + tt := tt + t.Run("", func(t *testing.T) { + gotSql, gotArgs, err := tt.query.toSQL() + if err != nil { + t.Error(err) + return + } + + assert.Equal(t, tt.wantSQL, gotSql) + assert.Equal(t, tt.wantArgs, gotArgs) + }) + } +} + +func TestListMeterSubjects(t *testing.T) { + from, _ := time.Parse(time.RFC3339, "2023-01-01T00:00:00.001Z") + to, _ := time.Parse(time.RFC3339, "2023-01-02T00:00:00Z") + + tests := []struct { + query listMeterSubjectsQuery + wantSQL string + wantArgs []interface{} + }{ + { + query: listMeterSubjectsQuery{ + Database: "openmeter", + EventsTableName: "om_events", + Namespace: "my_namespace", + Meter: models.Meter{ + Slug: "meter1", + EventType: "event1", + Aggregation: models.MeterAggregationSum, + }, + }, + wantSQL: "SELECT DISTINCT subject FROM openmeter.om_events WHERE namespace = ? AND type = ? ORDER BY subject", + wantArgs: []interface{}{"my_namespace", "event1"}, + }, + { + query: listMeterSubjectsQuery{ + Database: "openmeter", + EventsTableName: "om_events", + Namespace: "my_namespace", + Meter: models.Meter{ + Slug: "meter1", + EventType: "event1", + Aggregation: models.MeterAggregationSum, + }, + From: &from, + }, + wantSQL: "SELECT DISTINCT subject FROM openmeter.om_events WHERE namespace = ? AND type = ? AND time >= ? ORDER BY subject", + wantArgs: []interface{}{"my_namespace", "event1", from.Unix()}, + }, + { + query: listMeterSubjectsQuery{ + Database: "openmeter", + EventsTableName: "om_events", + Namespace: "my_namespace", + Meter: models.Meter{ + Slug: "meter1", + EventType: "event1", + Aggregation: models.MeterAggregationSum, + }, + From: &from, + To: &to, + }, + wantSQL: "SELECT DISTINCT subject FROM openmeter.om_events WHERE namespace = ? AND type = ? AND time >= ? AND time <= ? ORDER BY subject", + wantArgs: []interface{}{"my_namespace", "event1", from.Unix(), to.Unix()}, + }, + } + + for _, tt := range tests { + tt := tt + t.Run("", func(t *testing.T) { + gotSql, gotArgs := tt.query.toSQL() + + assert.Equal(t, tt.wantArgs, gotArgs) + assert.Equal(t, tt.wantSQL, gotSql) + }) + } +} diff --git a/openmeter/streaming/clickhouse_connector/connector.go b/openmeter/streaming/clickhouse_connector/connector.go deleted file mode 100644 index d999b08a5..000000000 --- a/openmeter/streaming/clickhouse_connector/connector.go +++ /dev/null @@ -1,496 +0,0 @@ -package clickhouse_connector - -import ( - "context" - "encoding/json" - "fmt" - "log/slog" - "strings" - "time" - - "github.com/ClickHouse/clickhouse-go/v2" - "github.com/cloudevents/sdk-go/v2/event" - - "github.com/openmeterio/openmeter/api" - "github.com/openmeterio/openmeter/openmeter/meter" - "github.com/openmeterio/openmeter/openmeter/streaming" - "github.com/openmeterio/openmeter/pkg/models" -) - -var ( - tablePrefix = "om_" - EventsTableName = "events" -) - -// ClickhouseConnector implements `ingest.Connector“ and `namespace.Handler interfaces. -type ClickhouseConnector struct { - config ClickhouseConnectorConfig -} - -type ClickhouseConnectorConfig struct { - Logger *slog.Logger - ClickHouse clickhouse.Conn - Database string - Meters meter.Repository - CreateOrReplaceMeter bool - PopulateMeter bool -} - -func NewClickhouseConnector(config ClickhouseConnectorConfig) (*ClickhouseConnector, error) { - connector := &ClickhouseConnector{ - config: config, - } - - return connector, nil -} - -func (c *ClickhouseConnector) ListEvents(ctx context.Context, namespace string, params streaming.ListEventsParams) ([]api.IngestedEvent, error) { - if namespace == "" { - return nil, fmt.Errorf("namespace is required") - } - - events, err := c.queryEventsTable(ctx, namespace, params) - if err != nil { - if _, ok := err.(*models.NamespaceNotFoundError); ok { - return nil, err - } - - return nil, fmt.Errorf("query events: %w", err) - } - - return events, nil -} - -func (c *ClickhouseConnector) CreateMeter(ctx context.Context, namespace string, meter *models.Meter) error { - if namespace == "" { - return fmt.Errorf("namespace is required") - } - - err := c.createMeterView(ctx, namespace, meter) - if err != nil { - return fmt.Errorf("init: %w", err) - } - - return nil -} - -func (c *ClickhouseConnector) DeleteMeter(ctx context.Context, namespace string, meterSlug string) error { - if namespace == "" { - return fmt.Errorf("namespace is required") - } - if meterSlug == "" { - return fmt.Errorf("slug is required") - } - - err := c.deleteMeterView(ctx, namespace, meterSlug) - if err != nil { - if _, ok := err.(*models.MeterNotFoundError); ok { - return err - } - - return fmt.Errorf("delete meter view: %w", err) - } - - return nil -} - -func (c *ClickhouseConnector) QueryMeter(ctx context.Context, namespace string, meterSlug string, params *streaming.QueryParams) ([]models.MeterQueryRow, error) { - if namespace == "" { - return nil, fmt.Errorf("namespace is required") - } - - values, err := c.queryMeterView(ctx, namespace, meterSlug, params) - if err != nil { - if _, ok := err.(*models.MeterNotFoundError); ok { - return nil, err - } - - return nil, fmt.Errorf("get values: %w", err) - } - - // If the total usage is queried for a single period (no window size), - // replace the window start and end with the period for each row. - // We can still have multiple rows for a single period due to group bys. - if params.WindowSize == nil { - for i := range values { - if params.From != nil { - values[i].WindowStart = *params.From - } - if params.To != nil { - values[i].WindowEnd = *params.To - } - } - } - - return values, nil -} - -func (c *ClickhouseConnector) ListMeterSubjects(ctx context.Context, namespace string, meterSlug string, from *time.Time, to *time.Time) ([]string, error) { - if namespace == "" { - return nil, fmt.Errorf("namespace is required") - } - if meterSlug == "" { - return nil, fmt.Errorf("slug is required") - } - - subjects, err := c.listMeterViewSubjects(ctx, namespace, meterSlug, from, to) - if err != nil { - if _, ok := err.(*models.MeterNotFoundError); ok { - return nil, err - } - - return nil, fmt.Errorf("list meter subjects: %w", err) - } - - return subjects, nil -} - -func (c *ClickhouseConnector) CreateNamespace(ctx context.Context, namespace string) error { - err := c.createEventsTable(ctx) - if err != nil { - return fmt.Errorf("create namespace in clickhouse: %w", err) - } - - return nil -} - -func (c *ClickhouseConnector) DeleteNamespace(ctx context.Context, namespace string) error { - err := c.deleteNamespace(ctx, namespace) - if err != nil { - return fmt.Errorf("delete namespace in clickhouse: %w", err) - } - return nil -} - -// DeleteNamespace deletes the namespace related resources from Clickhouse -// We don't delete the events table as it it reused between namespaces -// We only delete the materialized views for the meters -func (c *ClickhouseConnector) deleteNamespace(ctx context.Context, namespace string) error { - // Retrieve meters belonging to the namespace - meters, err := c.config.Meters.ListMeters(ctx, namespace) - if err != nil { - return fmt.Errorf("failed to list meters: %w", err) - } - - for _, meter := range meters { - err := c.deleteMeterView(ctx, namespace, meter.Slug) - if err != nil { - // If the meter view does not exist, we ignore the error - if _, ok := err.(*models.MeterNotFoundError); ok { - return nil - } - return fmt.Errorf("delete meter view: %w", err) - } - } - - return nil -} - -func (c *ClickhouseConnector) CountEvents(ctx context.Context, namespace string, params streaming.CountEventsParams) ([]streaming.CountEventRow, error) { - if namespace == "" { - return nil, fmt.Errorf("namespace is required") - } - - rows, err := c.queryCountEvents(ctx, namespace, params) - if err != nil { - if _, ok := err.(*models.NamespaceNotFoundError); ok { - return nil, err - } - - return nil, fmt.Errorf("query count events: %w", err) - } - - return rows, nil -} - -func (c *ClickhouseConnector) createEventsTable(ctx context.Context) error { - table := createEventsTable{ - Database: c.config.Database, - } - - err := c.config.ClickHouse.Exec(ctx, table.toSQL()) - if err != nil { - return fmt.Errorf("create events table: %w", err) - } - - return nil -} - -func (c *ClickhouseConnector) queryEventsTable(ctx context.Context, namespace string, params streaming.ListEventsParams) ([]api.IngestedEvent, error) { - table := queryEventsTable{ - Database: c.config.Database, - Namespace: namespace, - From: params.From, - To: params.To, - IngestedAtFrom: params.IngestedAtFrom, - IngestedAtTo: params.IngestedAtTo, - ID: params.ID, - Subject: params.Subject, - HasError: params.HasError, - Limit: params.Limit, - } - - sql, args := table.toSQL() - - rows, err := c.config.ClickHouse.Query(ctx, sql, args...) - if err != nil { - if strings.Contains(err.Error(), "code: 60") { - return nil, &models.NamespaceNotFoundError{Namespace: namespace} - } - - return nil, fmt.Errorf("query events table query: %w", err) - } - - events := []api.IngestedEvent{} - - for rows.Next() { - var id string - var eventType string - var subject string - var source string - var eventTime time.Time - var dataStr string - var validationError string - var ingestedAt time.Time - var storedAt time.Time - - if err = rows.Scan(&id, &eventType, &subject, &source, &eventTime, &dataStr, &validationError, &ingestedAt, &storedAt); err != nil { - return nil, err - } - - // Parse data - var data interface{} - err := json.Unmarshal([]byte(dataStr), &data) - if err != nil { - return nil, fmt.Errorf("query events parse data: %w", err) - } - - event := event.New() - event.SetID(id) - event.SetType(eventType) - event.SetSubject(subject) - event.SetSource(source) - event.SetTime(eventTime) - err = event.SetData("application/json", data) - if err != nil { - return nil, fmt.Errorf("query events set data: %w", err) - } - - ingestedEvent := api.IngestedEvent{ - Event: event, - } - - if validationError != "" { - ingestedEvent.ValidationError = &validationError - } - - ingestedEvent.IngestedAt = ingestedAt - ingestedEvent.StoredAt = storedAt - - events = append(events, ingestedEvent) - } - - return events, nil -} - -func (c *ClickhouseConnector) queryCountEvents(ctx context.Context, namespace string, params streaming.CountEventsParams) ([]streaming.CountEventRow, error) { - table := queryCountEvents{ - Database: c.config.Database, - Namespace: namespace, - From: params.From, - } - - sql, args := table.toSQL() - - rows, err := c.config.ClickHouse.Query(ctx, sql, args...) - if err != nil { - if strings.Contains(err.Error(), "code: 60") { - return nil, &models.NamespaceNotFoundError{Namespace: namespace} - } - - return nil, fmt.Errorf("query events count query: %w", err) - } - - results := []streaming.CountEventRow{} - - for rows.Next() { - result := streaming.CountEventRow{} - - if err = rows.Scan(&result.Count, &result.Subject, &result.IsError); err != nil { - return nil, err - } - - results = append(results, result) - } - - return results, nil -} - -func (c *ClickhouseConnector) createMeterView(ctx context.Context, namespace string, meter *models.Meter) error { - // CreateOrReplace is used to force the recreation of the materialized view - // This is not safe to use in production as it will drop the existing views - if c.config.CreateOrReplaceMeter { - err := c.deleteMeterView(ctx, namespace, meter.Slug) - if err != nil { - return fmt.Errorf("drop meter view: %w", err) - } - } - - view := createMeterView{ - Populate: c.config.PopulateMeter, - Database: c.config.Database, - Namespace: namespace, - MeterSlug: meter.Slug, - Aggregation: meter.Aggregation, - EventType: meter.EventType, - ValueProperty: meter.ValueProperty, - GroupBy: meter.GroupBy, - } - sql, args, err := view.toSQL() - if err != nil { - return fmt.Errorf("create meter view: %w", err) - } - err = c.config.ClickHouse.Exec(ctx, sql, args...) - if err != nil { - return fmt.Errorf("create meter view: %w", err) - } - - return nil -} - -func (c *ClickhouseConnector) deleteMeterView(ctx context.Context, namespace string, meterSlug string) error { - query := deleteMeterView{ - Database: c.config.Database, - Namespace: namespace, - MeterSlug: meterSlug, - } - - sql := query.toSQL() - - err := c.config.ClickHouse.Exec(ctx, sql) - if err != nil { - if strings.Contains(err.Error(), "code: 60") { - return &models.MeterNotFoundError{MeterSlug: meterSlug} - } - - return fmt.Errorf("delete meter view: %w", err) - } - - return nil -} - -func (c *ClickhouseConnector) queryMeterView(ctx context.Context, namespace string, meterSlug string, params *streaming.QueryParams) ([]models.MeterQueryRow, error) { - queryMeter := queryMeterView{ - Database: c.config.Database, - Namespace: namespace, - MeterSlug: meterSlug, - Aggregation: params.Aggregation, - From: params.From, - To: params.To, - Subject: params.FilterSubject, - FilterGroupBy: params.FilterGroupBy, - GroupBy: params.GroupBy, - WindowSize: params.WindowSize, - WindowTimeZone: params.WindowTimeZone, - } - - values := []models.MeterQueryRow{} - - sql, args, err := queryMeter.toSQL() - if err != nil { - return values, fmt.Errorf("query meter view: %w", err) - } - - start := time.Now() - rows, err := c.config.ClickHouse.Query(ctx, sql, args...) - if err != nil { - if strings.Contains(err.Error(), "code: 60") { - return nil, &models.MeterNotFoundError{MeterSlug: meterSlug} - } - - return values, fmt.Errorf("query meter view query: %w", err) - } - elapsed := time.Since(start) - slog.Debug("query meter view", "elapsed", elapsed.String(), "sql", sql, "args", args) - - for rows.Next() { - value := models.MeterQueryRow{ - GroupBy: map[string]*string{}, - } - - args := []interface{}{&value.WindowStart, &value.WindowEnd, &value.Value} - argCount := len(args) - - for range queryMeter.GroupBy { - tmp := "" - args = append(args, &tmp) - } - - if err := rows.Scan(args...); err != nil { - return values, fmt.Errorf("query meter view row scan: %w", err) - } - - for i, key := range queryMeter.GroupBy { - if s, ok := args[i+argCount].(*string); ok { - if key == "subject" { - value.Subject = s - continue - } - - // We treat empty string as nil - if s != nil && *s == "" { - value.GroupBy[key] = nil - } else { - value.GroupBy[key] = s - } - } - } - - // an empty row is returned when there are no values for the meter - if value.WindowStart.IsZero() && value.WindowEnd.IsZero() && value.Value == 0 { - continue - } - - values = append(values, value) - } - rows.Close() - err = rows.Err() - if err != nil { - return values, fmt.Errorf("query meter rows error: %w", err) - } - - return values, nil -} - -func (c *ClickhouseConnector) listMeterViewSubjects(ctx context.Context, namespace string, meterSlug string, from *time.Time, to *time.Time) ([]string, error) { - query := listMeterViewSubjects{ - Database: c.config.Database, - Namespace: namespace, - MeterSlug: meterSlug, - From: from, - To: to, - } - - sql, args := query.toSQL() - - rows, err := c.config.ClickHouse.Query(ctx, sql, args...) - if err != nil { - if strings.Contains(err.Error(), "code: 60") { - return nil, &models.MeterNotFoundError{MeterSlug: meterSlug} - } - - return nil, fmt.Errorf("list meter view subjects: %w", err) - } - - subjects := []string{} - for rows.Next() { - var subject string - if err = rows.Scan(&subject); err != nil { - return nil, err - } - - subjects = append(subjects, subject) - } - - return subjects, nil -} diff --git a/openmeter/streaming/clickhouse_connector/model.go b/openmeter/streaming/clickhouse_connector/model.go deleted file mode 100644 index 915b5def3..000000000 --- a/openmeter/streaming/clickhouse_connector/model.go +++ /dev/null @@ -1,9 +0,0 @@ -package clickhouse_connector - -import "github.com/openmeterio/openmeter/pkg/models" - -type MeterView struct { - Slug string - Aggregation models.MeterAggregation - GroupBy []string -} diff --git a/openmeter/streaming/connector.go b/openmeter/streaming/connector.go index 3d61b3cbd..29cffdc71 100644 --- a/openmeter/streaming/connector.go +++ b/openmeter/streaming/connector.go @@ -5,6 +5,7 @@ import ( "time" "github.com/openmeterio/openmeter/api" + "github.com/openmeterio/openmeter/openmeter/namespace" "github.com/openmeterio/openmeter/pkg/models" ) @@ -30,12 +31,34 @@ type CountEventRow struct { IsError bool } +type ListMeterSubjectsParams struct { + From *time.Time + To *time.Time +} + +// RawEvent represents a single raw event +type RawEvent struct { + Namespace string + ValidationError string + ID string + Type string + Source string + Subject string + Time time.Time + Data string + IngestedAt time.Time + StoredAt time.Time +} + type Connector interface { + namespace.Handler + CountEvents(ctx context.Context, namespace string, params CountEventsParams) ([]CountEventRow, error) ListEvents(ctx context.Context, namespace string, params ListEventsParams) ([]api.IngestedEvent, error) - CreateMeter(ctx context.Context, namespace string, meter *models.Meter) error - DeleteMeter(ctx context.Context, namespace string, meterSlug string) error - QueryMeter(ctx context.Context, namespace string, meterSlug string, params *QueryParams) ([]models.MeterQueryRow, error) - ListMeterSubjects(ctx context.Context, namespace string, meterSlug string, from *time.Time, to *time.Time) ([]string, error) + CreateMeter(ctx context.Context, namespace string, meter models.Meter) error + DeleteMeter(ctx context.Context, namespace string, meter models.Meter) error + QueryMeter(ctx context.Context, namespace string, meter models.Meter, params QueryParams) ([]models.MeterQueryRow, error) + ListMeterSubjects(ctx context.Context, namespace string, meter models.Meter, params ListMeterSubjectsParams) ([]string, error) + BatchInsert(ctx context.Context, events []RawEvent) error // Add more methods as needed ... } diff --git a/openmeter/streaming/query_params.go b/openmeter/streaming/query_params.go index 27bdccbb8..4fdd3e023 100644 --- a/openmeter/streaming/query_params.go +++ b/openmeter/streaming/query_params.go @@ -14,7 +14,6 @@ type QueryParams struct { FilterSubject []string FilterGroupBy map[string][]string GroupBy []string - Aggregation models.MeterAggregation WindowSize *models.WindowSize WindowTimeZone *time.Location } diff --git a/openmeter/streaming/testutils/streaming.go b/openmeter/streaming/testutils/streaming.go index 071cb28ad..cf737ce0e 100644 --- a/openmeter/streaming/testutils/streaming.go +++ b/openmeter/streaming/testutils/streaming.go @@ -11,6 +11,8 @@ import ( "github.com/openmeterio/openmeter/pkg/models" ) +var _ streaming.Connector = &MockStreamingConnector{} + func NewMockStreamingConnector(t testing.TB) *MockStreamingConnector { t.Helper() return &MockStreamingConnector{ @@ -49,6 +51,14 @@ func (m *MockStreamingConnector) AddRow(meterSlug string, row models.MeterQueryR m.rows[meterSlug] = append(m.rows[meterSlug], row) } +func (c *MockStreamingConnector) CreateNamespace(ctx context.Context, namespace string) error { + return nil +} + +func (c *MockStreamingConnector) DeleteNamespace(ctx context.Context, namespace string) error { + return nil +} + func (m *MockStreamingConnector) CountEvents(ctx context.Context, namespace string, params streaming.CountEventsParams) ([]streaming.CountEventRow, error) { return []streaming.CountEventRow{}, nil } @@ -57,28 +67,28 @@ func (m *MockStreamingConnector) ListEvents(ctx context.Context, namespace strin return []api.IngestedEvent{}, nil } -func (m *MockStreamingConnector) CreateMeter(ctx context.Context, namespace string, meter *models.Meter) error { +func (m *MockStreamingConnector) CreateMeter(ctx context.Context, namespace string, meter models.Meter) error { return nil } -func (m *MockStreamingConnector) DeleteMeter(ctx context.Context, namespace string, meterSlug string) error { +func (m *MockStreamingConnector) DeleteMeter(ctx context.Context, namespace string, meter models.Meter) error { return nil } // Returns the result query set for the given params. If the query set is not found, // it will try to approximate the result by aggregating the simple events -func (m *MockStreamingConnector) QueryMeter(ctx context.Context, namespace string, meterSlug string, params *streaming.QueryParams) ([]models.MeterQueryRow, error) { +func (m *MockStreamingConnector) QueryMeter(ctx context.Context, namespace string, meter models.Meter, params streaming.QueryParams) ([]models.MeterQueryRow, error) { rows := []models.MeterQueryRow{} - _, rowOk := m.rows[meterSlug] + _, rowOk := m.rows[meter.Slug] if rowOk { - for _, row := range m.rows[meterSlug] { + for _, row := range m.rows[meter.Slug] { if row.WindowStart.Equal(*params.From) && row.WindowEnd.Equal(*params.To) { rows = append(rows, row) } } } else { - row, err := m.aggregateEvents(meterSlug, params) + row, err := m.aggregateEvents(meter.Slug, params) if err != nil { return rows, err } @@ -88,12 +98,16 @@ func (m *MockStreamingConnector) QueryMeter(ctx context.Context, namespace strin return rows, nil } +func (m *MockStreamingConnector) BatchInsert(ctx context.Context, events []streaming.RawEvent) error { + return nil +} + func windowSizeToDuration(windowSize models.WindowSize) time.Duration { return windowSize.Duration() } // We approximate the actual logic by a simple filter + aggregation for most cases -func (m *MockStreamingConnector) aggregateEvents(meterSlug string, params *streaming.QueryParams) ([]models.MeterQueryRow, error) { +func (m *MockStreamingConnector) aggregateEvents(meterSlug string, params streaming.QueryParams) ([]models.MeterQueryRow, error) { events, ok := m.events[meterSlug] from := defaultx.WithDefault(params.From, time.Now().AddDate(-10, 0, 0)) to := defaultx.WithDefault(params.To, time.Now()) @@ -154,6 +168,6 @@ func (m *MockStreamingConnector) aggregateEvents(meterSlug string, params *strea return rows, nil } -func (m *MockStreamingConnector) ListMeterSubjects(ctx context.Context, namespace string, meterSlug string, from *time.Time, to *time.Time) ([]string, error) { +func (m *MockStreamingConnector) ListMeterSubjects(ctx context.Context, namespace string, meter models.Meter, params streaming.ListMeterSubjectsParams) ([]string, error) { return []string{}, nil } diff --git a/openmeter/streaming/testutils/streaming_test.go b/openmeter/streaming/testutils/streaming_test.go index c02ba631c..669eccb54 100644 --- a/openmeter/streaming/testutils/streaming_test.go +++ b/openmeter/streaming/testutils/streaming_test.go @@ -16,11 +16,15 @@ import ( func TestMockStreamingConnector(t *testing.T) { defaultMeterSlug := "default-meter" + defaultMeter := models.Meter{ + Slug: defaultMeterSlug, + } + type tc struct { Name string Events []SimpleEvent Rows []models.MeterQueryRow - Query *streaming.QueryParams + Query streaming.QueryParams Expected []models.MeterQueryRow ExpectedError error } @@ -31,7 +35,7 @@ func TestMockStreamingConnector(t *testing.T) { tt := []tc{ { Name: "Should return error if meter not found", - Query: &streaming.QueryParams{ + Query: streaming.QueryParams{ From: convert.ToPointer(now.Add(-time.Hour)), To: convert.ToPointer(now), }, @@ -39,7 +43,7 @@ func TestMockStreamingConnector(t *testing.T) { }, { Name: "Should error if meter exists but doesnt match", - Query: &streaming.QueryParams{ + Query: streaming.QueryParams{ From: convert.ToPointer(now.Add(-time.Hour)), To: convert.ToPointer(now), }, @@ -48,7 +52,7 @@ func TestMockStreamingConnector(t *testing.T) { }, { Name: "Should return empty rows if no rows and no events", - Query: &streaming.QueryParams{ + Query: streaming.QueryParams{ From: convert.ToPointer(now.Add(-time.Hour)), To: convert.ToPointer(now), }, @@ -64,7 +68,7 @@ func TestMockStreamingConnector(t *testing.T) { }, { Name: "Should return exact row", - Query: &streaming.QueryParams{ + Query: streaming.QueryParams{ From: convert.ToPointer(now.Add(-time.Hour)), To: convert.ToPointer(now), }, @@ -83,7 +87,7 @@ func TestMockStreamingConnector(t *testing.T) { }, { Name: "Should return event sum", - Query: &streaming.QueryParams{ + Query: streaming.QueryParams{ From: convert.ToPointer(now.Add(-time.Hour)), To: convert.ToPointer(now), }, @@ -100,7 +104,7 @@ func TestMockStreamingConnector(t *testing.T) { }, { Name: "Should aggregate events as if they were windowed", - Query: &streaming.QueryParams{ + Query: streaming.QueryParams{ From: convert.ToPointer(now.Truncate(time.Minute).Add(time.Second * 30).Add(-time.Minute * 2)), To: convert.ToPointer(now.Truncate(time.Minute).Add(time.Second * 30)), }, @@ -133,7 +137,7 @@ func TestMockStreamingConnector(t *testing.T) { }, { Name: "Should return events windowed", - Query: &streaming.QueryParams{ + Query: streaming.QueryParams{ From: convert.ToPointer(now.Add(-time.Minute * 3)), To: convert.ToPointer(now), WindowSize: convert.ToPointer(models.WindowSizeMinute), @@ -168,7 +172,7 @@ func TestMockStreamingConnector(t *testing.T) { }, { Name: "Should return row for queried period if window is larger than period", - Query: &streaming.QueryParams{ + Query: streaming.QueryParams{ From: convert.ToPointer(now.Add(-time.Minute * 3)), To: convert.ToPointer(now), WindowSize: convert.ToPointer(models.WindowSizeHour), @@ -203,7 +207,7 @@ func TestMockStreamingConnector(t *testing.T) { streamingConnector.AddRow(defaultMeterSlug, row) } - result, err := streamingConnector.QueryMeter(context.Background(), "namespace", defaultMeterSlug, tc.Query) + result, err := streamingConnector.QueryMeter(context.Background(), "namespace", defaultMeter, tc.Query) if tc.ExpectedError != nil { assert.Error(t, err) assert.Equal(t, tc.ExpectedError, err)