Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .golangci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ linters:
- name: atomic
- name: add-constant
arguments:
- allowFloats: 0.0,0.,1.0,1.,2.0,2.,3.0,3.
- allowFloats: 0.0,0.,1.0,1.,2.0,2.,3.0,3.,100.0
allowInts: 0,1,2,3,10,8,16,32,64,100,128,192,256,512,1024,2048,4096,8192,16384,32768,65536
allowStrs: '""'
maxLitCount: "3"
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,7 @@ GO_BINARY_DIRS = \
$(NULL)

GO_COMMAND_PACKAGE_DIRS = \
$(foreach parent_dir,$(GO_BINARY_DIRS),$(foreach src_dir,$(wildcard $(parent_dir)/*/),$(patsubst %/,%,$(src_dir)))) \
$(patsubst %/,%,$(filter %/,$(foreach parent_dir,$(GO_BINARY_DIRS),$(wildcard $(parent_dir)/*/)))) \
$(NULL)

GO_BINARIES = \
Expand Down
26 changes: 26 additions & 0 deletions app/domain/metric_collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ import (
"github.com/rs/zerolog/log"

config "github.com/cloudzero/cloudzero-agent/app/config/gator"
"github.com/cloudzero/cloudzero-agent/app/domain/transform"
"github.com/cloudzero/cloudzero-agent/app/types"
)

Expand Down Expand Up @@ -113,6 +114,9 @@ type MetricCollector struct {
// filter implements metric classification logic to separate cost from observability metrics.
filter *MetricFilter

// transformer handles vendor-specific metric transformation (e.g., DCGM GPU metrics).
transformer types.MetricTransformer

// clock provides time abstraction for testing and consistent timestamping.
clock types.TimeProvider

Expand Down Expand Up @@ -144,6 +148,7 @@ func NewMetricCollector(s *config.Settings, clock types.TimeProvider, costStore
costStore: costStore,
observabilityStore: observabilityStore,
filter: filter,
transformer: transform.NewMetricTransformer(),
clock: clock,
cancelFunc: cancel,
}
Expand Down Expand Up @@ -192,6 +197,27 @@ func (d *MetricCollector) PutMetrics(ctx context.Context, contentType, encodingT
return nil, fmt.Errorf("unsupported content type: %s", contentType)
}

// Log complete DCGM metrics for debugging GPU transformation
for _, metric := range metrics {
if strings.HasPrefix(metric.MetricName, "DCGM_FI_DEV_") {
log.Ctx(ctx).Info().
Str("metricName", metric.MetricName).
Str("value", metric.Value).
Str("nodeName", metric.NodeName).
Interface("labels", metric.Labels).
Time("timestamp", metric.TimeStamp).
Str("clusterName", metric.ClusterName).
Str("cloudAccountID", metric.CloudAccountID).
Msg("DCGM metric received")
}
}

// Transform vendor-specific metrics (e.g., DCGM GPU metrics) before filtering
metrics, err = d.transformer.Transform(ctx, metrics)
if err != nil {
return stats, fmt.Errorf("failed to transform metrics: %w", err)
}

costMetrics, observabilityMetrics, droppedMetrics := d.filter.Filter(metrics)

metricsReceived.WithLabelValues().Add(float64(len(metrics)))
Expand Down
65 changes: 65 additions & 0 deletions app/domain/transform/catalog/catalog.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
// SPDX-FileCopyrightText: Copyright (c) 2016-2025, CloudZero, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0

// Package catalog provides a catalog-based metric transformer that routes
// metrics to registered specialized transformers.
//
// The catalog transformer orchestrates multiple specialized transformers to
// provide automatic routing based on metric characteristics.
package catalog

import (
"context"

"github.com/cloudzero/cloudzero-agent/app/types"
)

// Transformer implements types.MetricTransformer using a catalog of specialized
// transformers.
//
// Each transformer in the catalog processes all metrics sequentially.
// Transformers identify which metrics they can handle and transform those while
// passing through others unchanged.
type Transformer struct {
transformers []types.MetricTransformer
}

// NewTransformer creates a new catalog transformer with the provided
// specialized transformers.
//
// Transformers are applied sequentially - each transformer receives all metrics
// and decides which ones to transform based on implementation-specific logic
// (e.g., metric name patterns).
func NewTransformer(transformers ...types.MetricTransformer) *Transformer {
return &Transformer{
transformers: transformers,
}
}

// Transform processes metrics by routing them sequentially through specialized
// transformers.
//
// Processing flow:
// 1. Pass metrics through first transformer
// 2. Pass results through second transformer
// 3. Continue until all transformers have processed the metrics
//
// This implements the types.MetricTransformer interface.
func (t *Transformer) Transform(ctx context.Context, metrics []types.Metric) ([]types.Metric, error) {
if len(t.transformers) == 0 {
return metrics, nil
}

// Process through each transformer in sequence
result := metrics
var err error

for _, transformer := range t.transformers {
result, err = transformer.Transform(ctx, result)
if err != nil {
return nil, err
}
}

return result, nil
}
Loading
Loading