Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(gcp+aws): Add last_scrape_time metric #159

Merged
merged 3 commits into from
May 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 72 additions & 2 deletions pkg/aws/aws.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,62 @@ type AWS struct {
}

var (
providerLastScrapeErrorDesc = prometheus.NewDesc(
prometheus.BuildFQName(cloudcost_exporter.ExporterName, "", "last_scrape_error"),
"Was the last scrape an error. 1 indicates an error.",
[]string{"provider"},
nil,
)
collectorSuccessDesc = prometheus.NewDesc(
prometheus.BuildFQName(cloudcost_exporter.ExporterName, subsystem, "collector_success"),
"Was the last scrape of the AWS metrics successful.",
[]string{"collector"},
nil,
)
collectorLastScrapeErrorDesc = prometheus.NewDesc(
prometheus.BuildFQName(cloudcost_exporter.ExporterName, "collector", "last_scrape_error"),
"Was the last scrape an error. 1 indicates an error.",
[]string{"provider", "collector"},
nil,
)
collectorDurationDesc = prometheus.NewDesc(
prometheus.BuildFQName(cloudcost_exporter.ExporterName, "collector", "last_scrape_duration_seconds"),
"Duration of the last scrape in seconds.",
[]string{"provider", "collector"},
nil,
)
collectorScrapesTotalCounter = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: prometheus.BuildFQName(cloudcost_exporter.ExporterName, "collector", "scrapes_total"),
Help: "Total number of scrapes for a collector.",
},
[]string{"provider", "collector"},
)
collectorLastScrapeTime = prometheus.NewDesc(
prometheus.BuildFQName(cloudcost_exporter.ExporterName, "collector", "last_scrape_time"),
"Time of the last scrape.W",
[]string{"provider", "collector"},
nil,
)
providerLastScrapeTime = prometheus.NewDesc(
prometheus.BuildFQName(cloudcost_exporter.ExporterName, "", "last_scrape_time"),
"Time of the last scrape.",
[]string{"provider"},
nil,
)
providerLastScrapeDurationDesc = prometheus.NewDesc(
prometheus.BuildFQName(cloudcost_exporter.ExporterName, "", "last_scrape_duration_seconds"),
"Duration of the last scrape in seconds.",
[]string{"provider"},
nil,
)
providerScrapesTotalCounter = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: prometheus.BuildFQName(cloudcost_exporter.ExporterName, "", "scrapes_total"),
Help: "Total number of scrapes.",
},
[]string{"provider"},
)
)

var services = []string{"S3"}
Expand Down Expand Up @@ -85,6 +135,9 @@ func New(config *Config) (*AWS, error) {

func (a *AWS) RegisterCollectors(registry provider.Registry) error {
log.Printf("Registering %d collectors for AWS", len(a.collectors))
registry.MustRegister(
collectorScrapesTotalCounter,
)
for _, c := range a.collectors {
if err := c.Register(registry); err != nil {
return err
Expand All @@ -94,6 +147,13 @@ func (a *AWS) RegisterCollectors(registry provider.Registry) error {
}

func (a *AWS) Describe(ch chan<- *prometheus.Desc) {
ch <- collectorLastScrapeErrorDesc
ch <- collectorDurationDesc
ch <- providerLastScrapeErrorDesc
ch <- providerLastScrapeDurationDesc
ch <- collectorLastScrapeTime
ch <- providerLastScrapeTime
ch <- collectorSuccessDesc
for _, c := range a.collectors {
if err := c.Describe(ch); err != nil {
log.Printf("Error describing collector %s: %s", c.Name(), err)
Expand All @@ -102,18 +162,28 @@ func (a *AWS) Describe(ch chan<- *prometheus.Desc) {
}

func (a *AWS) Collect(ch chan<- prometheus.Metric) {
start := time.Now()
wg := &sync.WaitGroup{}
wg.Add(len(a.collectors))
for _, c := range a.collectors {
go func(c provider.Collector) {
now := time.Now()
defer wg.Done()
collectorSuccess := 1.0
collectorSuccess := 0.0
if err := c.Collect(ch); err != nil {
collectorSuccess = 0.0
collectorSuccess = 1.0
log.Printf("Error collecting metrics from collector %s: %s", c.Name(), err)
}
ch <- prometheus.MustNewConstMetric(collectorLastScrapeErrorDesc, prometheus.GaugeValue, collectorSuccess, subsystem, c.Name())
ch <- prometheus.MustNewConstMetric(collectorDurationDesc, prometheus.GaugeValue, time.Since(now).Seconds(), subsystem, c.Name())
ch <- prometheus.MustNewConstMetric(collectorLastScrapeTime, prometheus.GaugeValue, float64(time.Now().Unix()), subsystem, c.Name())
ch <- prometheus.MustNewConstMetric(collectorSuccessDesc, prometheus.GaugeValue, collectorSuccess, c.Name())
collectorScrapesTotalCounter.WithLabelValues(subsystem, c.Name()).Inc()
}(c)
}
wg.Wait()
ch <- prometheus.MustNewConstMetric(providerLastScrapeErrorDesc, prometheus.GaugeValue, 0.0, subsystem)
ch <- prometheus.MustNewConstMetric(providerLastScrapeDurationDesc, prometheus.GaugeValue, time.Since(start).Seconds(), subsystem)
ch <- prometheus.MustNewConstMetric(providerLastScrapeTime, prometheus.GaugeValue, float64(time.Now().Unix()), subsystem)
providerScrapesTotalCounter.WithLabelValues(subsystem).Inc()
}
1 change: 1 addition & 0 deletions pkg/aws/aws_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ func Test_RegisterCollectors(t *testing.T) {
t.Run(tc.name, func(t *testing.T) {
ctrl := gomock.NewController(t)
r := mock_provider.NewMockRegistry(ctrl)
r.EXPECT().MustRegister(gomock.Any()).AnyTimes()
c := mock_provider.NewMockCollector(ctrl)
if tc.register != nil {
c.EXPECT().Register(r).DoAndReturn(tc.register).Times(tc.numCollectors)
Expand Down
16 changes: 16 additions & 0 deletions pkg/google/gcp.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,18 @@ var (
},
[]string{"provider", "collector"},
)
collectorLastScrapeTime = prometheus.NewDesc(
prometheus.BuildFQName(cloudcost_exporter.ExporterName, "collector", "last_scrape_time"),
"Time of the last scrape.W",
[]string{"provider", "collector"},
nil,
)
providerLastScrapeTime = prometheus.NewDesc(
prometheus.BuildFQName(cloudcost_exporter.ExporterName, "", "last_scrape_time"),
"Time of the last scrape.",
[]string{"provider"},
nil,
)
)

type GCP struct {
Expand Down Expand Up @@ -163,6 +175,8 @@ func (g *GCP) Describe(ch chan<- *prometheus.Desc) {
ch <- collectorDurationDesc
ch <- providerLastScrapeErrorDesc
ch <- providerLastScrapeDurationDesc
ch <- collectorLastScrapeTime
ch <- providerLastScrapeTime
for _, c := range g.collectors {
if err := c.Describe(ch); err != nil {
log.Printf("Error describing collector %s: %s", c.Name(), err)
Expand All @@ -187,12 +201,14 @@ func (g *GCP) Collect(ch chan<- prometheus.Metric) {
log.Printf("Collector(%s) collect respose=%.2f", c.Name(), collectorSuccess)
ch <- prometheus.MustNewConstMetric(collectorLastScrapeErrorDesc, prometheus.GaugeValue, collectorSuccess, subsystem, c.Name())
ch <- prometheus.MustNewConstMetric(collectorDurationDesc, prometheus.GaugeValue, time.Since(now).Seconds(), subsystem, c.Name())
ch <- prometheus.MustNewConstMetric(collectorLastScrapeTime, prometheus.GaugeValue, float64(time.Now().Unix()), subsystem, c.Name())
collectorScrapesTotalCounter.WithLabelValues(subsystem, c.Name()).Inc()
}(c)
}
wg.Wait()
// When can the error actually happen? Potentially if all the collectors fail?
ch <- prometheus.MustNewConstMetric(providerLastScrapeErrorDesc, prometheus.GaugeValue, 0.0, subsystem)
ch <- prometheus.MustNewConstMetric(providerLastScrapeDurationDesc, prometheus.GaugeValue, time.Since(start).Seconds(), subsystem)
ch <- prometheus.MustNewConstMetric(providerLastScrapeTime, prometheus.GaugeValue, float64(time.Now().Unix()), subsystem)
providerScrapesTotalCounter.WithLabelValues(subsystem).Inc()
}
32 changes: 26 additions & 6 deletions pkg/google/gcp_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,12 @@ func TestGCP_CollectMetrics(t *testing.T) {
Value: 0,
MetricType: prometheus.GaugeValue,
},
{
FqName: "cloudcost_exporter_collector_last_scrape_time",
Labels: utils.LabelMap{"provider": "gcp", "collector": "test"},
Value: 0,
MetricType: prometheus.GaugeValue,
},
{
FqName: "cloudcost_exporter_last_scrape_error",
Labels: utils.LabelMap{"provider": "gcp"},
Expand Down Expand Up @@ -117,6 +123,12 @@ func TestGCP_CollectMetrics(t *testing.T) {
Value: 0,
MetricType: prometheus.GaugeValue,
}, {
FqName: "cloudcost_exporter_collector_last_scrape_time",
Labels: utils.LabelMap{"provider": "gcp", "collector": "test"},
Value: 0,
MetricType: prometheus.GaugeValue,
},
{
FqName: "cloudcost_exporter_collector_last_scrape_error",
Labels: utils.LabelMap{"provider": "gcp", "collector": "test"},
Value: 0,
Expand All @@ -128,14 +140,16 @@ func TestGCP_CollectMetrics(t *testing.T) {
Value: 0,
MetricType: prometheus.GaugeValue,
},

{
FqName: "cloudcost_exporter_last_scrape_error",
Labels: utils.LabelMap{"provider": "gcp"},
FqName: "cloudcost_exporter_collector_last_scrape_time",
Labels: utils.LabelMap{"provider": "gcp", "collector": "test"},
Value: 0,
MetricType: prometheus.GaugeValue,
},

{
FqName: "cloudcost_exporter_last_scrape_duration_seconds",
FqName: "cloudcost_exporter_last_scrape_error",
Labels: utils.LabelMap{"provider": "gcp"},
Value: 0,
MetricType: prometheus.GaugeValue,
Expand Down Expand Up @@ -176,13 +190,19 @@ func TestGCP_CollectMetrics(t *testing.T) {
wg.Done()

wg.Wait()
ignoredMetricSuffix := []string{"duration_seconds", "last_scrape_time"}
// I don't love using a named loop, but this allows the inner loop to properly continue if the condition has been met.
metricsLoop:
for _, expectedMetric := range tt.expectedMetrics {
metric := utils.ReadMetrics(<-ch)
// We don't care about the value for the scrape durations, just that it exists and is returned in the order we expect.
if strings.Contains(metric.FqName, "duration_seconds") {
require.Equal(t, expectedMetric.FqName, metric.FqName)
continue
for _, suffix := range ignoredMetricSuffix {
if strings.Contains(metric.FqName, suffix) {
require.Equal(t, expectedMetric.FqName, metric.FqName)
continue metricsLoop
}
}

require.Equal(t, expectedMetric, metric)
}

Expand Down
Loading