Skip to content

Commit 568ee42

Browse files
Hot reload to override individual metrics (#374)
Signed-off-by: Anders Swanson <[email protected]>
1 parent 16d66b8 commit 568ee42

File tree

9 files changed

+67
-17
lines changed

9 files changed

+67
-17
lines changed

collector/cache.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ import (
88
"time"
99
)
1010

11-
func NewMetricsCache(metrics []*Metric) *MetricsCache {
11+
func NewMetricsCache(metrics map[string]*Metric) *MetricsCache {
1212
c := map[*Metric]*MetricCacheRecord{}
1313

1414
for _, metric := range metrics {

collector/collector.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -252,7 +252,7 @@ func (e *Exporter) scrapeDatabase(ch chan<- prometheus.Metric, errChan chan<- er
252252
e.logger.Debug("Successfully pinged Oracle database: "+maskDsn(d.Config.URL), "database", d.Name)
253253

254254
metricsToScrape := 0
255-
for _, metric := range e.metricsToScrape.Metric {
255+
for _, metric := range e.metricsToScrape {
256256
metric := metric //https://golang.org/doc/faq#closures_and_goroutines
257257
isScrapeMetric := e.isScrapeMetric(tick, metric, d)
258258
metricsToScrape++
@@ -324,7 +324,7 @@ func (e *Exporter) scrapeDatabase(ch chan<- prometheus.Metric, errChan chan<- er
324324

325325
func (e *Exporter) scrape(ch chan<- prometheus.Metric, tick *time.Time) {
326326
e.totalScrapes.Inc()
327-
errChan := make(chan error, len(e.metricsToScrape.Metric)*len(e.databases))
327+
errChan := make(chan error, len(e.metricsToScrape)*len(e.databases))
328328
begun := time.Now()
329329
if e.checkIfMetricsChanged() {
330330
e.reloadMetrics()
@@ -529,7 +529,7 @@ func (e *Exporter) generatePrometheusMetrics(d *Database, parse func(row map[str
529529

530530
func (e *Exporter) initCache() {
531531
for _, d := range e.databases {
532-
d.initCache(e.metricsToScrape.Metric)
532+
d.initCache(e.metricsToScrape)
533533
}
534534
}
535535

collector/data_loader.go

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,8 @@ import (
1414
)
1515

1616
func (e *Exporter) reloadMetrics() {
17-
// Truncate metricsToScrape
18-
e.metricsToScrape.Metric = []*Metric{}
19-
20-
// Load default metrics
21-
defaultMetrics := e.DefaultMetrics()
22-
e.metricsToScrape.Metric = defaultMetrics.Metric
17+
// reload default metrics
18+
e.metricsToScrape = e.DefaultMetrics()
2319

2420
// If custom metrics, load it
2521
if len(e.CustomMetricsFiles()) > 0 {
@@ -32,15 +28,22 @@ func (e *Exporter) reloadMetrics() {
3228
} else {
3329
e.logger.Info("Successfully loaded custom metrics from " + _customMetrics)
3430
}
35-
36-
e.metricsToScrape.Metric = append(e.metricsToScrape.Metric, metrics.Metric...)
31+
// Merge custom metrics into default metrics.
32+
// Any collisions (by ID) will overwrite the old metric value.
33+
e.merge(metrics)
3734
}
3835
} else {
3936
e.logger.Debug("No custom metrics defined.")
4037
}
4138
e.initCache()
4239
}
4340

41+
func (e *Exporter) merge(metrics *Metrics) {
42+
for _, metric := range metrics.Metric {
43+
e.metricsToScrape[metric.ID()] = metric
44+
}
45+
}
46+
4447
func loadYamlMetricsConfig(_metricsFileName string, metrics *Metrics) error {
4548
yamlBytes, err := os.ReadFile(_metricsFileName)
4649
if err != nil {

collector/database.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ func NewDatabase(logger *slog.Logger, dbname string, dbconfig DatabaseConfig) *D
9898
}
9999

100100
// initCache resets the metrics cached. Used on startup and when metrics are reloaded.
101-
func (d *Database) initCache(metrics []*Metric) {
101+
func (d *Database) initCache(metrics map[string]*Metric) {
102102
d.MetricsCache = NewMetricsCache(metrics)
103103
}
104104

collector/default_metrics.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,19 +17,19 @@ import (
1717
var defaultMetricsToml string
1818

1919
// DefaultMetrics is a somewhat hacky way to load the default metrics
20-
func (e *Exporter) DefaultMetrics() Metrics {
20+
func (e *Exporter) DefaultMetrics() map[string]*Metric {
2121
var metricsToScrape Metrics
2222
if e.Metrics.Default != "" {
2323
if err := loadMetricsConfig(filepath.Clean(e.Metrics.Default), &metricsToScrape); err != nil {
2424
e.logger.Error(fmt.Sprintf("there was an issue while loading specified default metrics file at: "+e.Metrics.Default+", proceeding to run with default metrics."),
2525
"error", err)
2626
}
27-
return metricsToScrape
27+
return metricsToScrape.toMap()
2828
}
2929

3030
if _, err := toml.Decode(defaultMetricsToml, &metricsToScrape); err != nil {
3131
e.logger.Error("failed to load default metrics", "error", err)
3232
panic(errors.New("Error while loading " + defaultMetricsToml))
3333
}
34-
return metricsToScrape
34+
return metricsToScrape.toMap()
3535
}

collector/metrics.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,3 +112,11 @@ func (m *Metric) IsEnabledForDatabase(d *Database) bool {
112112
}
113113
return false
114114
}
115+
116+
func (metrics Metrics) toMap() map[string]*Metric {
117+
m := map[string]*Metric{}
118+
for _, metric := range metrics.Metric {
119+
m[metric.ID()] = metric
120+
}
121+
return m
122+
}

collector/types.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ import (
1515
type Exporter struct {
1616
*MetricsConfiguration
1717
mu *sync.Mutex
18-
metricsToScrape Metrics
18+
metricsToScrape map[string]*Metric
1919
duration, error prometheus.Gauge
2020
totalScrapes prometheus.Counter
2121
scrapeErrors *prometheus.CounterVec

site/docs/configuration/custom-metrics.md

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,10 @@ metrics:
2323
2424
You may also use `--custom.metrics` flag followed by a comma separated list of TOML or YAML files, or export `CUSTOM_METRICS` variable environment (`export CUSTOM_METRICS=my-custom-metrics.toml,my-other-custom-metrics.toml`)
2525

26+
### Metric Hot Reload
27+
28+
The exporter watches for changes in custom metrics. When these files change, the exporter hot reloads the metrics definition, and serves the new metrics on the next scrape.
29+
2630
### Metric Schema
2731

2832
Metrics files must contain a series of `[[metric]]` definitions, in TOML, or the equivalent definition in a YAML file. Each metric definition must follow the exporter's metric schema:
@@ -123,6 +127,40 @@ oracledb_test_value_2 2
123127
You can find [working examples](https://github.com/oracle/oracle-db-appdev-monitoring/blob/main/custom-metrics-example/custom-metrics.toml) of custom metrics for slow queries, big queries and top 100 tables.
124128
An example of [custom metrics for Transacational Event Queues](https://github.com/oracle/oracle-db-appdev-monitoring/blob/main/custom-metrics-example/txeventq-metrics.toml) is also provided.
125129

130+
#### Override Existing, Individual Metrics
131+
132+
You may override properties for existing metrics by supplying a new, custom metric definition with the same `context` and `metricsdesc` values. For example, if you have an existing metric like so:
133+
134+
```toml
135+
[[metric]]
136+
context = "my_default_metric"
137+
metricsdesc = { value_1 = "Simple example returning always 1.", value_2 = "Same but returning always 2." }
138+
request = "SELECT 1 as value_1, 2 as value_2 FROM DUAL"
139+
```
140+
141+
You can redefine this metric in a custom metrics file to change any properties other than `context` or `metricsdesc`. For example, overriding the previous metric with `labels`, `scrapeinterval`, and `querytimeout` properties:
142+
143+
```toml
144+
[[metric]]
145+
context = "my_default_metric"
146+
metricsdesc = { value_1 = "Simple example returning always 1.", value_2 = "Same but returning always 2." }
147+
labels = [ "label_1", "label_2" ]
148+
request = "SELECT 1 as value_1, 2 as value_2 FROM DUAL"
149+
scrapeinterval = "30s"
150+
querytimeout = "10s"
151+
```
152+
153+
Then, provide any metrics overrides as custom metrics files in the [exporter configuration file](config-file.md):
154+
155+
```yaml
156+
metrics:
157+
## Paths to any custom metrics files
158+
custom:
159+
- my-custom-metrics.toml
160+
```
161+
162+
If any metric appears more than once in the custom metrics file list, the metric definition in the last file provided takes precedence.
163+
126164
### YAML Metrics
127165

128166
Metrics may be defined with YAML instead of TOML. YAML metric field names correspond to TOML metric field names.

site/docs/releases/changelog.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ Our current priorities to support metrics for advanced database features and use
1313

1414
- Updated project dependencies.
1515
- Standardize multi-arch builds and document supported database versions.
16+
- The metrics override capability is extended, allowing users to redefine individual existing metrics in custom metrics files. This allows users to modify individual default metrics without wholly replacing the default metrics file.
1617
- If the exporter fails to connect to a database due to invalid or locked credentials (ORA-01017 or ORA-28000 errors), that database configuration will be invalidated and the exporter will not attempt to re-establish the database connection. Other databases will continue to be scraped.
1718
- Metrics with an empty databases array (`databases = []`) are now considered disabled, and will not be scraped.
1819
- Increased the default query timeout for the `top_sql` metric to 10 seconds (previously 5 seconds).

0 commit comments

Comments
 (0)