Skip to content

Commit 6f9d1f9

Browse files
authored
Merge pull request #8 from lyubent/hint_metrics
Add hint metrics
2 parents 37a133d + 146eb6f commit 6f9d1f9

File tree

6 files changed

+79
-0
lines changed

6 files changed

+79
-0
lines changed

README.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,15 @@ These metrics come from Cassandra's storage service which keeps track of the clu
140140
| `seastat_storage_tokens` | Number of tokens reported by Cassandra | Gauge |
141141
| `seastat_storage_node_status` | Status (`live`, `unreachable`, `joining`, `moving`, `leaving`) of each node in the cluster (tagged by node and status) | Gauge |
142142

143+
## Hint Metrics
144+
145+
These metrics come from the [Storage](https://cassandra.apache.org/doc/latest/operating/metrics.html#storage-metrics) metric which keeps track of hints, node load and storage exceptions.
146+
147+
| Name | Description | Type |
148+
| ------------- | ------------- | ---- |
149+
| `seastat_total_hints` | Number of hint messages written to this node since [re]start. Includes one entry for each host to be hinted per hint. | Counter |
150+
| `seastat_total_hints_in_progress` | Number of hints attempting to be sent currently from this node. | Gauge |
151+
143152
## Scrape Metrics
144153

145154
Seastat also exposes some internal metrics of how long the scrape took and the timestamp of the last scrape

jolokia/client.go

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -442,6 +442,27 @@ func (c *jolokiaClient) StorageStats() (StorageStats, error) {
442442
return stats, nil
443443
}
444444

445+
// HintStats gives information on hints being created and handed off in Cassandra
446+
// Hint metrics are ephemeral and reset when the Cassandra process restarts
447+
func (c *jolokiaClient) HintStats() (HintStats, error) {
448+
v, err := c.read("org.apache.cassandra.metrics", "type=Storage", "name=*")
449+
if err != nil {
450+
return HintStats{}, fmt.Errorf("err reading CQL stats: %v", err)
451+
}
452+
453+
stats := HintStats{}
454+
v.Get("value").GetObject().Visit(func(key []byte, val *fastjson.Value) {
455+
attributes := extractAttributes(string(key))
456+
switch attributes["name"] {
457+
case "TotalHintsInProgress":
458+
stats.TotalHintsInProgress = Gauge(val.Get("Count").GetInt64())
459+
case "TotalHints":
460+
stats.TotalHints = Counter(val.Get("Count").GetInt64())
461+
}
462+
})
463+
return stats, nil
464+
}
465+
445466
// get makes a GET request to the targetPath and returns the contents of the
446467
// body as a JSON value ready for items to be plucked. If any part of the
447468
// request pipeline fails, an err is returned

jolokia/jolokia.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,10 @@ type Client interface {
8484
// which encapsulates things like number of keyspaces and what nodes
8585
// are part of the cluster
8686
StorageStats() (StorageStats, error)
87+
88+
// HintStats gives information on hints being created and handed off in Cassandra
89+
// Hint metrics are ephemeral and reset when the Cassandra process restarts
90+
HintStats() (HintStats, error)
8791
}
8892

8993
// Table embeds information about a Keyspace and Table that exists in
@@ -188,3 +192,10 @@ type StorageStats struct {
188192
MovingNodes []string
189193
LeavingNodes []string
190194
}
195+
196+
// HintStats embeds information gathered from the Storage metric in
197+
// cassandra such as the number of total hints and hints being handed off
198+
type HintStats struct {
199+
TotalHintsInProgress Gauge
200+
TotalHints Counter
201+
}

server/collector.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,10 @@ func (c *SeastatCollector) Describe(ch chan<- *prometheus.Desc) {
8484
PromStorageKeyspaces,
8585
PromStorageTokens,
8686
PromStorageNodeStatus,
87+
88+
// HintStats
89+
PromTotalHintsInProgress,
90+
PromTotalHints,
8791
}
8892

8993
for _, desc := range descs {
@@ -112,6 +116,7 @@ func (c *SeastatCollector) Collect(ch chan<- prometheus.Metric) {
112116
addMemoryStats(metrics, ch)
113117
addGCStats(metrics, ch)
114118
addStorageStats(metrics, ch)
119+
addHintStats(metrics, ch)
115120
}
116121

117122
func addTableStats(metrics ScrapedMetrics, ch chan<- prometheus.Metric) {
@@ -398,3 +403,14 @@ func addStorageStats(metrics ScrapedMetrics, ch chan<- prometheus.Metric) {
398403
}
399404
}
400405
}
406+
407+
func addHintStats(metrics ScrapedMetrics, ch chan<- prometheus.Metric) {
408+
if metrics.HintStats == nil {
409+
return
410+
}
411+
412+
ch <- prometheus.MustNewConstMetric(PromTotalHintsInProgress,
413+
prometheus.GaugeValue, float64(metrics.HintStats.TotalHintsInProgress))
414+
ch <- prometheus.MustNewConstMetric(PromTotalHints,
415+
prometheus.CounterValue, float64(metrics.HintStats.TotalHints))
416+
}

server/prom_metrics.go

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -343,3 +343,17 @@ var (
343343
[]string{"node", "status"}, nil,
344344
)
345345
)
346+
347+
var (
348+
PromTotalHintsInProgress = prometheus.NewDesc(
349+
"seastat_hints_in_progress",
350+
"Number of hints attempting to be handed off since Cassandra started",
351+
[]string{}, nil,
352+
)
353+
354+
PromTotalHints = prometheus.NewDesc(
355+
"seastat_hints_total",
356+
"Number of hint messages written to this node since Cassandra started",
357+
[]string{}, nil,
358+
)
359+
)

server/scraper.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ type ScrapedMetrics struct {
4141
MemoryStats *jolokia.MemoryStats
4242
GCStats []jolokia.GCStats
4343
StorageStats *jolokia.StorageStats
44+
HintStats *jolokia.HintStats
4445

4546
ScrapeDuration time.Duration
4647
ScrapeTime time.Time
@@ -191,6 +192,13 @@ func (s *Scraper) scrapeAllMetrics() ScrapedMetrics {
191192
out.StorageStats = &storageStats
192193
}
193194

195+
hintStats, err := s.client.HintStats()
196+
if err != nil {
197+
logrus.Debugf("🦂 Could not get Hint stats: %v", err)
198+
} else {
199+
out.HintStats = &hintStats
200+
}
201+
194202
out.ScrapeDuration = time.Since(scrapeStart)
195203
out.ScrapeTime = time.Now()
196204
return out

0 commit comments

Comments
 (0)