Skip to content

Commit

Permalink
WIP: Instrument client object refs metrics
Browse files Browse the repository at this point in the history
Adds a new gauge metric with the cache_key label.

E.g:
 vso_client_factory_client_refs{cache_key="kubernetes-f42f026de00efe6e8aff24"} 1
  • Loading branch information
benashz committed Jun 13, 2024
1 parent 3377c25 commit 6b94ecc
Show file tree
Hide file tree
Showing 6 changed files with 104 additions and 6 deletions.
2 changes: 2 additions & 0 deletions internal/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,14 @@ const (

NameConfig = "config"
NameLength = "length"
NameTainted = "tainted"
NameOperationsTotal = "operations_total"
NameOperationsErrorsTotal = "operations_errors_total"
NameOperationsTimeSeconds = "operations_time_seconds"
NameRequestsTotal = "requests_total"
NameRequestsErrorsTotal = "requests_errors_total"
NameTaintedClients = "tainted_clients"
NameClientRefs = "client_refs"
)

var ResourceStatus = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Expand Down
34 changes: 34 additions & 0 deletions internal/vault/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,12 @@ import (
// In the case where the return value is true, the Client will be removed from the cache.
type ClientCachePruneFilterFunc func(Client) bool

type CacheStats struct {
Tainted int
Refs int
Len int
}

// ClientCache provides an interface for Caching a Client.
type ClientCache interface {
Get(ClientCacheKey) (Client, bool)
Expand All @@ -26,6 +32,7 @@ type ClientCache interface {
Prune(filterFunc ClientCachePruneFilterFunc) []Client
Contains(key ClientCacheKey) bool
Purge() []ClientCacheKey
Stats() CacheStats
}

var _ ClientCache = (*clientCache)(nil)
Expand All @@ -40,6 +47,33 @@ type clientCache struct {
evictionCloneGauge prometheus.Gauge
hitCloneCounter prometheus.Counter
missCloneCounter prometheus.Counter
cacheStats *CacheStats
}

func (c *clientCache) Stats() CacheStats {
// TODO: cache this result somehow.
if c.cacheStats != nil {
return CacheStats{
Len: c.cacheStats.Len,
Refs: c.cacheStats.Refs,
}
}

stats := CacheStats{
Len: c.Len(),
}
for _, key := range c.Keys() {
if client, ok := c.cache.Peek(key); ok {
if client.Tainted() {
stats.Tainted++
}
if client.Stat() != nil {
stats.Refs += client.Stat().RefCount()
}
}
}

return stats
}

func (c *clientCache) Keys() []ClientCacheKey {
Expand Down
33 changes: 28 additions & 5 deletions internal/vault/cache_metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,26 +47,41 @@ var (
// metricsFQNClientCacheEvictions for the ClientCache.
metricsFQNClientCloneCacheEvictions = prometheus.BuildFQName(
metrics.Namespace, subsystemClientCloneCache, "evictions")

// metricsFQNClientCacheTaintedClients for the ClientCache.
metricsFQNClientCacheTaintedClients = prometheus.BuildFQName(
metrics.Namespace, subsystemClientCache, metrics.NameTainted)

// metricsFQNClientCacheClientRefs for the ClientCache.
metricsFQNClientCacheClientRefs = prometheus.BuildFQName(
metrics.Namespace, subsystemClientCache, metrics.NameClientRefs)
)

var _ prometheus.Collector = (*clientCacheCollector)(nil)

// clientCacheCollector provides a prometheus.Collector for ClientCache metrics.
type clientCacheCollector struct {
cache ClientCache
size float64
sizeDesc *prometheus.Desc
lenDesc *prometheus.Desc
cache ClientCache
size float64
sizeDesc *prometheus.Desc
lenDesc *prometheus.Desc
taintedDesc *prometheus.Desc
refsDesc *prometheus.Desc
}

func (c clientCacheCollector) Describe(ch chan<- *prometheus.Desc) {
ch <- c.sizeDesc
ch <- c.lenDesc
ch <- c.taintedDesc
ch <- c.refsDesc
}

func (c clientCacheCollector) Collect(ch chan<- prometheus.Metric) {
stats := c.cache.Stats()
ch <- prometheus.MustNewConstMetric(c.sizeDesc, prometheus.GaugeValue, c.size)
ch <- prometheus.MustNewConstMetric(c.lenDesc, prometheus.GaugeValue, float64(c.cache.Len()))
ch <- prometheus.MustNewConstMetric(c.lenDesc, prometheus.GaugeValue, float64(stats.Len))
ch <- prometheus.MustNewConstMetric(c.taintedDesc, prometheus.GaugeValue, float64(stats.Tainted))
ch <- prometheus.MustNewConstMetric(c.refsDesc, prometheus.GaugeValue, float64(stats.Refs))
}

func newClientCacheCollector(cache ClientCache, size int) prometheus.Collector {
Expand All @@ -81,5 +96,13 @@ func newClientCacheCollector(cache ClientCache, size int) prometheus.Collector {
metricsFQNClientCacheLength,
"Number of Vault Clients in the cache.",
nil, nil),
taintedDesc: prometheus.NewDesc(
metricsFQNClientCacheTaintedClients,
"Number of tainted Vault Clients in the cache.",
nil, nil),
refsDesc: prometheus.NewDesc(
metricsFQNClientCacheClientRefs,
"Total number of client object references.",
nil, nil),
}
}
15 changes: 14 additions & 1 deletion internal/vault/cache_metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@ func Test_clientCacheCollector_Collect(t *testing.T) {

for i := 0; i < tt.clientCount; i++ {
_, err := cache.Add(&defaultClient{
clientStat: &clientStat{
refCount: 1,
},
authObj: &secretsv1beta1.VaultAuth{
ObjectMeta: metav1.ObjectMeta{
Name: fmt.Sprintf("auth-%d", i),
Expand Down Expand Up @@ -79,7 +82,7 @@ func Test_clientCacheCollector_Collect(t *testing.T) {
require.NoError(t, err)

var found int
assert.Len(t, mfs, 2)
assert.Len(t, mfs, 4)
for _, mf := range mfs {
name := mf.GetName()
m := mf.GetMetric()
Expand All @@ -96,6 +99,16 @@ func Test_clientCacheCollector_Collect(t *testing.T) {
found++
assert.Equal(t, float64(tt.size), *m[0].Gauge.Value)
}

if name == metricsFQNClientCacheTaintedClients {
found++
assert.Equal(t, float64(0), *m[0].Gauge.Value)
}

if name == metricsFQNClientCacheClientRefs {
found++
assert.Equal(t, float64(tt.size), *m[0].Gauge.Value)
}
}
require.Equal(t, len(mfs), found, "not all metrics found")
})
Expand Down
23 changes: 23 additions & 0 deletions internal/vault/client_factory.go
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ type cachingClientFactory struct {
requestCounterVec *prometheus.CounterVec
requestErrorCounterVec *prometheus.CounterVec
taintedClientGauge *prometheus.GaugeVec
clientRefGauge *prometheus.GaugeVec
revokeOnEvict bool
pruneStorageOnEvict bool
ctrlClient ctrlclient.Client
Expand Down Expand Up @@ -299,6 +300,9 @@ func (m *cachingClientFactory) onClientEvict(ctx context.Context, client ctrlcli
}

m.removeClientLock(cacheKey)
if m.clientRefGauge != nil {
m.clientRefGauge.DeleteLabelValues(cacheKey.String())
}
}

// Restore will attempt to restore a Client from storage. If storage is not enabled then no restoration will take place.
Expand Down Expand Up @@ -590,6 +594,11 @@ func (m *cachingClientFactory) updateClientStatsAfterGet(ctx context.Context, ca
"creationTimestamp", c.Stat().CreationTimestamp(),
"reason", decrementReason,
)
if m.clientRefGauge != nil {
m.clientRefGauge.WithLabelValues(cacheKey.String()).Set(
float64(c.Stat().RefCount()),
)
}
// send the client to the cache pruner.
m.orphanPrunerClientCh <- c
}
Expand All @@ -604,6 +613,11 @@ func (m *cachingClientFactory) updateClientStatsAfterGet(ctx context.Context, ca
"creationTimestamp", c.Stat().CreationTimestamp(),
"reason", incrementReason,
)
if m.clientRefGauge != nil {
m.clientRefGauge.WithLabelValues(cacheKey.String()).Set(
float64(c.Stat().RefCount()),
)
}
}
}
}
Expand Down Expand Up @@ -1004,6 +1018,14 @@ func NewCachingClientFactory(ctx context.Context, client ctrlclient.Client, cach
metrics.LabelCacheKey,
},
),
clientRefGauge: prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: metricsFQNClientRefs,
Help: "Client factory number of object references",
}, []string{
metrics.LabelCacheKey,
},
),
}

if config.CollectClientCacheMetrics {
Expand All @@ -1016,6 +1038,7 @@ func NewCachingClientFactory(ctx context.Context, client ctrlclient.Client, cach
factory.requestCounterVec,
factory.requestErrorCounterVec,
factory.taintedClientGauge,
factory.clientRefGauge,
clientFactoryOperationTimes,
)
}
Expand Down
3 changes: 3 additions & 0 deletions internal/vault/client_factory_metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ var (
metricsFQNClientFactoryTaintedClients = prometheus.BuildFQName(
metrics.Namespace, subsystemClientFactory, metrics.NameTaintedClients)

metricsFQNClientRefs = prometheus.BuildFQName(
metrics.Namespace, subsystemClientFactory, metrics.NameClientRefs)

// TODO: update to use Native Histograms once it is no longer an experimental Prometheus feature
// ref: https://github.com/prometheus/prometheus/milestone/10
clientFactoryOperationTimes = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Expand Down

0 comments on commit 6b94ecc

Please sign in to comment.