Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add client object refs metrics #814

Draft
wants to merge 1 commit into
base: VAULT-27944/core-prune-orphaned-vault-clients
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions internal/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,14 @@ const (

NameConfig = "config"
NameLength = "length"
NameTainted = "tainted"
NameOperationsTotal = "operations_total"
NameOperationsErrorsTotal = "operations_errors_total"
NameOperationsTimeSeconds = "operations_time_seconds"
NameRequestsTotal = "requests_total"
NameRequestsErrorsTotal = "requests_errors_total"
NameTaintedClients = "tainted_clients"
NameClientRefs = "client_refs"
)

var ResourceStatus = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Expand Down
34 changes: 34 additions & 0 deletions internal/vault/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,12 @@ import (
// In the case where the return value is true, the Client will be removed from the cache.
type ClientCachePruneFilterFunc func(Client) bool

type CacheStats struct {
Tainted int
Refs int
Len int
}

// ClientCache provides an interface for Caching a Client.
type ClientCache interface {
Get(ClientCacheKey) (Client, bool)
Expand All @@ -26,6 +32,7 @@ type ClientCache interface {
Prune(filterFunc ClientCachePruneFilterFunc) []Client
Contains(key ClientCacheKey) bool
Purge() []ClientCacheKey
Stats() CacheStats
}

var _ ClientCache = (*clientCache)(nil)
Expand All @@ -40,6 +47,33 @@ type clientCache struct {
evictionCloneGauge prometheus.Gauge
hitCloneCounter prometheus.Counter
missCloneCounter prometheus.Counter
cacheStats *CacheStats
}

func (c *clientCache) Stats() CacheStats {
// TODO: cache this result somehow.
if c.cacheStats != nil {
return CacheStats{
Len: c.cacheStats.Len,
Refs: c.cacheStats.Refs,
}
}

stats := CacheStats{
Len: c.Len(),
}
for _, key := range c.Keys() {
if client, ok := c.cache.Peek(key); ok {
if client.Tainted() {
stats.Tainted++
}
if client.Stat() != nil {
stats.Refs += client.Stat().RefCount()
}
}
}

return stats
}

func (c *clientCache) Keys() []ClientCacheKey {
Expand Down
33 changes: 28 additions & 5 deletions internal/vault/cache_metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,26 +47,41 @@ var (
// metricsFQNClientCacheEvictions for the ClientCache.
metricsFQNClientCloneCacheEvictions = prometheus.BuildFQName(
metrics.Namespace, subsystemClientCloneCache, "evictions")

// metricsFQNClientCacheTaintedClients for the ClientCache.
metricsFQNClientCacheTaintedClients = prometheus.BuildFQName(
metrics.Namespace, subsystemClientCache, metrics.NameTainted)

// metricsFQNClientCacheClientRefs for the ClientCache.
metricsFQNClientCacheClientRefs = prometheus.BuildFQName(
metrics.Namespace, subsystemClientCache, metrics.NameClientRefs)
)

var _ prometheus.Collector = (*clientCacheCollector)(nil)

// clientCacheCollector provides a prometheus.Collector for ClientCache metrics.
type clientCacheCollector struct {
cache ClientCache
size float64
sizeDesc *prometheus.Desc
lenDesc *prometheus.Desc
cache ClientCache
size float64
sizeDesc *prometheus.Desc
lenDesc *prometheus.Desc
taintedDesc *prometheus.Desc
refsDesc *prometheus.Desc
}

func (c clientCacheCollector) Describe(ch chan<- *prometheus.Desc) {
ch <- c.sizeDesc
ch <- c.lenDesc
ch <- c.taintedDesc
ch <- c.refsDesc
}

func (c clientCacheCollector) Collect(ch chan<- prometheus.Metric) {
stats := c.cache.Stats()
ch <- prometheus.MustNewConstMetric(c.sizeDesc, prometheus.GaugeValue, c.size)
ch <- prometheus.MustNewConstMetric(c.lenDesc, prometheus.GaugeValue, float64(c.cache.Len()))
ch <- prometheus.MustNewConstMetric(c.lenDesc, prometheus.GaugeValue, float64(stats.Len))
ch <- prometheus.MustNewConstMetric(c.taintedDesc, prometheus.GaugeValue, float64(stats.Tainted))
ch <- prometheus.MustNewConstMetric(c.refsDesc, prometheus.GaugeValue, float64(stats.Refs))
}

func newClientCacheCollector(cache ClientCache, size int) prometheus.Collector {
Expand All @@ -81,5 +96,13 @@ func newClientCacheCollector(cache ClientCache, size int) prometheus.Collector {
metricsFQNClientCacheLength,
"Number of Vault Clients in the cache.",
nil, nil),
taintedDesc: prometheus.NewDesc(
metricsFQNClientCacheTaintedClients,
"Number of tainted Vault Clients in the cache.",
nil, nil),
refsDesc: prometheus.NewDesc(
metricsFQNClientCacheClientRefs,
"Total number of client object references.",
nil, nil),
}
}
15 changes: 14 additions & 1 deletion internal/vault/cache_metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@ func Test_clientCacheCollector_Collect(t *testing.T) {

for i := 0; i < tt.clientCount; i++ {
_, err := cache.Add(&defaultClient{
clientStat: &clientStat{
refCount: 1,
},
authObj: &secretsv1beta1.VaultAuth{
ObjectMeta: metav1.ObjectMeta{
Name: fmt.Sprintf("auth-%d", i),
Expand Down Expand Up @@ -79,7 +82,7 @@ func Test_clientCacheCollector_Collect(t *testing.T) {
require.NoError(t, err)

var found int
assert.Len(t, mfs, 2)
assert.Len(t, mfs, 4)
for _, mf := range mfs {
name := mf.GetName()
m := mf.GetMetric()
Expand All @@ -96,6 +99,16 @@ func Test_clientCacheCollector_Collect(t *testing.T) {
found++
assert.Equal(t, float64(tt.size), *m[0].Gauge.Value)
}

if name == metricsFQNClientCacheTaintedClients {
found++
assert.Equal(t, float64(0), *m[0].Gauge.Value)
}

if name == metricsFQNClientCacheClientRefs {
found++
assert.Equal(t, float64(tt.size), *m[0].Gauge.Value)
}
}
require.Equal(t, len(mfs), found, "not all metrics found")
})
Expand Down
23 changes: 23 additions & 0 deletions internal/vault/client_factory.go
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ type cachingClientFactory struct {
requestCounterVec *prometheus.CounterVec
requestErrorCounterVec *prometheus.CounterVec
taintedClientGauge *prometheus.GaugeVec
clientRefGauge *prometheus.GaugeVec
revokeOnEvict bool
pruneStorageOnEvict bool
ctrlClient ctrlclient.Client
Expand Down Expand Up @@ -299,6 +300,9 @@ func (m *cachingClientFactory) onClientEvict(ctx context.Context, client ctrlcli
}

m.removeClientLock(cacheKey)
if m.clientRefGauge != nil {
m.clientRefGauge.DeleteLabelValues(cacheKey.String())
}
}

// Restore will attempt to restore a Client from storage. If storage is not enabled then no restoration will take place.
Expand Down Expand Up @@ -590,6 +594,11 @@ func (m *cachingClientFactory) updateClientStatsAfterGet(ctx context.Context, ca
"creationTimestamp", c.Stat().CreationTimestamp(),
"reason", decrementReason,
)
if m.clientRefGauge != nil {
m.clientRefGauge.WithLabelValues(cacheKey.String()).Set(
float64(c.Stat().RefCount()),
)
}
// send the client to the cache pruner.
m.orphanPrunerClientCh <- c
}
Expand All @@ -604,6 +613,11 @@ func (m *cachingClientFactory) updateClientStatsAfterGet(ctx context.Context, ca
"creationTimestamp", c.Stat().CreationTimestamp(),
"reason", incrementReason,
)
if m.clientRefGauge != nil {
m.clientRefGauge.WithLabelValues(cacheKey.String()).Set(
float64(c.Stat().RefCount()),
)
}
}
}
}
Expand Down Expand Up @@ -1004,6 +1018,14 @@ func NewCachingClientFactory(ctx context.Context, client ctrlclient.Client, cach
metrics.LabelCacheKey,
},
),
clientRefGauge: prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: metricsFQNClientRefs,
Help: "Client factory number of object references",
}, []string{
metrics.LabelCacheKey,
},
),
}

if config.CollectClientCacheMetrics {
Expand All @@ -1016,6 +1038,7 @@ func NewCachingClientFactory(ctx context.Context, client ctrlclient.Client, cach
factory.requestCounterVec,
factory.requestErrorCounterVec,
factory.taintedClientGauge,
factory.clientRefGauge,
clientFactoryOperationTimes,
)
}
Expand Down
3 changes: 3 additions & 0 deletions internal/vault/client_factory_metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ var (
metricsFQNClientFactoryTaintedClients = prometheus.BuildFQName(
metrics.Namespace, subsystemClientFactory, metrics.NameTaintedClients)

metricsFQNClientRefs = prometheus.BuildFQName(
metrics.Namespace, subsystemClientFactory, metrics.NameClientRefs)

// TODO: update to use Native Histograms once it is no longer an experimental Prometheus feature
// ref: https://github.com/prometheus/prometheus/milestone/10
clientFactoryOperationTimes = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Expand Down