Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix cke_node_reboot_status metrics #660

Merged
merged 3 commits into from
Sep 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 71 additions & 4 deletions metrics/collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ import (
"github.com/cybozu-go/log"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
v3 "go.etcd.io/etcd/client/v3"
)

type logger struct{}
Expand Down Expand Up @@ -40,10 +39,13 @@ type metricGroup struct {
// This abstraction is for mock test.
type storage interface {
IsSabakanDisabled(context.Context) (bool, error)
GetRebootsEntries(ctx context.Context) ([]*cke.RebootQueueEntry, error)
GetCluster(ctx context.Context) (*cke.Cluster, error)
}

// NewCollector returns a new prometheus.Collector.
func NewCollector(client *v3.Client) prometheus.Collector {
func NewCollector(storage storage) prometheus.Collector {

return &collector{
metrics: map[string]metricGroup{
"leader": {
Expand All @@ -55,15 +57,15 @@ func NewCollector(client *v3.Client) prometheus.Collector {
isAvailable: isOperationPhaseAvailable,
},
"reboot": {
collectors: []prometheus.Collector{rebootQueueEntries, rebootQueueItems, nodeRebootStatus},
collectors: []prometheus.Collector{nodeMetricsCollector{storage}},
isAvailable: isRebootAvailable,
},
"sabakan_integration": {
collectors: []prometheus.Collector{sabakanIntegrationSuccessful, sabakanIntegrationTimestampSeconds, sabakanWorkers, sabakanUnusedMachines},
isAvailable: isSabakanIntegrationAvailable,
},
},
storage: &cke.Storage{Client: client},
storage: storage,
}
}

Expand Down Expand Up @@ -120,3 +122,68 @@ func (c collector) Collect(ch chan<- prometheus.Metric) {
}
wg.Wait()
}

// nodeMetricsCollector implements prometheus.Collector interface.
type nodeMetricsCollector struct {
morimoto-cybozu marked this conversation as resolved.
Show resolved Hide resolved
storage storage
}

var _ prometheus.Collector = &nodeMetricsCollector{}

func (c nodeMetricsCollector) Describe(ch chan<- *prometheus.Desc) {
ch <- rebootQueueEntries
ch <- rebootQueueItems
ch <- nodeRebootStatus
}

func (c nodeMetricsCollector) Collect(ch chan<- prometheus.Metric) {
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
defer cancel()

rqEntries, err := c.storage.GetRebootsEntries(ctx)
if err != nil {
log.Error("failed to get reboots entries", map[string]interface{}{
log.FnError: err,
})
return
morimoto-cybozu marked this conversation as resolved.
Show resolved Hide resolved
}

cluster, err := c.storage.GetCluster(ctx)
if err != nil {
log.Error("failed to get cluster", map[string]interface{}{
log.FnError: err,
})
return
morimoto-cybozu marked this conversation as resolved.
Show resolved Hide resolved
}
itemCounts := cke.CountRebootQueueEntries(rqEntries)
nodeStatus := cke.BuildNodeRebootStatus(cluster.Nodes, rqEntries)

ch <- prometheus.MustNewConstMetric(
rebootQueueEntries,
prometheus.GaugeValue,
float64(len(rqEntries)),
)
for status, count := range itemCounts {
ch <- prometheus.MustNewConstMetric(
rebootQueueItems,
prometheus.GaugeValue,
float64(count),
status,
)
}
for node, statuses := range nodeStatus {
for status, matches := range statuses {
value := float64(0)
if matches {
value = 1
}
ch <- prometheus.MustNewConstMetric(
nodeRebootStatus,
prometheus.GaugeValue,
value,
node,
status,
)
}
}
}
32 changes: 14 additions & 18 deletions metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,29 +33,25 @@ var operationPhaseTimestampSeconds = prometheus.NewGauge(
},
)

var rebootQueueEntries = prometheus.NewGauge(
prometheus.GaugeOpts{
Namespace: namespace,
Name: "reboot_queue_entries",
Help: "The number of reboot queue entries remaining.",
},
var rebootQueueEntries = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "", "reboot_queue_entries"),
"The number of reboot queue entries remaining.",
nil,
nil,
)

var rebootQueueItems = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: namespace,
Name: "reboot_queue_items",
Help: "The number of reboot queue entries remaining per status.",
},
var rebootQueueItems = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "", "reboot_queue_items"),
"The number of reboot queue entries remaining per status.",
[]string{"status"},
nil,
)

var nodeRebootStatus = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: namespace,
Name: "node_reboot_status",
Help: "The reboot status of a node.",
}, []string{"node", "status"},
var nodeRebootStatus = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "", "node_reboot_status"),
"The reboot status of a node.",
[]string{"node", "status"},
nil,
)

var sabakanIntegrationSuccessful = prometheus.NewGauge(
Expand Down
30 changes: 0 additions & 30 deletions metrics/updater.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,36 +39,6 @@ func isOperationPhaseAvailable(_ context.Context, _ storage) (bool, error) {
return isLeader, nil
}

// UpdateRebootQueueEntries updates "reboot_queue_entries".
func UpdateRebootQueueEntries(numEntries int) {
rebootQueueEntries.Set(float64(numEntries))
}

// UpdateRebootQueueItems updates "reboot_queue_items".
func UpdateRebootQueueItems(counts map[string]int) {
for status, count := range counts {
rebootQueueItems.With(map[string]string{
"status": status,
}).Set(float64(count))
}
}

// UpdateNodeRebootStatus updates "node_reboot_status".
func UpdateNodeRebootStatus(nodeStatus map[string]map[string]bool) {
for node, statuses := range nodeStatus {
for status, matches := range statuses {
value := float64(0)
if matches {
value = 1
}
nodeRebootStatus.With(map[string]string{
"node": node,
"status": status,
}).Set(value)
}
}
}

func isRebootAvailable(_ context.Context, _ storage) (bool, error) {
return isLeader, nil
}
Expand Down
Loading