Skip to content

Commit 8661c5f

Browse files
authored
Expose prometheus compatible metrics endpoint (#357)
* Expose prometheus compatible metrics endpoint * add docs for prometheus endpoint * Add explicit callout for additional options for metrics command * move example daemonset out of README to it's own file * rename to metrics_server.go * copyright and license header * rename shortenMetricName to sanitizeMetricName to match what it does * remove extra log message * remove extra log message * remove debug message * only create prometheus metrics if enabled * fix conditional for updating prom metrics in printMetricsAsync
1 parent 6865e4a commit 8661c5f

File tree

7 files changed

+265
-42
lines changed

7 files changed

+265
-42
lines changed

README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,9 @@ If neither sudo nor root access is available, an administrator must apply the fo
5252

5353
Once the configuration changes are applied, use the `--noroot` flag on the command line, for example, `perfspect metrics --noroot`.
5454

55+
##### Prometheus Endpoint
56+
The `metrics` command can expose metrics via a Prometheus compatible `metrics` endpoint. This allows integration with Prometheus monitoring systems. To enable the Prometheus endpoint, use the `--prometheus-server` flag. By default, the endpoint listens on port 9090. The port can be changed using the `--prometheus-server-addr` flag. Run `perfspect metrics --prometheus-server`. See the [example daemonset](docs/perfspect-daemonset.md) for deploying in Kubernetes.
57+
5558
See `perfspect metrics -h` for the extensive set of options and examples.
5659

5760
#### Report Command

cmd/metrics/metrics.go

Lines changed: 80 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import (
1111
"fmt"
1212
"io"
1313
"log/slog"
14+
"net"
1415
"os"
1516
"os/exec"
1617
"os/signal"
@@ -28,6 +29,8 @@ import (
2829
"perfspect/internal/target"
2930
"perfspect/internal/util"
3031

32+
"github.com/prometheus/client_golang/prometheus"
33+
3134
"slices"
3235

3336
"github.com/spf13/cobra"
@@ -102,16 +105,18 @@ var (
102105
flagLive bool
103106
flagTransactionRate float64
104107
// advanced options
105-
flagShowMetricNames bool
106-
flagMetricsList []string
107-
flagEventFilePath string
108-
flagMetricFilePath string
109-
flagPerfPrintInterval int
110-
flagPerfMuxInterval int
111-
flagNoRoot bool
112-
flagWriteEventsToFile bool
113-
flagInput string
114-
flagNoSystemSummary bool
108+
flagShowMetricNames bool
109+
flagMetricsList []string
110+
flagEventFilePath string
111+
flagMetricFilePath string
112+
flagPerfPrintInterval int
113+
flagPerfMuxInterval int
114+
flagNoRoot bool
115+
flagWriteEventsToFile bool
116+
flagInput string
117+
flagNoSystemSummary bool
118+
flagPrometheusServer bool
119+
flagPrometheusServerAddr string
115120

116121
// positional arguments
117122
argsApplication []string
@@ -131,16 +136,18 @@ const (
131136
flagLiveName = "live"
132137
flagTransactionRateName = "txnrate"
133138

134-
flagShowMetricNamesName = "list"
135-
flagMetricsListName = "metrics"
136-
flagEventFilePathName = "eventfile"
137-
flagMetricFilePathName = "metricfile"
138-
flagPerfPrintIntervalName = "interval"
139-
flagPerfMuxIntervalName = "muxinterval"
140-
flagNoRootName = "noroot"
141-
flagWriteEventsToFileName = "raw"
142-
flagInputName = "input"
143-
flagNoSystemSummaryName = "no-summary"
139+
flagShowMetricNamesName = "list"
140+
flagMetricsListName = "metrics"
141+
flagEventFilePathName = "eventfile"
142+
flagMetricFilePathName = "metricfile"
143+
flagPerfPrintIntervalName = "interval"
144+
flagPerfMuxIntervalName = "muxinterval"
145+
flagNoRootName = "noroot"
146+
flagWriteEventsToFileName = "raw"
147+
flagInputName = "input"
148+
flagNoSystemSummaryName = "no-summary"
149+
flagPrometheusServerName = "prometheus-server"
150+
flagPrometheusServerAddrName = "prometheus-server-addr"
144151
)
145152

146153
const (
@@ -192,6 +199,8 @@ func init() {
192199
Cmd.Flags().BoolVar(&flagWriteEventsToFile, flagWriteEventsToFileName, false, "")
193200
Cmd.Flags().StringVar(&flagInput, flagInputName, "", "")
194201
Cmd.Flags().BoolVar(&flagNoSystemSummary, flagNoSystemSummaryName, false, "")
202+
Cmd.Flags().BoolVar(&flagPrometheusServer, flagPrometheusServerName, false, "")
203+
Cmd.Flags().StringVar(&flagPrometheusServerAddr, flagPrometheusServerAddrName, ":9090", "")
195204

196205
common.AddTargetFlags(Cmd)
197206

@@ -280,6 +289,14 @@ func getFlagGroups() []common.FlagGroup {
280289
Name: flagTransactionRateName,
281290
Help: "number of transactions per second. Will divide relevant metrics by transactions/second.",
282291
},
292+
{
293+
Name: flagPrometheusServerName,
294+
Help: "enable promtheus metrics server",
295+
},
296+
{
297+
Name: flagPrometheusServerAddrName,
298+
Help: "address (e.g., host:port) to start Prometheus metrics server on (implies --promtheus-server true)",
299+
},
283300
}
284301
groups = append(groups, common.FlagGroup{
285302
GroupName: "Output Options",
@@ -518,6 +535,23 @@ func validateFlags(cmd *cobra.Command, args []string) error {
518535
if err := common.ValidateTargetFlags(cmd); err != nil {
519536
return common.FlagValidationError(cmd, err.Error())
520537
}
538+
// prometheus server address
539+
if cmd.Flags().Changed(flagPrometheusServerAddrName) {
540+
flagPrometheusServer = true
541+
_, port, err := net.SplitHostPort(flagPrometheusServerAddr)
542+
if err != nil {
543+
slog.Error(err.Error())
544+
err = fmt.Errorf("invalid prometheus server address format: %s, expected host:port", flagPrometheusServerAddr)
545+
fmt.Fprintf(os.Stderr, "Error: %v\n", err)
546+
return err
547+
}
548+
if _, err := strconv.Atoi(port); err != nil {
549+
slog.Error(err.Error())
550+
err = fmt.Errorf("invalid port in prometheus server address: %s, port must be an integer", flagPrometheusServerAddr)
551+
fmt.Fprintf(os.Stderr, "Error: %v\n", err)
552+
return err
553+
}
554+
}
521555
return nil
522556
}
523557

@@ -921,7 +955,7 @@ func runCmd(cmd *cobra.Command, args []string) error {
921955
return nil
922956
}
923957
// create the local output directory
924-
if !flagLive {
958+
if !flagLive && !flagPrometheusServer {
925959
err = common.CreateOutputDir(localOutputDir)
926960
if err != nil {
927961
err = fmt.Errorf("failed to create output directory: %w", err)
@@ -948,6 +982,13 @@ func runCmd(cmd *cobra.Command, args []string) error {
948982
if flagLive {
949983
multiSpinner.Finish()
950984
}
985+
// Start Prometheus server if requested
986+
if flagPrometheusServer && flagPrometheusServerAddr != "" {
987+
multiSpinner.Finish()
988+
fmt.Printf("starting metrics server on %s\n", flagPrometheusServerAddr)
989+
startPrometheusServer(flagPrometheusServerAddr)
990+
cmd.SilenceUsage = true
991+
}
951992
// wait for all collectOnTarget goroutines to finish
952993
collectOnTargetWG.Wait()
953994
// finalize the spinner status, capture any errors, and create output files
@@ -983,7 +1024,7 @@ func runCmd(cmd *cobra.Command, args []string) error {
9831024
}
9841025
}
9851026
}
986-
if !flagLive {
1027+
if !flagLive && !flagPrometheusServer {
9871028
multiSpinner.Finish()
9881029
printOutputFileNames(allPrintedFileNames)
9891030
}
@@ -1133,6 +1174,23 @@ func prepareMetrics(targetContext *targetContext, localTempDir string, channelEr
11331174
channelError <- targetError{target: myTarget, err: err}
11341175
return
11351176
}
1177+
if flagPrometheusServer {
1178+
for _, def := range targetContext.metricDefinitions {
1179+
desc := fmt.Sprintf("%s (expr: %s)", def.Name, def.Expression)
1180+
name := promMetricPrefix + sanitizeMetricName(def.Name)
1181+
gauge := prometheus.NewGaugeVec(
1182+
prometheus.GaugeOpts{
1183+
Name: name,
1184+
Help: desc,
1185+
},
1186+
[]string{"socket", "cpu", "cgroup", "pid", "cmd"},
1187+
)
1188+
promMetrics[name] = gauge
1189+
}
1190+
for _, m := range promMetrics {
1191+
prometheus.MustRegister(m)
1192+
}
1193+
}
11361194
channelError <- targetError{target: myTarget, err: nil}
11371195
}
11381196

cmd/metrics/metrics_server.go

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
package metrics
2+
3+
// Copyright 2025 Google LLC.
4+
// SPDX-License-Identifier: BSD-3-Clause
5+
6+
import (
7+
"log/slog"
8+
"math"
9+
"net/http"
10+
"regexp"
11+
"strings"
12+
13+
"github.com/prometheus/client_golang/prometheus"
14+
"github.com/prometheus/client_golang/prometheus/promhttp"
15+
)
16+
17+
const promMetricPrefix = "perfspect_"
18+
19+
var prometheusMetricsGaugeVec = prometheus.NewGaugeVec(
20+
prometheus.GaugeOpts{
21+
Name: "perfspect_metrics",
22+
Help: "PerfSpect metrics",
23+
},
24+
[]string{"metric_name", "socket", "cpu", "cgroup", "pid", "cmd"},
25+
)
26+
var rxTrailingChars = regexp.MustCompile(`\)$`)
27+
var promMetrics = make(map[string]*prometheus.GaugeVec)
28+
29+
func sanitizeMetricName(name string) string {
30+
sanitized := rxTrailingChars.ReplaceAllString(name, "")
31+
sanitized = strings.ReplaceAll(sanitized, "%", "pct")
32+
return sanitized
33+
}
34+
func startPrometheusServer(listenAddr string) {
35+
prometheus.MustRegister(prometheusMetricsGaugeVec)
36+
mux := http.NewServeMux()
37+
mux.Handle("/metrics", promhttp.Handler())
38+
slog.Info("Starting Prometheus metrics server", slog.String("address", listenAddr))
39+
go func() {
40+
err := http.ListenAndServe(listenAddr, mux)
41+
if err != nil && err != http.ErrServerClosed {
42+
slog.Error("Prometheus HTTP server ListenAndServe error", slog.String("error", err.Error()))
43+
}
44+
}()
45+
}
46+
47+
func updatePrometheusMetrics(metricFrames []MetricFrame) {
48+
for _, frame := range metricFrames {
49+
for _, metric := range frame.Metrics {
50+
if !math.IsNaN(metric.Value) {
51+
metricKey := promMetricPrefix + sanitizeMetricName(metric.Name)
52+
if m, ok := promMetrics[metricKey]; ok {
53+
m.WithLabelValues(
54+
frame.Socket,
55+
frame.CPU,
56+
frame.Cgroup,
57+
frame.PID,
58+
frame.Cmd,
59+
).Set(metric.Value)
60+
} else {
61+
slog.Warn("Unable to find metric", slog.String("metric", metricKey))
62+
}
63+
}
64+
}
65+
}
66+
}

cmd/metrics/print.go

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,29 +17,33 @@ import (
1717
)
1818

1919
func printMetrics(metricFrames []MetricFrame, frameCount int, targetName string, collectionStartTime time.Time, outputDir string) (printedFiles []string) {
20-
fileName, err := printMetricsTxt(metricFrames, targetName, collectionStartTime, flagLive && flagOutputFormat[0] == formatTxt, !flagLive && slices.Contains(flagOutputFormat, formatTxt), outputDir)
20+
printToFile := !flagLive && !flagPrometheusServer && slices.Contains(flagOutputFormat, formatTxt)
21+
fileName, err := printMetricsTxt(metricFrames, targetName, collectionStartTime, flagLive && flagOutputFormat[0] == formatTxt, printToFile, outputDir)
2122
if err != nil {
2223
fmt.Fprintf(os.Stderr, "Error: %v\n", err)
2324
slog.Error(err.Error())
2425
} else if fileName != "" {
2526
printedFiles = util.UniqueAppend(printedFiles, fileName)
2627
}
27-
fileName, err = printMetricsJSON(metricFrames, targetName, collectionStartTime, flagLive && flagOutputFormat[0] == formatJSON, !flagLive && slices.Contains(flagOutputFormat, formatJSON), outputDir)
28+
printToFile = !flagLive && !flagPrometheusServer && slices.Contains(flagOutputFormat, formatJSON)
29+
fileName, err = printMetricsJSON(metricFrames, targetName, collectionStartTime, flagLive && flagOutputFormat[0] == formatJSON, printToFile, outputDir)
2830
if err != nil {
2931
fmt.Fprintf(os.Stderr, "Error: %v\n", err)
3032
slog.Error(err.Error())
3133
} else if fileName != "" {
3234
printedFiles = util.UniqueAppend(printedFiles, fileName)
3335
}
3436
// csv is always written to file unless no files are requested -- we need it to create the summary reports
35-
fileName, err = printMetricsCSV(metricFrames, frameCount, targetName, collectionStartTime, flagLive && flagOutputFormat[0] == formatCSV, !flagLive, outputDir)
37+
printToFile = !flagLive && !flagPrometheusServer
38+
fileName, err = printMetricsCSV(metricFrames, frameCount, targetName, collectionStartTime, flagLive && flagOutputFormat[0] == formatCSV, printToFile, outputDir)
3639
if err != nil {
3740
fmt.Fprintf(os.Stderr, "Error: %v\n", err)
3841
slog.Error(err.Error())
3942
} else if fileName != "" {
4043
printedFiles = util.UniqueAppend(printedFiles, fileName)
4144
}
42-
fileName, err = printMetricsWide(metricFrames, frameCount, targetName, collectionStartTime, flagLive && flagOutputFormat[0] == formatWide, !flagLive && slices.Contains(flagOutputFormat, formatWide), outputDir)
45+
printToFile = !flagLive && !flagPrometheusServer && slices.Contains(flagOutputFormat, formatWide)
46+
fileName, err = printMetricsWide(metricFrames, frameCount, targetName, collectionStartTime, flagLive && flagOutputFormat[0] == formatWide, printToFile, outputDir)
4347
if err != nil {
4448
fmt.Fprintf(os.Stderr, "Error: %v\n", err)
4549
slog.Error(err.Error())
@@ -77,6 +81,9 @@ func printMetricsAsync(targetContext *targetContext, outputDir string, frameChan
7781
// block until next set of metric frames arrives, will exit loop when frameChannel is closed
7882
for metricFrames := range frameChannel {
7983
printedFiles := printMetrics(metricFrames, frameCount, targetContext.target.GetName(), targetContext.perfStartTime, outputDir)
84+
if flagPrometheusServer {
85+
updatePrometheusMetrics(metricFrames)
86+
}
8087
for _, file := range printedFiles {
8188
allPrintedFiles = util.UniqueAppend(allPrintedFiles, file)
8289
}

docs/perfspect-daemonset.md

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
## Example DaemonSet for PerfSpect for GKE
2+
3+
This is an example DaemonSet for exposing PerfSpect metrics as a prometheus compatible metrics endpoint. This example assumes the use of Google Kubernetes Engine (GKE) and using the `PodMonitoring` resource to collect metrics from the metrics endpoint.
4+
5+
```
6+
apiVersion: apps/v1
7+
kind: DaemonSet
8+
metadata:
9+
name: perfspect
10+
namespace: default
11+
labels:
12+
name: perfspect
13+
spec:
14+
selector:
15+
matchLabels:
16+
name: perfspect
17+
template:
18+
metadata:
19+
labels:
20+
name: perfspect
21+
spec:
22+
containers:
23+
- name: perfspect
24+
image: docker.registry/user-sandbox/ar-us/perfspect
25+
imagePullPolicy: Always
26+
securityContext:
27+
privileged: true
28+
args:
29+
- "/perfspect"
30+
- "metrics"
31+
- "--log-stdout"
32+
- "--granularity"
33+
- "cpu"
34+
- "--noroot"
35+
- "--interval"
36+
- "15"
37+
- "--prometheus-server-addr"
38+
- ":9090"
39+
ports:
40+
- name: metrics-port # Name of the port, referenced by PodMonitoring
41+
containerPort: 9090 # The port your application inside the container listens on for metrics
42+
protocol: TCP
43+
resources:
44+
requests:
45+
memory: "200Mi"
46+
cpu: "500m"
47+
48+
---
49+
apiVersion: monitoring.googleapis.com/v1
50+
kind: PodMonitoring
51+
metadata:
52+
name: perfspect-podmonitoring
53+
namespace: default
54+
labels:
55+
name: perfspect
56+
spec:
57+
selector:
58+
matchLabels:
59+
name: perfspect
60+
endpoints:
61+
- port: metrics-port
62+
interval: 30s
63+
```
64+
* Replace `docker.registry/user-sandbox/ar-us/perfspect` with the location of your perfspect container image.

0 commit comments

Comments
 (0)