Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: metrics for services and checks #519

Open
wants to merge 49 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
49 commits
Select commit Hold shift + click to select a range
f76470c
poc: a metrics module for pebble
IronCore864 Nov 13, 2024
6d8ee59
chore: undo unnecessary change
IronCore864 Nov 14, 2024
b4abc9a
chore: undo unnecessary change
IronCore864 Nov 14, 2024
a274276
chore: undo unnecessary change
IronCore864 Nov 14, 2024
a2c07e6
chore: metrics identity basic auth poc
IronCore864 Nov 26, 2024
4ebb633
chore: a poc for metrics with labels
IronCore864 Nov 27, 2024
7468b95
poc: remove adding identities using env vars according to comment in …
IronCore864 Nov 28, 2024
790a8f9
chore: update tests for the metrics lib poc
IronCore864 Nov 28, 2024
272005b
chore: refactor identities and access according to spec review
IronCore864 Dec 9, 2024
5be3e96
feat: use sha512 to verify password
IronCore864 Jan 21, 2025
a6c374d
feat: move the metrics api to /v1/metrics
IronCore864 Jan 21, 2025
1bd54cb
chore: remove Username from apiBasicIdentity
IronCore864 Jan 21, 2025
98ea11e
chore: revert changes on user state
IronCore864 Jan 21, 2025
68c18b7
Merge branch 'master' into poc-custom-metrics-lib
IronCore864 Jan 21, 2025
7fc255e
chore: fix failed identity tests
IronCore864 Jan 21, 2025
31a0617
test: unit tests for basic identity
IronCore864 Jan 22, 2025
306f2d3
feat: add basic identity
IronCore864 Jan 23, 2025
6c53491
chore: update comments
IronCore864 Jan 23, 2025
a57f041
chore: rework the metrics for services
IronCore864 Jan 24, 2025
b7a442f
chore: add metrics for checks, not done
IronCore864 Jan 24, 2025
e49419d
chore: refactor according to review and add more unit tests
IronCore864 Feb 10, 2025
8ebebb8
chore: refactor metrics, add open telemetry writer
IronCore864 Feb 11, 2025
363eaf0
Merge branch 'master' into poc-custom-metrics-lib
IronCore864 Feb 11, 2025
a1db1a6
chore: refactor according to review, fix check counter reset issue
IronCore864 Feb 11, 2025
047cc42
Merge branch 'master' into poc-custom-metrics-lib
IronCore864 Feb 11, 2025
c527344
chore: add a test for check metrics
IronCore864 Feb 12, 2025
5476299
test: add tests for open telemetry writer
IronCore864 Feb 12, 2025
9e5b65a
test: service metrics
IronCore864 Feb 12, 2025
d678766
chore: update tests
IronCore864 Feb 12, 2025
c99fa53
chore: fix linting
IronCore864 Feb 12, 2025
35794e6
Merge branch 'master' into basic-identity
IronCore864 Feb 13, 2025
c8f3aba
chore: prioritize basic type identity, add tests
IronCore864 Feb 13, 2025
af5e0b2
test: add more tests
IronCore864 Feb 13, 2025
edccbac
chore: prioritize basic type
IronCore864 Feb 13, 2025
83c1717
chore: fix some of the comments according to review
IronCore864 Feb 14, 2025
467d8fa
chore: unexport check metrics and update tests
IronCore864 Feb 14, 2025
8bc87f1
chore: use buffer in api metrics
IronCore864 Feb 14, 2025
6963e5e
chore: use wtier for metrics labels
IronCore864 Feb 14, 2025
cfa73a5
test: add test for api metrics
IronCore864 Feb 14, 2025
f37f94f
chore: fix linting
IronCore864 Feb 14, 2025
7369fe3
chore: remove unnecessary changes
IronCore864 Feb 14, 2025
136ae06
chore: refactor according to review
IronCore864 Feb 17, 2025
13aa3b1
chore: create internal checkData
IronCore864 Feb 18, 2025
5fa2d92
chore: change check metrics to success count and failure count
IronCore864 Feb 18, 2025
45f206f
chore: revert unnecessary change
IronCore864 Feb 18, 2025
054b702
Merge branch 'basic-identity' into poc-custom-metrics-lib
IronCore864 Feb 18, 2025
c49f565
chore: refactor after review
IronCore864 Feb 20, 2025
8fe03da
Merge branch 'basic-identity' into poc-custom-metrics-lib
IronCore864 Feb 20, 2025
2bb6414
chore: refactor after review
IronCore864 Feb 20, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 14 additions & 5 deletions internals/daemon/api_metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
package daemon

import (
"bytes"
"net/http"

"github.com/canonical/pebble/internals/logger"
Expand All @@ -37,18 +38,26 @@ type metricsResponse struct {
}

func (r metricsResponse) ServeHTTP(w http.ResponseWriter, req *http.Request) {
openTelemetryWriter := metrics.NewOpenTelemetryWriter(w)
var buf bytes.Buffer
metricsWriter := metrics.NewOpenTelemetryWriter(&buf)

err := r.svcMgr.WriteMetrics(openTelemetryWriter)
err := r.svcMgr.WriteMetrics(metricsWriter)
if err != nil {
logger.Noticef("Cannot write to HTTP response: %v", err.Error())
logger.Noticef("Cannot write service metrics: %v", err)
http.Error(w, "# internal server error", http.StatusInternalServerError)
return
}

err = r.chkMgr.WriteMetrics(openTelemetryWriter)
err = r.chkMgr.WriteMetrics(metricsWriter)
if err != nil {
logger.Noticef("Cannot write to HTTP response: %v", err.Error())
logger.Noticef("Cannot write check metrics: %v", err)
http.Error(w, "# internal server error", http.StatusInternalServerError)
return
}

_, err = buf.WriteTo(w)
if err != nil {
logger.Noticef("Cannot write to HTTP response: %v", err)
http.Error(w, "# internal server error", http.StatusInternalServerError)
return
}
Expand Down
78 changes: 78 additions & 0 deletions internals/daemon/api_metrics_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
// Copyright (c) 2025 Canonical Ltd
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License version 3 as
// published by the Free Software Foundation.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.

package daemon

import (
"bytes"
"net/http"
"net/http/httptest"
"time"

. "gopkg.in/check.v1"

"github.com/canonical/pebble/internals/overlord/servstate"
)

func (s *apiSuite) TestMetrics(c *C) {
writeTestLayer(s.pebbleDir, `
services:
test1:
override: replace
command: sleep 10
`)
d := s.daemon(c)
d.overlord.Loop()

// Start test service.
payload := bytes.NewBufferString(`{"action": "start", "services": ["test1"]}`)
req, err := http.NewRequest("POST", "/v1/services", payload)
c.Assert(err, IsNil)
rsp := v1PostServices(apiCmd("/v1/services"), req, nil).(*resp)
rec := httptest.NewRecorder()
rsp.ServeHTTP(rec, req)
c.Check(rec.Result().StatusCode, Equals, 202)

// Wait for it to be running.
serviceMgr := d.overlord.ServiceManager()
for i := 0; ; i++ {
if i > 50 {
c.Fatalf("timed out waiting for service to start")
}
services, err := serviceMgr.Services([]string{"test1"})
c.Assert(err, IsNil)
if len(services) == 1 && services[0].Current == servstate.StatusActive {
break
}
time.Sleep(5 * time.Millisecond)
}

// Get metrics.
metricsCmd := apiCmd("/v1/metrics")
metricsReq, err := http.NewRequest("GET", "/v1/metrics", nil)
c.Assert(err, IsNil)
metricsRec := httptest.NewRecorder()
metricsRsp := v1GetMetrics(metricsCmd, metricsReq, nil).(metricsResponse)
metricsRsp.ServeHTTP(metricsRec, metricsReq)
c.Check(metricsRec.Code, Equals, 200)
expected := `
# HELP pebble_service_start_count Number of times the service has started
# TYPE pebble_service_start_count counter
pebble_service_start_count{service="test1"} 1
# HELP pebble_service_active Whether the service is currently active (1) or not (0)
# TYPE pebble_service_active gauge
pebble_service_active{service="test1"} 1
`[1:]
c.Assert(metricsRec.Body.String(), Equals, expected)
}
1 change: 0 additions & 1 deletion internals/daemon/daemon.go
Original file line number Diff line number Diff line change
Expand Up @@ -373,7 +373,6 @@ func (d *Daemon) Init() error {
}

logger.Noticef("Started daemon.")

return nil
}

Expand Down
45 changes: 28 additions & 17 deletions internals/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ package metrics
import (
"fmt"
"io"
"strings"
)

type MetricType int
Expand All @@ -27,6 +26,17 @@ const (
TypeGaugeInt
)

func (mt MetricType) String() string {
switch mt {
case TypeCounterInt:
return "counter"
case TypeGaugeInt:
return "gauge"
default:
panic("invalid metric type")
}
}

// Metric represents a single metric.
type Metric struct {
Name string
Expand Down Expand Up @@ -63,28 +73,29 @@ func NewOpenTelemetryWriter(w io.Writer) *OpenTelemetryWriter {
}

func (otw *OpenTelemetryWriter) Write(m Metric) error {
var metricType string
switch m.Type {
case TypeCounterInt:
metricType = "counter"
case TypeGaugeInt:
metricType = "gauge"
if m.Comment != "" {
_, err := fmt.Fprintf(otw.w, "# HELP %s %s\n", m.Name, m.Comment)
if err != nil {
return err
}
}

_, err := fmt.Fprintf(otw.w, "# HELP %s %s\n", m.Name, m.Comment)
if err != nil {
return err
}
_, err = fmt.Fprintf(otw.w, "# TYPE %s %s\n", m.Name, metricType)
_, err := fmt.Fprintf(otw.w, "# TYPE %s %s\n", m.Name, m.Type.String())
if err != nil {
return err
}

labels := make([]string, len(m.Labels))
for i, label := range m.Labels {
labels[i] = fmt.Sprintf("%s=%s", label.key, label.value)
io.WriteString(otw.w, m.Name)
if len(m.Labels) > 0 {
io.WriteString(otw.w, "{")
for i, label := range m.Labels {
if i > 0 {
io.WriteString(otw.w, ",")
}
fmt.Fprintf(otw.w, "%s=%q", label.key, label.value) // Use %q to quote values.
}
io.WriteString(otw.w, "}")
}

_, err = fmt.Fprintf(otw.w, "%s{%s} %d\n", m.Name, strings.Join(labels, ","), m.ValueInt64)
fmt.Fprintf(otw.w, " %d\n", m.ValueInt64)
return err
}
32 changes: 20 additions & 12 deletions internals/metrics/metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,17 @@ package metrics_test

import (
"bytes"
"testing"

. "gopkg.in/check.v1"

"github.com/canonical/pebble/internals/metrics"
)

func Test(t *testing.T) {
TestingT(t)
}

type OpenTelemetryWriterSuite struct{}

var _ = Suite(&OpenTelemetryWriterSuite{})
Expand All @@ -44,10 +49,11 @@ func (s *OpenTelemetryWriterSuite) TestOpenTelemetryWriter(c *C) {
metrics.NewLabel("key2", "value2"),
},
},
expected: `# HELP my_counter A simple counter
expected: `
# HELP my_counter A simple counter
# TYPE my_counter counter
my_counter{key1=value1,key2=value2} 42
`,
my_counter{key1="value1",key2="value2"} 42
`[1:],
},
{
name: "GaugeInt",
Expand All @@ -58,10 +64,11 @@ my_counter{key1=value1,key2=value2} 42
Comment: "A simple gauge",
Labels: []metrics.Label{}, // Test with no labels
},
expected: `# HELP my_gauge A simple gauge
expected: `
# HELP my_gauge A simple gauge
# TYPE my_gauge gauge
my_gauge{} 1
`,
my_gauge 1
`[1:],
},
{
name: "NoComment", // Test without comment
Expand All @@ -71,10 +78,10 @@ my_gauge{} 1
ValueInt64: 42,
Labels: []metrics.Label{metrics.NewLabel("env", "prod")},
},
expected: `# HELP no_comment_metric
expected: `
# TYPE no_comment_metric counter
no_comment_metric{env=prod} 42
`,
no_comment_metric{env="prod"} 42
`[1:],
},

{
Expand All @@ -89,10 +96,11 @@ no_comment_metric{env=prod} 42
metrics.NewLabel("key-with-dash", "value-with-dash"),
},
},
expected: `# HELP special_chars Metric with special characters
expected: `
# HELP special_chars Metric with special characters
# TYPE special_chars gauge
special_chars{key_with_underscore=value_with_underscore,key-with-dash=value-with-dash} 42
`,
special_chars{key_with_underscore="value_with_underscore",key-with-dash="value-with-dash"} 42
`[1:],
},
}

Expand Down
2 changes: 1 addition & 1 deletion internals/overlord/checkstate/handlers.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ func (m *CheckManager) doPerformCheck(task *state.Task, tomb *tombpkg.Tomb) erro
select {
case <-ticker.C:
err := runCheck(tomb.Context(nil), chk, config.Timeout.Value)
m.incPerformCheckCount(config)
if !tomb.Alive() {
return checkStopped(config.Name, task.Kind(), tomb.Err())
}
Expand Down Expand Up @@ -91,7 +92,6 @@ func (m *CheckManager) doPerformCheck(task *state.Task, tomb *tombpkg.Tomb) erro
task.Set(checkDetailsAttr, &details)
m.state.Unlock()
}
m.incPerformCheckCount(config)

case <-tomb.Dying():
return checkStopped(config.Name, task.Kind(), tomb.Err())
Expand Down
Loading
Loading