Skip to content

Commit

Permalink
merge: branch '3605-instrumentation' into 'main'
Browse files Browse the repository at this point in the history
Push metrics via OpenTelemetry [#3605]

Closes #3605

See merge request accumulatenetwork/accumulate!1078
  • Loading branch information
firelizzard18 committed Jul 10, 2024
2 parents c1e8c87 + d12b7f3 commit e7299fe
Show file tree
Hide file tree
Showing 14 changed files with 1,199 additions and 74 deletions.
2 changes: 1 addition & 1 deletion .golangci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,6 @@ issues:
- goheader
- gosimple

- path: ^test/util/goroutine_leaks\.go|exp/tendermint/http.go$
- path: ^(test/util/goroutine_leaks\.go|exp/tendermint/http\.go|exp/telemetry/translate\.go)$
linters:
- goheader
13 changes: 13 additions & 0 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,19 @@
"${workspaceFolder}/.genesis/fozzie/network.json",
]
},
{
"name": "Run bootstrap node",
"presentation": {
"group": "20-Run"
},
"type": "go",
"request": "launch",
"mode": "auto",
"program": "${workspaceFolder}/cmd/accumulated",
"args": [
"${workspaceFolder}/.nodes/devnet/bootstrap",
]
},
// Services
{
"name": "API (mainnet)",
Expand Down
51 changes: 51 additions & 0 deletions cmd/accumulated/run/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ Config:
type: Instrumentation
marshal-as: reference
pointer: true
- name: Telemetry
type: Telemetry
marshal-as: reference
pointer: true
- name: P2P
type: P2P
marshal-as: reference
Expand Down Expand Up @@ -88,6 +92,53 @@ Instrumentation:
type: p2p.Multiaddr
marshal-as: union

Telemetry:
non-binary: true
fields:
- name: Enabled
type: bool
pointer: true
- name: Stdout
type: bool
pointer: true
- name: Otlp
type: OtlpConfig
pointer: true
marshal-as: reference
- name: Export
type: HttpListener
pointer: true
marshal-as: reference
- name: Rules
type: TelemetryRule
pointer: true
repeatable: true
marshal-as: reference

TelemetryRule:
non-binary: true
fields:
- name: Match
type: string
repeatable: true
- name: Drop
type: bool
- name: Rate
type: duration

OtlpConfig:
non-binary: true
fields:
- name: Enabled
type: bool
pointer: true
- name: Endpoint
type: string
- name: Username
type: string
- name: Password
type: string

Monitor:
non-binary: true
fields:
Expand Down
35 changes: 33 additions & 2 deletions cmd/accumulated/run/instance.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,19 @@ import (
"sync"
"time"

"github.com/google/uuid"
"gitlab.com/accumulatenetwork/accumulate/exp/ioc"
"gitlab.com/accumulatenetwork/accumulate/pkg/api/v3/p2p"
"gitlab.com/accumulatenetwork/accumulate/pkg/errors"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/metric"
"golang.org/x/exp/slices"
)

var meter = otel.Meter("gitlab.com/accumulatenetwork/accumulate/cmd/accumulated/run")
var serviceUp = must(meter.Int64Counter("accumulated_service_up"))

type Instance struct {
config *Config
rootDir string
Expand Down Expand Up @@ -66,6 +73,15 @@ func New(ctx context.Context, cfg *Config) (*Instance, error) {
return nil, errors.UnknownError.WithFormat("start logging: %w", err)
}

// Set the ID
setDefaultVal(&cfg.P2P, new(P2P))
setDefaultVal[PrivateKey](&cfg.P2P.Key, new(TransientPrivateKey))
if key, err := getPrivateKey(cfg.P2P.Key, inst); err != nil {
return nil, errors.UnknownError.WithFormat("load key: %w", err)
} else {
inst.id = uuid.NewSHA1(uuid.Nil, key[32:]).String()
}

return inst, nil
}

Expand Down Expand Up @@ -108,13 +124,19 @@ func (inst *Instance) StartFiltered(predicate func(Service) bool) (err error) {
}
}()

// Start instrumentation
// Start instrumentation and telemetry
setDefaultVal(&inst.config.Instrumentation, new(Instrumentation))
err = inst.config.Instrumentation.start(inst)
if err != nil {
return err
}

setDefaultVal(&inst.config.Telemetry, new(Telemetry))
err = inst.config.Telemetry.start(inst)
if err != nil {
return err
}

// Ensure the disk does not fill up (and is not currently full; requires
// logging)
free, err := diskUsage(inst.rootDir)
Expand Down Expand Up @@ -168,11 +190,20 @@ func (inst *Instance) StartFiltered(predicate func(Service) bool) (err error) {
// Start services
for _, services := range services {
for _, svc := range services {
inst.logger.InfoContext(inst.context, "Starting", "module", "run", "service", svc.Type())
slog.InfoContext(inst.context, "Starting", "module", "run", "service", svc.Type())
err := svc.start(inst)
if err != nil {
return errors.UnknownError.WithFormat("start service %v: %w", svc.Type(), err)
}

serviceUp.Add(inst.context, 1, metric.WithAttributes(
attribute.String("type", svc.Type().String())))

inst.cleanup(func(ctx context.Context) error {
serviceUp.Add(inst.context, -1, metric.WithAttributes(
attribute.String("type", svc.Type().String())))
return nil
})
}
}

Expand Down
4 changes: 4 additions & 0 deletions cmd/accumulated/run/instrumentation.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,10 @@ func (i *Instrumentation) startPprof(inst *Instance) error {
}

func (m *Monitor) start(inst *Instance) error {
if m == nil {
m = new(Monitor)
}

setDefaultPtr(&m.ProfileMemory, false) // Enabled = false
setDefaultPtr(&m.MemoryPollingRate, time.Minute) // Polling rate = every minute
setDefaultPtr(&m.AllocRateTrigger, 50<<20) // Trigger rate = 50 MiB/s
Expand Down
10 changes: 1 addition & 9 deletions cmd/accumulated/run/p2p.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,20 +17,12 @@ import (
)

func (p *P2P) start(inst *Instance) error {
if p == nil {
p = new(P2P)
}
if p.Key == nil {
p.Key = new(TransientPrivateKey)
}

sk, err := getPrivateKey(p.Key, inst)
if err != nil {
return err
}

setDefaultPtr(&p.PeerDB, "")

node, err := p2p.New(p2p.Options{
Key: sk,
Network: inst.config.Network,
Expand All @@ -44,7 +36,7 @@ func (p *P2P) start(inst *Instance) error {
}
inst.p2p = node

slog.InfoContext(inst.context, "We are", "id", node.ID(), "module", "run")
slog.InfoContext(inst.context, "We are", "node-id", node.ID(), "instance-id", inst.id, "module", "run")

inst.cleanup(func(context.Context) error {
err := node.Close()
Expand Down
3 changes: 2 additions & 1 deletion cmd/accumulated/run/subnode.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
package run

import (
"log/slog"
"sync"

"gitlab.com/accumulatenetwork/accumulate/exp/ioc"
Expand Down Expand Up @@ -57,7 +58,7 @@ func (s *SubnodeService) start(inst *Instance) error {
// Start services
for _, services := range services {
for _, svc := range services {
inst.logger.InfoContext(inst.context, "Starting", "subnode", s.Name, "service", svc.Type(), "module", "run")
slog.InfoContext(inst.context, "Starting", "subnode", s.Name, "service", svc.Type(), "module", "run")
err := svc.start(sub)
if err != nil {
return errors.UnknownError.WithFormat("start service %v: %w", svc.Type(), err)
Expand Down
Loading

0 comments on commit e7299fe

Please sign in to comment.