diff --git a/cmd/weaver-kube/deploy.go b/cmd/weaver-kube/deploy.go index ec0bada..877ceae 100644 --- a/cmd/weaver-kube/deploy.go +++ b/cmd/weaver-kube/deploy.go @@ -83,30 +83,7 @@ Container Image Names: You can specify any combination of the various options or none. E.g., listeners.foo = {public = true, serice_name = "unique_name"} - c) Observability - if nothing is specified, the kube deployer will - automatically launch Prometheus, Jaeger, Loki and Grafana to retrieve your - application's metrics, traces, logs, and to provide custom dashboards. - - If you don't want one or more of these services to run, you can simply - disable them. E.g., : - [kube.observability] - prometheus_service = "none" - jaeger_service = "none" - loki_service = "none" - grafana_service = "none" - - If you want to plugin one or more of your existing Prometheus, Jaeger, - Loki, Grafana, you can specify their service name: - [kube.observability] - prometheus_service = "your_prometheus_service_name" - jaeger_service = "your_jaeger_service_name" - loki_service = "your_loki_service_name" - grafana_service = "your_granfa_service_name" - - Note that we support only the Prometheus, Jaeger, Loki, Grafana stack for - observability right now. - - d) Configure resource requirements for the pods [1]. E.g., + c) Configure resource requirements for the pods [1]. E.g., [kube.resources] requests_cpu = "200m" requests_mem = "256Mi" diff --git a/examples/echo/weaver_gen.go b/examples/echo/weaver_gen.go index 6f5069b..01a1981 100644 --- a/examples/echo/weaver_gen.go +++ b/examples/echo/weaver_gen.go @@ -176,7 +176,7 @@ var _ weaver.Main = (*main_client_stub)(nil) // you run "go build" or "go run". var _ codegen.LatestVersion = codegen.Version[[0][20]struct{}](` -ERROR: You generated this file with 'weaver generate' v0.22.0 (codegen +ERROR: You generated this file with 'weaver generate' v0.22.1-0.20231019162801-c2294d1ae0e8 (codegen version v0.20.0). The generated code is incompatible with the version of the github.com/ServiceWeaver/weaver module that you're using. The weaver module version can be found in your go.mod file or by running the following command. diff --git a/go.mod b/go.mod index 057ade4..56214f3 100644 --- a/go.mod +++ b/go.mod @@ -3,14 +3,12 @@ module github.com/ServiceWeaver/weaver-kube go 1.21 require ( - github.com/ServiceWeaver/weaver v0.22.0 + github.com/ServiceWeaver/weaver v0.22.1-0.20231019162801-c2294d1ae0e8 github.com/google/uuid v1.3.1 go.opentelemetry.io/otel v1.19.0 - go.opentelemetry.io/otel/exporters/jaeger v1.16.0 - go.opentelemetry.io/otel/sdk v1.16.0 go.opentelemetry.io/otel/trace v1.19.0 golang.org/x/exp v0.0.0-20230713183714-613f0c0eb8a1 - golang.org/x/sync v0.3.0 + golang.org/x/sync v0.4.0 google.golang.org/protobuf v1.31.0 k8s.io/api v0.27.4 k8s.io/apimachinery v0.27.4 @@ -54,14 +52,15 @@ require ( go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.45.0 // indirect go.opentelemetry.io/otel/exporters/stdout/stdouttrace v1.16.0 // indirect go.opentelemetry.io/otel/metric v1.19.0 // indirect - golang.org/x/mod v0.12.0 // indirect + go.opentelemetry.io/otel/sdk v1.16.0 // indirect + golang.org/x/mod v0.13.0 // indirect golang.org/x/net v0.17.0 // indirect golang.org/x/oauth2 v0.0.0-20220223155221-ee480838109b // indirect golang.org/x/sys v0.13.0 // indirect golang.org/x/term v0.13.0 // indirect golang.org/x/text v0.13.0 // indirect golang.org/x/time v0.0.0-20220210224613-90d013bbcef8 // indirect - golang.org/x/tools v0.11.0 // indirect + golang.org/x/tools v0.14.0 // indirect google.golang.org/appengine v1.6.7 // indirect google.golang.org/genproto v0.0.0-20230306155012-7f2fa6fef1f4 // indirect gopkg.in/inf.v0 v0.9.1 // indirect diff --git a/go.sum b/go.sum index b9c0f84..5003d09 100644 --- a/go.sum +++ b/go.sum @@ -39,8 +39,8 @@ github.com/DataDog/hyperloglog v0.0.0-20220804205443-1806d9b66146 h1:S5WsRc58vIe github.com/DataDog/hyperloglog v0.0.0-20220804205443-1806d9b66146/go.mod h1:hFPkswc42pKhRbeKDKXy05mRi7J1kJ2vMNbvd9erH0M= github.com/DataDog/mmh3 v0.0.0-20210722141835-012dc69a9e49 h1:EbzDX8HPk5uE2FsJYxD74QmMw0/3CqSKhEr6teh0ncQ= github.com/DataDog/mmh3 v0.0.0-20210722141835-012dc69a9e49/go.mod h1:SvsjzyJlSg0rKsqYgdcFxeEVflx3ZNAyFfkUHP0TxXg= -github.com/ServiceWeaver/weaver v0.22.0 h1:1VrW9pn2iTnaF23XkviQlVIw6AGP1VNhOjKESY1AXEI= -github.com/ServiceWeaver/weaver v0.22.0/go.mod h1:fOFTt+d1SSxcMxhQi6ft4KHiH1gnOWhS4DEobGD51tk= +github.com/ServiceWeaver/weaver v0.22.1-0.20231019162801-c2294d1ae0e8 h1:smtruzdiiELIMDNHrXD+fY8/I69p0rQPqMlYERptwA4= +github.com/ServiceWeaver/weaver v0.22.1-0.20231019162801-c2294d1ae0e8/go.mod h1:j27YowX7vVpIrYcEPZ9e1FR+fvVrlH9DweyO3uyOqkg= github.com/antlr/antlr4/runtime/Go/antlr/v4 v4.0.0-20230305170008-8188dc5388df h1:7RFfzj4SSt6nnvCPbCqijJi1nWCd+TqAT3bYCStRC18= github.com/antlr/antlr4/runtime/Go/antlr/v4 v4.0.0-20230305170008-8188dc5388df/go.mod h1:pSwJ0fSY5KhvocuWSx4fz3BA8OrA1bQn+K1Eli3BRwM= github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= @@ -208,7 +208,6 @@ github.com/stoewer/go-strcase v1.2.0 h1:Z2iHWqGXH00XYgqDmNgQbIBxf3wrNq0F3feEy0ai github.com/stoewer/go-strcase v1.2.0/go.mod h1:IBiWB2sKIp3wVVQ3Y035++gc+knqhUQag1KpM8ahLw8= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= -github.com/stretchr/objx v0.5.0 h1:1zr/of2m5FGMsad5YfcqgdqdWrIhu+EBEJRhR1U7z/c= github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= @@ -231,8 +230,6 @@ go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.45.0 h1:x8Z78aZ go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.45.0/go.mod h1:62CPTSry9QZtOaSsE3tOzhx6LzDhHnXJ6xHeMNNiM6Q= go.opentelemetry.io/otel v1.19.0 h1:MuS/TNf4/j4IXsZuJegVzI1cwut7Qc00344rgH7p8bs= go.opentelemetry.io/otel v1.19.0/go.mod h1:i0QyjOq3UPoTzff0PJB2N66fb4S0+rSbSB15/oyH9fY= -go.opentelemetry.io/otel/exporters/jaeger v1.16.0 h1:YhxxmXZ011C0aDZKoNw+juVWAmEfv/0W2XBOv9aHTaA= -go.opentelemetry.io/otel/exporters/jaeger v1.16.0/go.mod h1:grYbBo/5afWlPpdPZYhyn78Bk04hnvxn2+hvxQhKIQM= go.opentelemetry.io/otel/exporters/stdout/stdouttrace v1.16.0 h1:+XWJd3jf75RXJq29mxbuXhCXFDG3S3R4vBUeSI2P7tE= go.opentelemetry.io/otel/exporters/stdout/stdouttrace v1.16.0/go.mod h1:hqgzBPTf4yONMFgdZvL/bK42R/iinTyVQtiWihs3SZc= go.opentelemetry.io/otel/metric v1.19.0 h1:aTzpGtV0ar9wlV4Sna9sdJyII5jTVJEvKETPiOKwvpE= @@ -278,8 +275,8 @@ golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzB golang.org/x/mod v0.1.1-0.20191107180719-034126e5016b/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg= golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= -golang.org/x/mod v0.12.0 h1:rmsUpXtvNzj340zd98LZ4KntptpfRHwpFOHG188oHXc= -golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= +golang.org/x/mod v0.13.0 h1:I/DsJXRlw/8l/0c24sM9yb0T4z9liZTduXvdAWYiysY= +golang.org/x/mod v0.13.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -326,8 +323,8 @@ golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20200317015054-43a5402ce75a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20200625203802-6e8e738ad208/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.3.0 h1:ftCYgMx6zT/asHUrPw8BLLscYtGznsLAnjq5RH9P66E= -golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y= +golang.org/x/sync v0.4.0 h1:zxkM55ReGkDlKSM+Fu41A+zmbZuaPVbGMzvvdUPznYQ= +golang.org/x/sync v0.4.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -419,8 +416,8 @@ golang.org/x/tools v0.0.0-20200729194436-6467de6f59a7/go.mod h1:njjCfa9FT2d7l9Bc golang.org/x/tools v0.0.0-20200804011535-6c149bb5ef0d/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA= golang.org/x/tools v0.0.0-20200825202427-b303f430e36d/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA= golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= -golang.org/x/tools v0.11.0 h1:EMCa6U9S2LtZXLAMoWiR/R8dAQFRqbAitmbJ2UKhoi8= -golang.org/x/tools v0.11.0/go.mod h1:anzJrxPjNtfgiYQYirP2CPGzGLxrH2u2QBhn6Bf3qY8= +golang.org/x/tools v0.14.0 h1:jvNa2pY0M4r62jkRQ6RwEZZyPcymeL9XZMLBbV7U2nc= +golang.org/x/tools v0.14.0/go.mod h1:uYBEerGOWcJyEORxN+Ek8+TT266gXkNlHdJBwexUsBg= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= diff --git a/internal/impl/babysitter.go b/internal/impl/babysitter.go index 9c33c24..6a1ddf6 100644 --- a/internal/impl/babysitter.go +++ b/internal/impl/babysitter.go @@ -32,10 +32,7 @@ import ( "github.com/ServiceWeaver/weaver/runtime/metrics" "github.com/ServiceWeaver/weaver/runtime/prometheus" "github.com/ServiceWeaver/weaver/runtime/protos" - "github.com/ServiceWeaver/weaver/runtime/traces" "github.com/google/uuid" - "go.opentelemetry.io/otel/exporters/jaeger" - "go.opentelemetry.io/otel/sdk/trace" "golang.org/x/sync/errgroup" v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -54,14 +51,13 @@ var logDir = filepath.Join(runtime.LogsDir(), "kube") // babysitter starts and manages a weavelet inside the Pod. type babysitter struct { - ctx context.Context - cfg *BabysitterConfig - app *protos.AppConfig - envelope *envelope.Envelope - logger *slog.Logger - traceExporter *jaeger.Exporter - clientset *kubernetes.Clientset - printer *logging.PrettyPrinter + ctx context.Context + cfg *BabysitterConfig + app *protos.AppConfig + envelope *envelope.Envelope + logger *slog.Logger + clientset *kubernetes.Clientset + printer *logging.PrettyPrinter mu sync.Mutex watching map[string]struct{} // components being watched @@ -98,18 +94,6 @@ func NewBabysitter(ctx context.Context, app *protos.AppConfig, config *Babysitte Write: logSaver, }) - // Create the trace exporter. - var traceExporter *jaeger.Exporter - if config.TraceServiceUrl != "" { - // Export traces if there is a tracing service running that is able to - // receive these traces. - endpoint := jaeger.WithCollectorEndpoint(jaeger.WithEndpoint(config.TraceServiceUrl)) - traceExporter, err = jaeger.New(endpoint) - if err != nil { - return nil, fmt.Errorf("NewBabysitter: create trace exporter: %w", err) - } - } - // Create a Kubernetes config. kubeConfig, err := rest.InClusterConfig() if err != nil { @@ -122,15 +106,14 @@ func NewBabysitter(ctx context.Context, app *protos.AppConfig, config *Babysitte // Create the babysitter. b := &babysitter{ - ctx: ctx, - cfg: config, - app: app, - envelope: e, - logger: logger, - traceExporter: traceExporter, - clientset: clientset, - printer: logging.NewPrettyPrinter(false /*colors disabled*/), - watching: map[string]struct{}{}, + ctx: ctx, + cfg: config, + app: app, + envelope: e, + logger: logger, + clientset: clientset, + printer: logging.NewPrettyPrinter(false /*colors disabled*/), + watching: map[string]struct{}{}, } // Inform the weavelet of the components it should host. @@ -143,9 +126,9 @@ func NewBabysitter(ctx context.Context, app *protos.AppConfig, config *Babysitte func (b *babysitter) Serve() error { // Run an HTTP server that exports metrics. - lis, err := net.Listen("tcp", fmt.Sprintf(":%d", defaultMetricsPort)) + lis, err := net.Listen("tcp", fmt.Sprintf(":%d", prometheusPort)) if err != nil { - return fmt.Errorf("Babysitter.Serve: listen on port %d: %w", defaultMetricsPort, err) + return fmt.Errorf("Babysitter.Serve: listen on port %d: %w", prometheusPort, err) } mux := http.NewServeMux() mux.HandleFunc(prometheusEndpoint, func(w http.ResponseWriter, r *http.Request) { @@ -165,9 +148,7 @@ func (b *babysitter) Serve() error { return b.envelope.Serve(b) }) - err = group.Wait() - b.traceExporter.Shutdown(b.ctx) //nolint:errcheck // response write error - return err + return group.Wait() } // ActivateComponent implements the envelope.EnvelopeHandler interface. @@ -274,14 +255,8 @@ func (b *babysitter) HandleLogEntry(_ context.Context, entry *protos.LogEntry) e // HandleTraceSpans implements the envelope.EnvelopeHandler interface. func (b *babysitter) HandleTraceSpans(ctx context.Context, spans *protos.TraceSpans) error { - if b.traceExporter == nil { - return nil - } - var spansToExport []trace.ReadOnlySpan - for _, span := range spans.Span { - spansToExport = append(spansToExport, &traces.ReadSpan{Span: span}) - } - return b.traceExporter.ExportSpans(ctx, spansToExport) + // TODO(mwhittaker): Implement with plugins. + return nil } // GetSelfCertificate implements the envelope.EnvelopeHandler interface. diff --git a/internal/impl/config.go b/internal/impl/config.go index a2cb222..5b152c6 100644 --- a/internal/impl/config.go +++ b/internal/impl/config.go @@ -64,41 +64,6 @@ type kubeConfig struct { // If a listener isn't specified in the map, default options will be used. Listeners map[string]*listenerConfig - // Observability controls how the deployer will export observability information - // such as logs, metrics and traces, keyed by service. If no options are - // specified, the deployer will launch corresponding services for exporting logs, - // metrics and traces automatically. - // - // The key must be one of the following strings: - // "prometheus_service" - to export metrics to Prometheus [1] - // "jaeger_service" - to export traces to Jaeger [2] - // "loki_service" - to export logs to Grafana Loki [3] - // "grafana_service" - to visualize/manipulate observability information [4] - // - // Possible values for each service: - // 1) do not specify a value at all; leave it empty - // this is the default behavior; kube deployer will automatically create the - // observability service for you. - // - // 2) "none" - // kube deployer will not export the corresponding observability information to - // any service. E.g., prometheus_service = "none" means that the user will not - // be able to see any metrics at all. This can be useful for testing or - // benchmarking the performance of your application. - // - // 3) "your_observability_service_name" - // if you already have a running service to collect metrics, traces or logs, - // then you can simply specify the service name, and your application will - // automatically export the corresponding information to your service. E.g., - // jaeger_service = "jaeger-all-in-one" will enable your running Jaeger - // "service/jaeger-all-in-one" to capture all the app traces. - // - // [1] - https://prometheus.io/ - // [2] - https://www.jaegertracing.io/ - // [3] - https://grafana.com/oss/loki/ - // [4] - https://grafana.com/ - Observability map[string]string - // Resources needed to run the pods. Note that the resources should satisfy // the format specified in [1]. // diff --git a/internal/impl/kube.go b/internal/impl/kube.go index 11c60ce..ceee5a2 100644 --- a/internal/impl/kube.go +++ b/internal/impl/kube.go @@ -16,6 +16,7 @@ package impl import ( "bytes" + "crypto/sha256" _ "embed" "fmt" "html/template" @@ -27,6 +28,7 @@ import ( "github.com/ServiceWeaver/weaver/runtime/bin" "github.com/ServiceWeaver/weaver/runtime/protos" + "github.com/google/uuid" "golang.org/x/exp/maps" "google.golang.org/protobuf/encoding/prototext" appsv1 "k8s.io/api/apps/v1" @@ -37,16 +39,21 @@ import ( "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/intstr" + "sigs.k8s.io/yaml" ) const ( // Name of the container that hosts the application binary. appContainerName = "serviceweaver" + // The exported port by the Service Weaver services. servicePort = 80 // Port used by the weavelets to listen for internal traffic. internalPort = 10000 + + // Default Prometheus port. + prometheusPort = 9090 ) // Start value for ports used by the public and private listeners. @@ -58,12 +65,11 @@ var externalPort int32 = 20000 // Note that this is different from a Kubernetes Deployment. A deployed Service // Weaver application consists of many Kubernetes Deployments. type deployment struct { - deploymentId string // globally unique deployment id - image string // Docker image URI - traceServiceURL string // where traces are exported to, if not empty - config *kubeConfig // [kube] config from weaver.toml - app *protos.AppConfig // parsed weaver.toml - groups []group // groups + deploymentId string // globally unique deployment id + image string // Docker image URI + config *kubeConfig // [kube] config from weaver.toml + app *protos.AppConfig // parsed weaver.toml + groups []group // groups } // group contains information about a possibly replicated group of components. @@ -115,9 +121,6 @@ func buildDeployment(d deployment, g group) (*appsv1.Deployment, error) { "serviceweaver/app": d.app.Name, "serviceweaver/version": d.deploymentId[:8], } - if d.config.Observability[metricsConfigKey] != disabled { - podLabels["metrics"] = d.app.Name // Needed by Prometheus to scrape the metrics. - } // Pick DNS policy. dnsPolicy := corev1.DNSClusterFirst @@ -285,18 +288,10 @@ func buildContainer(d deployment, g group) (corev1.Container, error) { ContainerPort: l.port, }) } - if d.config.Observability[metricsConfigKey] != disabled { - // Expose the metrics port from the container, so it can be - // discoverable for scraping by Prometheus. - // - // TODO(rgrandl): We may want to have a default metrics port that can - // be scraped by any metrics collection system. For now, disable the - // port if Prometheus will not collect the metrics. - ports = append(ports, corev1.ContainerPort{ - Name: "prometheus", - ContainerPort: defaultMetricsPort, - }) - } + ports = append(ports, corev1.ContainerPort{ + Name: "prometheus", + ContainerPort: prometheusPort, + }) // Gather the set of resources. resources, err := computeResourceRequirements(d.config.Resources) @@ -433,11 +428,6 @@ func generateYAMLs(configFilename string, app *protos.AppConfig, cfg *kubeConfig return fmt.Errorf("unable to create kube app deployment: %w", err) } - // Generate deployment info needed to get insights into the application. - if err := generateObservabilityYAMLs(&b, app.Name, cfg); err != nil { - return fmt.Errorf("unable to create configuration information: %w", err) - } - // Write the generated kube info into a file. f, err := os.OpenFile(yamlFile, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644) if err != nil { @@ -620,10 +610,9 @@ func generateConfigMap(w io.Writer, configFilename string, d deployment) error { } } babysitterConfig := &BabysitterConfig{ - Namespace: d.config.Namespace, - DeploymentId: d.deploymentId, - TraceServiceUrl: d.traceServiceURL, - Listeners: listeners, + Namespace: d.config.Namespace, + DeploymentId: d.deploymentId, + Listeners: listeners, } configTextpb, err := prototext.MarshalOptions{Multiline: true}.Marshal(babysitterConfig) if err != nil { @@ -741,26 +730,12 @@ func newDeployment(app *protos.AppConfig, cfg *kubeConfig, depId, image string) return sorted[i].name < sorted[j].name }) - // Compute the URL of the export traces service. - var traceServiceURL string - switch jservice := cfg.Observability[tracesConfigKey]; { - case jservice == auto: - // Point to the service launched by the kube deployer. - traceServiceURL = fmt.Sprintf("http://%s:%d/api/traces", name{app.Name, jaegerAppName}.DNSLabel(), defaultJaegerCollectorPort) - case jservice != disabled: - // Point to the service launched by the user. - traceServiceURL = fmt.Sprintf("http://%s:%d/api/traces", jservice, defaultJaegerCollectorPort) - default: - // No trace to export. - } - return deployment{ - deploymentId: depId, - image: image, - traceServiceURL: traceServiceURL, - config: cfg, - app: app, - groups: sorted, + deploymentId: depId, + image: image, + config: cfg, + app: app, + groups: sorted, }, nil } @@ -855,3 +830,30 @@ func readComponentsAndListeners(binary string) (map[string][]string, error) { return components, nil } + +// hash8 computes a stable 8-byte hash over the provided strings. +func hash8(strs []string) string { + h := sha256.New() + var data []byte + for _, str := range strs { + h.Write([]byte(str)) + h.Write(data) + data = h.Sum(data) + } + return uuid.NewHash(h, uuid.Nil, data, 0).String()[:8] +} + +// marshalResource marshals the provided Kubernetes resource into YAML into the +// provided writer, prefixing it with the provided comment. +func marshalResource(w io.Writer, resource any, comment string) error { + bytes, err := yaml.Marshal(resource) + if err != nil { + return err + } + fmt.Fprintf(w, "\n# %s\n", comment) + if _, err := w.Write(bytes); err != nil { + return err + } + fmt.Fprintf(w, "\n---\n") + return nil +} diff --git a/internal/impl/kube.pb.go b/internal/impl/kube.pb.go index 7f1d39b..29c5c75 100644 --- a/internal/impl/kube.pb.go +++ b/internal/impl/kube.pb.go @@ -40,10 +40,9 @@ type BabysitterConfig struct { sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - Namespace string `protobuf:"bytes,1,opt,name=namespace,proto3" json:"namespace,omitempty"` // Kubernetes namespace - DeploymentId string `protobuf:"bytes,2,opt,name=deployment_id,json=deploymentId,proto3" json:"deployment_id,omitempty"` // globally unique deployment id - TraceServiceUrl string `protobuf:"bytes,3,opt,name=trace_service_url,json=traceServiceUrl,proto3" json:"trace_service_url,omitempty"` // if not empty, where to send traces - Listeners map[string]int32 `protobuf:"bytes,4,rep,name=listeners,proto3" json:"listeners,omitempty" protobuf_key:"bytes,1,opt,name=key,proto3" protobuf_val:"varint,2,opt,name=value,proto3"` // a map from listener name to port + Namespace string `protobuf:"bytes,1,opt,name=namespace,proto3" json:"namespace,omitempty"` // Kubernetes namespace + DeploymentId string `protobuf:"bytes,2,opt,name=deployment_id,json=deploymentId,proto3" json:"deployment_id,omitempty"` // globally unique deployment id + Listeners map[string]int32 `protobuf:"bytes,3,rep,name=listeners,proto3" json:"listeners,omitempty" protobuf_key:"bytes,1,opt,name=key,proto3" protobuf_val:"varint,2,opt,name=value,proto3"` // a map from listener name to port } func (x *BabysitterConfig) Reset() { @@ -92,13 +91,6 @@ func (x *BabysitterConfig) GetDeploymentId() string { return "" } -func (x *BabysitterConfig) GetTraceServiceUrl() string { - if x != nil { - return x.TraceServiceUrl - } - return "" -} - func (x *BabysitterConfig) GetListeners() map[string]int32 { if x != nil { return x.Listeners @@ -111,27 +103,24 @@ var File_internal_impl_kube_proto protoreflect.FileDescriptor var file_internal_impl_kube_proto_rawDesc = []byte{ 0x0a, 0x18, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x6e, 0x61, 0x6c, 0x2f, 0x69, 0x6d, 0x70, 0x6c, 0x2f, 0x6b, 0x75, 0x62, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x04, 0x69, 0x6d, 0x70, 0x6c, - 0x22, 0x84, 0x02, 0x0a, 0x10, 0x42, 0x61, 0x62, 0x79, 0x73, 0x69, 0x74, 0x74, 0x65, 0x72, 0x43, + 0x22, 0xd8, 0x01, 0x0a, 0x10, 0x42, 0x61, 0x62, 0x79, 0x73, 0x69, 0x74, 0x74, 0x65, 0x72, 0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x12, 0x1c, 0x0a, 0x09, 0x6e, 0x61, 0x6d, 0x65, 0x73, 0x70, 0x61, 0x63, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x6e, 0x61, 0x6d, 0x65, 0x73, 0x70, 0x61, 0x63, 0x65, 0x12, 0x23, 0x0a, 0x0d, 0x64, 0x65, 0x70, 0x6c, 0x6f, 0x79, 0x6d, 0x65, 0x6e, 0x74, 0x5f, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0c, 0x64, 0x65, 0x70, 0x6c, - 0x6f, 0x79, 0x6d, 0x65, 0x6e, 0x74, 0x49, 0x64, 0x12, 0x2a, 0x0a, 0x11, 0x74, 0x72, 0x61, 0x63, - 0x65, 0x5f, 0x73, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x5f, 0x75, 0x72, 0x6c, 0x18, 0x03, 0x20, - 0x01, 0x28, 0x09, 0x52, 0x0f, 0x74, 0x72, 0x61, 0x63, 0x65, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, - 0x65, 0x55, 0x72, 0x6c, 0x12, 0x43, 0x0a, 0x09, 0x6c, 0x69, 0x73, 0x74, 0x65, 0x6e, 0x65, 0x72, - 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x25, 0x2e, 0x69, 0x6d, 0x70, 0x6c, 0x2e, 0x42, - 0x61, 0x62, 0x79, 0x73, 0x69, 0x74, 0x74, 0x65, 0x72, 0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x2e, - 0x4c, 0x69, 0x73, 0x74, 0x65, 0x6e, 0x65, 0x72, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x09, - 0x6c, 0x69, 0x73, 0x74, 0x65, 0x6e, 0x65, 0x72, 0x73, 0x1a, 0x3c, 0x0a, 0x0e, 0x4c, 0x69, 0x73, - 0x74, 0x65, 0x6e, 0x65, 0x72, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, - 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x14, 0x0a, - 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x05, 0x76, 0x61, - 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x42, 0x34, 0x5a, 0x32, 0x67, 0x69, 0x74, 0x68, 0x75, - 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x57, 0x65, 0x61, - 0x76, 0x65, 0x72, 0x2f, 0x77, 0x65, 0x61, 0x76, 0x65, 0x72, 0x2d, 0x6b, 0x75, 0x62, 0x65, 0x2f, - 0x69, 0x6e, 0x74, 0x65, 0x72, 0x6e, 0x61, 0x6c, 0x2f, 0x69, 0x6d, 0x70, 0x6c, 0x62, 0x06, 0x70, - 0x72, 0x6f, 0x74, 0x6f, 0x33, + 0x6f, 0x79, 0x6d, 0x65, 0x6e, 0x74, 0x49, 0x64, 0x12, 0x43, 0x0a, 0x09, 0x6c, 0x69, 0x73, 0x74, + 0x65, 0x6e, 0x65, 0x72, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x25, 0x2e, 0x69, 0x6d, + 0x70, 0x6c, 0x2e, 0x42, 0x61, 0x62, 0x79, 0x73, 0x69, 0x74, 0x74, 0x65, 0x72, 0x43, 0x6f, 0x6e, + 0x66, 0x69, 0x67, 0x2e, 0x4c, 0x69, 0x73, 0x74, 0x65, 0x6e, 0x65, 0x72, 0x73, 0x45, 0x6e, 0x74, + 0x72, 0x79, 0x52, 0x09, 0x6c, 0x69, 0x73, 0x74, 0x65, 0x6e, 0x65, 0x72, 0x73, 0x1a, 0x3c, 0x0a, + 0x0e, 0x4c, 0x69, 0x73, 0x74, 0x65, 0x6e, 0x65, 0x72, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, + 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, + 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, + 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x42, 0x34, 0x5a, 0x32, 0x67, + 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, + 0x65, 0x57, 0x65, 0x61, 0x76, 0x65, 0x72, 0x2f, 0x77, 0x65, 0x61, 0x76, 0x65, 0x72, 0x2d, 0x6b, + 0x75, 0x62, 0x65, 0x2f, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x6e, 0x61, 0x6c, 0x2f, 0x69, 0x6d, 0x70, + 0x6c, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, } var ( diff --git a/internal/impl/kube.proto b/internal/impl/kube.proto index 3015b35..de6f045 100644 --- a/internal/impl/kube.proto +++ b/internal/impl/kube.proto @@ -22,6 +22,5 @@ package impl; message BabysitterConfig { string namespace = 1; // Kubernetes namespace string deployment_id = 2; // globally unique deployment id - string trace_service_url = 3; // if not empty, where to send traces - map listeners = 4; // a map from listener name to port + map listeners = 3; // a map from listener name to port } diff --git a/internal/impl/name.go b/internal/impl/name.go deleted file mode 100644 index 404187a..0000000 --- a/internal/impl/name.go +++ /dev/null @@ -1,265 +0,0 @@ -// Copyright 2023 Google LLC -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package impl - -import ( - "crypto/sha256" - "fmt" - "math" - "strings" - "unicode" - - "github.com/google/uuid" - "k8s.io/apimachinery/pkg/util/validation" -) - -// TODO(rgrandl): Remove duplicate file. - -const ( - // Maximum number of components allowed in the name. - maxNumComponents = 30 - - // Minimum length of a sanitized string. - minSanitizedLen = 1 -) - -func init() { - // Enforce the following invariant on maxNumComponents: if all - // maxNumComponents component names are provided, we guarantee that - // we can always construct a valid Kubernetes name. - const n = maxNumComponents + 1 // one extra component for hash - const minLen = n*minSanitizedLen + n - 1 // components plus '-' separators - if minLen > validation.DNS1035LabelMaxLength { - panic(fmt.Sprintf("bad invariant: type Name cannot support %d components", maxNumComponents)) - } -} - -// name represents a Kubernetes object name, consisting of N component names -// concatenated using the '-' character: -// -// --...- -// -// Component names are arbitrary strings that need not adhere to Kubernetes's -// strict naming convention, which excludes most of the non-alphanumerical -// symbols including '.' and '_'. Likewise, the combined length of the -// component names need not meet the length requirement for Kubernetes object -// names, which can be as low as 63 characters. Instead, when creating object -// names (using DNSLabel() and DNSSubdomain() methods), the component names -// are sanitized in a way that preserves as much of their information as -// possible, while satisfying the Kubernetes naming requirements for the -// concatenated string. -// -// A simplistic sanitization, however, can lead to different names being -// converted to the same name (e.g., "foo_bar" and "foo.bar" may both be -// converted to "foobar"), leading to collisions. To prevent these -// collisions, we append a combined hash of all original components to the -// object name: -// -// --...-- -// -// Empty component names are dropped and not concatenated. -type name [maxNumComponents]string - -// DNSLabel returns a human readable name that follows the DNS label -// standard as defined in RFC 1035. -func (n name) DNSLabel() string { - return n.convert(true /*isLabel*/) -} - -// DNSSubdomain returns a human-readable name that follows the DNS -// subdomain standard as defined in RFC 1123. -func (n name) DNSSubdomain() string { - return n.convert(false /*isLabel*/) -} - -func (n name) convert(isLabel bool) string { - var maxTotalLen int // max length for the entire name - if isLabel { - maxTotalLen = validation.DNS1035LabelMaxLength - } else { - maxTotalLen = validation.DNS1123SubdomainMaxLength - } - - // Create a list of non-empty components. - var components []string - for _, c := range n { - if c != "" { - components = append(components, c) - } - } - - // Compute a consistent hash of all of the components and append it - // to the list of components. - hash := hash8(components) - components = append(components, hash) - - // Maximum length for all component names combined. - maxCombinedLen := maxTotalLen - if len(components) > 1 { - // Account for the '-' separators between components. - maxCombinedLen -= (len(components) - 1) - } - - // Step 1: Sanitize all components without any length limits. This gives us - // the upper bound on the sanitized length for each component. - var maxComponentLength [maxNumComponents]int - for i := 0; i < len(components); i++ { - name := sanitizer{ - maxLen: math.MaxInt32, - numStartOK: i > 0 || !isLabel, - dotOK: !isLabel, - }.sanitize(components[i]) - maxComponentLength[i] = len(name) - } - - // Step 2: Allocate an equal target length T for each component. - T := maxCombinedLen / len(components) - if T < minSanitizedLen { - panic(fmt.Sprintf("impossible: target component length %d can never be less than %d due to an invariant", T, minSanitizedLen)) - } - - // Step 3: Sanitize all components whose max sanitized length is less than - // or equal to T. Collect all of the unused bytes into a remainder value R. - var R int - for i := 0; i < len(components); i++ { - if maxComponentLength[i] > T { - continue - } - name := sanitizer{ - maxLen: T, - numStartOK: i > 0 || !isLabel, - dotOK: !isLabel, - }.sanitize(components[i]) - components[i] = name - R += T - len(name) - } - - // Step 4: Sanitize all other components, using the remainder budget R. - for i := 0; i < len(components); i++ { - if maxComponentLength[i] <= T { // already sanitized - continue - } - name := sanitizer{ - maxLen: T + R, - numStartOK: i > 0 || !isLabel, - dotOK: !isLabel, - }.sanitize(components[i]) - components[i] = name - R += T - len(name) - } - - return strings.Join(components, "-") -} - -// isAplhanum returns true iff r is in the character set [a-zA-Z0-9]. -func isAlphanum(r rune) bool { - return isAlpha(r) || (r >= '0' && r <= '9') -} - -// isAlpha returns true iff r is in the character set [a-zA-Z]. -func isAlpha(r rune) bool { - return (r >= 'a' && r <= 'z') || (r >= 'A' && r <= 'Z') -} - -type sanitizer struct { - maxLen int - numStartOK bool - dotOK bool -} - -// sanitize returns a human-readable variant of name that is a valid -// DNS name and whose length: -// - Greater than zero. -// - Less than or equal to maxLen and len(name). -// -// If s.numStartOK is false, the returned variant will start with a letter. -// If s.dotOK is true, the returned variant may include '.' characters. -// -// REQUIRES: !name.empty() -func (s sanitizer) sanitize(name string) string { - runes := []rune(name) - - // Remove/replace all runes not in the set [-.a-zA-Z0-9]. - idx := 0 - for i := 0; i < len(runes); i++ { - r := runes[i] - switch { - case r == '.': - if !s.dotOK { - r = '-' - } - case r == '_' || r == '-': - r = '-' - case r == '/': - if s.dotOK { - r = '.' - } else { - r = '-' - } - case isAlphanum(r): - r = unicode.ToLower(r) - default: - continue // drop the rune - } - - if !isAlphanum(r) && idx > 0 && !isAlphanum(runes[idx-1]) { - // Don't keep adjacent non-alphanumeric characters. - continue - } - runes[idx] = r - idx++ - } - runes = runes[:idx] - - // Shorten the runes length, if necessary. We remove the prefix, since - // the suffix carries more information (e.g., object type). - if len(runes) > s.maxLen { - runes = runes[len(runes)-s.maxLen:] - } - - // Ensure that the start and the end runes are alphanumeric characters. - // If necessary, ensure that the start rune is an alphabetic character. - start := 0 - for ; start < len(runes); start++ { - r := runes[start] - if isAlpha(r) || (s.numStartOK && isAlphanum(r)) { - break - } - } - runes = runes[start:] - end := len(runes) - 1 - for ; end >= 0 && !isAlphanum(runes[end]); end-- { - } - runes = runes[:end+1] - - // Ensure a non-empty set of runes remains. - if len(runes) == 0 { - runes = []rune{'a'} - } - - return string(runes) -} - -// hash computes a stable 8-byte hash over the provided strings. -func hash8(strs []string) string { - h := sha256.New() - var data []byte - for _, str := range strs { - h.Write([]byte(str)) - h.Write(data) - data = h.Sum(data) - } - return uuid.NewHash(h, uuid.Nil, data, 0).String()[:8] -} diff --git a/internal/impl/observability.go b/internal/impl/observability.go deleted file mode 100644 index 70d975f..0000000 --- a/internal/impl/observability.go +++ /dev/null @@ -1,1397 +0,0 @@ -// Copyright 2023 Google LLC -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package impl - -import ( - _ "embed" - "fmt" - "io" - "os" - "time" - - appsv1 "k8s.io/api/apps/v1" - _ "k8s.io/api/autoscaling/v2beta2" - corev1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/util/intstr" - "sigs.k8s.io/yaml" -) - -// TODO(rgrandl): We might want to revisit the way we integrate with external -// systems to export observability information. For example, we might want to -// create an agent abstraction that is used by the babysitters to export -// observability info. Different implementations of these agents will behave -// differently. For example, a Jaeger agent, will simply export traces to a Jaeger -// service. A Prometheus agent will export a /metrics endpoint that can be scraped -// by a Prometheus service. Another agent might want to convert otel traces to -// a different format and export it (e.g., Elastic). This way, we can add agent -// implementations for any observability systems. - -const ( - // The names of the observability services that interact with the application. - tracesConfigKey = "jaeger_service" - metricsConfigKey = "prometheus_service" - logsConfigKey = "loki_service" - grafanaConfigKey = "grafana_service" - - // Jaeger related configs. - - // Name of the Jaeger application. - jaegerAppName = "jaeger" - - // Name of the Jaeger [1] container image used for automatically started Jaeger service. - // - // all-in-one[1] combines the three Jaeger components: agent, collector, and - // query service/UI in a single binary, which is enough for handling the traces - // in a kubernetes deployment. However, we don't really need an agent. Also, - // we may want to launch separate collector and query services later on. Or, - // we may want to launch an otel collector service as well, to ensure that the - // traces are available, even if the deployment is deleted. - // - // [1] https://www.jaegertracing.io/docs/1.45/deployment/#all-in-one - autoJaegerImageName = "jaegertracing/all-in-one" - - // The port on which the Jaeger UI agent is listening on. - // - // Note that this is expected to be the default port [1] for both the automatically - // started Jaeger UI agent and the one started by the user. - // - // [1] https://www.jaegertracing.io/docs/1.6/getting-started/ - defaultJaegerUIPort = 16686 - - // The port on which the Jaeger collector is receiving traces from the - // clients when using the Jaeger exporter. - // - // Note that this is expected to be the default port [1] for both the automatically - // started Jaeger collector and the one started by the user. - // - // [1] https://www.jaegertracing.io/docs/1.6/getting-started/ - defaultJaegerCollectorPort = 14268 - - // Prometheus related configs. - - // Name of the Prometheus [1] container image used for automatically started Prometheus service. - // - // [1] https://prometheus.io/ - autoPrometheusImageName = "prom/prometheus:v2.30.3" - - // The port on which the weavelets are exporting the metrics. - // - // Note that this is expected to be the default port [1] for both the automatically - // started Prometheus and the one started by the user. - // - // [1] https://opensource.com/article/18/12/introduction-prometheus - defaultMetricsPort = 9090 - - // Loki related configs. - - // Name of the Loki [1] container image used for automatically started Loki service. - // - // [1] https://grafana.com/oss/loki/ - autoLokiImageName = "grafana/loki" - - // The port on which Loki is exporting the logs. - // - // Note that this is expected to be the default port [1] for both the automatically - // started Loki and the one started by the user. - // - // [1] https://grafana.com/docs/loki/latest/configuration/ - defaultLokiPort = 3100 - - // Promtail related configs. - - // Name of the Promtail [1] container image used for automatically started Promtail. - // - // [1] https://grafana.com/docs/loki/latest/clients/promtail/ - autoPromtailImageName = "grafana/promtail" - - // Grafana related configs. - - // Name of the Grafana [1] container image used for automatically started Grafana. - // - // [1] https://grafana.com/ - autoGrafanaImageName = "grafana/grafana" - - // The default Grafana web server port. - // - // Note that this is expected to be the default port [1] for both the automatically - // started Grafana and the one started by the user. - // - // [1] https://grafana.com/docs/grafana/latest/setup-grafana/configure-grafana/ - defaultGrafanaPort = 3000 - - // The below values are used to check the values passed through app config - // for different observability services. - - // Set iff the kube deployer should generate Kubernetes configs to deploy - // a service. - auto = "" - - // Set iff the user requires that the corresponding service should be disabled - // entirely. I.e., neither the user started the service nor the Kube deployer. - disabled = "none" -) - -// dashboard was generated using the Grafana UI. Then, we saved the content as -// a JSON file. -// -//go:embed dashboard.txt -var dashboardContent string - -// generateObservabilityYAMLs generates Kubernetes YAMLs for exporting -// applications' metrics, logs, and traces. -func generateObservabilityYAMLs(w io.Writer, appName string, cfg *kubeConfig) error { - if err := generateConfigsToExportTraces(w, appName, cfg); err != nil { - return fmt.Errorf("unable to create kube configs to export traces: %w", err) - } - - if err := generateConfigsToExportMetrics(w, appName, cfg); err != nil { - return fmt.Errorf("unable to create kube configs to export metrics: %w", err) - } - - if err := generateConfigsToExportLogs(w, appName, cfg); err != nil { - return fmt.Errorf("unable to create kube configs to export logs: %w", err) - } - - // Generate Kubernetes configs to export logs, traces and metrics to Grafana. - if err := generateConfigsToExportToGrafana(w, appName, cfg); err != nil { - return fmt.Errorf("unable to create kube configs to export data to Grafana: %w", err) - } - return nil -} - -// generateConfigsToExportTraces generates Jaeger Kubernetes deployment -// configurations for a given app. -// -// Note that the configs should be generated iff the kube deployer automatically -// runs a Jaeger service along with the app. -// -// Note that we run a single instance of Jaeger. This is because we are using -// a Jaeger image that combines three Jaeger components, agent, collector, and -// query service/UI in a single image. -// -// TODO(rgrandl): If the trace volume can't be handled by a single instance, we -// should scale these components independently, and use different image(s). -// -// TODO(rgrandl): Convert the below comments into docs. -// How to integrate with an external Jaeger service? -// E.g., if you use Helm [1] to install Jaeger, you can simply do the following: -// 1) You install Jaeger using a command similar to the one below. -// helm install jaeger-all-in-one jaeger-all-in-one/jaeger-all-in-one -// -// 2) Your Jaeger service has the name 'jaeger-all-in-one'. -// -// 3) in your config.toml, set the 'jaeger_service' info as follows: -// config.toml -// ... -// [kube] -// observability = {jaeger_service = "jaeger-all-in-one"} -// -// [1] https://helm.sh/ -func generateConfigsToExportTraces(w io.Writer, appName string, cfg *kubeConfig) error { - // The user disabled exporting the traces, don't generate anything. - if cfg.Observability[tracesConfigKey] != auto { - return nil - } - - jname := name{appName, jaegerAppName}.DNSLabel() - - // Generate the Jaeger deployment. - d := &appsv1.Deployment{ - TypeMeta: metav1.TypeMeta{ - APIVersion: "apps/v1", - Kind: "Deployment", - }, - ObjectMeta: metav1.ObjectMeta{ - Name: jname, - Namespace: cfg.Namespace, - }, - Spec: appsv1.DeploymentSpec{ - Replicas: ptrOf(int32(1)), - Selector: &metav1.LabelSelector{ - MatchLabels: map[string]string{ - "jaeger": jname, - }, - }, - Template: corev1.PodTemplateSpec{ - ObjectMeta: metav1.ObjectMeta{ - Labels: map[string]string{ - "jaeger": jname, - }, - Namespace: cfg.Namespace, - }, - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Name: jname, - Image: fmt.Sprintf("%s:latest", autoJaegerImageName), - ImagePullPolicy: corev1.PullIfNotPresent, - }, - }, - }, - }, - Strategy: appsv1.DeploymentStrategy{ - Type: "RollingUpdate", - RollingUpdate: &appsv1.RollingUpdateDeployment{}, - }, - }, - } - if err := marshalResource(w, d, "Jaegar Deployment"); err != nil { - return err - } - fmt.Fprintf(os.Stderr, "Generated Jaeger deployment\n") - - // Generate the Jaeger service. - s := &corev1.Service{ - TypeMeta: metav1.TypeMeta{ - APIVersion: "v1", - Kind: "Service", - }, - ObjectMeta: metav1.ObjectMeta{ - Name: jname, - Namespace: cfg.Namespace, - }, - Spec: corev1.ServiceSpec{ - Selector: map[string]string{"jaeger": jname}, - Ports: []corev1.ServicePort{ - { - Name: "ui-port", - Port: defaultJaegerUIPort, - Protocol: "TCP", - TargetPort: intstr.IntOrString{IntVal: int32(defaultJaegerUIPort)}, - }, - { - Name: "collector-port", - Port: defaultJaegerCollectorPort, - Protocol: "TCP", - TargetPort: intstr.IntOrString{IntVal: int32(defaultJaegerCollectorPort)}, - }, - }, - }, - } - if err := marshalResource(w, s, "Jaegar Service"); err != nil { - return err - } - fmt.Fprintf(os.Stderr, "Generated Jaeger service\n") - return nil -} - -// generateConfigsToExportMetrics generates the Prometheus kubernetes deployment -// information for a given app. -// -// TODO(rgrandl): Convert the below comments into docs. -// How to integrate with an external Prometheus service? -// E.g., if you use Helm [1] to install Prometheus, you can simply do the following: -// 1) You install Prometheus using a command similar to the one below. -// helm install prometheus prometheus-community/prometheus -// -// 2) Your Prometheus service has the name 'prometheus-server'. -// -// 3) Write a simple manifest file prom.yaml that contains the scrape config info -// generated by the kube deployer. The scrape config info should look something like: -// -// ``` -// - job_name: "collatz-prometheus-ca03bb5f" -// metrics_path: /metrics -// ... -// -// You can find the Kube generated config map by running kubectl cm -n . -// It should look something like collatz-prometheus-config-xyz. -// -// 4) The prom.yaml file should look like: -// -// extraScrapeConfigs: | -// - job_name: "collatz-prometheus-ca03bb5f" -// metrics_path: /metrics -// ... -// -// 5) Upgrade your prometheus release with the new manifest file. -// helm upgrade prometheus prometheus-community/prometheus -f prom.yaml -// -// 6) Now you should be able to see the app traces with your running Prometheus service. -// -// Note that this will work disregarding whether you disabled the kube deployer -// to generate a Prometheus service as well. -// -// [Optional] However, if you run a Grafana service, and the service is generated -// by the Kube deployer, then if you specify the name of your prometheus service -// in the config.toml file, the Grafana service will automatically import your -// Prometheus service as a datasource. -// -// config.toml -// ... -// [kube] -// observability = {prometheus_service = "prometheus-server"} -// -// [1] https://helm.sh/ -func generateConfigsToExportMetrics(w io.Writer, appName string, cfg *kubeConfig) error { - // The user disabled exporting the metrics, don't generate anything. - if cfg.Observability[metricsConfigKey] == disabled { - return nil - } - - // Generate configs to configure Prometheus to scrape metrics from the app. - if err := generatePrometheusConfigs(w, appName, cfg); err != nil { - return err - } - - // Generate the Prometheus kubernetes deployment info iff the kube deployer - // should automatically start the Prometheus service. - if cfg.Observability[metricsConfigKey] != auto { - return nil - } - - // Generate kubernetes service configs for Prometheus. - if err := generatePrometheusServiceConfigs(w, appName, cfg); err != nil { - return err - } - return nil -} - -// generatePrometheusConfigs generate configs needed by the Prometheus service -// to export metrics. -// -// Note that these configs are needed by both the automatically started Prometheus -// service and the one started by the user. -func generatePrometheusConfigs(w io.Writer, appName string, cfg *kubeConfig) error { - cname := name{appName, "prometheus", "config"}.DNSLabel() - pname := name{appName, "prometheus"}.DNSLabel() - - // Build the config map that holds the prometheus configuration file. In the - // config we specify how to scrape the app pods for the metrics. - // - // Note that the info in the config map will be used by the Prometheus service - // deployed along the app by the kube deployer, or by the user if they run their - // own instance of Prometheus. - config := fmt.Sprintf(` -global: - scrape_interval: 15s -scrape_configs: - - job_name: "%s" - metrics_path: %s - kubernetes_sd_configs: - - role: pod - namespaces: - names: - - %s - scheme: http - relabel_configs: - - source_labels: [__meta_kubernetes_pod_label_metrics] - regex: "%s" - action: keep -`, pname, prometheusEndpoint, cfg.Namespace, appName) - - // Create a config map to store the prometheus config. - cm := corev1.ConfigMap{ - TypeMeta: metav1.TypeMeta{ - Kind: "ConfigMap", - APIVersion: "v1", - }, - ObjectMeta: metav1.ObjectMeta{ - Name: cname, - Namespace: cfg.Namespace, - }, - Data: map[string]string{ - "prometheus.yaml": config, - }, - } - if err := marshalResource(w, cm, fmt.Sprintf("Config Map %s", cname)); err != nil { - return err - } - fmt.Fprintf(os.Stderr, "Generated kube deployment for config map %s\n", cname) - return nil -} - -// generatePrometheusServiceConfigs generates the Prometheus kubernetes -// service information for a given app. -// -// Note that the configs should be generated iff the kube deployer automatically -// runs a Prometheus service along with the app. -// -// TODO(rgrandl): We run a single instance of Prometheus for now. We might want -// to scale it up if it becomes a bottleneck. -func generatePrometheusServiceConfigs(w io.Writer, appName string, cfg *kubeConfig) error { - cname := name{appName, "prometheus", "config"}.DNSLabel() - pname := name{appName, "prometheus"}.DNSLabel() - - // Build the kubernetes Prometheus deployment. - d := &appsv1.Deployment{ - TypeMeta: metav1.TypeMeta{ - APIVersion: "apps/v1", - Kind: "Deployment", - }, - ObjectMeta: metav1.ObjectMeta{ - Name: pname, - Namespace: cfg.Namespace, - }, - Spec: appsv1.DeploymentSpec{ - Replicas: ptrOf(int32(1)), - Selector: &metav1.LabelSelector{ - MatchLabels: map[string]string{"prometheus": pname}, - }, - Template: corev1.PodTemplateSpec{ - ObjectMeta: metav1.ObjectMeta{ - Labels: map[string]string{"prometheus": pname}, - Namespace: cfg.Namespace, - }, - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Name: pname, - Image: autoPrometheusImageName, - ImagePullPolicy: corev1.PullIfNotPresent, - Args: []string{ - fmt.Sprintf("--config.file=/etc/%s/prometheus.yaml", pname), - fmt.Sprintf("--storage.tsdb.path=/%s", pname), - }, - Ports: []corev1.ContainerPort{{ContainerPort: defaultMetricsPort}}, - VolumeMounts: []corev1.VolumeMount{ - { - Name: cname, - MountPath: fmt.Sprintf("/etc/%s/prometheus.yaml", pname), - SubPath: "prometheus.yaml", - }, - { - Name: fmt.Sprintf("%s-data", pname), - MountPath: fmt.Sprintf("/%s", pname), - }, - }, - }, - }, - Volumes: []corev1.Volume{ - { - Name: cname, - VolumeSource: corev1.VolumeSource{ - ConfigMap: &corev1.ConfigMapVolumeSource{ - LocalObjectReference: corev1.LocalObjectReference{ - Name: cname, - }, - Items: []corev1.KeyToPath{ - { - Key: "prometheus.yaml", - Path: "prometheus.yaml", - }, - }, - }, - }, - }, - { - Name: fmt.Sprintf("%s-data", pname), - }, - }, - }, - }, - Strategy: appsv1.DeploymentStrategy{ - Type: "RollingUpdate", - RollingUpdate: &appsv1.RollingUpdateDeployment{}, - }, - }, - } - if err := marshalResource(w, d, fmt.Sprintf("Prometheus Deployment %s", pname)); err != nil { - return err - } - fmt.Fprintf(os.Stderr, "Generated kube deployment for Prometheus %s\n", pname) - - // Build the kubernetes Prometheus service. - s := &corev1.Service{ - TypeMeta: metav1.TypeMeta{ - APIVersion: "v1", - Kind: "Service", - }, - ObjectMeta: metav1.ObjectMeta{ - Name: pname, - Namespace: cfg.Namespace, - }, - Spec: corev1.ServiceSpec{ - Selector: map[string]string{"prometheus": pname}, - Ports: []corev1.ServicePort{ - { - Port: servicePort, - Protocol: "TCP", - TargetPort: intstr.IntOrString{IntVal: int32(defaultMetricsPort)}, - }, - }, - }, - } - if err := marshalResource(w, s, fmt.Sprintf("Prometheus Service %s", pname)); err != nil { - return err - } - fmt.Fprintf(os.Stderr, "Generated kube service for Prometheus %s\n", pname) - return nil -} - -// generateConfigsToExportLogs generates the Loki/Promtail kubernetes deployment -// information for a given app. -// -// Note that for the Loki to be able to aggregate logs, we need to run Promtail -// on each node where the app is deployed. -// -// TODO(rgrandl): Convert the below comments into docs. -// How to integrate with an external Loki service? -// E.g., if you use Helm [1] to install Loki, you can simply do the following: -// 1) You install Loki using a command similar to the one below. -// helm install loki grafana/loki-stack -// -// 2) You install Promtail using a command similar to the one below. -// helm install promtail grafana/promtail -// -// Assume that your Loki service name is 'loki' and the Promtail daemonset name -// is 'promtail'. -// -// 3) You don't need to update the 'loki' service at all. -// -// 4) You have to ugrade the 'promtail' daemonset with the content of the config -// generated by the kube deployer. The kube generated config can be found by running -// kubectl cm -n . It should look something like collatz-promtail-config-xyz: -// -// clients: -// - url: http://loki:3100/loki/api/v1/push -// ... -// scrape_configs: -// - job_name: kubernetes-pods -// -// 5) Write a simple manifest file promtail.yaml that contains the `clients` and the -// `scrape_config` sections from the config. -// -// 6) The promtail.yaml file should look like: -// -// clients: -// - url: http://loki:3100/loki/api/v1/push -// extraScrapeConfigs: | -// - job_name: kubernetes-pods -// ... -// -// 7) Upgrade your Promtail release with the new manifest file. -// helm upgrade promtail grafana/promtail -f promtail.yaml -// -// 8) In your config.toml file, you should set the name of the loki service as follows: -// -// config.toml -// ... -// [kube] -// observability = {loki_service = "loki"} -// -// This is optional. However, if you set it, then the kube generated config -// for promtail will set the right path to the Loki service for you, so you -// can easily upgrade your Promtail release. Also, if you launch Grafana, -// it will automatically add your Loki service as a datasource. -// -// [1] https://helm.sh/ -func generateConfigsToExportLogs(w io.Writer, appName string, cfg *kubeConfig) error { - // The user disabled exporting the logs, don't generate anything. - if cfg.Observability[logsConfigKey] == disabled { - return nil - } - - // Generate configs to configure Loki/Promtail. - if err := generateLokiConfigs(w, appName, cfg); err != nil { - return err - } - if err := generatePromtailConfigs(w, appName, cfg); err != nil { - return err - } - - // Generate the Loki/Promtail kubernetes deployment configs iff the kube deployer - // should deploy the Loki/Promtail. - if cfg.Observability[logsConfigKey] != auto { - return nil - } - - // Generate kubernetes service configs for Loki/Promtail. - if err := generateLokiServiceConfigs(w, appName, cfg); err != nil { - return err - } - if err := generatePromtailAgentConfigs(w, appName, cfg); err != nil { - return err - } - return nil -} - -// generateLokiConfigs generate configs needed by a Loki service to -// aggregate app logs. -// -// Note that these configs are needed by both the automatically started Loki -// service and the one started by the user. -// -// TODO(rgrandl): check if we can simplify the configurations. -func generateLokiConfigs(w io.Writer, appName string, cfg *kubeConfig) error { - cname := name{appName, "loki", "config"}.DNSLabel() - lname := name{appName, "loki"}.DNSLabel() - - timeSchemaEnabledFromFn := func() string { - current := time.Now() - year, month, day := current.Date() - return fmt.Sprintf("%d-%02d-%02d", year, month, day) - } - - // Build the config map that holds the Loki configuration file. In the - // config we specify how to store the logs and the schema. Right now we have - // a very simple in-memory store [1]. - // - // TODO(rgrandl): There are millions of knobs to tune the config. We might revisit - // this in the future. - // - // [1] https://grafana.com/docs/loki/latest/operations/storage/boltdb-shipper/ - config := fmt.Sprintf(` -auth_enabled: false -server: - http_listen_port: %d - -common: - instance_addr: 127.0.0.1 - path_prefix: /tmp/%s - storage: - filesystem: - chunks_directory: /tmp/%s/chunks - rules_directory: /tmp/%s/rules - replication_factor: 1 - ring: - kvstore: - store: inmemory - -schema_config: - configs: - - from: %s # Marks the starting point of this schema - store: boltdb-shipper - object_store: filesystem - schema: v11 - index: - prefix: index_ - period: 24h -`, defaultLokiPort, lname, lname, lname, timeSchemaEnabledFromFn()) - - // Create a config map to store the Loki config. - cm := corev1.ConfigMap{ - TypeMeta: metav1.TypeMeta{ - Kind: "ConfigMap", - APIVersion: "v1", - }, - ObjectMeta: metav1.ObjectMeta{ - Name: cname, - Namespace: cfg.Namespace, - }, - Data: map[string]string{ - "loki.yaml": config, - }, - } - if err := marshalResource(w, cm, fmt.Sprintf("Grafana Service %s", cname)); err != nil { - return err - } - fmt.Fprintf(os.Stderr, "Generated kube deployment for config map %s\n", cname) - return nil -} - -// generatePromtailConfigs generates configuration needed to enable Promtail -// to retrieve the app logs. -// -// Note that these configs are needed by both the automatically started Promtail -// agent and the one started by the user. -// -// TODO(rgrandl): check if we can simplify the configurations. -func generatePromtailConfigs(w io.Writer, appName string, cfg *kubeConfig) error { - promName := name{appName, "promtail"}.DNSLabel() - - var lokiURL string - lservice := cfg.Observability[logsConfigKey] - switch { - case lservice == auto: - // lokiURL should point to the Loki service generated by Kube. - lokiURL = name{appName, "loki"}.DNSLabel() - case lservice != disabled: - // lokiURL should point to the Loki service provided by the user. - lokiURL = lservice - default: - // No Loki service URL to set. - } - - // This configuration is a simplified version of the Promtail config generated - // by helm [1]. Right now we scrape only logs from the pods. We may want to - // scrape system information and nodes info as well. - // - // The scraped logs are sent to Loki for indexing and being stored. - // - // [1] https://helm.sh/docs/topics/charts/. - config := fmt.Sprintf(` -server: - log_format: logfmt - http_listen_port: 3101 - -clients: - - url: http://%s:%d/loki/api/v1/push - -positions: - filename: /run/promtail/positions.yaml - -scrape_configs: - - job_name: kubernetes-pods - kubernetes_sd_configs: - - role: pod - namespaces: - names: - - %s - relabel_configs: - - source_labels: - - __meta_kubernetes_pod_label_appName - regex: ^.*%s.*$ - action: keep - - source_labels: - - __meta_kubernetes_pod_label_appName - action: replace - target_label: app - - source_labels: - - __meta_kubernetes_pod_name - action: replace - target_label: pod - - action: replace - replacement: /var/log/pods/*$1/*.log - separator: / - source_labels: - - __meta_kubernetes_pod_uid - - __meta_kubernetes_pod_container_name - target_label: __path__ -`, lokiURL, defaultLokiPort, cfg.Namespace, appName) - - // Config is stored as a config map in the daemonset. - cm := corev1.ConfigMap{ - TypeMeta: metav1.TypeMeta{ - Kind: "ConfigMap", - APIVersion: "v1", - }, - ObjectMeta: metav1.ObjectMeta{ - Name: promName, - Namespace: cfg.Namespace, - }, - Data: map[string]string{ - "promtail.yaml": config, - }, - } - if err := marshalResource(w, cm, fmt.Sprintf("Config Map %s", cm.Name)); err != nil { - return err - } - fmt.Fprintf(os.Stderr, "Generated kube deployment for config map %s\n", cm.Name) - return nil -} - -// generateLokiServiceConfigs generates the Loki kubernetes service information -// for a given app. -// -// Note that the configs should be generated iff the kube deployer automatically -// runs a Loki service along with the app. -// -// TODO(rgrandl): We run a single instance of Loki for now. We might want to -// scale it up if it becomes a bottleneck. -func generateLokiServiceConfigs(w io.Writer, appName string, cfg *kubeConfig) error { - // Build the kubernetes Loki deployment. - cname := name{appName, "loki", "config"}.DNSLabel() - lname := name{appName, "loki"}.DNSLabel() - - d := &appsv1.Deployment{ - TypeMeta: metav1.TypeMeta{ - APIVersion: "apps/v1", - Kind: "Deployment", - }, - ObjectMeta: metav1.ObjectMeta{ - Name: lname, - Namespace: cfg.Namespace, - }, - Spec: appsv1.DeploymentSpec{ - Replicas: ptrOf(int32(1)), - Selector: &metav1.LabelSelector{ - MatchLabels: map[string]string{"loki": lname}, - }, - Template: corev1.PodTemplateSpec{ - ObjectMeta: metav1.ObjectMeta{ - Labels: map[string]string{"loki": lname}, - Namespace: cfg.Namespace, - }, - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Name: lname, - Image: fmt.Sprintf("%s:latest", autoLokiImageName), - ImagePullPolicy: corev1.PullIfNotPresent, - Args: []string{ - fmt.Sprintf("--config.file=/etc/%s/loki.yaml", lname), - }, - Ports: []corev1.ContainerPort{{ContainerPort: defaultLokiPort}}, - VolumeMounts: []corev1.VolumeMount{ - { - Name: cname, - MountPath: fmt.Sprintf("/etc/%s/", lname), - }, - }, - }, - }, - Volumes: []corev1.Volume{ - { - Name: cname, - VolumeSource: corev1.VolumeSource{ - ConfigMap: &corev1.ConfigMapVolumeSource{ - LocalObjectReference: corev1.LocalObjectReference{ - Name: cname, - }, - Items: []corev1.KeyToPath{ - { - Key: "loki.yaml", - Path: "loki.yaml", - }, - }, - }, - }, - }, - }, - }, - }, - Strategy: appsv1.DeploymentStrategy{ - Type: "RollingUpdate", - RollingUpdate: &appsv1.RollingUpdateDeployment{}, - }, - }, - } - if err := marshalResource(w, d, fmt.Sprintf("Loki Deployment %s", lname)); err != nil { - return err - } - fmt.Fprintf(os.Stderr, "Generated kube deployment for Loki %s\n", lname) - - // Build the kubernetes Loki service. - s := &corev1.Service{ - TypeMeta: metav1.TypeMeta{ - APIVersion: "v1", - Kind: "Service", - }, - ObjectMeta: metav1.ObjectMeta{ - Name: lname, - Namespace: cfg.Namespace, - }, - Spec: corev1.ServiceSpec{ - Selector: map[string]string{"loki": lname}, - Ports: []corev1.ServicePort{ - { - Port: defaultLokiPort, - Protocol: "TCP", - TargetPort: intstr.IntOrString{IntVal: int32(defaultLokiPort)}, - }, - }, - }, - } - if err := marshalResource(w, s, fmt.Sprintf("Loki Service %s", lname)); err != nil { - return err - } - fmt.Fprintf(os.Stderr, "Generated kube service for Loki %s\n", lname) - return nil -} - -// generatePromtailAgentConfigs generates the Promtail kubernetes configs -// to deploy Promtail on each node in the cluster. -// -// Note that the configs should be generated iff the kube deployer automatically -// runs Promtail along with the app. -func generatePromtailAgentConfigs(w io.Writer, appName string, cfg *kubeConfig) error { - // Create a Promtail daemonset that will run on each node. The daemonset will - // run in order to scrape the pods running on each node. - promName := name{appName, "promtail"}.DNSLabel() - dset := appsv1.DaemonSet{ - TypeMeta: metav1.TypeMeta{ - Kind: "DaemonSet", - APIVersion: "apps/v1", - }, - ObjectMeta: metav1.ObjectMeta{ - Name: promName, - Namespace: cfg.Namespace, - }, - Spec: appsv1.DaemonSetSpec{ - Selector: &metav1.LabelSelector{ - MatchLabels: map[string]string{ - "promtail": promName, - }, - }, - Template: corev1.PodTemplateSpec{ - ObjectMeta: metav1.ObjectMeta{ - Labels: map[string]string{ - "promtail": promName, - }, - Namespace: cfg.Namespace, - }, - Spec: corev1.PodSpec{ - ServiceAccountName: "default", - Containers: []corev1.Container{ - { - Name: promName, - Image: fmt.Sprintf("%s:latest", autoPromtailImageName), - ImagePullPolicy: corev1.PullIfNotPresent, - Args: []string{ - fmt.Sprintf("--config.file=/etc/%s/promtail.yaml", promName), - }, - Ports: []corev1.ContainerPort{{ContainerPort: 3101}}, - Env: []corev1.EnvVar{ - { - Name: "HOSTNAME", - ValueFrom: &corev1.EnvVarSource{ - FieldRef: &corev1.ObjectFieldSelector{ - FieldPath: "spec.nodeName", - }, - }, - }, - }, - VolumeMounts: []corev1.VolumeMount{ - { - Name: "config", - MountPath: fmt.Sprintf("/etc/%s", promName), - }, - { - Name: "run", - MountPath: "/run/promtail", - }, - { - Name: "containers", - MountPath: "/var/lib/docker/containers", - ReadOnly: true, - }, - { - Name: "pods", - MountPath: "/var/log/pods", - ReadOnly: true, - }, - }, - }, - }, - Volumes: []corev1.Volume{ - { - Name: "config", - VolumeSource: corev1.VolumeSource{ - ConfigMap: &corev1.ConfigMapVolumeSource{ - LocalObjectReference: corev1.LocalObjectReference{ - Name: promName, - }, - Items: []corev1.KeyToPath{ - { - Key: "promtail.yaml", - Path: "promtail.yaml", - }, - }, - }, - }, - }, - { - Name: "run", - VolumeSource: corev1.VolumeSource{ - HostPath: &corev1.HostPathVolumeSource{ - Path: "/run/promtail", - }, - }, - }, - { - Name: "containers", - VolumeSource: corev1.VolumeSource{ - HostPath: &corev1.HostPathVolumeSource{ - Path: "/var/lib/docker/containers", - }, - }, - }, - { - Name: "pods", - VolumeSource: corev1.VolumeSource{ - HostPath: &corev1.HostPathVolumeSource{ - Path: "/var/log/pods", - }, - }, - }, - }, - }, - }, - UpdateStrategy: appsv1.DaemonSetUpdateStrategy{ - Type: "RollingUpdate", - RollingUpdate: &appsv1.RollingUpdateDaemonSet{}, - }, - }, - } - if err := marshalResource(w, dset, fmt.Sprintf("Promtail DaemonSet %s", promName)); err != nil { - return err - } - fmt.Fprintf(os.Stderr, "Generated kube daemonset for Promtail %s\n", promName) - return nil -} - -// generateConfigsToExportToGrafana generates the Grafana kubernetes deployment -// information for a given app. -// -// TODO(rgrandl): Convert the below comments into docs. -// How to integrate with an external Grafana service? -// E.g., if you use Helm [1] to install Grafana, you can simply do the following: -// 1) You install Grafana using a command similar to the one below. -// helm install grafana grafana/grafana -// -// Assume your running Grafana service is 'grafana' -// -// 2) In your config.toml file, you should specify the name of your Grafana service: -// -// config.toml -// ... -// [kube] -// observability = {grafana_service = "grafana"} -// -// Once the Kube deployer generates the deployment information, you should update -// your grafana release with the datasources and the dashboard from the generated -// config map for Grafana. You can find the config map by running kubectl cm -n . -// The config map should be named something like collatz-grafana-config-xyz. -// -// 3) Create a manifest file containing the datasource and the dashboard (e.g., graf.yaml). -// -// 4) Update your grafana release as follows: -// helm upgrade grafana grafana/grafana -f graf.yaml -// -// 5) Your Grafana UI should be able to load the Service Weaver dashboard, and -// the configured datasources. -// -// [1] https://helm.sh/ -func generateConfigsToExportToGrafana(w io.Writer, appName string, cfg *kubeConfig) error { - // The user disabled Grafana, don't generate anything. - if cfg.Observability[grafanaConfigKey] == disabled { - return nil - } - - // Generate configs needed to configure Grafana. - if err := generateGrafanaConfigs(w, appName, cfg); err != nil { - return err - } - - // Generate the Grafana kubernetes deployment info iff the kube deployer should - // deploy the Grafana service. - if cfg.Observability[grafanaConfigKey] != auto { - return nil - } - - // Generate kubernetes service configs needed to run Grafana. - if err := generateGrafanaServiceConfigs(w, appName, cfg); err != nil { - return err - } - return nil -} - -// generateGrafanaConfigs generate configs needed by the Grafana service -// to manipulate various datasources and to export dashboards. -// -// TODO(rgrandl): check if we can simplify the configurations. -func generateGrafanaConfigs(w io.Writer, appName string, cfg *kubeConfig) error { - cname := name{appName, "grafana", "config"}.DNSLabel() - - // Build the config map that holds the Grafana configuration file. In the - // config we specify which data source connections the Grafana service should - // export. By default, we export the Prometheus, Jaeger and Loki services in - // order to have a single dashboard where we can visualize the metrics and the - // traces of the app. - config := ` -apiVersion: 1 -datasources: -` - - // Set up the Jaeger data source (if any). - var jaegerURL string - jservice := cfg.Observability[tracesConfigKey] - switch { - case jservice == auto: - // jaegerURL should point to the Jaeger service generated by the Kube deployer. - jaegerURL = fmt.Sprintf("http://%s:%d", name{appName, jaegerAppName}.DNSLabel(), defaultJaegerUIPort) - case jservice != disabled: - // jaegerURL should point to the Jaeger service provided by the user. - jaegerURL = fmt.Sprintf("http://%s:%d", jservice, defaultJaegerUIPort) - default: - // No Jaeger service URL to set. - } - if jaegerURL != "" { - config = fmt.Sprintf(` -%s - - name: Jaeger - type: jaeger - url: %s -`, config, jaegerURL) - } - - // Set up the Prometheus data source (if any). - var prometheusURL string - pservice := cfg.Observability[metricsConfigKey] - switch { - case pservice == auto: - // prometheusURL should point to the Prometheus service generated by the Kube deployer. - prometheusURL = fmt.Sprintf("http://%s", name{appName, "prometheus"}.DNSLabel()) - case pservice != disabled: - // prometheusURL should point to the Prometheus service provided by the user. - prometheusURL = fmt.Sprintf("http://%s", pservice) - default: - // No Prometheus service URL to set. - } - if prometheusURL != "" { - config = fmt.Sprintf(` -%s - - name: Prometheus - type: prometheus - access: proxy - url: %s - isDefault: true -`, config, prometheusURL) - } - - // Set up the Loki data source (if any). - var lokiURL string - lservice := cfg.Observability[logsConfigKey] - switch { - case lservice == auto: - // lokiURL should point to the Loki service generated by the Kube deployer. - lokiURL = fmt.Sprintf("http://%s:%d", name{appName, "loki"}.DNSLabel(), defaultLokiPort) - case lservice != disabled: - // lokiURL should point to the Loki service provided by the user. - lokiURL = fmt.Sprintf("http://%s:%d", lservice, defaultLokiPort) - default: - // No Loki service URL to set. - } - if lokiURL != "" { - // Note that we add a custom HTTP header 'X-Scope-OrgID' to make Grafana - // work with a Loki datasource that runs in multi-tenant mode [1]. - // - // [1] https://stackoverflow.com/questions/76387302/configuring-loki-datasource-for-grafana-error-no-org-id-found - // - // TODO(rgrandl): Investigate how we can do this in a more programmatic way. - config = fmt.Sprintf(` -%s - - name: Loki - type: loki - access: proxy - jsonData: - httpHeaderName1: 'X-Scope-OrgID' - secureJsonData: - httpHeaderValue1: 'customvalue' - url: %s -`, config, lokiURL) - } - - // It contains the list of dashboard providers that load dashboards into - // Grafana from the local filesystem [1]. - // - // https://grafana.com/docs/grafana/latest/administration/provisioning/#dashboards - const dashboard = ` -apiVersion: 1 -providers: - - name: 'Service Weaver Dashboard' - options: - path: /etc/grafana/dashboards/default-dashboard.json -` - - // Create a config map to store the Grafana configs and the default dashboards. - cm := corev1.ConfigMap{ - TypeMeta: metav1.TypeMeta{ - Kind: "ConfigMap", - APIVersion: "v1", - }, - ObjectMeta: metav1.ObjectMeta{ - Name: cname, - Namespace: cfg.Namespace, - }, - Data: map[string]string{ - "grafana.yaml": config, - "dashboard-config.yaml": dashboard, - "default-dashboard.json": fmt.Sprintf(dashboardContent, appName), - }, - } - if err := marshalResource(w, cm, fmt.Sprintf("Config Map %s", cname)); err != nil { - return err - } - fmt.Fprintf(os.Stderr, "Generated kube deployment for config map %s\n", cname) - return nil -} - -// generateGrafanaServiceConfigs generates the kubernetes configurations to deploy -// a Grafana service for a given app. -// -// Note that the configs should be generated iff the kube deployer automatically -// runs Grafana along with the app. -// -// TODO(rgrandl): We run a single instance of Grafana for now. We might want -// to scale it up if it becomes a bottleneck. -func generateGrafanaServiceConfigs(w io.Writer, appName string, cfg *kubeConfig) error { - cname := name{appName, "grafana", "config"}.DNSLabel() - gname := name{appName, "grafana"}.DNSLabel() - - // Generate the Grafana deployment. - d := &appsv1.Deployment{ - TypeMeta: metav1.TypeMeta{ - APIVersion: "apps/v1", - Kind: "Deployment", - }, - ObjectMeta: metav1.ObjectMeta{ - Name: gname, - Namespace: cfg.Namespace, - }, - Spec: appsv1.DeploymentSpec{ - Replicas: ptrOf(int32(1)), - Selector: &metav1.LabelSelector{ - MatchLabels: map[string]string{"grafana": gname}, - }, - Template: corev1.PodTemplateSpec{ - ObjectMeta: metav1.ObjectMeta{ - Labels: map[string]string{"grafana": gname}, - Namespace: cfg.Namespace, - }, - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Name: gname, - Image: fmt.Sprintf("%s:latest", autoGrafanaImageName), - ImagePullPolicy: corev1.PullIfNotPresent, - Ports: []corev1.ContainerPort{{ContainerPort: defaultGrafanaPort}}, - VolumeMounts: []corev1.VolumeMount{ - { - // By default, we have to store any data source connection that - // should be exported by Grafana under provisioning/datasources. - Name: "datasource-volume", - MountPath: "/etc/grafana/provisioning/datasources/", - }, - { - // By default, we need to store the dashboards config files under - // provisioning/dashboards directory. Each config file can contain - // a list of dashboards providers that load dashboards into Grafana - // from the local filesystem. More info here [1]. - // - // [1] https://grafana.com/docs/grafana/latest/administration/provisioning/#dashboards - Name: "dashboards-config", - MountPath: "/etc/grafana/provisioning/dashboards/", - }, - { - // Mount the volume that stores the predefined dashboards. - Name: "dashboards", - MountPath: "/etc/grafana/dashboards/", - }, - }, - Env: []corev1.EnvVar{ - // TODO(rgrandl): we may want to enable the user to specify their - // credentials in a different way. - { - Name: "GF_SECURITY_ADMIN_USER", - Value: "admin", - }, - { - Name: "GF_SECURITY_ADMIN_PASSWORD", - Value: "admin", - }, - }, - }, - }, - Volumes: []corev1.Volume{ - { - Name: "datasource-volume", - VolumeSource: corev1.VolumeSource{ - ConfigMap: &corev1.ConfigMapVolumeSource{ - LocalObjectReference: corev1.LocalObjectReference{ - Name: cname, - }, - Items: []corev1.KeyToPath{ - { - Key: "grafana.yaml", - Path: "grafana.yaml", - }, - }, - }, - }, - }, - { - Name: "dashboards-config", - VolumeSource: corev1.VolumeSource{ - ConfigMap: &corev1.ConfigMapVolumeSource{ - LocalObjectReference: corev1.LocalObjectReference{ - Name: cname, - }, - Items: []corev1.KeyToPath{ - { - Key: "dashboard-config.yaml", - Path: "dashboard-config.yaml", - }, - }, - }, - }, - }, - { - Name: "dashboards", - VolumeSource: corev1.VolumeSource{ - ConfigMap: &corev1.ConfigMapVolumeSource{ - LocalObjectReference: corev1.LocalObjectReference{ - Name: cname, - }, - Items: []corev1.KeyToPath{ - { - Key: "default-dashboard.json", - Path: "default-dashboard.json", - }, - }, - }, - }, - }, - }, - }, - }, - Strategy: appsv1.DeploymentStrategy{ - Type: "RollingUpdate", - RollingUpdate: &appsv1.RollingUpdateDeployment{}, - }, - }, - } - if err := marshalResource(w, d, "Grafana Deployment"); err != nil { - return err - } - fmt.Fprintf(os.Stderr, "Generated Grafana deployment\n") - - // Generate the Grafana service. - // - // TODO(rgrandl): should we create a load balancer instead of a cluster ip? - s := &corev1.Service{ - TypeMeta: metav1.TypeMeta{ - APIVersion: "v1", - Kind: "Service", - }, - ObjectMeta: metav1.ObjectMeta{ - Name: gname, - Namespace: cfg.Namespace, - }, - Spec: corev1.ServiceSpec{ - Selector: map[string]string{"grafana": gname}, - Ports: []corev1.ServicePort{ - { - Name: "ui-port", - Port: servicePort, - Protocol: "TCP", - TargetPort: intstr.IntOrString{IntVal: int32(defaultGrafanaPort)}, - }, - }, - }, - } - if err := marshalResource(w, s, "Grafana Service"); err != nil { - return err - } - fmt.Fprintf(os.Stderr, "Generated Grafana service\n") - return nil -} - -// marshalResource marshals the provided Kubernetes resource into YAML into the -// provided writer, prefixing it with the provided comment. -func marshalResource(w io.Writer, resource any, comment string) error { - bytes, err := yaml.Marshal(resource) - if err != nil { - return err - } - fmt.Fprintf(w, "\n# %s\n", comment) - if _, err := w.Write(bytes); err != nil { - return err - } - fmt.Fprintf(w, "\n---\n") - return nil -}