From 77f2537af89fd434c6c8547ce53b20db554afb3f Mon Sep 17 00:00:00 2001 From: Alexey Olshanskiy <234377865+avlllo@users.noreply.github.com> Date: Tue, 19 May 2026 17:36:35 +0300 Subject: [PATCH 01/17] feat: update runtime-monitor to 0.2 --- runtime-monitor/.dockerignore | 1 + runtime-monitor/.helm/Chart.yaml | 2 +- .../.helm/templates/_container_tetragon.tpl | 4 + .../.helm/templates/_extensions.tpl | 2 + .../.helm/templates/clusterrole.yaml | 3 + .../.helm/templates/daemonset.yaml | 36 +- .../.helm/templates/tetragon_configmap.yaml | 16 +- runtime-monitor/.helm/values.yaml | 100 +- runtime-monitor/Dockerfile | 16 +- runtime-monitor/Taskfile.yml | 45 +- runtime-monitor/api/config.pb.go | 136 ++- runtime-monitor/api/config.pb.gw.go | 229 +++- runtime-monitor/api/config.proto | 17 + runtime-monitor/api/config_grpc.pb.go | 117 +- .../api/openapiv2/config.swagger.json | 93 +- runtime-monitor/api/tetragon/bpf.proto | 276 ++--- .../api/tetragon/capabilities.proto | 668 +++++----- runtime-monitor/api/tetragon/events.proto | 326 ++--- runtime-monitor/api/tetragon/sensors.proto | 310 ++--- runtime-monitor/api/tetragon/stack.proto | 20 +- runtime-monitor/api/tetragon/tetragon.proto | 1083 +++++++++-------- runtime-monitor/cmd/runtime-monitor/main.go | 56 +- runtime-monitor/docker-compose.test.yml | 13 +- runtime-monitor/pkg/config/config.go | 8 +- runtime-monitor/pkg/database/config.go | 11 +- runtime-monitor/pkg/database/database.go | 3 +- runtime-monitor/pkg/metrics/metrics.go | 70 ++ runtime-monitor/pkg/model/config.go | 42 +- .../model/tracingpolicy/file-monitoring.yaml | 145 ++- .../pkg/model/tracingpolicy/io-streams.yaml | 72 ++ .../pkg/model/tracingpolicy/permissions.yaml | 71 ++ .../pkg/model/tracingpolicy/rootkit.yml | 45 + .../pkg/model/tracingpolicy/umh.yaml | 61 + runtime-monitor/pkg/monitor/config/config.go | 5 - runtime-monitor/pkg/monitor/monitor.go | 114 +- .../pkg/monitor/publisher/publisher.go | 6 + .../pkg/monitor/updater/updater.go | 4 +- runtime-monitor/pkg/server/server.go | 11 +- runtime-monitor/pkg/service/config_audit.go | 65 + runtime-monitor/pkg/service/config_auth.go | 27 + runtime-monitor/pkg/service/config_generic.go | 48 +- runtime-monitor/pkg/service/config_logging.go | 32 +- 42 files changed, 2831 insertions(+), 1578 deletions(-) create mode 100644 runtime-monitor/pkg/metrics/metrics.go create mode 100644 runtime-monitor/pkg/model/tracingpolicy/io-streams.yaml create mode 100644 runtime-monitor/pkg/model/tracingpolicy/permissions.yaml create mode 100644 runtime-monitor/pkg/model/tracingpolicy/rootkit.yml create mode 100644 runtime-monitor/pkg/model/tracingpolicy/umh.yaml create mode 100644 runtime-monitor/pkg/service/config_audit.go diff --git a/runtime-monitor/.dockerignore b/runtime-monitor/.dockerignore index cceb7d1d..2b47e45d 100644 --- a/runtime-monitor/.dockerignore +++ b/runtime-monitor/.dockerignore @@ -31,6 +31,7 @@ docker-compose*.yml .dockerignore /.helm /.task +.build # Misc .env* diff --git a/runtime-monitor/.helm/Chart.yaml b/runtime-monitor/.helm/Chart.yaml index 6afecc6b..b0d10d34 100644 --- a/runtime-monitor/.helm/Chart.yaml +++ b/runtime-monitor/.helm/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: runtime-monitor description: A Helm chart for Kubernetes type: application -version: v0.0.1 +version: 0.0.1 dependencies: - name: common repository: file://../../install/helm/charts/common diff --git a/runtime-monitor/.helm/templates/_container_tetragon.tpl b/runtime-monitor/.helm/templates/_container_tetragon.tpl index 9a2abeb1..718c630f 100644 --- a/runtime-monitor/.helm/templates/_container_tetragon.tpl +++ b/runtime-monitor/.helm/templates/_container_tetragon.tpl @@ -41,6 +41,10 @@ - mountPath: /tmp name: empty-dir subPath: tmp-dir +{{- if and (.Values.tetragon.cri.enabled) (.Values.tetragon.cri.socketHostPath) }} + - mountPath: {{ quote .Values.tetragon.cri.socketHostPath }} + name: cri-socket +{{- end }} {{- range .Values.extraHostPathMounts }} - name: {{ .name }} mountPath: {{ .mountPath }} diff --git a/runtime-monitor/.helm/templates/_extensions.tpl b/runtime-monitor/.helm/templates/_extensions.tpl index 8e160597..62a734a9 100644 --- a/runtime-monitor/.helm/templates/_extensions.tpl +++ b/runtime-monitor/.helm/templates/_extensions.tpl @@ -5,3 +5,5 @@ {{- define "tetragon.volumemounts.extra" -}}{{- end }} {{- define "initcontainers.extra" -}}{{- end }} + +{{- define "clusterrole.extra" -}}{{- end }} diff --git a/runtime-monitor/.helm/templates/clusterrole.yaml b/runtime-monitor/.helm/templates/clusterrole.yaml index 4639c11e..8df6d1a8 100644 --- a/runtime-monitor/.helm/templates/clusterrole.yaml +++ b/runtime-monitor/.helm/templates/clusterrole.yaml @@ -9,6 +9,8 @@ rules: - apiGroups: - "" resources: + - namespaces + - nodes - pods - services verbs: @@ -35,4 +37,5 @@ rules: - get - list - watch + {{- include "clusterrole.extra" . | nindent 2 }} {{- end }} diff --git a/runtime-monitor/.helm/templates/daemonset.yaml b/runtime-monitor/.helm/templates/daemonset.yaml index 3c3888d5..b4875765 100644 --- a/runtime-monitor/.helm/templates/daemonset.yaml +++ b/runtime-monitor/.helm/templates/daemonset.yaml @@ -16,6 +16,9 @@ spec: labels: {{- include "common.podLabels" . | nindent 8 }} spec: + {{- with .Values.priorityClassName }} + priorityClassName: "{{ . }}" + {{- end }} {{- with .Values.imagePullSecrets }} imagePullSecrets: {{- toYaml . | nindent 8 }} @@ -25,9 +28,11 @@ spec: securityContext: {{- omit .Values.podSecurityContext "enabled" | toYaml | nindent 8 }} {{- end }} + initContainers: + {{- include "initcontainers.extra" . | nindent 6 }} containers: - {{- include "container.tetragon" . | nindent 8 -}} {{- include "common.cs.container.app" . | nindent 8 -}} + {{- include "container.tetragon" . | nindent 8 -}} {{- with .Values.nodeSelector }} nodeSelector: {{- toYaml . | nindent 8 }} @@ -63,28 +68,25 @@ spec: hostPath: path: {{ .Values.tetragon.hostProcPath }} type: Directory -{{- if .Values.tetragon.ociHookSetup.enabled }} - - name: oci-hooks-path + {{- if and (.Values.tetragon.cri.enabled) (.Values.tetragon.cri.socketHostPath) }} + - name: cri-socket hostPath: - path: /usr/share/containers/oci/hooks.d/ - type: Directory - - name: oci-hooks-install-path - hostPath: - path: {{ quote .Values.tetragon.ociHookSetup.installDir }} - type: DirectoryOrCreate -{{- end }} -{{- if not .Values.tetragon.btf }} + path: {{ quote .Values.tetragon.cri.socketHostPath }} + type: Socket + {{- end }} + {{- if not .Values.tetragon.btf }} - emptyDir: {} name: metadata-files -{{- end }} -{{- range .Values.extraHostPathMounts }} + {{- end }} + {{- with .Values.extraVolumes }} + {{- toYaml . | nindent 6 }} + {{- end }} + {{- range .Values.extraHostPathMounts }} - name: {{ .name }} hostPath: path: {{ .mountPath }} -{{- end }} -{{- with (include "common.cs.volumes" . | fromYaml) }} - {{- toYaml .volumes | nindent 6 }} -{{- end }} + {{- end }} + {{- include "common.cs.volumes" . | nindent 6 }} {{- with .Values.updateStrategy }} updateStrategy: {{- toYaml . | nindent 4 }} diff --git a/runtime-monitor/.helm/templates/tetragon_configmap.yaml b/runtime-monitor/.helm/templates/tetragon_configmap.yaml index d4dcaa32..d6289a33 100644 --- a/runtime-monitor/.helm/templates/tetragon_configmap.yaml +++ b/runtime-monitor/.helm/templates/tetragon_configmap.yaml @@ -4,6 +4,10 @@ metadata: name: {{ include "common.name" . }}-tetragon-config labels: {{- include "common.labels" . | nindent 4 }} + {{- with (include "common.annotations" .) }} + annotations: + {{- . | nindent 4 }} + {{- end }} data: cluster-name: {{ .Values.tetragon.clusterName | quote }} {{- if .Values.tetragon.btf }} @@ -13,6 +17,7 @@ data: debug: {{ .Values.tetragon.debug | quote }} enable-process-cred: {{ .Values.tetragon.enableProcessCred | quote }} enable-process-ns: {{ .Values.tetragon.enableProcessNs | quote }} + enable-ancestors: {{ .Values.tetragon.processAncestors.enabled }} process-cache-size: {{ .Values.tetragon.processCacheSize | quote }} {{- if .Values.tetragon.exportFilename }} export-filename: {{ .Values.exportDirectory}}/{{ .Values.tetragon.exportFilename }} @@ -58,6 +63,9 @@ data: {{- if .Values.tetragon.enablePolicyFilter }} enable-policy-filter: "true" {{- end }} +{{- if .Values.tetragon.enablePolicyFilterCgroupMap }} + enable-policy-filter-cgroup-map: "true" +{{- end }} {{- if .Values.tetragon.enablePolicyFilterDebug }} enable-policy-filter-debug: "true" {{- end }} @@ -71,9 +79,15 @@ data: {{- end }} event-cache-retries: {{ .Values.tetragon.eventCacheRetries | quote }} event-cache-retry-delay: {{ .Values.tetragon.eventCacheRetryDelay | quote }} -{{- include "configmap.extra" . | nindent 2 }} + {{- include "configmap.extra" . | nindent 2 }} {{- if .Values.tetragon.enableKeepSensorsOnExit }} keep-sensors-on-exit: "true" release-pinned-bpf: "false" {{- end }} process-cache-gc-interval: {{ .Values.tetragon.processCacheGCInterval | quote }} + enable-cri: {{ .Values.tetragon.cri.enabled | quote }} +{{- if and (.Values.tetragon.cri.enabled) (.Values.tetragon.cri.socketHostPath) }} + cri-endpoint: "unix://{{ .Values.tetragon.cri.socketHostPath }}" +{{- end }} + enable-cgidmap: {{ .Values.tetragon.cgidmap.enabled | quote }} + enable-pod-annotations: {{ .Values.tetragon.podAnnotations.enabled | default "false" | quote }} diff --git a/runtime-monitor/.helm/values.yaml b/runtime-monitor/.helm/values.yaml index 0f5ae81a..e6449a45 100644 --- a/runtime-monitor/.helm/values.yaml +++ b/runtime-monitor/.helm/values.yaml @@ -73,7 +73,7 @@ startupProbe: successThreshold: 1 failureThreshold: 3 # tetragon values are copied from tetragon helm chart as-is -# https://github.com/cilium/tetragon/blob/v1.3.0/install/kubernetes/tetragon/values.yaml +# https://github.com/cilium/tetragon/blob/v1.5.0/install/kubernetes/tetragon/values.yaml # Tetragon agent settings priorityClassName: "" imagePullPolicy: IfNotPresent @@ -110,10 +110,11 @@ exportDirectory: "/var/run/cilium/tetragon" hostNetwork: true tetragon: enabled: true + nameOverride: "" image: override: ~ repository: quay.io/cilium/tetragon - tag: v1.3.0 + tag: v1.5.0 resources: limits: cpu: 1 @@ -128,6 +129,8 @@ tetragon: # extraEnv: # - name: foo # value: bar + podAnnotations: + enabled: false extraVolumeMounts: [] securityContext: privileged: true @@ -161,7 +164,7 @@ tetragon: # {"namespace":["default"],"event_set":["PROCESS_EXEC"]} exportAllowList: |- {"event_set":["PROCESS_EXEC", "PROCESS_EXIT", "PROCESS_KPROBE", "PROCESS_UPROBE", "PROCESS_TRACEPOINT", "PROCESS_LSM"]} - # -- Denylist for JSON export. For example, to exclude exec events that look similar to + # -- Denylist for JSON export **(for file sinks only; does not filter gRPC output)**. For example, to exclude exec events that look similar to # Kubernetes health checks and all the events from kube-system namespace and the host: # # exportDenyList: | @@ -227,6 +230,12 @@ tetragon: enableProcessCred: false # -- Enable Namespaces visibility in exec and kprobe events. enableProcessNs: false + processAncestors: + # -- Comma-separated list of process event types to enable ancestors for. + # Supported event types are: base, kprobe, tracepoint, uprobe, lsm. Unknown event types will be ignored. + # Type "base" is required by all other supported event types for correct reference counting. + # Set it to "" to disable ancestors completely. + enabled: "" # -- Set --btf option to explicitly specify an absolute path to a btf file. For advanced users only. btf: "" # -- Override the command. For advanced users only. @@ -241,7 +250,7 @@ tetragon: # -- The port at which to expose metrics. port: 2112 # -- Comma-separated list of enabled metrics labels. - # The configurable labels are: namespace, workload, pod, binary. Unkown labels will be ignored. + # The configurable labels are: namespace, workload, pod, binary. Unknown labels will be ignored. # Removing some labels from the list might help reduce the metrics cardinality if needed. metricsLabelFilter: "namespace,workload,pod,binary" serviceMonitor: @@ -252,7 +261,7 @@ tetragon: # -- Extra labels to be added on the Tetragon ServiceMonitor. extraLabels: {} # -- Interval at which metrics should be scraped. If not specified, Prometheus' global scrape interval is used. - scrapeInterval: "10s" + scrapeInterval: 60s grpc: # -- Whether to enable exposing Tetragon gRPC. enabled: true @@ -274,6 +283,8 @@ tetragon: port: 6060 # -- Enable policy filter. This is required for K8s namespace and pod-label filtering. enablePolicyFilter: True + # -- Enable policy filter cgroup map. + enablePolicyFilterCgroupMap: false # -- Enable policy filter debug messages. enablePolicyFilterDebug: false # -- Enable latency monitoring in message handling @@ -289,25 +300,6 @@ tetragon: # host, the path is /proc. Exceptions to this are environments like kind, where the runtime itself # does not run on the host. hostProcPath: "/proc" - # -- Configure tetragon's init container for setting up tetragon-oci-hook on the host - # NOTE: This is deprecated, please use .rthooks - ociHookSetup: - # -- enable init container to setup tetragon-oci-hook - enabled: false - # -- interface specifices how the hook is configured. There is only one avaialble value for now: - # "oci-hooks" (https://github.com/containers/common/blob/main/pkg/hooks/docs/oci-hooks.5.md). - interface: "oci-hooks" - installDir: "/opt/tetragon" - # -- Comma-separated list of namespaces to allow Pod creation for, in case tetragon-oci-hook fails to reach Tetragon agent. - # The namespace Tetragon is deployed in is always added as an exception and must not be added again. - failAllowNamespaces: "" - # -- Security context for oci-hook-setup init container - securityContext: - privileged: true - # -- Extra volume mounts to add to the oci-hook-setup init container - extraVolumeMounts: [] - # -- resources for the the oci-hook-setup init container - resources: {} # -- Configure the number of retries in tetragon's event cache. eventCacheRetries: 15 # -- Configure the delay (in seconds) between retires in tetragon's event cache. @@ -316,10 +308,36 @@ tetragon: enableKeepSensorsOnExit: false # -- Configure the interval (suffixed with s for seconds, m for minutes, etc) for the process cache garbage collector. processCacheGCInterval: 30s + # -- Configure tetragon pod so that it can contact the CRI running on the host + cri: + enabled: false + # -- path of the CRI socket on the host. This will typically be + # "/run/containerd/containerd.sock" for containerd or "/var/run/crio/crio.sock" for crio. + socketHostPath: "" + # -- Enabling cgidmap instructs the Tetragon agent to use cgroup ids (instead of cgroup names) for + # pod association. This feature depends on cri being enabled. + cgidmap: + enabled: false # Tetragon Operator settings tetragonOperator: # -- Enables the Tetragon Operator. enabled: false + # -- The name of the Tetragon Operator deployment. + nameOverride: "" + # -- Number of replicas to run for the tetragon-operator deployment + replicas: 1 + # -- Lease handling for an automated failover when running multiple replicas + failoverLease: + # -- Enable lease failover functionality + enabled: false + # -- Kubernetes Namespace in which the Lease resource is created. Defaults to the namespace where Tetragon is deployed in, if it's empty. + namespace: "" + # -- If a lease is not renewed for X duration, the current leader is considered dead, a new leader is picked + leaseDuration: 15s + # -- The interval at which the leader will renew the lease + leaseRenewDeadline: 5s + # -- The timeout between retries if renewal fails + leaseRetryPeriod: 2s # -- Annotations for the Tetragon Operator Deployment. annotations: {} # -- Annotations for the Tetragon Operator Deployment Pods. @@ -336,9 +354,9 @@ tetragonOperator: annotations: {} name: "" # -- securityContext for the Tetragon Operator Deployment Pods. - securityContext: {} + podSecurityContext: {} # -- securityContext for the Tetragon Operator Deployment Pod container. - podSecurityContext: + securityContext: allowPrivilegeEscalation: false capabilities: drop: @@ -352,17 +370,28 @@ tetragonOperator: cpu: 10m memory: 64Mi # -- resources for the Tetragon Operator Deployment update strategy - strategy: {} + strategy: + type: RollingUpdate + rollingUpdate: + maxSurge: 1 + maxUnavailable: 0 # -- Steer the Tetragon Operator Deployment Pod placement via nodeSelector, tolerations and affinity rules. nodeSelector: {} - tolerations: - - operator: Exists - affinity: {} + tolerations: [] + affinity: + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + podAffinityTerm: + topologyKey: kubernetes.io/hostname + labelSelector: + matchLabels: + app.kubernetes.io/name: tetragon-operator # -- tetragon-operator image. image: override: ~ repository: quay.io/cilium/tetragon-operator - tag: v1.3.0 + tag: v1.5.0 pullPolicy: IfNotPresent # -- Extra volumes for the Tetragon Operator Deployment. extraVolumes: [] @@ -390,7 +419,7 @@ tetragonOperator: # -- Extra labels to be added on the Tetragon Operator ServiceMonitor. extraLabels: {} # -- Interval at which metrics should be scraped. If not specified, Prometheus' global scrape interval is used. - scrapeInterval: "10s" + scrapeInterval: 60s # -- Tetragon events export settings export: # "stdout". "" to disable. @@ -427,7 +456,7 @@ export: image: override: ~ repository: quay.io/cilium/hubble-export-stdout - tag: v1.0.4 + tag: v1.1.0 crds: # -- Method for installing CRDs. Supported values are: "operator", "helm" and "none". # The "operator" method allows for fine-grained control over which CRDs are installed and by @@ -446,11 +475,12 @@ rthooks: # Add an apppriate file to "/usr/share/containers/oci/hooks.d". Use this with CRI-O. # See https://github.com/containers/common/blob/main/pkg/hooks/docs/oci-hooks.5.md # for more details. - # Specific configuration for this interface can be found under "OciHooks". + # Specific configuration for this interface can be found under "ociHooks". # # "nri-hook": # Install the hook via NRI. Use this with containerd. Requires NRI being enabled. # see: https://github.com/containerd/containerd/blob/main/docs/NRI.md. + # Specific configuration for this interface can be found under "nriHook". # interface: "" # -- Annotations for the Tetragon rthooks daemonset @@ -486,7 +516,7 @@ rthooks: image: override: ~ repository: quay.io/cilium/tetragon-rthooks - tag: v0.4 + tag: v0.5 # -- rthooks service account. serviceAccount: name: "" diff --git a/runtime-monitor/Dockerfile b/runtime-monitor/Dockerfile index d31a8d9e..fbcf4691 100644 --- a/runtime-monitor/Dockerfile +++ b/runtime-monitor/Dockerfile @@ -1,8 +1,10 @@ -FROM golang:1.25.0 AS tools +ARG GO_VERSION=1.26.0 + +FROM golang:${GO_VERSION} AS tools RUN CGO_ENABLED=0 GOBIN=/usr/bin go install github.com/go-task/task/v3/cmd/task@v3.38.0 -FROM golang:1.25.0 AS builder +FROM golang:${GO_VERSION} AS builder ARG BUILD_RELEASE ARG BUILD_BRANCH @@ -14,14 +16,16 @@ COPY vendor/ vendor/ COPY pkg/tools/tools.go pkg/tools/ ENV GOCACHE=/root/.cache/go-build + RUN --mount=type=cache,target="/root/.cache/go-build" \ mkdir -p bin && \ GOBIN=/go/src/repo/bin go install github.com/google/gops COPY --from=tools /usr/bin/task /usr/bin -COPY . /go/src/repo +COPY runtime-monitor/ runtime-monitor/ +COPY lib/ lib/ -RUN task build +RUN --mount=type=cache,target="/root/.cache/go-build" task -d runtime-monitor build FROM debian:12.0-slim AS runner @@ -31,8 +35,8 @@ LABEL org.opencontainers.image.licenses="Apache-2.0" ARG APP_NAME=runtime-monitor -COPY --from=builder /go/src/repo/cmd/${APP_NAME}/${APP_NAME} /app -COPY --from=builder /go/src/repo/cmd/${APP_NAME}/*.pem / +COPY --from=builder /go/src/repo/${APP_NAME}/cmd/${APP_NAME}/${APP_NAME} /app +COPY --from=builder /go/src/repo/${APP_NAME}/cmd/${APP_NAME}/*.pem / COPY --from=builder /go/src/repo/bin/gops /usr/bin/gops EXPOSE 8000 9000 diff --git a/runtime-monitor/Taskfile.yml b/runtime-monitor/Taskfile.yml index bf6fc48f..54340a03 100644 --- a/runtime-monitor/Taskfile.yml +++ b/runtime-monitor/Taskfile.yml @@ -12,21 +12,19 @@ env: tasks: proto: deps: [protoc-plugins] - vars: - PROTO_DIR: api cmds: - | PATH="{{.TOOLS_BIN}}{{if eq OS "windows"}};{{else}}:{{end}}$PATH" protoc -I api \ --go_opt=paths=source_relative \ - --go_out={{.PROTO_DIR}} \ + --go_out=api \ --go-grpc_opt=paths=source_relative \ - --go-grpc_out={{.PROTO_DIR}} \ + --go-grpc_out=api \ --grpc-gateway_opt=paths=source_relative \ --grpc-gateway_opt=logtostderr=true \ - --grpc-gateway_out={{.PROTO_DIR}} \ + --grpc-gateway_out=api \ --openapiv2_opt=logtostderr=true \ - --openapiv2_out={{.PROTO_DIR}}/openapiv2 \ + --openapiv2_out=api/openapiv2 \ \ --go_opt=Mtetragon/bpf.proto="github.com/cilium/tetragon/api/v1/tetragon" \ --go_opt=Mtetragon/capabilities.proto="github.com/cilium/tetragon/api/v1/tetragon" \ @@ -56,12 +54,12 @@ tasks: --openapiv2_opt=Mtetragon/stack.proto="github.com/cilium/tetragon/api/v1/tetragon" \ --openapiv2_opt=Mtetragon/tetragon.proto="github.com/cilium/tetragon/api/v1/tetragon" \ \ - {{.PROTO_DIR}}/*.proto + api/*.proto sources: - - "{{.PROTO_DIR}}/**/*.proto" + - "api/**/*.proto" - "go.mod" generates: - - "{{.PROTO_DIR}}/**/*pb*.go" + - "api/**/*pb*.go" build: vars: @@ -73,9 +71,9 @@ tasks: sh: echo "${BUILD_COMMIT:-$(git rev-parse --short HEAD)}" BUILD_DATE: '{{now.Format "2006-01-02_15:04:05"}}' MODULE_NAME: - sh: go list -m + sh: 'echo "$(go list -m)/{{.APP_NAME}}"' cmds: - - | + - | CGO_ENABLED=0 go build -ldflags "-X {{.MODULE_NAME}}/pkg/build.Release={{.BUILD_RELEASE}} -X {{.MODULE_NAME}}/pkg/build.Branch={{.BUILD_BRANCH}} -X {{.MODULE_NAME}}/pkg/build.Commit={{.BUILD_COMMIT}} -X {{.MODULE_NAME}}/pkg/build.Date={{.BUILD_DATE}}" \ -o cmd/{{.APP_NAME}}/ ./cmd/{{.APP_NAME}} @@ -85,10 +83,14 @@ tasks: - go test -race -count=1 -vet=off ./cmd/{{.APP_NAME}} - go test -race -count=1 ./pkg/... + test-docker-cleanup: + cmds: + - docker compose -f docker-compose.test.yml down -v + test-docker: cmds: + - defer: {task: test-docker-cleanup} - docker compose -f docker-compose.test.yml up --build --abort-on-container-exit test - - defer: docker compose -f docker-compose.test.yml down lint: deps: [tools] @@ -106,17 +108,16 @@ tasks: clean: cmds: - # - rm -f api/*.pb.go - rm -f bin/* - rm -f cmd/{{.APP_NAME}}/{{.APP_NAME}}{{exeExt}} tidy: cmds: - - go mod tidy + - cd .. && go mod tidy vendor: cmds: - - go mod vendor + - cd .. && go mod vendor generate: cmds: @@ -129,6 +130,7 @@ tasks: - cp cmd/{{.APP_NAME}}/cert.pem cmd/{{.APP_NAME}}/ca.pem docker-build: + dir: .. vars: BUILD_RELEASE: sh: git describe --tags 2> /dev/null || echo "v0.0.0" @@ -136,8 +138,15 @@ tasks: sh: git rev-parse --abbrev-ref HEAD BUILD_COMMIT: sh: git rev-parse --short HEAD + sources: + - go.mod + - go.sum + - lib/** + - "{{.APP_NAME}}/**" + generates: + - "{{.APP_NAME}}/.build" cmds: - - docker build --build-arg BUILD_RELEASE={{.BUILD_RELEASE}} --build-arg BUILD_BRANCH={{.BUILD_BRANCH}} --build-arg BUILD_COMMIT={{.BUILD_COMMIT}} --tag {{.DOCKER_IMAGE}} . + - docker build --iidfile {{.APP_NAME}}/.build -f {{.APP_NAME}}/Dockerfile --build-arg BUILD_RELEASE={{.BUILD_RELEASE}} --build-arg BUILD_BRANCH={{.BUILD_BRANCH}} --build-arg BUILD_COMMIT={{.BUILD_COMMIT}} --tag {{.DOCKER_IMAGE}} . docker-push: vars: @@ -159,9 +168,6 @@ tasks: - GOBIN={{.TOOLS_BIN}} go install golang.org/x/vuln/cmd/govulncheck protoc-plugins: - sources: - - "pkg/tools/tools.go" - - "go.mod" generates: - '{{.TOOLS_BIN}}/protoc-gen-grpc-gateway{{exeExt}}' - '{{.TOOLS_BIN}}/protoc-gen-openapiv2{{exeExt}}' @@ -178,6 +184,7 @@ tasks: - vendor/modules.txt - go.mod - go.sum + - "*.go" - cmd/**/*.go - pkg/**/*.go - internal/**/*.go diff --git a/runtime-monitor/api/config.pb.go b/runtime-monitor/api/config.pb.go index 6ac894ce..ba59cf81 100644 --- a/runtime-monitor/api/config.pb.go +++ b/runtime-monitor/api/config.pb.go @@ -1,7 +1,7 @@ // Code generated by protoc-gen-go. DO NOT EDIT. // versions: -// protoc-gen-go v1.36.6 -// protoc v5.29.3 +// protoc-gen-go v1.36.11 +// protoc v7.34.1 // source: config.proto // Package is called "runtime_config" in order to distinguish it from future more generic "config" package @@ -148,6 +148,74 @@ func (x *TracingPolicy) GetEnabled() bool { return false } +type ConfigStatus struct { + state protoimpl.MessageState `protogen:"open.v1"` + Default bool `protobuf:"varint,1,opt,name=default,proto3" json:"default,omitempty"` + DefaultTracingPolicies bool `protobuf:"varint,2,opt,name=default_tracing_policies,json=defaultTracingPolicies,proto3" json:"default_tracing_policies,omitempty"` + LastInitError string `protobuf:"bytes,3,opt,name=last_init_error,json=lastInitError,proto3" json:"last_init_error,omitempty"` + NodeName string `protobuf:"bytes,4,opt,name=node_name,json=nodeName,proto3" json:"node_name,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *ConfigStatus) Reset() { + *x = ConfigStatus{} + mi := &file_config_proto_msgTypes[2] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *ConfigStatus) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*ConfigStatus) ProtoMessage() {} + +func (x *ConfigStatus) ProtoReflect() protoreflect.Message { + mi := &file_config_proto_msgTypes[2] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use ConfigStatus.ProtoReflect.Descriptor instead. +func (*ConfigStatus) Descriptor() ([]byte, []int) { + return file_config_proto_rawDescGZIP(), []int{2} +} + +func (x *ConfigStatus) GetDefault() bool { + if x != nil { + return x.Default + } + return false +} + +func (x *ConfigStatus) GetDefaultTracingPolicies() bool { + if x != nil { + return x.DefaultTracingPolicies + } + return false +} + +func (x *ConfigStatus) GetLastInitError() string { + if x != nil { + return x.LastInitError + } + return "" +} + +func (x *ConfigStatus) GetNodeName() string { + if x != nil { + return x.NodeName + } + return "" +} + type Config_ConfigJSON struct { state protoimpl.MessageState `protogen:"open.v1"` Version string `protobuf:"bytes,1,opt,name=version,proto3" json:"version,omitempty"` @@ -175,7 +243,7 @@ type Config_ConfigJSON struct { func (x *Config_ConfigJSON) Reset() { *x = Config_ConfigJSON{} - mi := &file_config_proto_msgTypes[2] + mi := &file_config_proto_msgTypes[3] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -187,7 +255,7 @@ func (x *Config_ConfigJSON) String() string { func (*Config_ConfigJSON) ProtoMessage() {} func (x *Config_ConfigJSON) ProtoReflect() protoreflect.Message { - mi := &file_config_proto_msgTypes[2] + mi := &file_config_proto_msgTypes[3] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -261,10 +329,17 @@ const file_config_proto_rawDesc = "" + "\x04name\x18\x01 \x01(\tR\x04name\x12 \n" + "\vdescription\x18\x02 \x01(\tR\vdescription\x12\x12\n" + "\x04yaml\x18\x03 \x01(\tR\x04yaml\x12\x18\n" + - "\aenabled\x18\x04 \x01(\bR\aenabled2\xd4\x01\n" + + "\aenabled\x18\x04 \x01(\bR\aenabled\"\xa7\x01\n" + + "\fConfigStatus\x12\x18\n" + + "\adefault\x18\x01 \x01(\bR\adefault\x128\n" + + "\x18default_tracing_policies\x18\x02 \x01(\bR\x16defaultTracingPolicies\x12&\n" + + "\x0flast_init_error\x18\x03 \x01(\tR\rlastInitError\x12\x1b\n" + + "\tnode_name\x18\x04 \x01(\tR\bnodeName2\xbe\x03\n" + "\x10ConfigController\x12`\n" + "\x03Add\x12\x16.runtime_config.Config\x1a\x16.google.protobuf.Empty\")\x82\xd3\xe4\x93\x02#:\x01*\"\x1e/api/v1/config/runtime-monitor\x12^\n" + - "\x04Read\x12\x16.google.protobuf.Empty\x1a\x16.runtime_config.Config\"&\x82\xd3\xe4\x93\x02 \x12\x1e/api/v1/config/runtime-monitorB(\x92A\x1e\x12\x1c\n" + + "\x04Read\x12\x16.google.protobuf.Empty\x1a\x16.runtime_config.Config\"&\x82\xd3\xe4\x93\x02 \x12\x1e/api/v1/config/runtime-monitor\x12y\n" + + "\x0eResetToDefault\x12\x16.google.protobuf.Empty\x1a\x16.google.protobuf.Empty\"7\x82\xd3\xe4\x93\x021\x12//api/v1/config/runtime-monitor/reset-to-default\x12m\n" + + "\x06Status\x12\x16.google.protobuf.Empty\x1a\x1c.runtime_config.ConfigStatus\"-\x82\xd3\xe4\x93\x02'\x12%/api/v1/config/runtime-monitor/statusB(\x92A\x1e\x12\x1c\n" + "\x1aRuntime Monitor Config APIZ\x05./apib\x06proto3" var ( @@ -279,32 +354,37 @@ func file_config_proto_rawDescGZIP() []byte { return file_config_proto_rawDescData } -var file_config_proto_msgTypes = make([]protoimpl.MessageInfo, 4) +var file_config_proto_msgTypes = make([]protoimpl.MessageInfo, 5) var file_config_proto_goTypes = []any{ (*Config)(nil), // 0: runtime_config.Config (*TracingPolicy)(nil), // 1: runtime_config.TracingPolicy - (*Config_ConfigJSON)(nil), // 2: runtime_config.Config.ConfigJSON - nil, // 3: runtime_config.Config.ConfigJSON.TracingPoliciesEntry - (*tetragon.Filter)(nil), // 4: tetragon.Filter - (*tetragon.AggregationOptions)(nil), // 5: tetragon.AggregationOptions - (*emptypb.Empty)(nil), // 6: google.protobuf.Empty + (*ConfigStatus)(nil), // 2: runtime_config.ConfigStatus + (*Config_ConfigJSON)(nil), // 3: runtime_config.Config.ConfigJSON + nil, // 4: runtime_config.Config.ConfigJSON.TracingPoliciesEntry + (*tetragon.Filter)(nil), // 5: tetragon.Filter + (*tetragon.AggregationOptions)(nil), // 6: tetragon.AggregationOptions + (*emptypb.Empty)(nil), // 7: google.protobuf.Empty } var file_config_proto_depIdxs = []int32{ - 2, // 0: runtime_config.Config.config:type_name -> runtime_config.Config.ConfigJSON - 3, // 1: runtime_config.Config.ConfigJSON.tracing_policies:type_name -> runtime_config.Config.ConfigJSON.TracingPoliciesEntry - 4, // 2: runtime_config.Config.ConfigJSON.allow_list:type_name -> tetragon.Filter - 4, // 3: runtime_config.Config.ConfigJSON.deny_list:type_name -> tetragon.Filter - 5, // 4: runtime_config.Config.ConfigJSON.aggregation_options:type_name -> tetragon.AggregationOptions - 1, // 5: runtime_config.Config.ConfigJSON.TracingPoliciesEntry.value:type_name -> runtime_config.TracingPolicy - 0, // 6: runtime_config.ConfigController.Add:input_type -> runtime_config.Config - 6, // 7: runtime_config.ConfigController.Read:input_type -> google.protobuf.Empty - 6, // 8: runtime_config.ConfigController.Add:output_type -> google.protobuf.Empty - 0, // 9: runtime_config.ConfigController.Read:output_type -> runtime_config.Config - 8, // [8:10] is the sub-list for method output_type - 6, // [6:8] is the sub-list for method input_type - 6, // [6:6] is the sub-list for extension type_name - 6, // [6:6] is the sub-list for extension extendee - 0, // [0:6] is the sub-list for field type_name + 3, // 0: runtime_config.Config.config:type_name -> runtime_config.Config.ConfigJSON + 4, // 1: runtime_config.Config.ConfigJSON.tracing_policies:type_name -> runtime_config.Config.ConfigJSON.TracingPoliciesEntry + 5, // 2: runtime_config.Config.ConfigJSON.allow_list:type_name -> tetragon.Filter + 5, // 3: runtime_config.Config.ConfigJSON.deny_list:type_name -> tetragon.Filter + 6, // 4: runtime_config.Config.ConfigJSON.aggregation_options:type_name -> tetragon.AggregationOptions + 1, // 5: runtime_config.Config.ConfigJSON.TracingPoliciesEntry.value:type_name -> runtime_config.TracingPolicy + 0, // 6: runtime_config.ConfigController.Add:input_type -> runtime_config.Config + 7, // 7: runtime_config.ConfigController.Read:input_type -> google.protobuf.Empty + 7, // 8: runtime_config.ConfigController.ResetToDefault:input_type -> google.protobuf.Empty + 7, // 9: runtime_config.ConfigController.Status:input_type -> google.protobuf.Empty + 7, // 10: runtime_config.ConfigController.Add:output_type -> google.protobuf.Empty + 0, // 11: runtime_config.ConfigController.Read:output_type -> runtime_config.Config + 7, // 12: runtime_config.ConfigController.ResetToDefault:output_type -> google.protobuf.Empty + 2, // 13: runtime_config.ConfigController.Status:output_type -> runtime_config.ConfigStatus + 10, // [10:14] is the sub-list for method output_type + 6, // [6:10] is the sub-list for method input_type + 6, // [6:6] is the sub-list for extension type_name + 6, // [6:6] is the sub-list for extension extendee + 0, // [0:6] is the sub-list for field type_name } func init() { file_config_proto_init() } @@ -318,7 +398,7 @@ func file_config_proto_init() { GoPackagePath: reflect.TypeOf(x{}).PkgPath(), RawDescriptor: unsafe.Slice(unsafe.StringData(file_config_proto_rawDesc), len(file_config_proto_rawDesc)), NumEnums: 0, - NumMessages: 4, + NumMessages: 5, NumExtensions: 0, NumServices: 1, }, diff --git a/runtime-monitor/api/config.pb.gw.go b/runtime-monitor/api/config.pb.gw.go index d5a05aa0..5c3ae3a0 100644 --- a/runtime-monitor/api/config.pb.gw.go +++ b/runtime-monitor/api/config.pb.gw.go @@ -10,6 +10,7 @@ package api import ( "context" + "errors" "io" "net/http" @@ -25,72 +26,119 @@ import ( ) // Suppress "imported and not used" errors -var _ codes.Code -var _ io.Reader -var _ status.Status -var _ = runtime.String -var _ = utilities.NewDoubleArray -var _ = metadata.Join +var ( + _ codes.Code + _ io.Reader + _ status.Status + _ = errors.New + _ = runtime.String + _ = utilities.NewDoubleArray + _ = metadata.Join +) func request_ConfigController_Add_0(ctx context.Context, marshaler runtime.Marshaler, client ConfigControllerClient, req *http.Request, pathParams map[string]string) (proto.Message, runtime.ServerMetadata, error) { - var protoReq Config - var metadata runtime.ServerMetadata - - if err := marshaler.NewDecoder(req.Body).Decode(&protoReq); err != nil && err != io.EOF { + var ( + protoReq Config + metadata runtime.ServerMetadata + ) + if err := marshaler.NewDecoder(req.Body).Decode(&protoReq); err != nil && !errors.Is(err, io.EOF) { return nil, metadata, status.Errorf(codes.InvalidArgument, "%v", err) } - + if req.Body != nil { + _, _ = io.Copy(io.Discard, req.Body) + } msg, err := client.Add(ctx, &protoReq, grpc.Header(&metadata.HeaderMD), grpc.Trailer(&metadata.TrailerMD)) return msg, metadata, err - } func local_request_ConfigController_Add_0(ctx context.Context, marshaler runtime.Marshaler, server ConfigControllerServer, req *http.Request, pathParams map[string]string) (proto.Message, runtime.ServerMetadata, error) { - var protoReq Config - var metadata runtime.ServerMetadata - - if err := marshaler.NewDecoder(req.Body).Decode(&protoReq); err != nil && err != io.EOF { + var ( + protoReq Config + metadata runtime.ServerMetadata + ) + if err := marshaler.NewDecoder(req.Body).Decode(&protoReq); err != nil && !errors.Is(err, io.EOF) { return nil, metadata, status.Errorf(codes.InvalidArgument, "%v", err) } - msg, err := server.Add(ctx, &protoReq) return msg, metadata, err - } func request_ConfigController_Read_0(ctx context.Context, marshaler runtime.Marshaler, client ConfigControllerClient, req *http.Request, pathParams map[string]string) (proto.Message, runtime.ServerMetadata, error) { - var protoReq emptypb.Empty - var metadata runtime.ServerMetadata - + var ( + protoReq emptypb.Empty + metadata runtime.ServerMetadata + ) + if req.Body != nil { + _, _ = io.Copy(io.Discard, req.Body) + } msg, err := client.Read(ctx, &protoReq, grpc.Header(&metadata.HeaderMD), grpc.Trailer(&metadata.TrailerMD)) return msg, metadata, err - } func local_request_ConfigController_Read_0(ctx context.Context, marshaler runtime.Marshaler, server ConfigControllerServer, req *http.Request, pathParams map[string]string) (proto.Message, runtime.ServerMetadata, error) { - var protoReq emptypb.Empty - var metadata runtime.ServerMetadata - + var ( + protoReq emptypb.Empty + metadata runtime.ServerMetadata + ) msg, err := server.Read(ctx, &protoReq) return msg, metadata, err +} + +func request_ConfigController_ResetToDefault_0(ctx context.Context, marshaler runtime.Marshaler, client ConfigControllerClient, req *http.Request, pathParams map[string]string) (proto.Message, runtime.ServerMetadata, error) { + var ( + protoReq emptypb.Empty + metadata runtime.ServerMetadata + ) + if req.Body != nil { + _, _ = io.Copy(io.Discard, req.Body) + } + msg, err := client.ResetToDefault(ctx, &protoReq, grpc.Header(&metadata.HeaderMD), grpc.Trailer(&metadata.TrailerMD)) + return msg, metadata, err +} + +func local_request_ConfigController_ResetToDefault_0(ctx context.Context, marshaler runtime.Marshaler, server ConfigControllerServer, req *http.Request, pathParams map[string]string) (proto.Message, runtime.ServerMetadata, error) { + var ( + protoReq emptypb.Empty + metadata runtime.ServerMetadata + ) + msg, err := server.ResetToDefault(ctx, &protoReq) + return msg, metadata, err +} + +func request_ConfigController_Status_0(ctx context.Context, marshaler runtime.Marshaler, client ConfigControllerClient, req *http.Request, pathParams map[string]string) (proto.Message, runtime.ServerMetadata, error) { + var ( + protoReq emptypb.Empty + metadata runtime.ServerMetadata + ) + if req.Body != nil { + _, _ = io.Copy(io.Discard, req.Body) + } + msg, err := client.Status(ctx, &protoReq, grpc.Header(&metadata.HeaderMD), grpc.Trailer(&metadata.TrailerMD)) + return msg, metadata, err +} +func local_request_ConfigController_Status_0(ctx context.Context, marshaler runtime.Marshaler, server ConfigControllerServer, req *http.Request, pathParams map[string]string) (proto.Message, runtime.ServerMetadata, error) { + var ( + protoReq emptypb.Empty + metadata runtime.ServerMetadata + ) + msg, err := server.Status(ctx, &protoReq) + return msg, metadata, err } // RegisterConfigControllerHandlerServer registers the http handlers for service ConfigController to "mux". // UnaryRPC :call ConfigControllerServer directly. // StreamingRPC :currently unsupported pending https://github.com/grpc/grpc-go/issues/906. // Note that using this registration option will cause many gRPC library features to stop working. Consider using RegisterConfigControllerHandlerFromEndpoint instead. +// GRPC interceptors will not work for this type of registration. To use interceptors, you must use the "runtime.WithMiddlewares" option in the "runtime.NewServeMux" call. func RegisterConfigControllerHandlerServer(ctx context.Context, mux *runtime.ServeMux, server ConfigControllerServer) error { - - mux.Handle("POST", pattern_ConfigController_Add_0, func(w http.ResponseWriter, req *http.Request, pathParams map[string]string) { + mux.Handle(http.MethodPost, pattern_ConfigController_Add_0, func(w http.ResponseWriter, req *http.Request, pathParams map[string]string) { ctx, cancel := context.WithCancel(req.Context()) defer cancel() var stream runtime.ServerTransportStream ctx = grpc.NewContextWithServerTransportStream(ctx, &stream) inboundMarshaler, outboundMarshaler := runtime.MarshalerForRequest(mux, req) - var err error - var annotatedContext context.Context - annotatedContext, err = runtime.AnnotateIncomingContext(ctx, mux, req, "/runtime_config.ConfigController/Add", runtime.WithHTTPPathPattern("/api/v1/config/runtime-monitor")) + annotatedContext, err := runtime.AnnotateIncomingContext(ctx, mux, req, "/runtime_config.ConfigController/Add", runtime.WithHTTPPathPattern("/api/v1/config/runtime-monitor")) if err != nil { runtime.HTTPError(ctx, mux, outboundMarshaler, w, req, err) return @@ -102,20 +150,15 @@ func RegisterConfigControllerHandlerServer(ctx context.Context, mux *runtime.Ser runtime.HTTPError(annotatedContext, mux, outboundMarshaler, w, req, err) return } - forward_ConfigController_Add_0(annotatedContext, mux, outboundMarshaler, w, req, resp, mux.GetForwardResponseOptions()...) - }) - - mux.Handle("GET", pattern_ConfigController_Read_0, func(w http.ResponseWriter, req *http.Request, pathParams map[string]string) { + mux.Handle(http.MethodGet, pattern_ConfigController_Read_0, func(w http.ResponseWriter, req *http.Request, pathParams map[string]string) { ctx, cancel := context.WithCancel(req.Context()) defer cancel() var stream runtime.ServerTransportStream ctx = grpc.NewContextWithServerTransportStream(ctx, &stream) inboundMarshaler, outboundMarshaler := runtime.MarshalerForRequest(mux, req) - var err error - var annotatedContext context.Context - annotatedContext, err = runtime.AnnotateIncomingContext(ctx, mux, req, "/runtime_config.ConfigController/Read", runtime.WithHTTPPathPattern("/api/v1/config/runtime-monitor")) + annotatedContext, err := runtime.AnnotateIncomingContext(ctx, mux, req, "/runtime_config.ConfigController/Read", runtime.WithHTTPPathPattern("/api/v1/config/runtime-monitor")) if err != nil { runtime.HTTPError(ctx, mux, outboundMarshaler, w, req, err) return @@ -127,9 +170,47 @@ func RegisterConfigControllerHandlerServer(ctx context.Context, mux *runtime.Ser runtime.HTTPError(annotatedContext, mux, outboundMarshaler, w, req, err) return } - forward_ConfigController_Read_0(annotatedContext, mux, outboundMarshaler, w, req, resp, mux.GetForwardResponseOptions()...) - + }) + mux.Handle(http.MethodGet, pattern_ConfigController_ResetToDefault_0, func(w http.ResponseWriter, req *http.Request, pathParams map[string]string) { + ctx, cancel := context.WithCancel(req.Context()) + defer cancel() + var stream runtime.ServerTransportStream + ctx = grpc.NewContextWithServerTransportStream(ctx, &stream) + inboundMarshaler, outboundMarshaler := runtime.MarshalerForRequest(mux, req) + annotatedContext, err := runtime.AnnotateIncomingContext(ctx, mux, req, "/runtime_config.ConfigController/ResetToDefault", runtime.WithHTTPPathPattern("/api/v1/config/runtime-monitor/reset-to-default")) + if err != nil { + runtime.HTTPError(ctx, mux, outboundMarshaler, w, req, err) + return + } + resp, md, err := local_request_ConfigController_ResetToDefault_0(annotatedContext, inboundMarshaler, server, req, pathParams) + md.HeaderMD, md.TrailerMD = metadata.Join(md.HeaderMD, stream.Header()), metadata.Join(md.TrailerMD, stream.Trailer()) + annotatedContext = runtime.NewServerMetadataContext(annotatedContext, md) + if err != nil { + runtime.HTTPError(annotatedContext, mux, outboundMarshaler, w, req, err) + return + } + forward_ConfigController_ResetToDefault_0(annotatedContext, mux, outboundMarshaler, w, req, resp, mux.GetForwardResponseOptions()...) + }) + mux.Handle(http.MethodGet, pattern_ConfigController_Status_0, func(w http.ResponseWriter, req *http.Request, pathParams map[string]string) { + ctx, cancel := context.WithCancel(req.Context()) + defer cancel() + var stream runtime.ServerTransportStream + ctx = grpc.NewContextWithServerTransportStream(ctx, &stream) + inboundMarshaler, outboundMarshaler := runtime.MarshalerForRequest(mux, req) + annotatedContext, err := runtime.AnnotateIncomingContext(ctx, mux, req, "/runtime_config.ConfigController/Status", runtime.WithHTTPPathPattern("/api/v1/config/runtime-monitor/status")) + if err != nil { + runtime.HTTPError(ctx, mux, outboundMarshaler, w, req, err) + return + } + resp, md, err := local_request_ConfigController_Status_0(annotatedContext, inboundMarshaler, server, req, pathParams) + md.HeaderMD, md.TrailerMD = metadata.Join(md.HeaderMD, stream.Header()), metadata.Join(md.TrailerMD, stream.Trailer()) + annotatedContext = runtime.NewServerMetadataContext(annotatedContext, md) + if err != nil { + runtime.HTTPError(annotatedContext, mux, outboundMarshaler, w, req, err) + return + } + forward_ConfigController_Status_0(annotatedContext, mux, outboundMarshaler, w, req, resp, mux.GetForwardResponseOptions()...) }) return nil @@ -156,7 +237,6 @@ func RegisterConfigControllerHandlerFromEndpoint(ctx context.Context, mux *runti } }() }() - return RegisterConfigControllerHandler(ctx, mux, conn) } @@ -170,16 +250,13 @@ func RegisterConfigControllerHandler(ctx context.Context, mux *runtime.ServeMux, // to "mux". The handlers forward requests to the grpc endpoint over the given implementation of "ConfigControllerClient". // Note: the gRPC framework executes interceptors within the gRPC handler. If the passed in "ConfigControllerClient" // doesn't go through the normal gRPC flow (creating a gRPC client etc.) then it will be up to the passed in -// "ConfigControllerClient" to call the correct interceptors. +// "ConfigControllerClient" to call the correct interceptors. This client ignores the HTTP middlewares. func RegisterConfigControllerHandlerClient(ctx context.Context, mux *runtime.ServeMux, client ConfigControllerClient) error { - - mux.Handle("POST", pattern_ConfigController_Add_0, func(w http.ResponseWriter, req *http.Request, pathParams map[string]string) { + mux.Handle(http.MethodPost, pattern_ConfigController_Add_0, func(w http.ResponseWriter, req *http.Request, pathParams map[string]string) { ctx, cancel := context.WithCancel(req.Context()) defer cancel() inboundMarshaler, outboundMarshaler := runtime.MarshalerForRequest(mux, req) - var err error - var annotatedContext context.Context - annotatedContext, err = runtime.AnnotateContext(ctx, mux, req, "/runtime_config.ConfigController/Add", runtime.WithHTTPPathPattern("/api/v1/config/runtime-monitor")) + annotatedContext, err := runtime.AnnotateContext(ctx, mux, req, "/runtime_config.ConfigController/Add", runtime.WithHTTPPathPattern("/api/v1/config/runtime-monitor")) if err != nil { runtime.HTTPError(ctx, mux, outboundMarshaler, w, req, err) return @@ -190,18 +267,13 @@ func RegisterConfigControllerHandlerClient(ctx context.Context, mux *runtime.Ser runtime.HTTPError(annotatedContext, mux, outboundMarshaler, w, req, err) return } - forward_ConfigController_Add_0(annotatedContext, mux, outboundMarshaler, w, req, resp, mux.GetForwardResponseOptions()...) - }) - - mux.Handle("GET", pattern_ConfigController_Read_0, func(w http.ResponseWriter, req *http.Request, pathParams map[string]string) { + mux.Handle(http.MethodGet, pattern_ConfigController_Read_0, func(w http.ResponseWriter, req *http.Request, pathParams map[string]string) { ctx, cancel := context.WithCancel(req.Context()) defer cancel() inboundMarshaler, outboundMarshaler := runtime.MarshalerForRequest(mux, req) - var err error - var annotatedContext context.Context - annotatedContext, err = runtime.AnnotateContext(ctx, mux, req, "/runtime_config.ConfigController/Read", runtime.WithHTTPPathPattern("/api/v1/config/runtime-monitor")) + annotatedContext, err := runtime.AnnotateContext(ctx, mux, req, "/runtime_config.ConfigController/Read", runtime.WithHTTPPathPattern("/api/v1/config/runtime-monitor")) if err != nil { runtime.HTTPError(ctx, mux, outboundMarshaler, w, req, err) return @@ -212,22 +284,55 @@ func RegisterConfigControllerHandlerClient(ctx context.Context, mux *runtime.Ser runtime.HTTPError(annotatedContext, mux, outboundMarshaler, w, req, err) return } - forward_ConfigController_Read_0(annotatedContext, mux, outboundMarshaler, w, req, resp, mux.GetForwardResponseOptions()...) - }) - + mux.Handle(http.MethodGet, pattern_ConfigController_ResetToDefault_0, func(w http.ResponseWriter, req *http.Request, pathParams map[string]string) { + ctx, cancel := context.WithCancel(req.Context()) + defer cancel() + inboundMarshaler, outboundMarshaler := runtime.MarshalerForRequest(mux, req) + annotatedContext, err := runtime.AnnotateContext(ctx, mux, req, "/runtime_config.ConfigController/ResetToDefault", runtime.WithHTTPPathPattern("/api/v1/config/runtime-monitor/reset-to-default")) + if err != nil { + runtime.HTTPError(ctx, mux, outboundMarshaler, w, req, err) + return + } + resp, md, err := request_ConfigController_ResetToDefault_0(annotatedContext, inboundMarshaler, client, req, pathParams) + annotatedContext = runtime.NewServerMetadataContext(annotatedContext, md) + if err != nil { + runtime.HTTPError(annotatedContext, mux, outboundMarshaler, w, req, err) + return + } + forward_ConfigController_ResetToDefault_0(annotatedContext, mux, outboundMarshaler, w, req, resp, mux.GetForwardResponseOptions()...) + }) + mux.Handle(http.MethodGet, pattern_ConfigController_Status_0, func(w http.ResponseWriter, req *http.Request, pathParams map[string]string) { + ctx, cancel := context.WithCancel(req.Context()) + defer cancel() + inboundMarshaler, outboundMarshaler := runtime.MarshalerForRequest(mux, req) + annotatedContext, err := runtime.AnnotateContext(ctx, mux, req, "/runtime_config.ConfigController/Status", runtime.WithHTTPPathPattern("/api/v1/config/runtime-monitor/status")) + if err != nil { + runtime.HTTPError(ctx, mux, outboundMarshaler, w, req, err) + return + } + resp, md, err := request_ConfigController_Status_0(annotatedContext, inboundMarshaler, client, req, pathParams) + annotatedContext = runtime.NewServerMetadataContext(annotatedContext, md) + if err != nil { + runtime.HTTPError(annotatedContext, mux, outboundMarshaler, w, req, err) + return + } + forward_ConfigController_Status_0(annotatedContext, mux, outboundMarshaler, w, req, resp, mux.GetForwardResponseOptions()...) + }) return nil } var ( - pattern_ConfigController_Add_0 = runtime.MustPattern(runtime.NewPattern(1, []int{2, 0, 2, 1, 2, 2, 2, 3}, []string{"api", "v1", "config", "runtime-monitor"}, "")) - - pattern_ConfigController_Read_0 = runtime.MustPattern(runtime.NewPattern(1, []int{2, 0, 2, 1, 2, 2, 2, 3}, []string{"api", "v1", "config", "runtime-monitor"}, "")) + pattern_ConfigController_Add_0 = runtime.MustPattern(runtime.NewPattern(1, []int{2, 0, 2, 1, 2, 2, 2, 3}, []string{"api", "v1", "config", "runtime-monitor"}, "")) + pattern_ConfigController_Read_0 = runtime.MustPattern(runtime.NewPattern(1, []int{2, 0, 2, 1, 2, 2, 2, 3}, []string{"api", "v1", "config", "runtime-monitor"}, "")) + pattern_ConfigController_ResetToDefault_0 = runtime.MustPattern(runtime.NewPattern(1, []int{2, 0, 2, 1, 2, 2, 2, 3, 2, 4}, []string{"api", "v1", "config", "runtime-monitor", "reset-to-default"}, "")) + pattern_ConfigController_Status_0 = runtime.MustPattern(runtime.NewPattern(1, []int{2, 0, 2, 1, 2, 2, 2, 3, 2, 4}, []string{"api", "v1", "config", "runtime-monitor", "status"}, "")) ) var ( - forward_ConfigController_Add_0 = runtime.ForwardResponseMessage - - forward_ConfigController_Read_0 = runtime.ForwardResponseMessage + forward_ConfigController_Add_0 = runtime.ForwardResponseMessage + forward_ConfigController_Read_0 = runtime.ForwardResponseMessage + forward_ConfigController_ResetToDefault_0 = runtime.ForwardResponseMessage + forward_ConfigController_Status_0 = runtime.ForwardResponseMessage ) diff --git a/runtime-monitor/api/config.proto b/runtime-monitor/api/config.proto index 5c15e5db..f5970ce6 100644 --- a/runtime-monitor/api/config.proto +++ b/runtime-monitor/api/config.proto @@ -31,6 +31,16 @@ service ConfigController { get: "/api/v1/config/runtime-monitor" }; } + rpc ResetToDefault(google.protobuf.Empty) returns (google.protobuf.Empty) { + option (google.api.http) = { + get: "/api/v1/config/runtime-monitor/reset-to-default" + }; + } + rpc Status(google.protobuf.Empty) returns (ConfigStatus) { + option (google.api.http) = { + get: "/api/v1/config/runtime-monitor/status" + }; + } } message Config { @@ -69,3 +79,10 @@ message TracingPolicy { string yaml = 3; bool enabled = 4; } + +message ConfigStatus { + bool default = 1; + bool default_tracing_policies = 2; + string last_init_error = 3; + string node_name = 4; +} diff --git a/runtime-monitor/api/config_grpc.pb.go b/runtime-monitor/api/config_grpc.pb.go index ccebe6c1..0b5d3c3a 100644 --- a/runtime-monitor/api/config_grpc.pb.go +++ b/runtime-monitor/api/config_grpc.pb.go @@ -1,7 +1,7 @@ // Code generated by protoc-gen-go-grpc. DO NOT EDIT. // versions: -// - protoc-gen-go-grpc v1.3.0 -// - protoc v5.29.3 +// - protoc-gen-go-grpc v1.5.1 +// - protoc v7.34.1 // source: config.proto // Package is called "runtime_config" in order to distinguish it from future more generic "config" package @@ -19,20 +19,26 @@ import ( // This is a compile-time assertion to ensure that this generated file // is compatible with the grpc package it is being compiled against. -// Requires gRPC-Go v1.32.0 or later. -const _ = grpc.SupportPackageIsVersion7 +// Requires gRPC-Go v1.64.0 or later. +const _ = grpc.SupportPackageIsVersion9 const ( - ConfigController_Add_FullMethodName = "/runtime_config.ConfigController/Add" - ConfigController_Read_FullMethodName = "/runtime_config.ConfigController/Read" + ConfigController_Add_FullMethodName = "/runtime_config.ConfigController/Add" + ConfigController_Read_FullMethodName = "/runtime_config.ConfigController/Read" + ConfigController_ResetToDefault_FullMethodName = "/runtime_config.ConfigController/ResetToDefault" + ConfigController_Status_FullMethodName = "/runtime_config.ConfigController/Status" ) // ConfigControllerClient is the client API for ConfigController service. // // For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream. +// +// The ConfigController service definition. type ConfigControllerClient interface { Add(ctx context.Context, in *Config, opts ...grpc.CallOption) (*emptypb.Empty, error) Read(ctx context.Context, in *emptypb.Empty, opts ...grpc.CallOption) (*Config, error) + ResetToDefault(ctx context.Context, in *emptypb.Empty, opts ...grpc.CallOption) (*emptypb.Empty, error) + Status(ctx context.Context, in *emptypb.Empty, opts ...grpc.CallOption) (*ConfigStatus, error) } type configControllerClient struct { @@ -44,8 +50,9 @@ func NewConfigControllerClient(cc grpc.ClientConnInterface) ConfigControllerClie } func (c *configControllerClient) Add(ctx context.Context, in *Config, opts ...grpc.CallOption) (*emptypb.Empty, error) { + cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...) out := new(emptypb.Empty) - err := c.cc.Invoke(ctx, ConfigController_Add_FullMethodName, in, out, opts...) + err := c.cc.Invoke(ctx, ConfigController_Add_FullMethodName, in, out, cOpts...) if err != nil { return nil, err } @@ -53,8 +60,29 @@ func (c *configControllerClient) Add(ctx context.Context, in *Config, opts ...gr } func (c *configControllerClient) Read(ctx context.Context, in *emptypb.Empty, opts ...grpc.CallOption) (*Config, error) { + cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...) out := new(Config) - err := c.cc.Invoke(ctx, ConfigController_Read_FullMethodName, in, out, opts...) + err := c.cc.Invoke(ctx, ConfigController_Read_FullMethodName, in, out, cOpts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *configControllerClient) ResetToDefault(ctx context.Context, in *emptypb.Empty, opts ...grpc.CallOption) (*emptypb.Empty, error) { + cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...) + out := new(emptypb.Empty) + err := c.cc.Invoke(ctx, ConfigController_ResetToDefault_FullMethodName, in, out, cOpts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *configControllerClient) Status(ctx context.Context, in *emptypb.Empty, opts ...grpc.CallOption) (*ConfigStatus, error) { + cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...) + out := new(ConfigStatus) + err := c.cc.Invoke(ctx, ConfigController_Status_FullMethodName, in, out, cOpts...) if err != nil { return nil, err } @@ -63,16 +91,23 @@ func (c *configControllerClient) Read(ctx context.Context, in *emptypb.Empty, op // ConfigControllerServer is the server API for ConfigController service. // All implementations must embed UnimplementedConfigControllerServer -// for forward compatibility +// for forward compatibility. +// +// The ConfigController service definition. type ConfigControllerServer interface { Add(context.Context, *Config) (*emptypb.Empty, error) Read(context.Context, *emptypb.Empty) (*Config, error) + ResetToDefault(context.Context, *emptypb.Empty) (*emptypb.Empty, error) + Status(context.Context, *emptypb.Empty) (*ConfigStatus, error) mustEmbedUnimplementedConfigControllerServer() } -// UnimplementedConfigControllerServer must be embedded to have forward compatible implementations. -type UnimplementedConfigControllerServer struct { -} +// UnimplementedConfigControllerServer must be embedded to have +// forward compatible implementations. +// +// NOTE: this should be embedded by value instead of pointer to avoid a nil +// pointer dereference when methods are called. +type UnimplementedConfigControllerServer struct{} func (UnimplementedConfigControllerServer) Add(context.Context, *Config) (*emptypb.Empty, error) { return nil, status.Errorf(codes.Unimplemented, "method Add not implemented") @@ -80,7 +115,14 @@ func (UnimplementedConfigControllerServer) Add(context.Context, *Config) (*empty func (UnimplementedConfigControllerServer) Read(context.Context, *emptypb.Empty) (*Config, error) { return nil, status.Errorf(codes.Unimplemented, "method Read not implemented") } +func (UnimplementedConfigControllerServer) ResetToDefault(context.Context, *emptypb.Empty) (*emptypb.Empty, error) { + return nil, status.Errorf(codes.Unimplemented, "method ResetToDefault not implemented") +} +func (UnimplementedConfigControllerServer) Status(context.Context, *emptypb.Empty) (*ConfigStatus, error) { + return nil, status.Errorf(codes.Unimplemented, "method Status not implemented") +} func (UnimplementedConfigControllerServer) mustEmbedUnimplementedConfigControllerServer() {} +func (UnimplementedConfigControllerServer) testEmbeddedByValue() {} // UnsafeConfigControllerServer may be embedded to opt out of forward compatibility for this service. // Use of this interface is not recommended, as added methods to ConfigControllerServer will @@ -90,6 +132,13 @@ type UnsafeConfigControllerServer interface { } func RegisterConfigControllerServer(s grpc.ServiceRegistrar, srv ConfigControllerServer) { + // If the following call pancis, it indicates UnimplementedConfigControllerServer was + // embedded by pointer and is nil. This will cause panics if an + // unimplemented method is ever invoked, so we test this at initialization + // time to prevent it from happening at runtime later due to I/O. + if t, ok := srv.(interface{ testEmbeddedByValue() }); ok { + t.testEmbeddedByValue() + } s.RegisterService(&ConfigController_ServiceDesc, srv) } @@ -129,6 +178,42 @@ func _ConfigController_Read_Handler(srv interface{}, ctx context.Context, dec fu return interceptor(ctx, in, info, handler) } +func _ConfigController_ResetToDefault_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(emptypb.Empty) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(ConfigControllerServer).ResetToDefault(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: ConfigController_ResetToDefault_FullMethodName, + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(ConfigControllerServer).ResetToDefault(ctx, req.(*emptypb.Empty)) + } + return interceptor(ctx, in, info, handler) +} + +func _ConfigController_Status_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(emptypb.Empty) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(ConfigControllerServer).Status(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: ConfigController_Status_FullMethodName, + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(ConfigControllerServer).Status(ctx, req.(*emptypb.Empty)) + } + return interceptor(ctx, in, info, handler) +} + // ConfigController_ServiceDesc is the grpc.ServiceDesc for ConfigController service. // It's only intended for direct use with grpc.RegisterService, // and not to be introspected or modified (even as a copy) @@ -144,6 +229,14 @@ var ConfigController_ServiceDesc = grpc.ServiceDesc{ MethodName: "Read", Handler: _ConfigController_Read_Handler, }, + { + MethodName: "ResetToDefault", + Handler: _ConfigController_ResetToDefault_Handler, + }, + { + MethodName: "Status", + Handler: _ConfigController_Status_Handler, + }, }, Streams: []grpc.StreamDesc{}, Metadata: "config.proto", diff --git a/runtime-monitor/api/openapiv2/config.swagger.json b/runtime-monitor/api/openapiv2/config.swagger.json index c11d8247..c38634e0 100644 --- a/runtime-monitor/api/openapiv2/config.swagger.json +++ b/runtime-monitor/api/openapiv2/config.swagger.json @@ -30,7 +30,7 @@ "default": { "description": "An unexpected error response.", "schema": { - "$ref": "#/definitions/rpcStatus" + "$ref": "#/definitions/googlerpcStatus" } } }, @@ -51,7 +51,7 @@ "default": { "description": "An unexpected error response.", "schema": { - "$ref": "#/definitions/rpcStatus" + "$ref": "#/definitions/googlerpcStatus" } } }, @@ -69,6 +69,51 @@ "ConfigController" ] } + }, + "/api/v1/config/runtime-monitor/reset-to-default": { + "get": { + "operationId": "ConfigController_ResetToDefault", + "responses": { + "200": { + "description": "A successful response.", + "schema": { + "type": "object", + "properties": {} + } + }, + "default": { + "description": "An unexpected error response.", + "schema": { + "$ref": "#/definitions/googlerpcStatus" + } + } + }, + "tags": [ + "ConfigController" + ] + } + }, + "/api/v1/config/runtime-monitor/status": { + "get": { + "operationId": "ConfigController_Status", + "responses": { + "200": { + "description": "A successful response.", + "schema": { + "$ref": "#/definitions/runtime_configConfigStatus" + } + }, + "default": { + "description": "An unexpected error response.", + "schema": { + "$ref": "#/definitions/googlerpcStatus" + } + } + }, + "tags": [ + "ConfigController" + ] + } } }, "definitions": { @@ -106,16 +151,7 @@ } } }, - "protobufAny": { - "type": "object", - "properties": { - "@type": { - "type": "string" - } - }, - "additionalProperties": {} - }, - "rpcStatus": { + "googlerpcStatus": { "type": "object", "properties": { "code": { @@ -134,6 +170,15 @@ } } }, + "protobufAny": { + "type": "object", + "properties": { + "@type": { + "type": "string" + } + }, + "additionalProperties": {} + }, "runtime_configConfig": { "type": "object", "properties": { @@ -145,6 +190,23 @@ } } }, + "runtime_configConfigStatus": { + "type": "object", + "properties": { + "default": { + "type": "boolean" + }, + "defaultTracingPolicies": { + "type": "boolean" + }, + "lastInitError": { + "type": "string" + }, + "nodeName": { + "type": "string" + } + } + }, "runtime_configTracingPolicy": { "type": "object", "properties": { @@ -397,6 +459,13 @@ "inInitTree": { "type": "boolean", "description": "Filter containerized processes based on whether they are descendants of\nthe container's init process. This can be used, for example, to watch\nfor processes injected into a container via docker exec, kubectl exec, or\nsimilar mechanisms." + }, + "ancestorBinaryRegex": { + "type": "array", + "items": { + "type": "string" + }, + "description": "Filter ancestor processes' binaries using RE2 regular expression syntax." } } } diff --git a/runtime-monitor/api/tetragon/bpf.proto b/runtime-monitor/api/tetragon/bpf.proto index e74c4f3b..486c4ed3 100644 --- a/runtime-monitor/api/tetragon/bpf.proto +++ b/runtime-monitor/api/tetragon/bpf.proto @@ -5,180 +5,182 @@ syntax = "proto3"; package tetragon; +option go_package = "github.com/cilium/tetragon/api/v1/tetragon"; + enum BpfCmd { - /* Create a map and return a file descriptor that refers to the - map. */ - BPF_MAP_CREATE = 0; + /* Create a map and return a file descriptor that refers to the + map. */ + BPF_MAP_CREATE = 0; - /* Look up an element with a given key in the map referred to - by the file descriptor map_fd. */ - BPF_MAP_LOOKUP_ELEM = 1; + /* Look up an element with a given key in the map referred to + by the file descriptor map_fd. */ + BPF_MAP_LOOKUP_ELEM = 1; - /* Create or update an element (key/value pair) in a specified map. */ - BPF_MAP_UPDATE_ELEM = 2; + /* Create or update an element (key/value pair) in a specified map. */ + BPF_MAP_UPDATE_ELEM = 2; - /* Look up and delete an element by key in a specified map. */ - BPF_MAP_DELETE_ELEM = 3; + /* Look up and delete an element by key in a specified map. */ + BPF_MAP_DELETE_ELEM = 3; - /* Look up an element by key in a specified map and return the key - of the next element. Can be used to iterate over all elements - in the map. */ - BPF_MAP_GET_NEXT_KEY = 4; + /* Look up an element by key in a specified map and return the key + of the next element. Can be used to iterate over all elements + in the map. */ + BPF_MAP_GET_NEXT_KEY = 4; - /* Verify and load an eBPF program, returning a new file descriptor - associated with the program. */ - BPF_PROG_LOAD = 5; + /* Verify and load an eBPF program, returning a new file descriptor + associated with the program. */ + BPF_PROG_LOAD = 5; - /* Pin an eBPF program or map referred by the specified bpf_fd - to the provided pathname on the filesystem. */ - BPF_OBJ_PIN = 6; + /* Pin an eBPF program or map referred by the specified bpf_fd + to the provided pathname on the filesystem. */ + BPF_OBJ_PIN = 6; - /* Open a file descriptor for the eBPF object pinned to the - specified pathname. */ - BPF_OBJ_GET = 7; + /* Open a file descriptor for the eBPF object pinned to the + specified pathname. */ + BPF_OBJ_GET = 7; - /* Attach an eBPF program to a target_fd at the specified - attach_type hook. */ - BPF_PROG_ATTACH = 8; + /* Attach an eBPF program to a target_fd at the specified + attach_type hook. */ + BPF_PROG_ATTACH = 8; - /* Detach the eBPF program associated with the target_fd at the - hook specified by attach_type. */ - BPF_PROG_DETACH = 9; + /* Detach the eBPF program associated with the target_fd at the + hook specified by attach_type. */ + BPF_PROG_DETACH = 9; - /* Run the eBPF program associated with the prog_fd a repeat - number of times against a provided program context ctx_in and - data data_in, and return the modified program context - ctx_out, data_out (for example, packet data), result of the - execution retval, and duration of the test run. */ - BPF_PROG_TEST_RUN = 10; + /* Run the eBPF program associated with the prog_fd a repeat + number of times against a provided program context ctx_in and + data data_in, and return the modified program context + ctx_out, data_out (for example, packet data), result of the + execution retval, and duration of the test run. */ + BPF_PROG_TEST_RUN = 10; - /* Fetch the next eBPF program currently loaded into the kernel. */ - BPF_PROG_GET_NEXT_ID = 11; + /* Fetch the next eBPF program currently loaded into the kernel. */ + BPF_PROG_GET_NEXT_ID = 11; - /* Fetch the next eBPF map currently loaded into the kernel. */ - BPF_MAP_GET_NEXT_ID = 12; + /* Fetch the next eBPF map currently loaded into the kernel. */ + BPF_MAP_GET_NEXT_ID = 12; - /* Open a file descriptor for the eBPF program corresponding to prog_id. */ - BPF_PROG_GET_FD_BY_ID = 13; + /* Open a file descriptor for the eBPF program corresponding to prog_id. */ + BPF_PROG_GET_FD_BY_ID = 13; - /* Open a file descriptor for the eBPF map corresponding to map_id. */ - BPF_MAP_GET_FD_BY_ID = 14; + /* Open a file descriptor for the eBPF map corresponding to map_id. */ + BPF_MAP_GET_FD_BY_ID = 14; - /* Obtain information about the eBPF object corresponding to bpf_fd. */ - BPF_OBJ_GET_INFO_BY_FD = 15; + /* Obtain information about the eBPF object corresponding to bpf_fd. */ + BPF_OBJ_GET_INFO_BY_FD = 15; - /* Obtain information about eBPF programs associated with the specified - attach_type hook. */ - BPF_PROG_QUERY = 16; + /* Obtain information about eBPF programs associated with the specified + attach_type hook. */ + BPF_PROG_QUERY = 16; - /* Attach an eBPF program to a tracepoint *name* to access kernel - internal arguments of the tracepoint in their raw form. */ - BPF_RAW_TRACEPOINT_OPEN = 17; + /* Attach an eBPF program to a tracepoint *name* to access kernel + internal arguments of the tracepoint in their raw form. */ + BPF_RAW_TRACEPOINT_OPEN = 17; - /* Verify and load BPF Type Format (BTF) metadata into the kernel, - returning a new file descriptor associated with the metadata. */ - BPF_BTF_LOAD = 18; + /* Verify and load BPF Type Format (BTF) metadata into the kernel, + returning a new file descriptor associated with the metadata. */ + BPF_BTF_LOAD = 18; - /* Open a file descriptor for the BPF Type Format (BTF) - corresponding to btf_id. */ - BPF_BTF_GET_FD_BY_ID = 19; + /* Open a file descriptor for the BPF Type Format (BTF) + corresponding to btf_id. */ + BPF_BTF_GET_FD_BY_ID = 19; - /* Obtain information about eBPF programs associated with the target - process identified by pid and fd. */ - BPF_TASK_FD_QUERY = 20; + /* Obtain information about eBPF programs associated with the target + process identified by pid and fd. */ + BPF_TASK_FD_QUERY = 20; - /* Look up an element with the given key in the map referred to - by the file descriptor fd, and if found, delete the element. */ - BPF_MAP_LOOKUP_AND_DELETE_ELEM = 21; + /* Look up an element with the given key in the map referred to + by the file descriptor fd, and if found, delete the element. */ + BPF_MAP_LOOKUP_AND_DELETE_ELEM = 21; - /* Freeze the permissions of the specified map. */ - BPF_MAP_FREEZE = 22; + /* Freeze the permissions of the specified map. */ + BPF_MAP_FREEZE = 22; - /* Fetch the next BPF Type Format (BTF) object currently loaded into - the kernel. */ - BPF_BTF_GET_NEXT_ID = 23; + /* Fetch the next BPF Type Format (BTF) object currently loaded into + the kernel. */ + BPF_BTF_GET_NEXT_ID = 23; - /* Iterate and fetch multiple elements in a map. */ - BPF_MAP_LOOKUP_BATCH = 24; + /* Iterate and fetch multiple elements in a map. */ + BPF_MAP_LOOKUP_BATCH = 24; - /* Iterate and delete all elements in a map. */ - BPF_MAP_LOOKUP_AND_DELETE_BATCH = 25; + /* Iterate and delete all elements in a map. */ + BPF_MAP_LOOKUP_AND_DELETE_BATCH = 25; - /* Update multiple elements in a map by key. */ - BPF_MAP_UPDATE_BATCH = 26; + /* Update multiple elements in a map by key. */ + BPF_MAP_UPDATE_BATCH = 26; - /* Delete multiple elements in a map by key. */ - BPF_MAP_DELETE_BATCH = 27; + /* Delete multiple elements in a map by key. */ + BPF_MAP_DELETE_BATCH = 27; - /* Attach an eBPF program to a target_fd at the specified - attach_type hook and return a file descriptor handle for - managing the link. */ - BPF_LINK_CREATE = 28; + /* Attach an eBPF program to a target_fd at the specified + attach_type hook and return a file descriptor handle for + managing the link. */ + BPF_LINK_CREATE = 28; - /* Update the eBPF program in the specified link_fd to - new_prog_fd. */ - BPF_LINK_UPDATE = 29; + /* Update the eBPF program in the specified link_fd to + new_prog_fd. */ + BPF_LINK_UPDATE = 29; - /* Open a file descriptor for the eBPF Link corresponding to - link_id. */ - BPF_LINK_GET_FD_BY_ID = 30; + /* Open a file descriptor for the eBPF Link corresponding to + link_id. */ + BPF_LINK_GET_FD_BY_ID = 30; - /* Fetch the next eBPF link currently loaded into the kernel. */ - BPF_LINK_GET_NEXT_ID = 31; + /* Fetch the next eBPF link currently loaded into the kernel. */ + BPF_LINK_GET_NEXT_ID = 31; - /* Enable eBPF runtime statistics gathering. */ - BPF_ENABLE_STATS = 32; + /* Enable eBPF runtime statistics gathering. */ + BPF_ENABLE_STATS = 32; - /* Create an iterator on top of the specified link_fd (as - previously created using BPF_LINK_CREATE) and return a - file descriptor that can be used to trigger the iteration. */ - BPF_ITER_CREATE = 33; + /* Create an iterator on top of the specified link_fd (as + previously created using BPF_LINK_CREATE) and return a + file descriptor that can be used to trigger the iteration. */ + BPF_ITER_CREATE = 33; - /* Forcefully detach the specified link_fd from its corresponding - attachment point. */ - BPF_LINK_DETACH = 34; + /* Forcefully detach the specified link_fd from its corresponding + attachment point. */ + BPF_LINK_DETACH = 34; - /* Bind a map to the lifetime of an eBPF program. */ - BPF_PROG_BIND_MAP = 35; + /* Bind a map to the lifetime of an eBPF program. */ + BPF_PROG_BIND_MAP = 35; - /* Create BPF token with embedded information about what can be - passed as an extra parameter to various bpf() syscall commands - to grant BPF subsystem functionality to unprivileged processes. */ - BPF_TOKEN_CREATE = 36; + /* Create BPF token with embedded information about what can be + passed as an extra parameter to various bpf() syscall commands + to grant BPF subsystem functionality to unprivileged processes. */ + BPF_TOKEN_CREATE = 36; } enum BpfProgramType { - BPF_PROG_TYPE_UNSPEC = 0; - BPF_PROG_TYPE_SOCKET_FILTER = 1; - BPF_PROG_TYPE_KPROBE = 2; - BPF_PROG_TYPE_SCHED_CLS = 3; - BPF_PROG_TYPE_SCHED_ACT = 4; - BPF_PROG_TYPE_TRACEPOINT = 5; - BPF_PROG_TYPE_XDP = 6; - BPF_PROG_TYPE_PERF_EVENT = 7; - BPF_PROG_TYPE_CGROUP_SKB = 8; - BPF_PROG_TYPE_CGROUP_SOCK = 9; - BPF_PROG_TYPE_LWT_IN = 10; - BPF_PROG_TYPE_LWT_OUT = 11; - BPF_PROG_TYPE_LWT_XMIT = 12; - BPF_PROG_TYPE_SOCK_OPS = 13; - BPF_PROG_TYPE_SK_SKB = 14; - BPF_PROG_TYPE_CGROUP_DEVICE = 15; - BPF_PROG_TYPE_SK_MSG = 16; - BPF_PROG_TYPE_RAW_TRACEPOINT = 17; - BPF_PROG_TYPE_CGROUP_SOCK_ADDR = 18; - BPF_PROG_TYPE_LWT_SEG6LOCAL = 19; - BPF_PROG_TYPE_LIRC_MODE2 = 20; - BPF_PROG_TYPE_SK_REUSEPORT = 21; - BPF_PROG_TYPE_FLOW_DISSECTOR = 22; - BPF_PROG_TYPE_CGROUP_SYSCTL = 23; - BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE = 24; - BPF_PROG_TYPE_CGROUP_SOCKOPT = 25; - BPF_PROG_TYPE_TRACING = 26; - BPF_PROG_TYPE_STRUCT_OPS = 27; - BPF_PROG_TYPE_EXT = 28; - BPF_PROG_TYPE_LSM = 29; - BPF_PROG_TYPE_SK_LOOKUP = 30; - BPF_PROG_TYPE_SYSCALL = 31; - BPF_PROG_TYPE_NETFILTER = 32; -}; + BPF_PROG_TYPE_UNSPEC = 0; + BPF_PROG_TYPE_SOCKET_FILTER = 1; + BPF_PROG_TYPE_KPROBE = 2; + BPF_PROG_TYPE_SCHED_CLS = 3; + BPF_PROG_TYPE_SCHED_ACT = 4; + BPF_PROG_TYPE_TRACEPOINT = 5; + BPF_PROG_TYPE_XDP = 6; + BPF_PROG_TYPE_PERF_EVENT = 7; + BPF_PROG_TYPE_CGROUP_SKB = 8; + BPF_PROG_TYPE_CGROUP_SOCK = 9; + BPF_PROG_TYPE_LWT_IN = 10; + BPF_PROG_TYPE_LWT_OUT = 11; + BPF_PROG_TYPE_LWT_XMIT = 12; + BPF_PROG_TYPE_SOCK_OPS = 13; + BPF_PROG_TYPE_SK_SKB = 14; + BPF_PROG_TYPE_CGROUP_DEVICE = 15; + BPF_PROG_TYPE_SK_MSG = 16; + BPF_PROG_TYPE_RAW_TRACEPOINT = 17; + BPF_PROG_TYPE_CGROUP_SOCK_ADDR = 18; + BPF_PROG_TYPE_LWT_SEG6LOCAL = 19; + BPF_PROG_TYPE_LIRC_MODE2 = 20; + BPF_PROG_TYPE_SK_REUSEPORT = 21; + BPF_PROG_TYPE_FLOW_DISSECTOR = 22; + BPF_PROG_TYPE_CGROUP_SYSCTL = 23; + BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE = 24; + BPF_PROG_TYPE_CGROUP_SOCKOPT = 25; + BPF_PROG_TYPE_TRACING = 26; + BPF_PROG_TYPE_STRUCT_OPS = 27; + BPF_PROG_TYPE_EXT = 28; + BPF_PROG_TYPE_LSM = 29; + BPF_PROG_TYPE_SK_LOOKUP = 30; + BPF_PROG_TYPE_SYSCALL = 31; + BPF_PROG_TYPE_NETFILTER = 32; +} diff --git a/runtime-monitor/api/tetragon/capabilities.proto b/runtime-monitor/api/tetragon/capabilities.proto index c453a534..032e16c8 100644 --- a/runtime-monitor/api/tetragon/capabilities.proto +++ b/runtime-monitor/api/tetragon/capabilities.proto @@ -5,345 +5,347 @@ syntax = "proto3"; package tetragon; +option go_package = "github.com/cilium/tetragon/api/v1/tetragon"; + enum CapabilitiesType { - /* In a system with the [_POSIX_CHOWN_RESTRICTED] option defined, this - overrides the restriction of changing file ownership and group - ownership. */ - CAP_CHOWN = 0; - /* Override all DAC access, including ACL execute access if - [_POSIX_ACL] is defined. Excluding DAC access covered by - CAP_LINUX_IMMUTABLE. */ - DAC_OVERRIDE = 1; - - /* Overrides all DAC restrictions regarding read and search on files - and directories, including ACL restrictions if [_POSIX_ACL] is - defined. Excluding DAC access covered by "$1"_LINUX_IMMUTABLE. */ - CAP_DAC_READ_SEARCH = 2; - - /* Overrides all restrictions about allowed operations on files, where - file owner ID must be equal to the user ID, except where CAP_FSETID - is applicable. It doesn't override MAC and DAC restrictions. */ - CAP_FOWNER = 3; - - /* Overrides the following restrictions that the effective user ID - shall match the file owner ID when setting the S_ISUID and S_ISGID - bits on that file; that the effective group ID (or one of the - supplementary group IDs) shall match the file owner ID when setting - the S_ISGID bit on that file; that the S_ISUID and S_ISGID bits are - cleared on successful return from chown(2) (not implemented). */ - CAP_FSETID = 4; - - /* Overrides the restriction that the real or effective user ID of a - process sending a signal must match the real or effective user ID - of the process receiving the signal. */ - CAP_KILL = 5; - - /* Allows setgid(2) manipulation */ - /* Allows setgroups(2) */ - /* Allows forged gids on socket credentials passing. */ - CAP_SETGID = 6; - - /* Allows set*uid(2) manipulation (including fsuid). */ - /* Allows forged pids on socket credentials passing. */ - CAP_SETUID = 7; - - /** - ** Linux-specific capabilities - **/ - - /* Without VFS support for capabilities: - * Transfer any capability in your permitted set to any pid, - * remove any capability in your permitted set from any pid - * With VFS support for capabilities (neither of above, but) - * Add any capability from current's capability bounding set - * to the current process' inheritable set - * Allow taking bits out of capability bounding set - * Allow modification of the securebits for a process - */ - CAP_SETPCAP = 8; - - /* Allow modification of S_IMMUTABLE and S_APPEND file attributes */ - CAP_LINUX_IMMUTABLE = 9; - - /* Allows binding to TCP/UDP sockets below 1024 */ - /* Allows binding to ATM VCIs below 32 */ - CAP_NET_BIND_SERVICE = 10; - - /* Allow broadcasting, listen to multicast */ - CAP_NET_BROADCAST = 11; - - /* Allow interface configuration */ - /* Allow administration of IP firewall, masquerading and accounting */ - /* Allow setting debug option on sockets */ - /* Allow modification of routing tables */ - /* Allow setting arbitrary process / process group ownership on - sockets */ - /* Allow binding to any address for transparent proxying (also via NET_RAW) */ - /* Allow setting TOS (type of service) */ - /* Allow setting promiscuous mode */ - /* Allow clearing driver statistics */ - /* Allow multicasting */ - /* Allow read/write of device-specific registers */ - /* Allow activation of ATM control sockets */ - CAP_NET_ADMIN = 12; - - /* Allow use of RAW sockets */ - /* Allow use of PACKET sockets */ - /* Allow binding to any address for transparent proxying (also via NET_ADMIN) */ - CAP_NET_RAW = 13; - - /* Allow locking of shared memory segments */ - /* Allow mlock and mlockall (which doesn't really have anything to do - with IPC) */ - CAP_IPC_LOCK = 14; - - /* Override IPC ownership checks */ - CAP_IPC_OWNER = 15; - - /* Insert and remove kernel modules - modify kernel without limit */ - CAP_SYS_MODULE = 16; - - /* Allow ioperm/iopl access */ - /* Allow sending USB messages to any device via /dev/bus/usb */ - CAP_SYS_RAWIO = 17; - - /* Allow use of chroot() */ - CAP_SYS_CHROOT = 18; - - /* Allow ptrace() of any process */ - CAP_SYS_PTRACE = 19; - /* Allow configuration of process accounting */ - CAP_SYS_PACCT = 20; - - /* Allow configuration of the secure attention key */ - /* Allow administration of the random device */ - /* Allow examination and configuration of disk quotas */ - /* Allow setting the domainname */ - /* Allow setting the hostname */ - /* Allow calling bdflush() */ - /* Allow mount() and umount(), setting up new smb connection */ - /* Allow some autofs root ioctls */ - /* Allow nfsservctl */ - /* Allow VM86_REQUEST_IRQ */ - /* Allow to read/write pci config on alpha */ - /* Allow irix_prctl on mips (setstacksize) */ - /* Allow flushing all cache on m68k (sys_cacheflush) */ - /* Allow removing semaphores */ - /* Used instead of CAP_CHOWN to "chown" IPC message queues, semaphores and shared memory */ - /* Allow locking/unlocking of shared memory segment */ - /* Allow turning swap on/off */ - /* Allow forged pids on socket credentials passing */ - /* Allow setting readahead and flushing buffers on block devices */ - /* Allow setting geometry in floppy driver */ - /* Allow turning DMA on/off in xd driver */ - /* Allow administration of md devices (mostly the above, but some extra ioctls) */ - /* Allow tuning the ide driver */ - /* Allow access to the nvram device */ - /* Allow administration of apm_bios, serial and bttv (TV) device */ - /* Allow manufacturer commands in isdn CAPI support driver */ - /* Allow reading non-standardized portions of pci configuration space */ - /* Allow DDI debug ioctl on sbpcd driver */ - /* Allow setting up serial ports */ - /* Allow sending raw qic-117 commands */ - /* Allow enabling/disabling tagged queuing on SCSI controllers and sending - arbitrary SCSI commands */ - /* Allow setting encryption key on loopback filesystem */ - /* Allow setting zone reclaim policy */ - /* Allow everything under CAP_BPF and CAP_PERFMON for backward compatibility */ - CAP_SYS_ADMIN = 21; - - /* Allow use of reboot() */ - CAP_SYS_BOOT = 22; - - /* Allow raising priority and setting priority on other (different - UID) processes */ - /* Allow use of FIFO and round-robin (realtime) scheduling on own - processes and setting the scheduling algorithm used by another - process. */ - /* Allow setting cpu affinity on other processes */ - CAP_SYS_NICE = 23; - - /* Override resource limits. Set resource limits. */ - /* Override quota limits. */ - /* Override reserved space on ext2 filesystem */ - /* Modify data journaling mode on ext3 filesystem (uses journaling - resources) */ - /* ext2 honors fsuid when checking for resource overrides, so - you can override using fsuid too */ - /* Override size restrictions on IPC message queues */ - /* Allow more than 64hz interrupts from the real-time clock */ - /* Override max number of consoles on console allocation */ - /* Override max number of keymaps */ - /* Control memory reclaim behavior */ - CAP_SYS_RESOURCE = 24; - - /* Allow manipulation of system clock */ - /* Allow irix_stime on mips */ - /* Allow setting the real-time clock */ - CAP_SYS_TIME = 25; - - /* Allow configuration of tty devices */ - /* Allow vhangup() of tty */ - CAP_SYS_TTY_CONFIG = 26; - - /* Allow the privileged aspects of mknod() */ - CAP_MKNOD = 27; - - /* Allow taking of leases on files */ - CAP_LEASE = 28; - - /* Allow writing the audit log via unicast netlink socket */ - CAP_AUDIT_WRITE = 29; - - /* Allow configuration of audit via unicast netlink socket */ - CAP_AUDIT_CONTROL = 30; - - /* Set or remove capabilities on files */ - CAP_SETFCAP = 31; - - /* Override MAC access. - The base kernel enforces no MAC policy. - An LSM may enforce a MAC policy, and if it does and it chooses - to implement capability based overrides of that policy, this is - the capability it should use to do so. */ - CAP_MAC_OVERRIDE = 32; - - /* Allow MAC configuration or state changes. - The base kernel requires no MAC configuration. - An LSM may enforce a MAC policy, and if it does and it chooses - to implement capability based checks on modifications to that - policy or the data required to maintain it, this is the - capability it should use to do so. */ - CAP_MAC_ADMIN = 33; - - /* Allow configuring the kernel's syslog (printk behaviour) */ - CAP_SYSLOG = 34; - - /* Allow triggering something that will wake the system */ - CAP_WAKE_ALARM = 35; - - /* Allow preventing system suspends */ - CAP_BLOCK_SUSPEND = 36; - - /* Allow reading the audit log via multicast netlink socket */ - CAP_AUDIT_READ = 37; - - /* - * Allow system performance and observability privileged operations - * using perf_events, i915_perf and other kernel subsystems - */ - CAP_PERFMON = 38; - - /* - * CAP_BPF allows the following BPF operations: - * - Creating all types of BPF maps - * - Advanced verifier features - * - Indirect variable access - * - Bounded loops - * - BPF to BPF function calls - * - Scalar precision tracking - * - Larger complexity limits - * - Dead code elimination - * - And potentially other features - * - Loading BPF Type Format (BTF) data - * - Retrieve xlated and JITed code of BPF programs - * - Use bpf_spin_lock() helper - * CAP_PERFMON relaxes the verifier checks further: - * - BPF progs can use of pointer-to-integer conversions - * - speculation attack hardening measures are bypassed - * - bpf_probe_read to read arbitrary kernel memory is allowed - * - bpf_trace_printk to print kernel memory is allowed - * CAP_SYS_ADMIN is required to use bpf_probe_write_user. - * CAP_SYS_ADMIN is required to iterate system wide loaded - * programs, maps, links, BTFs and convert their IDs to file descriptors. - * CAP_PERFMON and CAP_BPF are required to load tracing programs. - * CAP_NET_ADMIN and CAP_BPF are required to load networking programs. - */ - CAP_BPF = 39; - - /* Allow checkpoint/restore related operations */ - /* Allow PID selection during clone3() */ - /* Allow writing to ns_last_pid */ - CAP_CHECKPOINT_RESTORE = 40; + /* In a system with the [_POSIX_CHOWN_RESTRICTED] option defined, this + overrides the restriction of changing file ownership and group + ownership. */ + CAP_CHOWN = 0; + /* Override all DAC access, including ACL execute access if + [_POSIX_ACL] is defined. Excluding DAC access covered by + CAP_LINUX_IMMUTABLE. */ + DAC_OVERRIDE = 1; + + /* Overrides all DAC restrictions regarding read and search on files + and directories, including ACL restrictions if [_POSIX_ACL] is + defined. Excluding DAC access covered by "$1"_LINUX_IMMUTABLE. */ + CAP_DAC_READ_SEARCH = 2; + + /* Overrides all restrictions about allowed operations on files, where + file owner ID must be equal to the user ID, except where CAP_FSETID + is applicable. It doesn't override MAC and DAC restrictions. */ + CAP_FOWNER = 3; + + /* Overrides the following restrictions that the effective user ID + shall match the file owner ID when setting the S_ISUID and S_ISGID + bits on that file; that the effective group ID (or one of the + supplementary group IDs) shall match the file owner ID when setting + the S_ISGID bit on that file; that the S_ISUID and S_ISGID bits are + cleared on successful return from chown(2) (not implemented). */ + CAP_FSETID = 4; + + /* Overrides the restriction that the real or effective user ID of a + process sending a signal must match the real or effective user ID + of the process receiving the signal. */ + CAP_KILL = 5; + + /* Allows setgid(2) manipulation */ + /* Allows setgroups(2) */ + /* Allows forged gids on socket credentials passing. */ + CAP_SETGID = 6; + + /* Allows set*uid(2) manipulation (including fsuid). */ + /* Allows forged pids on socket credentials passing. */ + CAP_SETUID = 7; + + /** + ** Linux-specific capabilities + **/ + + /* Without VFS support for capabilities: + * Transfer any capability in your permitted set to any pid, + * remove any capability in your permitted set from any pid + * With VFS support for capabilities (neither of above, but) + * Add any capability from current's capability bounding set + * to the current process' inheritable set + * Allow taking bits out of capability bounding set + * Allow modification of the securebits for a process + */ + CAP_SETPCAP = 8; + + /* Allow modification of S_IMMUTABLE and S_APPEND file attributes */ + CAP_LINUX_IMMUTABLE = 9; + + /* Allows binding to TCP/UDP sockets below 1024 */ + /* Allows binding to ATM VCIs below 32 */ + CAP_NET_BIND_SERVICE = 10; + + /* Allow broadcasting, listen to multicast */ + CAP_NET_BROADCAST = 11; + + /* Allow interface configuration */ + /* Allow administration of IP firewall, masquerading and accounting */ + /* Allow setting debug option on sockets */ + /* Allow modification of routing tables */ + /* Allow setting arbitrary process / process group ownership on + sockets */ + /* Allow binding to any address for transparent proxying (also via NET_RAW) */ + /* Allow setting TOS (type of service) */ + /* Allow setting promiscuous mode */ + /* Allow clearing driver statistics */ + /* Allow multicasting */ + /* Allow read/write of device-specific registers */ + /* Allow activation of ATM control sockets */ + CAP_NET_ADMIN = 12; + + /* Allow use of RAW sockets */ + /* Allow use of PACKET sockets */ + /* Allow binding to any address for transparent proxying (also via NET_ADMIN) */ + CAP_NET_RAW = 13; + + /* Allow locking of shared memory segments */ + /* Allow mlock and mlockall (which doesn't really have anything to do + with IPC) */ + CAP_IPC_LOCK = 14; + + /* Override IPC ownership checks */ + CAP_IPC_OWNER = 15; + + /* Insert and remove kernel modules - modify kernel without limit */ + CAP_SYS_MODULE = 16; + + /* Allow ioperm/iopl access */ + /* Allow sending USB messages to any device via /dev/bus/usb */ + CAP_SYS_RAWIO = 17; + + /* Allow use of chroot() */ + CAP_SYS_CHROOT = 18; + + /* Allow ptrace() of any process */ + CAP_SYS_PTRACE = 19; + /* Allow configuration of process accounting */ + CAP_SYS_PACCT = 20; + + /* Allow configuration of the secure attention key */ + /* Allow administration of the random device */ + /* Allow examination and configuration of disk quotas */ + /* Allow setting the domainname */ + /* Allow setting the hostname */ + /* Allow calling bdflush() */ + /* Allow mount() and umount(), setting up new smb connection */ + /* Allow some autofs root ioctls */ + /* Allow nfsservctl */ + /* Allow VM86_REQUEST_IRQ */ + /* Allow to read/write pci config on alpha */ + /* Allow irix_prctl on mips (setstacksize) */ + /* Allow flushing all cache on m68k (sys_cacheflush) */ + /* Allow removing semaphores */ + /* Used instead of CAP_CHOWN to "chown" IPC message queues, semaphores and shared memory */ + /* Allow locking/unlocking of shared memory segment */ + /* Allow turning swap on/off */ + /* Allow forged pids on socket credentials passing */ + /* Allow setting readahead and flushing buffers on block devices */ + /* Allow setting geometry in floppy driver */ + /* Allow turning DMA on/off in xd driver */ + /* Allow administration of md devices (mostly the above, but some extra ioctls) */ + /* Allow tuning the ide driver */ + /* Allow access to the nvram device */ + /* Allow administration of apm_bios, serial and bttv (TV) device */ + /* Allow manufacturer commands in isdn CAPI support driver */ + /* Allow reading non-standardized portions of pci configuration space */ + /* Allow DDI debug ioctl on sbpcd driver */ + /* Allow setting up serial ports */ + /* Allow sending raw qic-117 commands */ + /* Allow enabling/disabling tagged queuing on SCSI controllers and sending + arbitrary SCSI commands */ + /* Allow setting encryption key on loopback filesystem */ + /* Allow setting zone reclaim policy */ + /* Allow everything under CAP_BPF and CAP_PERFMON for backward compatibility */ + CAP_SYS_ADMIN = 21; + + /* Allow use of reboot() */ + CAP_SYS_BOOT = 22; + + /* Allow raising priority and setting priority on other (different + UID) processes */ + /* Allow use of FIFO and round-robin (realtime) scheduling on own + processes and setting the scheduling algorithm used by another + process. */ + /* Allow setting cpu affinity on other processes */ + CAP_SYS_NICE = 23; + + /* Override resource limits. Set resource limits. */ + /* Override quota limits. */ + /* Override reserved space on ext2 filesystem */ + /* Modify data journaling mode on ext3 filesystem (uses journaling + resources) */ + /* ext2 honors fsuid when checking for resource overrides, so + you can override using fsuid too */ + /* Override size restrictions on IPC message queues */ + /* Allow more than 64hz interrupts from the real-time clock */ + /* Override max number of consoles on console allocation */ + /* Override max number of keymaps */ + /* Control memory reclaim behavior */ + CAP_SYS_RESOURCE = 24; + + /* Allow manipulation of system clock */ + /* Allow irix_stime on mips */ + /* Allow setting the real-time clock */ + CAP_SYS_TIME = 25; + + /* Allow configuration of tty devices */ + /* Allow vhangup() of tty */ + CAP_SYS_TTY_CONFIG = 26; + + /* Allow the privileged aspects of mknod() */ + CAP_MKNOD = 27; + + /* Allow taking of leases on files */ + CAP_LEASE = 28; + + /* Allow writing the audit log via unicast netlink socket */ + CAP_AUDIT_WRITE = 29; + + /* Allow configuration of audit via unicast netlink socket */ + CAP_AUDIT_CONTROL = 30; + + /* Set or remove capabilities on files */ + CAP_SETFCAP = 31; + + /* Override MAC access. + The base kernel enforces no MAC policy. + An LSM may enforce a MAC policy, and if it does and it chooses + to implement capability based overrides of that policy, this is + the capability it should use to do so. */ + CAP_MAC_OVERRIDE = 32; + + /* Allow MAC configuration or state changes. + The base kernel requires no MAC configuration. + An LSM may enforce a MAC policy, and if it does and it chooses + to implement capability based checks on modifications to that + policy or the data required to maintain it, this is the + capability it should use to do so. */ + CAP_MAC_ADMIN = 33; + + /* Allow configuring the kernel's syslog (printk behaviour) */ + CAP_SYSLOG = 34; + + /* Allow triggering something that will wake the system */ + CAP_WAKE_ALARM = 35; + + /* Allow preventing system suspends */ + CAP_BLOCK_SUSPEND = 36; + + /* Allow reading the audit log via multicast netlink socket */ + CAP_AUDIT_READ = 37; + + /* + * Allow system performance and observability privileged operations + * using perf_events, i915_perf and other kernel subsystems + */ + CAP_PERFMON = 38; + + /* + * CAP_BPF allows the following BPF operations: + * - Creating all types of BPF maps + * - Advanced verifier features + * - Indirect variable access + * - Bounded loops + * - BPF to BPF function calls + * - Scalar precision tracking + * - Larger complexity limits + * - Dead code elimination + * - And potentially other features + * - Loading BPF Type Format (BTF) data + * - Retrieve xlated and JITed code of BPF programs + * - Use bpf_spin_lock() helper + * CAP_PERFMON relaxes the verifier checks further: + * - BPF progs can use of pointer-to-integer conversions + * - speculation attack hardening measures are bypassed + * - bpf_probe_read to read arbitrary kernel memory is allowed + * - bpf_trace_printk to print kernel memory is allowed + * CAP_SYS_ADMIN is required to use bpf_probe_write_user. + * CAP_SYS_ADMIN is required to iterate system wide loaded + * programs, maps, links, BTFs and convert their IDs to file descriptors. + * CAP_PERFMON and CAP_BPF are required to load tracing programs. + * CAP_NET_ADMIN and CAP_BPF are required to load networking programs. + */ + CAP_BPF = 39; + + /* Allow checkpoint/restore related operations */ + /* Allow PID selection during clone3() */ + /* Allow writing to ns_last_pid */ + CAP_CHECKPOINT_RESTORE = 40; } enum SecureBitsType { - SecBitNotSet = 0; - - /* When set UID 0 has no special privileges. When unset, inheritance - of root-permissions and suid-root executable under compatibility mode - is supported. If the effective uid of the new process is 0 then - the effective and inheritable bitmasks of the executable file is raised. - If the real uid is 0, the effective (legacy) bit of the executable file - is raised. */ - SecBitNoRoot = 1; - - /* Make bit-0 SecBitNoRoot immutable */ - SecBitNoRootLocked = 2; - - /* When set, setuid to/from uid 0 does not trigger capability-"fixup". - When unset, to provide compatiblility with old programs relying on - set*uid to gain/lose privilege, transitions to/from uid 0 cause - capabilities to be gained/lost. */ - SecBitNoSetUidFixup = 4; - - /* Make bit-2 SecBitNoSetUidFixup immutable */ - SecBitNoSetUidFixupLocked = 8; - - /* When set, a process can retain its capabilities even after - transitioning to a non-root user (the set-uid fixup suppressed by - bit 2). Bit-4 is cleared when a process calls exec(); setting both - bit 4 and 5 will create a barrier through exec that no exec()'d - child can use this feature again. */ - SecBitKeepCaps = 16; - - /* Make bit-4 SecBitKeepCaps immutable */ - SecBitKeepCapsLocked = 32; - - /* When set, a process cannot add new capabilities to its ambient set. */ - SecBitNoCapAmbientRaise = 64; - - /* Make bit-6 SecBitNoCapAmbientRaise immutable */ - SecBitNoCapAmbientRaiseLocked = 128; + SecBitNotSet = 0; + + /* When set UID 0 has no special privileges. When unset, inheritance + of root-permissions and suid-root executable under compatibility mode + is supported. If the effective uid of the new process is 0 then + the effective and inheritable bitmasks of the executable file is raised. + If the real uid is 0, the effective (legacy) bit of the executable file + is raised. */ + SecBitNoRoot = 1; + + /* Make bit-0 SecBitNoRoot immutable */ + SecBitNoRootLocked = 2; + + /* When set, setuid to/from uid 0 does not trigger capability-"fixup". + When unset, to provide compatiblility with old programs relying on + set*uid to gain/lose privilege, transitions to/from uid 0 cause + capabilities to be gained/lost. */ + SecBitNoSetUidFixup = 4; + + /* Make bit-2 SecBitNoSetUidFixup immutable */ + SecBitNoSetUidFixupLocked = 8; + + /* When set, a process can retain its capabilities even after + transitioning to a non-root user (the set-uid fixup suppressed by + bit 2). Bit-4 is cleared when a process calls exec(); setting both + bit 4 and 5 will create a barrier through exec that no exec()'d + child can use this feature again. */ + SecBitKeepCaps = 16; + + /* Make bit-4 SecBitKeepCaps immutable */ + SecBitKeepCapsLocked = 32; + + /* When set, a process cannot add new capabilities to its ambient set. */ + SecBitNoCapAmbientRaise = 64; + + /* Make bit-6 SecBitNoCapAmbientRaise immutable */ + SecBitNoCapAmbientRaiseLocked = 128; } // Reasons of why the process privileges changed. enum ProcessPrivilegesChanged { - PRIVILEGES_CHANGED_UNSET = 0; - - // A privilege elevation happened due to the execution of a binary with file capability sets. - // The kernel supports associating capability sets with an executable file using `setcap` command. - // The file capability sets are stored in an extended attribute (see https://man7.org/linux/man-pages/man7/xattr.7.html) - // named `security.capability`. The file capability sets, in conjunction with the capability sets - // of the process, determine the process capabilities and privileges after the `execve` system call. - // For further reference, please check sections `File capability extended attribute versioning` and - // `Namespaced file capabilities` of the capabilities man pages: https://man7.org/linux/man-pages/man7/capabilities.7.html. - // The new granted capabilities can be listed inside the `process` object. - PRIVILEGES_RAISED_EXEC_FILE_CAP = 1; - - // A privilege elevation happened due to the execution of a binary with set-user-ID to root. - // When a process with nonzero UIDs executes a binary with a set-user-ID to root also - // known as suid-root executable, then the kernel switches the effective user ID to 0 (root) which - // is a privilege elevation operation since it grants access to resources owned by the root user. - // The effective user ID is listed inside the `process_credentials` part of the `process` object. - // For further reading, section `Capabilities and execution of programs by root` of https://man7.org/linux/man-pages/man7/capabilities.7.html. - // Afterward the kernel recalculates the capability sets of the process and grants all capabilities - // in the permitted and effective capability sets, except those masked out by the capability bounding set. - // If the binary also have file capability sets then these bits are honored and the process gains just - // the capabilities granted by the file capability sets (i.e., not all capabilities, as it would occur - // when executing a set-user-ID to root binary that does not have any associated file capabilities). This - // is described in section `Set-user-ID-root programs that have file capabilities` of https://man7.org/linux/man-pages/man7/capabilities.7.html. - // The new granted capabilities can be listed inside the `process` object. - // There is one exception for the special treatments of set-user-ID to root execution receiving all - // capabilities, if the `SecBitNoRoot` bit of the Secure bits is set, then the kernel does not grant - // any capability. Please check section: `The securebits flags: establishing a capabilities-only environment` - // of the capabilities man pages: https://man7.org/linux/man-pages/man7/capabilities.7.html - PRIVILEGES_RAISED_EXEC_FILE_SETUID = 2; - - // A privilege elevation happened due to the execution of a binary with set-group-ID to root. - // When a process with nonzero GIDs executes a binary with a set-group-ID to root, the kernel switches - // the effective group ID to 0 (root) which is a privilege elevation operation since it grants access to - // resources owned by the root group. - // The effective group ID is listed inside the `process_credentials` part of the `process` object. - PRIVILEGES_RAISED_EXEC_FILE_SETGID = 3; + PRIVILEGES_CHANGED_UNSET = 0; + + // A privilege elevation happened due to the execution of a binary with file capability sets. + // The kernel supports associating capability sets with an executable file using `setcap` command. + // The file capability sets are stored in an extended attribute (see https://man7.org/linux/man-pages/man7/xattr.7.html) + // named `security.capability`. The file capability sets, in conjunction with the capability sets + // of the process, determine the process capabilities and privileges after the `execve` system call. + // For further reference, please check sections `File capability extended attribute versioning` and + // `Namespaced file capabilities` of the capabilities man pages: https://man7.org/linux/man-pages/man7/capabilities.7.html. + // The new granted capabilities can be listed inside the `process` object. + PRIVILEGES_RAISED_EXEC_FILE_CAP = 1; + + // A privilege elevation happened due to the execution of a binary with set-user-ID to root. + // When a process with nonzero UIDs executes a binary with a set-user-ID to root also + // known as suid-root executable, then the kernel switches the effective user ID to 0 (root) which + // is a privilege elevation operation since it grants access to resources owned by the root user. + // The effective user ID is listed inside the `process_credentials` part of the `process` object. + // For further reading, section `Capabilities and execution of programs by root` of https://man7.org/linux/man-pages/man7/capabilities.7.html. + // Afterward the kernel recalculates the capability sets of the process and grants all capabilities + // in the permitted and effective capability sets, except those masked out by the capability bounding set. + // If the binary also have file capability sets then these bits are honored and the process gains just + // the capabilities granted by the file capability sets (i.e., not all capabilities, as it would occur + // when executing a set-user-ID to root binary that does not have any associated file capabilities). This + // is described in section `Set-user-ID-root programs that have file capabilities` of https://man7.org/linux/man-pages/man7/capabilities.7.html. + // The new granted capabilities can be listed inside the `process` object. + // There is one exception for the special treatments of set-user-ID to root execution receiving all + // capabilities, if the `SecBitNoRoot` bit of the Secure bits is set, then the kernel does not grant + // any capability. Please check section: `The securebits flags: establishing a capabilities-only environment` + // of the capabilities man pages: https://man7.org/linux/man-pages/man7/capabilities.7.html + PRIVILEGES_RAISED_EXEC_FILE_SETUID = 2; + + // A privilege elevation happened due to the execution of a binary with set-group-ID to root. + // When a process with nonzero GIDs executes a binary with a set-group-ID to root, the kernel switches + // the effective group ID to 0 (root) which is a privilege elevation operation since it grants access to + // resources owned by the root group. + // The effective group ID is listed inside the `process_credentials` part of the `process` object. + PRIVILEGES_RAISED_EXEC_FILE_SETGID = 3; } diff --git a/runtime-monitor/api/tetragon/events.proto b/runtime-monitor/api/tetragon/events.proto index c6ed6f69..7d484b36 100644 --- a/runtime-monitor/api/tetragon/events.proto +++ b/runtime-monitor/api/tetragon/events.proto @@ -5,76 +5,80 @@ syntax = "proto3"; package tetragon; -import "tetragon/tetragon.proto"; -import "tetragon/capabilities.proto"; import "google/protobuf/duration.proto"; -import "google/protobuf/wrappers.proto"; -import "google/protobuf/timestamp.proto"; import "google/protobuf/field_mask.proto"; +import "google/protobuf/timestamp.proto"; +import "google/protobuf/wrappers.proto"; +import "tetragon/capabilities.proto"; +import "tetragon/tetragon.proto"; + +option go_package = "github.com/cilium/tetragon/api/v1/tetragon"; // Represents the type of a Tetragon event. // // NOTE: EventType constants must be in sync with the numbers used in the // GetEventsResponse event oneof. enum EventType { - reserved 2 to 4, 6 to 8, 13 to 26; - UNDEF = 0; - - PROCESS_EXEC = 1; - PROCESS_EXIT = 5; - PROCESS_KPROBE = 9; - PROCESS_TRACEPOINT = 10; - PROCESS_LOADER = 11; - PROCESS_UPROBE = 12; - PROCESS_THROTTLE = 27; - PROCESS_LSM = 28; - - TEST = 40000; - RATE_LIMIT_INFO = 40001; + reserved 2 to 4, 6 to 8, 13 to 26; + UNDEF = 0; + + PROCESS_EXEC = 1; + PROCESS_EXIT = 5; + PROCESS_KPROBE = 9; + PROCESS_TRACEPOINT = 10; + PROCESS_LOADER = 11; + PROCESS_UPROBE = 12; + PROCESS_THROTTLE = 27; + PROCESS_LSM = 28; + + TEST = 40000; + RATE_LIMIT_INFO = 40001; } message Filter { - repeated string binary_regex = 1; - repeated string namespace = 2; - google.protobuf.BoolValue health_check = 3; - repeated uint32 pid = 4; - // Filter by the PID of a process and any of its descendants. Note that this filter is - // intended for testing and development purposes only and should not be used in - // production. In particular, PID cycling in the OS over longer periods of time may - // cause unexpected events to pass this filter. - repeated uint32 pid_set = 5; - repeated EventType event_set = 6; - // Filter by process.pod.name field using RE2 regular expression syntax: - // https://github.com/google/re2/wiki/Syntax - repeated string pod_regex = 7; - // Filter by process.arguments field using RE2 regular expression syntax: - // https://github.com/google/re2/wiki/Syntax - repeated string arguments_regex = 8; - // Filter events by pod labels using Kubernetes label selector syntax: - // https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#label-selectors - // Note that this filter never matches events without the pod field (i.e. - // host process events). - repeated string labels = 9; - // Filter events by tracing policy names - repeated string policy_names = 10; - // Filter events by Linux process capability - CapFilter capabilities = 11; - // Filter parent process' binary using RE2 regular expression syntax. - repeated string parent_binary_regex = 12; - // Filter using CEL expressions. CEL filters support IP and CIDR notiation extensions from the k8s project. - // See https://pkg.go.dev/k8s.io/apiserver/pkg/cel/library#IP and https://pkg.go.dev/k8s.io/apiserver/pkg/cel/library#CIDR for details. - repeated string cel_expression = 13; - // Filter by process.parent.arguments field using RE2 regular expression syntax: - // https://github.com/google/re2/wiki/Syntax - repeated string parent_arguments_regex = 14; - // Filter by the container ID in the process.docker field using RE2 regular expression syntax: - // https://github.com/google/re2/wiki/Syntax - repeated string container_id = 15; - // Filter containerized processes based on whether they are descendants of - // the container's init process. This can be used, for example, to watch - // for processes injected into a container via docker exec, kubectl exec, or - // similar mechanisms. - google.protobuf.BoolValue in_init_tree = 16; + repeated string binary_regex = 1; + repeated string namespace = 2; + google.protobuf.BoolValue health_check = 3; + repeated uint32 pid = 4; + // Filter by the PID of a process and any of its descendants. Note that this filter is + // intended for testing and development purposes only and should not be used in + // production. In particular, PID cycling in the OS over longer periods of time may + // cause unexpected events to pass this filter. + repeated uint32 pid_set = 5; + repeated EventType event_set = 6; + // Filter by process.pod.name field using RE2 regular expression syntax: + // https://github.com/google/re2/wiki/Syntax + repeated string pod_regex = 7; + // Filter by process.arguments field using RE2 regular expression syntax: + // https://github.com/google/re2/wiki/Syntax + repeated string arguments_regex = 8; + // Filter events by pod labels using Kubernetes label selector syntax: + // https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#label-selectors + // Note that this filter never matches events without the pod field (i.e. + // host process events). + repeated string labels = 9; + // Filter events by tracing policy names + repeated string policy_names = 10; + // Filter events by Linux process capability + CapFilter capabilities = 11; + // Filter parent process' binary using RE2 regular expression syntax. + repeated string parent_binary_regex = 12; + // Filter using CEL expressions. CEL filters support IP and CIDR notiation extensions from the k8s project. + // See https://pkg.go.dev/k8s.io/apiserver/pkg/cel/library#IP and https://pkg.go.dev/k8s.io/apiserver/pkg/cel/library#CIDR for details. + repeated string cel_expression = 13; + // Filter by process.parent.arguments field using RE2 regular expression syntax: + // https://github.com/google/re2/wiki/Syntax + repeated string parent_arguments_regex = 14; + // Filter by the container ID in the process.docker field using RE2 regular expression syntax: + // https://github.com/google/re2/wiki/Syntax + repeated string container_id = 15; + // Filter containerized processes based on whether they are descendants of + // the container's init process. This can be used, for example, to watch + // for processes injected into a container via docker exec, kubectl exec, or + // similar mechanisms. + google.protobuf.BoolValue in_init_tree = 16; + // Filter ancestor processes' binaries using RE2 regular expression syntax. + repeated string ancestor_binary_regex = 17; } // Filter over a set of Linux process capabilities. See `message Capabilities` @@ -82,142 +86,144 @@ message Filter { // permitted filter matches, but the effective filter does not, the filter will // NOT match. message CapFilter { - // Filter over the set of permitted capabilities. - CapFilterSet permitted = 1; - // Filter over the set of effective capabilities. - CapFilterSet effective = 2; - // Filter over the set of inheritable capabilities. - CapFilterSet inheritable = 3; + // Filter over the set of permitted capabilities. + CapFilterSet permitted = 1; + // Filter over the set of effective capabilities. + CapFilterSet effective = 2; + // Filter over the set of inheritable capabilities. + CapFilterSet inheritable = 3; } // Capability set to filter over. NOTE: you may specify only ONE set here. message CapFilterSet { - // Match if the capability set contains any of the capabilities defined in this filter. - repeated CapabilitiesType any = 1; - // Match if the capability set contains all of the capabilities defined in this filter. - repeated CapabilitiesType all = 2; - // Match if the capability set exactly matches all of the capabilities defined in this filter. - repeated CapabilitiesType exactly = 3; - // Match if the capability set contains none of the capabilities defined in this filter. - repeated CapabilitiesType none = 4; + // Match if the capability set contains any of the capabilities defined in this filter. + repeated CapabilitiesType any = 1; + // Match if the capability set contains all of the capabilities defined in this filter. + repeated CapabilitiesType all = 2; + // Match if the capability set exactly matches all of the capabilities defined in this filter. + repeated CapabilitiesType exactly = 3; + // Match if the capability set contains none of the capabilities defined in this filter. + repeated CapabilitiesType none = 4; } message RedactionFilter { - // Deprecated, do not use. - repeated Filter match = 1 [deprecated=true]; - // RE2 regular expressions to use for redaction. Strings inside capture groups are redacted. - repeated string redact = 2; - // RE2 regular expression to match binary name. If supplied, redactions will only be applied to matching processes. - repeated string binary_regex = 3; + // Deprecated, do not use. + repeated Filter match = 1 [deprecated = true]; + // RE2 regular expressions to use for redaction. Strings inside capture groups are redacted. + repeated string redact = 2; + // RE2 regular expression to match binary name. If supplied, redactions will only be applied to matching processes. + repeated string binary_regex = 3; } // Determines the behavior of a field filter enum FieldFilterAction { - INCLUDE = 0; - EXCLUDE = 1; + INCLUDE = 0; + EXCLUDE = 1; } message FieldFilter { - // Event types to filter or undefined to filter over all event types. - repeated EventType event_set = 1; - // Fields to include or exclude. - google.protobuf.FieldMask fields = 2; - // Whether to include or exclude fields. - FieldFilterAction action = 3; - // Whether or not the event set filter should be inverted. - google.protobuf.BoolValue invert_event_set = 4; + // Event types to filter or undefined to filter over all event types. + repeated EventType event_set = 1; + // Fields to include or exclude. + google.protobuf.FieldMask fields = 2; + // Whether to include or exclude fields. + FieldFilterAction action = 3; + // Whether or not the event set filter should be inverted. + google.protobuf.BoolValue invert_event_set = 4; } message GetEventsRequest { - // allow_list specifies a list of filters to apply to only return certain - // events. If multiple filters are specified, at least one of them has to - // match for an event to be included in the results. - repeated Filter allow_list = 1; - // deny_list specifies a list of filters to apply to exclude certain events - // from the results. If multiple filters are specified, at least one of - // them has to match for an event to be excluded. - // If both allow_list and deny_list are specified, the results contain the - // set difference allow_list - deny_list. - repeated Filter deny_list = 2; - // aggregation_options configures aggregation options for this request. - // If this field is not set, responses will not be aggregated. - // Note that currently only process_accept and process_connect events are - // aggregated. Other events remain unaggregated. - AggregationOptions aggregation_options = 3; - // Fields to include or exclude for events in the GetEventsResponse. Omitting this - // field implies that all fields will be included. Exclusion always takes precedence - // over inclusion in the case of conflicts. - repeated FieldFilter field_filters = 4; + // allow_list specifies a list of filters to apply to only return certain + // events. If multiple filters are specified, at least one of them has to + // match for an event to be included in the results. + repeated Filter allow_list = 1; + // deny_list specifies a list of filters to apply to exclude certain events + // from the results. If multiple filters are specified, at least one of + // them has to match for an event to be excluded. + // If both allow_list and deny_list are specified, the results contain the + // set difference allow_list - deny_list. + repeated Filter deny_list = 2; + // aggregation_options configures aggregation options for this request. + // If this field is not set, responses will not be aggregated. + // Note that currently only process_accept and process_connect events are + // aggregated. Other events remain unaggregated. + AggregationOptions aggregation_options = 3; + // Fields to include or exclude for events in the GetEventsResponse. Omitting this + // field implies that all fields will be included. Exclusion always takes precedence + // over inclusion in the case of conflicts. + repeated FieldFilter field_filters = 4; } // AggregationOptions defines configuration options for aggregating events. message AggregationOptions { - // Aggregation window size. Defaults to 15 seconds if this field is not set. - google.protobuf.Duration window_size = 1; - // Size of the buffer for the aggregator to receive incoming events. If the - // buffer becomes full, the aggregator will log a warning and start dropping - // incoming events. - uint64 channel_buffer_size = 2; + // Aggregation window size. Defaults to 15 seconds if this field is not set. + google.protobuf.Duration window_size = 1; + // Size of the buffer for the aggregator to receive incoming events. If the + // buffer becomes full, the aggregator will log a warning and start dropping + // incoming events. + uint64 channel_buffer_size = 2; } // AggregationInfo contains information about aggregation results. message AggregationInfo { - // Total count of events in this aggregation time window. - uint64 count = 1; + // Total count of events in this aggregation time window. + uint64 count = 1; } message RateLimitInfo { - uint64 number_of_dropped_process_events = 1; + uint64 number_of_dropped_process_events = 1; } enum ThrottleType { - THROTTLE_UNKNOWN = 0; - THROTTLE_START = 1; - THROTTLE_STOP = 2; + THROTTLE_UNKNOWN = 0; + THROTTLE_START = 1; + THROTTLE_STOP = 2; } message ProcessThrottle { - // Throttle type - ThrottleType type = 1; - // Cgroup name - string cgroup = 2; + // Throttle type + ThrottleType type = 1; + // Cgroup name + string cgroup = 2; } message GetEventsResponse { - reserved 2 to 4, 6 to 8, 13 to 26; - // The type-specific fields of an event. - // - // NOTE: Numbers must stay in sync with enum EventType. - oneof event { - // ProcessExec event includes information about the execution of - // binaries and other related process metadata. - ProcessExec process_exec = 1; - // ProcessExit event indicates how and when a process terminates. - ProcessExit process_exit = 5; - // ProcessKprobe event contains information about the pre-defined - // functions and the process that invoked them. - ProcessKprobe process_kprobe = 9; - // ProcessTracepoint contains information about the pre-defined - // tracepoint and the process that invoked them. - ProcessTracepoint process_tracepoint = 10; - ProcessLoader process_loader = 11; - ProcessUprobe process_uprobe = 12; - ProcessThrottle process_throttle = 27; - ProcessLsm process_lsm = 28; - - Test test = 40000; - RateLimitInfo rate_limit_info = 40001; - } - // Name of the node where this event was observed. - string node_name = 1000; - // Timestamp at which this event was observed. - // For an aggregated response, this field to set to the timestamp at which - // the event was observed for the first time in a given aggregation time window. - google.protobuf.Timestamp time = 1001; - - // aggregation_info contains information about aggregation results. This field - // is set only for aggregated responses. - AggregationInfo aggregation_info = 1002; - // Name of the cluster where this event was observed. - string cluster_name = 1003; + reserved 2 to 4, 6 to 8, 13 to 26; + // The type-specific fields of an event. + // + // NOTE: Numbers must stay in sync with enum EventType. + oneof event { + // ProcessExec event includes information about the execution of + // binaries and other related process metadata. + ProcessExec process_exec = 1; + // ProcessExit event indicates how and when a process terminates. + ProcessExit process_exit = 5; + // ProcessKprobe event contains information about the pre-defined + // functions and the process that invoked them. + ProcessKprobe process_kprobe = 9; + // ProcessTracepoint contains information about the pre-defined + // tracepoint and the process that invoked them. + ProcessTracepoint process_tracepoint = 10; + ProcessLoader process_loader = 11; + ProcessUprobe process_uprobe = 12; + ProcessThrottle process_throttle = 27; + ProcessLsm process_lsm = 28; + + Test test = 40000; + RateLimitInfo rate_limit_info = 40001; + } + // Name of the node where this event was observed. + string node_name = 1000; + // Timestamp at which this event was observed. + // For an aggregated response, this field to set to the timestamp at which + // the event was observed for the first time in a given aggregation time window. + google.protobuf.Timestamp time = 1001; + + // aggregation_info contains information about aggregation results. This field + // is set only for aggregated responses. + AggregationInfo aggregation_info = 1002; + // Name of the cluster where this event was observed. + string cluster_name = 1003; + // Labels associated with the node where this event was observed. + map node_labels = 1004; } diff --git a/runtime-monitor/api/tetragon/sensors.proto b/runtime-monitor/api/tetragon/sensors.proto index 9bd13b2b..ce99ce30 100644 --- a/runtime-monitor/api/tetragon/sensors.proto +++ b/runtime-monitor/api/tetragon/sensors.proto @@ -3,226 +3,260 @@ syntax = "proto3"; -import "google/protobuf/wrappers.proto"; - package tetragon; -import "tetragon/tetragon.proto"; -import "tetragon/stack.proto"; +import "google/protobuf/wrappers.proto"; import "tetragon/events.proto"; +import "tetragon/stack.proto"; +import "tetragon/tetragon.proto"; + +option go_package = "github.com/cilium/tetragon/api/v1/tetragon"; /** * Sensors */ -message ListSensorsRequest { } +message ListSensorsRequest {} message SensorStatus { - // name is the name of the sensor - string name = 1; - // enabled marks whether the sensor is enabled - bool enabled = 2; - // collection is the collection the sensor belongs to (typically a tracing policy) - string collection = 3; + // name is the name of the sensor + string name = 1; + // enabled marks whether the sensor is enabled + bool enabled = 2; + // collection is the collection the sensor belongs to (typically a tracing policy) + string collection = 3; } message ListSensorsResponse { - repeated SensorStatus sensors = 1; + repeated SensorStatus sensors = 1; } -message ListTracingPoliciesRequest { } +message ListTracingPoliciesRequest {} enum TracingPolicyState { - // unknown state - TP_STATE_UNKNOWN = 0; - // loaded and enabled - TP_STATE_ENABLED = 1; - // loaded but disabled - TP_STATE_DISABLED = 2; - // failed to load - TP_STATE_LOAD_ERROR = 3; - // failed during lifetime - TP_STATE_ERROR = 4; - // in the process of loading - TP_STATE_LOADING = 5; - // in the process of unloading - TP_STATE_UNLOADING = 6; + // unknown state + TP_STATE_UNKNOWN = 0; + // loaded and enabled + TP_STATE_ENABLED = 1; + // loaded but disabled + TP_STATE_DISABLED = 2; + // failed to load + TP_STATE_LOAD_ERROR = 3; + // failed during lifetime + TP_STATE_ERROR = 4; + // in the process of loading + TP_STATE_LOADING = 5; + // in the process of unloading + TP_STATE_UNLOADING = 6; +} + +enum TracingPolicyMode { + TP_MODE_UNKNOWN = 0; + TP_MODE_ENFORCE = 1; + TP_MODE_MONITOR = 2; } message TracingPolicyStatus { - // id is the id of the policy - uint64 id = 1; - // name is the name of the policy - string name = 2; - // namespace is the namespace of the policy (or empty of the policy is global) - string namespace = 3; - // info is additional information about the policy - string info = 4; - // sensors loaded in the scope of this policy - repeated string sensors = 5; - // indicating if the policy is enabled. Deprecated: use 'state' instead. - bool enabled = 6 [deprecated = true]; - // filter ID of the policy used for k8s filtering - uint64 filter_id = 7; - // potential error of the policy - string error = 8; - // current state of the tracing policy - TracingPolicyState state = 9; - // the amount of kernel memory in bytes used by policy's sensors non-shared BPF maps (memlock) - uint64 kernel_memory_bytes = 10; + // id is the id of the policy + uint64 id = 1; + // name is the name of the policy + string name = 2; + // namespace is the namespace of the policy (or empty of the policy is global) + string namespace = 3; + // info is additional information about the policy + string info = 4; + // sensors loaded in the scope of this policy + repeated string sensors = 5; + // indicating if the policy is enabled. Deprecated: use 'state' instead. + bool enabled = 6 [deprecated = true]; + // filter ID of the policy used for k8s filtering + uint64 filter_id = 7; + // potential error of the policy + string error = 8; + // current state of the tracing policy + TracingPolicyState state = 9; + // the amount of kernel memory in bytes used by policy's sensors non-shared BPF maps (memlock) + uint64 kernel_memory_bytes = 10; + // current mode of the tracing policy + TracingPolicyMode mode = 11; } message ListTracingPoliciesResponse { - repeated TracingPolicyStatus policies = 1; + repeated TracingPolicyStatus policies = 1; } message AddTracingPolicyRequest { - string yaml = 1; + string yaml = 1; } message AddTracingPolicyResponse {} message DeleteTracingPolicyRequest { - string name = 1; - string namespace = 2; + string name = 1; + string namespace = 2; } message DeleteTracingPolicyResponse {} message EnableTracingPolicyRequest { - string name = 1; - string namespace = 2; + string name = 1; + string namespace = 2; } message EnableTracingPolicyResponse {} message DisableTracingPolicyRequest { - string name = 1; - string namespace = 2; + string name = 1; + string namespace = 2; } message DisableTracingPolicyResponse {} +message ConfigureTracingPolicyRequest { + string name = 1; + string namespace = 2; + + optional bool enable = 3; + optional TracingPolicyMode mode = 4; +} + +message ConfigureTracingPolicyResponse {} + message RemoveSensorRequest { - string name = 1; + string name = 1; } message RemoveSensorResponse {} - message EnableSensorRequest { - string name = 1; + string name = 1; } message EnableSensorResponse {} message DisableSensorRequest { - string name = 1; + string name = 1; } -message DisableSensorResponse { } +message DisableSensorResponse {} message GetStackTraceTreeRequest { - string name = 1; + string name = 1; } message GetStackTraceTreeResponse { - StackTraceNode root = 1; + StackTraceNode root = 1; } -message GetVersionRequest{} -message GetVersionResponse{ - string version = 1; +message GetVersionRequest {} +message GetVersionResponse { + string version = 1; } // For now, we only want to support debug-related config flags to be configurable. enum ConfigFlag { - CONFIG_FLAG_LOG_LEVEL = 0; - CONFIG_FLAG_DUMP_PROCESS_CACHE = 1; + CONFIG_FLAG_LOG_LEVEL = 0; + CONFIG_FLAG_DUMP_PROCESS_CACHE = 1; } enum LogLevel { - LOG_LEVEL_PANIC = 0; - LOG_LEVEL_FATAL = 1; - LOG_LEVEL_ERROR = 2; - LOG_LEVEL_WARN = 3; - LOG_LEVEL_INFO = 4; - LOG_LEVEL_DEBUG = 5; - LOG_LEVEL_TRACE = 6; + LOG_LEVEL_PANIC = 0; + LOG_LEVEL_FATAL = 1; + LOG_LEVEL_ERROR = 2; + LOG_LEVEL_WARN = 3; + LOG_LEVEL_INFO = 4; + LOG_LEVEL_DEBUG = 5; + LOG_LEVEL_TRACE = 6; } message DumpProcessCacheReqArgs { - bool skip_zero_refcnt = 1; - bool exclude_execve_map_processes = 2; + bool skip_zero_refcnt = 1; + bool exclude_execve_map_processes = 2; } message ProcessInternal { - Process process = 1; - string color = 2; - google.protobuf.UInt32Value refcnt = 3; - // refcnt_ops is a map of operations to refcnt change - // keys can be: - // - "process++": process increased refcnt (i.e. this process starts) - // - "process--": process decreased refcnt (i.e. this process exits) - // - "parent++": parent increased refcnt (i.e. a process starts that has this process as a parent) - // - "parent--": parent decreased refcnt (i.e. a process exits that has this process as a parent) - map refcnt_ops = 4; + Process process = 1; + string color = 2; + google.protobuf.UInt32Value refcnt = 3; + // refcnt_ops is a map of operations to refcnt change + // keys can be: + // - "process++": process increased refcnt (i.e. this process starts) + // - "process--": process decreased refcnt (i.e. this process exits) + // - "parent++": parent increased refcnt (i.e. a process starts that has this process as a parent) + // - "parent--": parent decreased refcnt (i.e. a process exits that has this process as a parent) + // - "ancestor++": ancestor increased refcnt (i.e. a process starts that has this process as an ancestor) + // - "ancestor--": ancestor decreased refcnt (i.e. a process exits that has this process as an ancestor) + map refcnt_ops = 4; } message DumpProcessCacheResArgs { - repeated ProcessInternal processes = 1; + repeated ProcessInternal processes = 1; } -message GetDebugRequest{ - ConfigFlag flag = 1; - oneof arg { - DumpProcessCacheReqArgs dump = 2; - } +message GetDebugRequest { + ConfigFlag flag = 1; + oneof arg { + DumpProcessCacheReqArgs dump = 2; + } } -message GetDebugResponse{ - ConfigFlag flag = 1; - oneof arg { - LogLevel level = 2; - DumpProcessCacheResArgs processes = 3; - } +message GetDebugResponse { + ConfigFlag flag = 1; + oneof arg { + LogLevel level = 2; + DumpProcessCacheResArgs processes = 3; + } } -message SetDebugRequest{ - ConfigFlag flag = 1; - oneof arg { - LogLevel level = 2; - } +message SetDebugRequest { + ConfigFlag flag = 1; + oneof arg { + LogLevel level = 2; + } } -message SetDebugResponse{ - ConfigFlag flag = 1; - oneof arg { - LogLevel level = 2; - } +message SetDebugResponse { + ConfigFlag flag = 1; + oneof arg { + LogLevel level = 2; + } } service FineGuidanceSensors { - rpc GetEvents(GetEventsRequest) returns (stream GetEventsResponse) {} - rpc GetHealth(GetHealthStatusRequest) returns (GetHealthStatusResponse) {} - - rpc AddTracingPolicy(AddTracingPolicyRequest) returns (AddTracingPolicyResponse) {} - rpc DeleteTracingPolicy(DeleteTracingPolicyRequest) returns (DeleteTracingPolicyResponse) {} - rpc ListTracingPolicies(ListTracingPoliciesRequest) returns (ListTracingPoliciesResponse) {} - rpc EnableTracingPolicy(EnableTracingPolicyRequest) returns (EnableTracingPolicyResponse) {} - rpc DisableTracingPolicy(DisableTracingPolicyRequest) returns (DisableTracingPolicyResponse) {} - - rpc ListSensors(ListSensorsRequest) returns (ListSensorsResponse) { - option deprecated = true; - } - rpc EnableSensor(EnableSensorRequest) returns (EnableSensorResponse) { - option deprecated = true; - } - rpc DisableSensor(DisableSensorRequest) returns (DisableSensorResponse) { - option deprecated = true; - } - rpc RemoveSensor(RemoveSensorRequest) returns (RemoveSensorResponse) { - option deprecated = true; - } - - rpc GetStackTraceTree(GetStackTraceTreeRequest) returns (GetStackTraceTreeResponse) {} - - rpc GetVersion(GetVersionRequest) returns (GetVersionResponse) {} - - rpc RuntimeHook(RuntimeHookRequest) returns (RuntimeHookResponse) {} - - rpc GetDebug(GetDebugRequest) returns (GetDebugResponse) {} - rpc SetDebug(SetDebugRequest) returns (SetDebugResponse) {} + rpc GetEvents(GetEventsRequest) returns (stream GetEventsResponse) {} + rpc GetHealth(GetHealthStatusRequest) returns (GetHealthStatusResponse) {} + + rpc AddTracingPolicy(AddTracingPolicyRequest) returns (AddTracingPolicyResponse) {} + rpc DeleteTracingPolicy(DeleteTracingPolicyRequest) returns (DeleteTracingPolicyResponse) {} + rpc ListTracingPolicies(ListTracingPoliciesRequest) returns (ListTracingPoliciesResponse) {} + // ConfigureTracingPolicy can be used to configure a loaded tracing policy. + // It can be used to: + // - enable/disable it + // - change its mode (enforcement vs monitoring) + // If multiple changes are requested and an error is encountered, the resulting state might have + // partial updates applied. In other words, the configuring a tracing policy is not atomic. + rpc ConfigureTracingPolicy(ConfigureTracingPolicyRequest) returns (ConfigureTracingPolicyResponse) {} + + rpc EnableTracingPolicy(EnableTracingPolicyRequest) returns (EnableTracingPolicyResponse) { + // use ConfigureTracingPolicy instead + option deprecated = true; + } + rpc DisableTracingPolicy(DisableTracingPolicyRequest) returns (DisableTracingPolicyResponse) { + // use ConfigureTracingPolicy instead + option deprecated = true; + } + + rpc ListSensors(ListSensorsRequest) returns (ListSensorsResponse) { + option deprecated = true; + } + rpc EnableSensor(EnableSensorRequest) returns (EnableSensorResponse) { + option deprecated = true; + } + rpc DisableSensor(DisableSensorRequest) returns (DisableSensorResponse) { + option deprecated = true; + } + rpc RemoveSensor(RemoveSensorRequest) returns (RemoveSensorResponse) { + option deprecated = true; + } + + rpc GetStackTraceTree(GetStackTraceTreeRequest) returns (GetStackTraceTreeResponse) {} + + rpc GetVersion(GetVersionRequest) returns (GetVersionResponse) {} + + rpc RuntimeHook(RuntimeHookRequest) returns (RuntimeHookResponse) {} + + rpc GetDebug(GetDebugRequest) returns (GetDebugResponse) {} + rpc SetDebug(SetDebugRequest) returns (SetDebugResponse) {} } diff --git a/runtime-monitor/api/tetragon/stack.proto b/runtime-monitor/api/tetragon/stack.proto index e73a7d6e..f0f90bef 100644 --- a/runtime-monitor/api/tetragon/stack.proto +++ b/runtime-monitor/api/tetragon/stack.proto @@ -5,27 +5,29 @@ syntax = "proto3"; package tetragon; +option go_package = "github.com/cilium/tetragon/api/v1/tetragon"; + /** * Stack traces */ message StackAddress { - uint64 address = 1; - string symbol = 2; + uint64 address = 1; + string symbol = 2; } message StackTrace { - repeated StackAddress addresses = 1; + repeated StackAddress addresses = 1; } message StackTraceLabel { - string key = 1; - uint64 count = 2; + string key = 1; + uint64 count = 2; } message StackTraceNode { - StackAddress address = 1; - uint64 count = 2; - repeated StackTraceLabel labels = 3; - repeated StackTraceNode children = 4; + StackAddress address = 1; + uint64 count = 2; + repeated StackTraceLabel labels = 3; + repeated StackTraceNode children = 4; } diff --git a/runtime-monitor/api/tetragon/tetragon.proto b/runtime-monitor/api/tetragon/tetragon.proto index 0ce9e5ae..dc5ad629 100644 --- a/runtime-monitor/api/tetragon/tetragon.proto +++ b/runtime-monitor/api/tetragon/tetragon.proto @@ -3,652 +3,685 @@ syntax = "proto3"; -import "google/protobuf/timestamp.proto"; -import "google/protobuf/wrappers.proto"; - package tetragon; -import "tetragon/capabilities.proto"; +import "google/protobuf/timestamp.proto"; +import "google/protobuf/wrappers.proto"; import "tetragon/bpf.proto"; +import "tetragon/capabilities.proto"; + +option go_package = "github.com/cilium/tetragon/api/v1/tetragon"; message Image { - // Identifier of the container image composed of the registry path and the - // sha256. - string id = 1; - // Name of the container image composed of the registry path and the tag. - string name = 2; + // Identifier of the container image composed of the registry path and the + // sha256. + string id = 1; + // Name of the container image composed of the registry path and the tag. + string name = 2; +} + +message SecurityContext { + // True if this container is priviledged. + bool privileged = 1; } message Container { - // Identifier of the container. - string id = 1; - // Name of the container. - string name = 2; - // Image of the container. - Image image = 3; - // Start time of the container. - google.protobuf.Timestamp start_time = 4; - // Process identifier in the container namespace. - google.protobuf.UInt32Value pid = 5; - // If this is set true, it means that the process might have been originated from - // a Kubernetes exec probe. For this field to be true, the following must be true: - // 1. The binary field matches the first element of the exec command list for either - // liveness or readiness probe excluding the basename. For example, "/bin/ls" - // and "ls" are considered a match. - // 2. The arguments field exactly matches the rest of the exec command list. - bool maybe_exec_probe = 13; + // Identifier of the container. + string id = 1; + // Name of the container. + string name = 2; + // Image of the container. + Image image = 3; + // Start time of the container. + google.protobuf.Timestamp start_time = 4; + // Process identifier in the container namespace. + google.protobuf.UInt32Value pid = 5; + // If this is set true, it means that the process might have been originated from + // a Kubernetes exec probe. For this field to be true, the following must be true: + // 1. The binary field matches the first element of the exec command list for either + // liveness or readiness probe excluding the basename. For example, "/bin/ls" + // and "ls" are considered a match. + // 2. The arguments field exactly matches the rest of the exec command list. + bool maybe_exec_probe = 13; + // The security context of the container + SecurityContext security_context = 14; } message Pod { - // Kubernetes namespace of the Pod. - string namespace = 1; - // Name of the Pod. - string name = 2; - // Container of the Pod from which the process that triggered the event - // originates. - Container container = 4; - // Contains all the labels of the pod. - map pod_labels = 5; - // Kubernetes workload of the Pod. - string workload = 6; - // Kubernetes workload kind (e.g. "Deployment", "DaemonSet") of the Pod. - string workload_kind = 7; + // Kubernetes namespace of the Pod. + string namespace = 1; + // Name of the Pod. + string name = 2; + // Container of the Pod from which the process that triggered the event + // originates. + Container container = 4; + // Contains all the labels of the pod. + map pod_labels = 5; + // Kubernetes workload of the Pod. + string workload = 6; + // Kubernetes workload kind (e.g. "Deployment", "DaemonSet") of the Pod. + string workload_kind = 7; + // Contains all the annotations of the pod. + map pod_annotations = 8; } message Capabilities { - // Permitted set indicates what capabilities the process can use. This is a - // limiting superset for the effective capabilities that the thread may - // assume. It is also a limiting superset for the capabilities that may be - // added to the inheritable set by a thread without the CAP_SETPCAP in its - // effective set. - repeated CapabilitiesType permitted = 1; - // Effective set indicates what capabilities are active in a process. This - // is the set used by the kernel to perform permission checks for the - // thread. - repeated CapabilitiesType effective = 2; - // Inheritable set indicates which capabilities will be inherited by the - // current process when running as a root user. - repeated CapabilitiesType inheritable = 3; + // Permitted set indicates what capabilities the process can use. This is a + // limiting superset for the effective capabilities that the thread may + // assume. It is also a limiting superset for the capabilities that may be + // added to the inheritable set by a thread without the CAP_SETPCAP in its + // effective set. + repeated CapabilitiesType permitted = 1; + // Effective set indicates what capabilities are active in a process. This + // is the set used by the kernel to perform permission checks for the + // thread. + repeated CapabilitiesType effective = 2; + // Inheritable set indicates which capabilities will be inherited by the + // current process when running as a root user. + repeated CapabilitiesType inheritable = 3; } message Namespace { - // Inode number of the namespace. - uint32 inum = 1; - // Indicates if namespace belongs to host. - bool is_host = 2; + // Inode number of the namespace. + uint32 inum = 1; + // Indicates if namespace belongs to host. + bool is_host = 2; } message Namespaces { - // Hostname and NIS domain name. - Namespace uts = 1; - // System V IPC, POSIX message queues. - Namespace ipc = 2; - // Mount points. - Namespace mnt = 3; - // Process IDs. - Namespace pid = 4; - // Process IDs for children processes. - Namespace pid_for_children = 5; - // Network devices, stacks, ports, etc. - Namespace net = 6; - // Boot and monotonic clocks. - Namespace time = 7; - // Boot and monotonic clocks for children processes. - Namespace time_for_children = 8; - // Cgroup root directory. - Namespace cgroup = 9; - // User and group IDs. - Namespace user = 10; + // Hostname and NIS domain name. + Namespace uts = 1; + // System V IPC, POSIX message queues. + Namespace ipc = 2; + // Mount points. + Namespace mnt = 3; + // Process IDs. + Namespace pid = 4; + // Process IDs for children processes. + Namespace pid_for_children = 5; + // Network devices, stacks, ports, etc. + Namespace net = 6; + // Boot and monotonic clocks. + Namespace time = 7; + // Boot and monotonic clocks for children processes. + Namespace time_for_children = 8; + // Cgroup root directory. + Namespace cgroup = 9; + // User and group IDs. + Namespace user = 10; } message UserNamespace { - // Nested level of the user namespace. Init or host user namespace is at level 0. - google.protobuf.Int32Value level = 1; - // The owner user ID of the namespace - google.protobuf.UInt32Value uid = 2; - // The owner group ID of the namepace. - google.protobuf.UInt32Value gid = 3; - // The user namespace details that include the inode number of the namespace. - Namespace ns = 4; + // Nested level of the user namespace. Init or host user namespace is at level 0. + google.protobuf.Int32Value level = 1; + // The owner user ID of the namespace + google.protobuf.UInt32Value uid = 2; + // The owner group ID of the namepace. + google.protobuf.UInt32Value gid = 3; + // The user namespace details that include the inode number of the namespace. + Namespace ns = 4; } message ProcessCredentials { - // The real user ID of the process' owner. - google.protobuf.UInt32Value uid = 1; - // The real group ID of the process' owner. - google.protobuf.UInt32Value gid = 2; - // The effective user ID used for permission checks. - google.protobuf.UInt32Value euid = 3; - // The effective group ID used for permission checks. - google.protobuf.UInt32Value egid = 4; - // The saved user ID. - google.protobuf.UInt32Value suid = 5; - // The saved group ID. - google.protobuf.UInt32Value sgid = 6; - // the filesystem user ID used for filesystem access checks. Usually equals the euid. - google.protobuf.UInt32Value fsuid = 7; - // The filesystem group ID used for filesystem access checks. Usually equals the egid. - google.protobuf.UInt32Value fsgid = 8; - // Secure management flags - repeated SecureBitsType securebits = 9; - // Set of capabilities that define the permissions the process can execute with. - Capabilities caps = 10; - // User namespace where the UIDs, GIDs and capabilities are relative to. - UserNamespace user_ns = 11; + // The real user ID of the process' owner. + google.protobuf.UInt32Value uid = 1; + // The real group ID of the process' owner. + google.protobuf.UInt32Value gid = 2; + // The effective user ID used for permission checks. + google.protobuf.UInt32Value euid = 3; + // The effective group ID used for permission checks. + google.protobuf.UInt32Value egid = 4; + // The saved user ID. + google.protobuf.UInt32Value suid = 5; + // The saved group ID. + google.protobuf.UInt32Value sgid = 6; + // the filesystem user ID used for filesystem access checks. Usually equals the euid. + google.protobuf.UInt32Value fsuid = 7; + // The filesystem group ID used for filesystem access checks. Usually equals the egid. + google.protobuf.UInt32Value fsgid = 8; + // Secure management flags + repeated SecureBitsType securebits = 9; + // Set of capabilities that define the permissions the process can execute with. + Capabilities caps = 10; + // User namespace where the UIDs, GIDs and capabilities are relative to. + UserNamespace user_ns = 11; } message InodeProperties { - // The inode number - uint64 number = 1; - // The inode links on the file system. If zero means the file is only in memory - google.protobuf.UInt32Value links = 2; + // The inode number + uint64 number = 1; + // The inode links on the file system. If zero means the file is only in memory + google.protobuf.UInt32Value links = 2; } message FileProperties { - // Inode of the file - InodeProperties inode = 1; - // Path of the file - string path = 2; + // Inode of the file + InodeProperties inode = 1; + // Path of the file + string path = 2; } message BinaryProperties { - // If set then this is the set user ID used for execution - google.protobuf.UInt32Value setuid = 1; - // If set then this is the set group ID used for execution - google.protobuf.UInt32Value setgid = 2; - // The reasons why this binary execution changed privileges. Usually this happens when the process executes - // a binary with the set-user-ID to root or file capability sets. - // The final granted privileges can be listed inside the `process_credentials` or capabilities fields part of of the `process` object. - repeated ProcessPrivilegesChanged privileges_changed = 3; - // File properties in case the executed binary is: - // 1. An anonymous shared memory file https://man7.org/linux/man-pages/man7/shm_overview.7.html. - // 2. An anonymous file obtained with memfd API https://man7.org/linux/man-pages/man2/memfd_create.2.html. - // 3. Or it was deleted from the file system. - FileProperties file = 4; + // If set then this is the set user ID used for execution + google.protobuf.UInt32Value setuid = 1; + // If set then this is the set group ID used for execution + google.protobuf.UInt32Value setgid = 2; + // The reasons why this binary execution changed privileges. Usually this happens when the process executes + // a binary with the set-user-ID to root or file capability sets. + // The final granted privileges can be listed inside the `process_credentials` or capabilities fields part of of the `process` object. + repeated ProcessPrivilegesChanged privileges_changed = 3; + // File properties in case the executed binary is: + // 1. An anonymous shared memory file https://man7.org/linux/man-pages/man7/shm_overview.7.html. + // 2. An anonymous file obtained with memfd API https://man7.org/linux/man-pages/man2/memfd_create.2.html. + // 3. Or it was deleted from the file system. + FileProperties file = 4; } // User records message UserRecord { - // The UNIX username for this record. Corresponds to `pw_name` field of [struct passwd](https://man7.org/linux/man-pages/man3/getpwnam.3.html) - // and the `sp_namp` field of [struct spwd](https://man7.org/linux/man-pages/man3/getspnam.3.html). - string name = 1; + // The UNIX username for this record. Corresponds to `pw_name` field of [struct passwd](https://man7.org/linux/man-pages/man3/getpwnam.3.html) + // and the `sp_namp` field of [struct spwd](https://man7.org/linux/man-pages/man3/getspnam.3.html). + string name = 1; } message Process { - // Exec ID uniquely identifies the process over time across all the nodes in the cluster. - string exec_id = 1; - // Process identifier from host PID namespace. - google.protobuf.UInt32Value pid = 2; - // The effective User identifier used for permission checks. This field maps to the - // 'ProcessCredentials.euid' field. Run with the `--enable-process-cred` flag to - // enable 'ProcessCredentials' and get all the User and Group identifiers. - google.protobuf.UInt32Value uid = 3; - // Current working directory of the process. - string cwd = 4; - // Absolute path of the executed binary. - string binary = 5; - // Arguments passed to the binary at execution. - string arguments = 6; - // Flags are for debugging purposes only and should not be considered a - // reliable source of information. They hold various information about - // which syscalls generated events, use of internal Tetragon buffers, - // errors and more. - // - `execve` This event is generated by an execve syscall for a new - // process. See procFs for the other option. A correctly formatted event - // should either set execve or procFS (described next). - // - `procFS` This event is generated from a proc interface. This happens - // at Tetragon init when existing processes are being loaded into Tetragon - // event buffer. All events should have either execve or procFS set. - // - `truncFilename` Indicates a truncated processes filename because the - // buffer size is too small to contain the process filename. Consider - // increasing buffer size to avoid this. - // - `truncArgs` Indicates truncated the processes arguments because the - // buffer size was too small to contain all exec args. Consider increasing - // buffer size to avoid this. - // - `taskWalk` Primarily useful for debugging. Indicates a walked process - // hierarchy to find a parent process in the Tetragon buffer. This may - // happen when we did not receive an exec event for the immediate parent of - // a process. Typically means we are looking at a fork that in turn did - // another fork we don't currently track fork events exactly and instead - // push an event with the original parent exec data. This flag can provide - // this insight into the event if needed. - // - `miss` An error flag indicating we could not find parent info in the - // Tetragon event buffer. If this is set it should be reported to Tetragon - // developers for debugging. Tetragon will do its best to recover - // information about the process from available kernel data structures - // instead of using cached info in this case. However, args will not be - // available. - // - `needsAUID` An internal flag for Tetragon to indicate the audit has - // not yet been resolved. The BPF hooks look at this flag to determine if - // probing the audit system is necessary. - // - `errorFilename` An error flag indicating an error happened while - // reading the filename. If this is set it should be reported to Tetragon - // developers for debugging. - // - `errorArgs` An error flag indicating an error happened while reading - // the process args. If this is set it should be reported to Tetragon - // developers for debugging - // - `needsCWD` An internal flag for Tetragon to indicate the current - // working directory has not yet been resolved. The Tetragon hooks look at - // this flag to determine if probing the CWD is necessary. - // - `noCWDSupport` Indicates that CWD is removed from the event because - // the buffer size is too small. Consider increasing buffer size to avoid - // this. - // - `rootCWD` Indicates that CWD is the root directory. This is necessary - // to inform readers the CWD is not in the event buffer and is '/' instead. - // - `errorCWD` An error flag indicating an error occurred while reading - // the CWD of a process. If this is set it should be reported to Tetragon - // developers for debugging. - // - `clone` Indicates the process issued a clone before exec*. This is the - // general flow to exec* a new process, however its possible to replace the - // current process with a new process by doing an exec* without a clone. In - // this case the flag will be omitted and the same PID will be used by the - // kernel for both the old process and the newly exec'd process. - string flags = 7; - // Start time of the execution. - google.protobuf.Timestamp start_time = 8; - // Audit user ID, this ID is assigned to a user upon login and is inherited - // by every process even when the user's identity changes. For example, by - // switching user accounts with su - john. - google.protobuf.UInt32Value auid = 9; - // Information about the the Kubernetes Pod where the event originated. - Pod pod = 10; - // The 15 first digits of the container ID. - string docker = 11; - // Exec ID of the parent process. - string parent_exec_id = 12; - // Reference counter from the Tetragon process cache. - uint32 refcnt = 13; - // Set of capabilities that define the permissions the process can execute with. - Capabilities cap = 14; - // Linux namespaces of the process, disabled by default, can be enabled by - // the `--enable-process-ns` flag. - Namespaces ns = 15; - // Thread ID, note that for the thread group leader, tid is equal to pid. - google.protobuf.UInt32Value tid = 16; - // Process credentials, disabled by default, can be enabled by the - // `--enable-process-cred` flag. - ProcessCredentials process_credentials = 17; - // Executed binary properties. This field is only available on ProcessExec events. - BinaryProperties binary_properties = 18; - // UserRecord contains user information about the event. - // It is only supported when i) Tetragon is running as a systemd service or directly on the host, and - // ii) when the flag `--username-metadata` is set to "unix". In this case, the information is retrieved from - // the traditional user database `/etc/passwd` and no name services lookups are performed. - // The resolution will only be attempted for processes in the host namespace. - // Note that this resolution happens in user-space, which means that mapping might have changed - // between the in-kernel BPF hook being executed and the username resolution. - UserRecord user = 19; - // If set to true, this process is containerized and is a member of the - // process tree rooted at pid=1 in its PID namespace. This is useful if, - // for example, you wish to discern whether a process was spawned using a - // tool like nsenter or kubectl exec. - google.protobuf.BoolValue in_init_tree = 20; + // Exec ID uniquely identifies the process over time across all the nodes in the cluster. + string exec_id = 1; + // Process identifier from host PID namespace. + google.protobuf.UInt32Value pid = 2; + // The effective User identifier used for permission checks. This field maps to the + // 'ProcessCredentials.euid' field. Run with the `--enable-process-cred` flag to + // enable 'ProcessCredentials' and get all the User and Group identifiers. + google.protobuf.UInt32Value uid = 3; + // Current working directory of the process. + string cwd = 4; + // Absolute path of the executed binary. + string binary = 5; + // Arguments passed to the binary at execution. + string arguments = 6; + // Flags are for debugging purposes only and should not be considered a + // reliable source of information. They hold various information about + // which syscalls generated events, use of internal Tetragon buffers, + // errors and more. + // - `execve` This event is generated by an execve syscall for a new + // process. See procFs for the other option. A correctly formatted event + // should either set execve or procFS (described next). + // - `procFS` This event is generated from a proc interface. This happens + // at Tetragon init when existing processes are being loaded into Tetragon + // event buffer. All events should have either execve or procFS set. + // - `truncFilename` Indicates a truncated processes filename because the + // buffer size is too small to contain the process filename. Consider + // increasing buffer size to avoid this. + // - `truncArgs` Indicates truncated the processes arguments because the + // buffer size was too small to contain all exec args. Consider increasing + // buffer size to avoid this. + // - `taskWalk` Primarily useful for debugging. Indicates a walked process + // hierarchy to find a parent process in the Tetragon buffer. This may + // happen when we did not receive an exec event for the immediate parent of + // a process. Typically means we are looking at a fork that in turn did + // another fork we don't currently track fork events exactly and instead + // push an event with the original parent exec data. This flag can provide + // this insight into the event if needed. + // - `miss` An error flag indicating we could not find parent info in the + // Tetragon event buffer. If this is set it should be reported to Tetragon + // developers for debugging. Tetragon will do its best to recover + // information about the process from available kernel data structures + // instead of using cached info in this case. However, args will not be + // available. + // - `needsAUID` An internal flag for Tetragon to indicate the audit has + // not yet been resolved. The BPF hooks look at this flag to determine if + // probing the audit system is necessary. + // - `errorFilename` An error flag indicating an error happened while + // reading the filename. If this is set it should be reported to Tetragon + // developers for debugging. + // - `errorArgs` An error flag indicating an error happened while reading + // the process args. If this is set it should be reported to Tetragon + // developers for debugging + // - `needsCWD` An internal flag for Tetragon to indicate the current + // working directory has not yet been resolved. The Tetragon hooks look at + // this flag to determine if probing the CWD is necessary. + // - `noCWDSupport` Indicates that CWD is removed from the event because + // the buffer size is too small. Consider increasing buffer size to avoid + // this. + // - `rootCWD` Indicates that CWD is the root directory. This is necessary + // to inform readers the CWD is not in the event buffer and is '/' instead. + // - `errorCWD` An error flag indicating an error occurred while reading + // the CWD of a process. If this is set it should be reported to Tetragon + // developers for debugging. + // - `clone` Indicates the process issued a clone before exec*. This is the + // general flow to exec* a new process, however its possible to replace the + // current process with a new process by doing an exec* without a clone. In + // this case the flag will be omitted and the same PID will be used by the + // kernel for both the old process and the newly exec'd process. + // - `unknown` Indicates the process was not found in the process cache + // and contains just pid and start time. + string flags = 7; + // Start time of the execution. + google.protobuf.Timestamp start_time = 8; + // Audit user ID, this ID is assigned to a user upon login and is inherited + // by every process even when the user's identity changes. For example, by + // switching user accounts with su - john. + google.protobuf.UInt32Value auid = 9; + // Information about the the Kubernetes Pod where the event originated. + Pod pod = 10; + // The 15 first digits of the container ID. + string docker = 11; + // Exec ID of the parent process. + string parent_exec_id = 12; + // Reference counter from the Tetragon process cache. + uint32 refcnt = 13; + // Set of capabilities that define the permissions the process can execute with. + Capabilities cap = 14; + // Linux namespaces of the process, disabled by default, can be enabled by + // the `--enable-process-ns` flag. + Namespaces ns = 15; + // Thread ID, note that for the thread group leader, tid is equal to pid. + google.protobuf.UInt32Value tid = 16; + // Process credentials, disabled by default, can be enabled by the + // `--enable-process-cred` flag. + ProcessCredentials process_credentials = 17; + // Executed binary properties. This field is only available on ProcessExec events. + BinaryProperties binary_properties = 18; + // UserRecord contains user information about the event. + // It is only supported when i) Tetragon is running as a systemd service or directly on the host, and + // ii) when the flag `--username-metadata` is set to "unix". In this case, the information is retrieved from + // the traditional user database `/etc/passwd` and no name services lookups are performed. + // The resolution will only be attempted for processes in the host namespace. + // Note that this resolution happens in user-space, which means that mapping might have changed + // between the in-kernel BPF hook being executed and the username resolution. + UserRecord user = 19; + // If set to true, this process is containerized and is a member of the + // process tree rooted at pid=1 in its PID namespace. This is useful if, + // for example, you wish to discern whether a process was spawned using a + // tool like nsenter or kubectl exec. + google.protobuf.BoolValue in_init_tree = 20; } message ProcessExec { - // Process that triggered the exec. - Process process = 1; - // Immediate parent of the process. - Process parent = 2; - // Ancestors of the process beyond the immediate parent. - repeated Process ancestors = 3; + // Process that triggered the exec. + Process process = 1; + // Immediate parent of the process. + Process parent = 2; + // Ancestors of the process beyond the immediate parent. + repeated Process ancestors = 3; } message ProcessExit { - // Process that triggered the exit. - Process process = 1; - // Immediate parent of the process. - Process parent = 2; - // Signal that the process received when it exited, for example SIGKILL or - // SIGTERM (list all signal names with `kill -l`). If there is no signal - // handler implemented for a specific process, we report the exit status - // code that can be found in the status field. - string signal = 3; - // Status code on process exit. For example, the status code can indicate - // if an error was encountered or the program exited successfully. - uint32 status = 4; - // Date and time of the event. - google.protobuf.Timestamp time = 5; + // Process that triggered the exit. + Process process = 1; + // Immediate parent of the process. + Process parent = 2; + // Signal that the process received when it exited, for example SIGKILL or + // SIGTERM (list all signal names with `kill -l`). If there is no signal + // handler implemented for a specific process, we report the exit status + // code that can be found in the status field. + string signal = 3; + // Status code on process exit. For example, the status code can indicate + // if an error was encountered or the program exited successfully. + uint32 status = 4; + // Date and time of the event. + google.protobuf.Timestamp time = 5; + // Ancestors of the process beyond the immediate parent. + repeated Process ancestors = 6; } message KprobeSock { - string family = 1; - string type = 2; - string protocol = 3; - uint32 mark = 4; - uint32 priority = 5; - string saddr = 6; - string daddr = 7; - uint32 sport = 8; - uint32 dport = 9; - uint64 cookie = 10; - string state = 11; + string family = 1; + string type = 2; + string protocol = 3; + uint32 mark = 4; + uint32 priority = 5; + string saddr = 6; + string daddr = 7; + uint32 sport = 8; + uint32 dport = 9; + uint64 cookie = 10; + string state = 11; } message KprobeSkb { - uint32 hash = 1; - uint32 len = 2; - uint32 priority = 3; - uint32 mark = 4; - string saddr = 5; - string daddr = 6; - uint32 sport = 7; - uint32 dport = 8; - uint32 proto = 9; - uint32 sec_path_len = 10; - uint32 sec_path_olen = 11; - string protocol = 12; - string family = 13; + uint32 hash = 1; + uint32 len = 2; + uint32 priority = 3; + uint32 mark = 4; + string saddr = 5; + string daddr = 6; + uint32 sport = 7; + uint32 dport = 8; + uint32 proto = 9; + uint32 sec_path_len = 10; + uint32 sec_path_olen = 11; + string protocol = 12; + string family = 13; +} + +message KprobeSockaddr { + string family = 1; + string addr = 2; + uint32 port = 3; } message KprobeNetDev { - string name = 1; + string name = 1; } message KprobePath { - string mount = 1; - string path = 2; - string flags = 3; - string permission = 4; + string mount = 1; + string path = 2; + string flags = 3; + string permission = 4; } message KprobeFile { - string mount = 1; - string path = 2; - string flags = 3; - string permission = 4; + string mount = 1; + string path = 2; + string flags = 3; + string permission = 4; } message KprobeTruncatedBytes { - bytes bytes_arg = 1; - uint64 orig_size = 2; + bytes bytes_arg = 1; + uint64 orig_size = 2; } message KprobeCred { - repeated CapabilitiesType permitted = 1; - repeated CapabilitiesType effective = 2; - repeated CapabilitiesType inheritable = 3; + repeated CapabilitiesType permitted = 1; + repeated CapabilitiesType effective = 2; + repeated CapabilitiesType inheritable = 3; } message KprobeLinuxBinprm { - string path = 1; - string flags = 2; - string permission = 3; + string path = 1; + string flags = 2; + string permission = 3; } message KprobeCapability { - google.protobuf.Int32Value value = 1; - string name = 2; + google.protobuf.Int32Value value = 1; + string name = 2; } message KprobeUserNamespace { - google.protobuf.Int32Value level = 1; - google.protobuf.UInt32Value owner = 2; - google.protobuf.UInt32Value group = 3; - Namespace ns = 4; + google.protobuf.Int32Value level = 1; + google.protobuf.UInt32Value owner = 2; + google.protobuf.UInt32Value group = 3; + Namespace ns = 4; } message KprobeBpfAttr { - string ProgType = 1; - uint32 InsnCnt = 2; - string ProgName = 3; + string ProgType = 1; + uint32 InsnCnt = 2; + string ProgName = 3; } message KprobePerfEvent { - string KprobeFunc = 1; - string Type = 2; - uint64 Config = 3; - uint64 ProbeOffset = 4; + string KprobeFunc = 1; + string Type = 2; + uint64 Config = 3; + uint64 ProbeOffset = 4; } message KprobeBpfMap { - string MapType = 1; - uint32 KeySize = 2; - uint32 ValueSize = 3; - uint32 MaxEntries = 4; - string MapName = 5; + string MapType = 1; + uint32 KeySize = 2; + uint32 ValueSize = 3; + uint32 MaxEntries = 4; + string MapName = 5; } message SyscallId { - uint32 id = 1; - string abi = 2; + uint32 id = 1; + string abi = 2; } message KprobeArgument { - oneof arg { - string string_arg = 1; - int32 int_arg = 2; - KprobeSkb skb_arg = 3; - uint64 size_arg = 4; - bytes bytes_arg = 5; - KprobePath path_arg = 6; - KprobeFile file_arg = 7; - KprobeTruncatedBytes truncated_bytes_arg = 8; - KprobeSock sock_arg = 9; - KprobeCred cred_arg = 10; - int64 long_arg = 11; - KprobeBpfAttr bpf_attr_arg = 12; - KprobePerfEvent perf_event_arg = 13; - KprobeBpfMap bpf_map_arg = 14; - uint32 uint_arg = 15; - KprobeUserNamespace user_namespace_arg = 16 [deprecated = true]; - KprobeCapability capability_arg = 17; - ProcessCredentials process_credentials_arg = 19; - UserNamespace user_ns_arg = 20; - KernelModule module_arg = 21; - string kernel_cap_t_arg = 22; // Capabilities in hexadecimal format. - string cap_inheritable_arg = 23; // Capabilities inherited by a forked process in hexadecimal format. - string cap_permitted_arg = 24; // Capabilities that are currently permitted in hexadecimal format. - string cap_effective_arg = 25; // Capabilities that are actually used in hexadecimal format. - KprobeLinuxBinprm linux_binprm_arg = 26; - KprobeNetDev net_dev_arg = 27; - BpfCmd bpf_cmd_arg = 28; - SyscallId syscall_id = 29; - } - string label = 18; + oneof arg { + string string_arg = 1; + int32 int_arg = 2; + KprobeSkb skb_arg = 3; + uint64 size_arg = 4; + bytes bytes_arg = 5; + KprobePath path_arg = 6; + KprobeFile file_arg = 7; + KprobeTruncatedBytes truncated_bytes_arg = 8; + KprobeSock sock_arg = 9; + KprobeCred cred_arg = 10; + int64 long_arg = 11; + KprobeBpfAttr bpf_attr_arg = 12; + KprobePerfEvent perf_event_arg = 13; + KprobeBpfMap bpf_map_arg = 14; + uint32 uint_arg = 15; + KprobeUserNamespace user_namespace_arg = 16 [deprecated = true]; + KprobeCapability capability_arg = 17; + ProcessCredentials process_credentials_arg = 19; + UserNamespace user_ns_arg = 20; + KernelModule module_arg = 21; + string kernel_cap_t_arg = 22; // Capabilities in hexadecimal format. + string cap_inheritable_arg = 23; // Capabilities inherited by a forked process in hexadecimal format. + string cap_permitted_arg = 24; // Capabilities that are currently permitted in hexadecimal format. + string cap_effective_arg = 25; // Capabilities that are actually used in hexadecimal format. + KprobeLinuxBinprm linux_binprm_arg = 26; + KprobeNetDev net_dev_arg = 27; + BpfCmd bpf_cmd_arg = 28; + SyscallId syscall_id = 29; + KprobeSockaddr sockaddr_arg = 30; + } + string label = 18; } enum KprobeAction { - // Unknown action - KPROBE_ACTION_UNKNOWN = 0; - // Post action creates an event (default action). - KPROBE_ACTION_POST = 1; - // Post action creates a mapping between file descriptors and file names. - KPROBE_ACTION_FOLLOWFD = 2; - // Sigkill action synchronously terminates the process. - KPROBE_ACTION_SIGKILL = 3; - // Post action removes a mapping between file descriptors and file names. - KPROBE_ACTION_UNFOLLOWFD = 4; - // Override action modifies the return value of the call. - KPROBE_ACTION_OVERRIDE = 5; - // Post action dupplicates a mapping between file descriptors and file - // names. - KPROBE_ACTION_COPYFD = 6; - // GetURL action issue an HTTP Get request against an URL from userspace. - KPROBE_ACTION_GETURL = 7; - // GetURL action issue a DNS lookup against an URL from userspace. - KPROBE_ACTION_DNSLOOKUP = 8; - // NoPost action suppresses the transmission of the event to userspace. - KPROBE_ACTION_NOPOST = 9; - // Signal action sends specified signal to the process. - KPROBE_ACTION_SIGNAL = 10; - // TrackSock action tracks socket. - KPROBE_ACTION_TRACKSOCK = 11; - // UntrackSock action un-tracks socket. - KPROBE_ACTION_UNTRACKSOCK = 12; - // NotifyEnforcer action notifies enforcer sensor. - KPROBE_ACTION_NOTIFYENFORCER = 13; - // CleanupEnforcerNotification action cleanups any state left by NotifyEnforcer - KPROBE_ACTION_CLEANUPENFORCERNOTIFICATION = 14; + // Unknown action + KPROBE_ACTION_UNKNOWN = 0; + // Post action creates an event (default action). + KPROBE_ACTION_POST = 1; + // Post action creates a mapping between file descriptors and file names. + KPROBE_ACTION_FOLLOWFD = 2; + // Sigkill action synchronously terminates the process. + KPROBE_ACTION_SIGKILL = 3; + // Post action removes a mapping between file descriptors and file names. + KPROBE_ACTION_UNFOLLOWFD = 4; + // Override action modifies the return value of the call. + KPROBE_ACTION_OVERRIDE = 5; + // Post action dupplicates a mapping between file descriptors and file + // names. + KPROBE_ACTION_COPYFD = 6; + // GetURL action issue an HTTP Get request against an URL from userspace. + KPROBE_ACTION_GETURL = 7; + // GetURL action issue a DNS lookup against an URL from userspace. + KPROBE_ACTION_DNSLOOKUP = 8; + // NoPost action suppresses the transmission of the event to userspace. + KPROBE_ACTION_NOPOST = 9; + // Signal action sends specified signal to the process. + KPROBE_ACTION_SIGNAL = 10; + // TrackSock action tracks socket. + KPROBE_ACTION_TRACKSOCK = 11; + // UntrackSock action un-tracks socket. + KPROBE_ACTION_UNTRACKSOCK = 12; + // NotifyEnforcer action notifies enforcer sensor. + KPROBE_ACTION_NOTIFYENFORCER = 13; + // CleanupEnforcerNotification action cleanups any state left by NotifyEnforcer + KPROBE_ACTION_CLEANUPENFORCERNOTIFICATION = 14; } message ProcessKprobe { - // Process that triggered the kprobe. - Process process = 1; - // Immediate parent of the process. - Process parent = 2; - // Symbol on which the kprobe was attached. - string function_name = 3; - // Arguments definition of the observed kprobe. - repeated KprobeArgument args = 4; - // Return value definition of the observed kprobe. - KprobeArgument return = 5; - // Action performed when the kprobe matched. - KprobeAction action = 6; - // Kernel stack trace to the call. - repeated StackTraceEntry kernel_stack_trace = 7; - // Name of the Tracing Policy that created that kprobe. - string policy_name = 8; - // Action performed when the return kprobe executed. - KprobeAction return_action = 9; - // Short message of the Tracing Policy to inform users what is going on. - string message = 10; - // Tags of the Tracing Policy to categorize the event. - repeated string tags = 11; - // User-mode stack trace to the call. - repeated StackTraceEntry user_stack_trace = 12; + // Process that triggered the kprobe. + Process process = 1; + // Immediate parent of the process. + Process parent = 2; + // Symbol on which the kprobe was attached. + string function_name = 3; + // Arguments definition of the observed kprobe. + repeated KprobeArgument args = 4; + // Return value definition of the observed kprobe. + KprobeArgument return = 5; + // Action performed when the kprobe matched. + KprobeAction action = 6; + // Kernel stack trace to the call. + repeated StackTraceEntry kernel_stack_trace = 7; + // Name of the Tracing Policy that created that kprobe. + string policy_name = 8; + // Action performed when the return kprobe executed. + KprobeAction return_action = 9; + // Short message of the Tracing Policy to inform users what is going on. + string message = 10; + // Tags of the Tracing Policy to categorize the event. + repeated string tags = 11; + // User-mode stack trace to the call. + repeated StackTraceEntry user_stack_trace = 12; + // Ancestors of the process beyond the immediate parent. + repeated Process ancestors = 13; } message ProcessTracepoint { - // Process that triggered the tracepoint. - Process process = 1; - // Immediate parent of the process. - Process parent = 2; - // Subsystem of the tracepoint. - string subsys = 4; - // Event of the subsystem. - string event = 5; - // Arguments definition of the observed tracepoint. - // TODO: once we implement all we want, rename KprobeArgument to GenericArgument - repeated KprobeArgument args = 6; - // Name of the policy that created that tracepoint. - string policy_name = 7; - // Action performed when the tracepoint matched. - KprobeAction action = 8; - // Short message of the Tracing Policy to inform users what is going on. - string message = 9; - // Tags of the Tracing Policy to categorize the event. - repeated string tags = 10; + // Process that triggered the tracepoint. + Process process = 1; + // Immediate parent of the process. + Process parent = 2; + // Subsystem of the tracepoint. + string subsys = 4; + // Event of the subsystem. + string event = 5; + // Arguments definition of the observed tracepoint. + // TODO: once we implement all we want, rename KprobeArgument to GenericArgument + repeated KprobeArgument args = 6; + // Name of the policy that created that tracepoint. + string policy_name = 7; + // Action performed when the tracepoint matched. + KprobeAction action = 8; + // Short message of the Tracing Policy to inform users what is going on. + string message = 9; + // Tags of the Tracing Policy to categorize the event. + repeated string tags = 10; + // Ancestors of the process beyond the immediate parent. + repeated Process ancestors = 11; } message ProcessUprobe { - Process process = 1; - Process parent = 2; - string path = 3; - string symbol = 4; - // Name of the policy that created that uprobe. - string policy_name = 5; - // Short message of the Tracing Policy to inform users what is going on. - string message = 6; - // Arguments definition of the observed uprobe. - repeated KprobeArgument args = 7; - // Tags of the Tracing Policy to categorize the event. - repeated string tags = 8; + Process process = 1; + Process parent = 2; + string path = 3; + string symbol = 4; + // Name of the policy that created that uprobe. + string policy_name = 5; + // Short message of the Tracing Policy to inform users what is going on. + string message = 6; + // Arguments definition of the observed uprobe. + repeated KprobeArgument args = 7; + // Tags of the Tracing Policy to categorize the event. + repeated string tags = 8; + // Ancestors of the process beyond the immediate parent. + repeated Process ancestors = 9; + // uprobe offset (mutualy exclusive with symbol) + uint64 offset = 10; + // uprobe ref_ctr_offset + uint64 ref_ctr_offset = 11; } message ProcessLsm { - Process process = 1; - Process parent = 2; - // LSM hook name. - string function_name = 3; - // Name of the policy that created that LSM hook. - string policy_name = 5; - // Short message of the Tracing Policy to inform users what is going on. - string message = 6; - // Arguments definition of the observed LSM hook. - repeated KprobeArgument args = 7; - // Action performed when the LSM hook matched. - KprobeAction action = 8; - // Tags of the Tracing Policy to categorize the event. - repeated string tags = 9; - // IMA file hash. Format algorithm:value. - string ima_hash = 11; + Process process = 1; + Process parent = 2; + // LSM hook name. + string function_name = 3; + // Name of the policy that created that LSM hook. + string policy_name = 5; + // Short message of the Tracing Policy to inform users what is going on. + string message = 6; + // Arguments definition of the observed LSM hook. + repeated KprobeArgument args = 7; + // Action performed when the LSM hook matched. + KprobeAction action = 8; + // Tags of the Tracing Policy to categorize the event. + repeated string tags = 9; + // Ancestors of the process beyond the immediate parent. + repeated Process ancestors = 10; + // IMA file hash. Format algorithm:value. + string ima_hash = 11; } message KernelModule { - // Kernel module name - string name = 1; - // If true the module signature was verified successfully. Depends on kernels compiled with - // CONFIG_MODULE_SIG option, for details please read: https://www.kernel.org/doc/Documentation/admin-guide/module-signing.rst - google.protobuf.BoolValue signature_ok = 2; - // The module tainted flags that will be applied on the kernel. For further details please read: https://docs.kernel.org/admin-guide/tainted-kernels.html - repeated TaintedBitsType tainted = 3; + // Kernel module name + string name = 1; + // If true the module signature was verified successfully. Depends on kernels compiled with + // CONFIG_MODULE_SIG option, for details please read: https://www.kernel.org/doc/Documentation/admin-guide/module-signing.rst + google.protobuf.BoolValue signature_ok = 2; + // The module tainted flags that will be applied on the kernel. For further details please read: https://docs.kernel.org/admin-guide/tainted-kernels.html + repeated TaintedBitsType tainted = 3; } message Test { - uint64 arg0 = 1; - uint64 arg1 = 2; - uint64 arg2 = 3; - uint64 arg3 = 4; + uint64 arg0 = 1; + uint64 arg1 = 2; + uint64 arg2 = 3; + uint64 arg3 = 4; } enum HealthStatusType { - HEALTH_STATUS_TYPE_UNDEF = 0; - HEALTH_STATUS_TYPE_STATUS = 1; + HEALTH_STATUS_TYPE_UNDEF = 0; + HEALTH_STATUS_TYPE_STATUS = 1; } enum HealthStatusResult { - HEALTH_STATUS_UNDEF = 0; - HEALTH_STATUS_RUNNING = 1; - HEALTH_STATUS_STOPPED = 2; - HEALTH_STATUS_ERROR = 3; + HEALTH_STATUS_UNDEF = 0; + HEALTH_STATUS_RUNNING = 1; + HEALTH_STATUS_STOPPED = 2; + HEALTH_STATUS_ERROR = 3; } message GetHealthStatusRequest { - repeated HealthStatusType event_set = 1; + repeated HealthStatusType event_set = 1; } // Tainted bits to indicate if the kernel was tainted. For further details: https://docs.kernel.org/admin-guide/tainted-kernels.html enum TaintedBitsType { - TAINT_UNSET = 0; + TAINT_UNSET = 0; - /* A proprietary module was loaded. */ - TAINT_PROPRIETARY_MODULE = 1; + /* A proprietary module was loaded. */ + TAINT_PROPRIETARY_MODULE = 1; - /* A module was force loaded. */ - TAINT_FORCED_MODULE = 2; + /* A module was force loaded. */ + TAINT_FORCED_MODULE = 2; - /* A module was force unloaded. */ - TAINT_FORCED_UNLOAD_MODULE = 4; + /* A module was force unloaded. */ + TAINT_FORCED_UNLOAD_MODULE = 4; - /* A staging driver was loaded. */ - TAINT_STAGED_MODULE = 1024; + /* A staging driver was loaded. */ + TAINT_STAGED_MODULE = 1024; - /* An out of tree module was loaded. */ - TAINT_OUT_OF_TREE_MODULE = 4096; + /* An out of tree module was loaded. */ + TAINT_OUT_OF_TREE_MODULE = 4096; - /* An unsigned module was loaded. Supported only on kernels built with CONFIG_MODULE_SIG option. */ - TAINT_UNSIGNED_MODULE = 8192; + /* An unsigned module was loaded. Supported only on kernels built with CONFIG_MODULE_SIG option. */ + TAINT_UNSIGNED_MODULE = 8192; - /* The kernel has been live patched. */ - TAINT_KERNEL_LIVE_PATCH_MODULE = 32768; + /* The kernel has been live patched. */ + TAINT_KERNEL_LIVE_PATCH_MODULE = 32768; - /* Loading a test module. */ - TAINT_TEST_MODULE = 262144; + /* Loading a test module. */ + TAINT_TEST_MODULE = 262144; } message HealthStatus { - HealthStatusType event = 1; - HealthStatusResult status = 2; - string details = 3; + HealthStatusType event = 1; + HealthStatusResult status = 2; + string details = 3; } message GetHealthStatusResponse { - repeated HealthStatus health_status = 1; + repeated HealthStatus health_status = 1; } // loader sensor event triggered for loaded binary/library message ProcessLoader { - Process process = 1; - string path = 2; - bytes buildid = 3; + Process process = 1; + string path = 2; + bytes buildid = 3; } // RuntimeHookRequest synchronously propagates information to the agent about run-time state. message RuntimeHookRequest { - oneof event { - CreateContainer createContainer = 1; - } + oneof event { + CreateContainer createContainer = 1; + } } message RuntimeHookResponse {} @@ -662,34 +695,36 @@ message RuntimeHookResponse {} // annotations as a convenience, and may be left empty if the corresponding annotations are not // found. message CreateContainer { - // cgroupsPath is the cgroups path for the container. The path is expected to be relative to the - // cgroups mountpoint. See: https://github.com/opencontainers/runtime-spec/blob/58ec43f9fc39e0db229b653ae98295bfde74aeab/specs-go/config.go#L174 - string cgroupsPath = 1; - // rootDir is the absolute path of the root directory of the container. - // See: https://github.com/opencontainers/runtime-spec/blob/main/specs-go/config.go#L174 - string rootDir = 2; - // annotations are the run-time annotations for the container - // see https://github.com/opencontainers/runtime-spec/blob/main/config.md#annotations - map annotations = 3; - // containerName is the name of the container - string containerName = 4; - // containerID is the id of the container - string containerID = 5; - // podName is the pod name - string podName = 6; - // podUID is the pod uid - string podUID = 7; - // podNamespace is the namespace of the pod - string podNamespace = 8; + // cgroupsPath is the cgroups path for the container. The path is expected to be relative to the + // cgroups mountpoint. See: https://github.com/opencontainers/runtime-spec/blob/58ec43f9fc39e0db229b653ae98295bfde74aeab/specs-go/config.go#L174 + string cgroupsPath = 1; + // rootDir is the absolute path of the root directory of the container. + // See: https://github.com/opencontainers/runtime-spec/blob/main/specs-go/config.go#L174 + string rootDir = 2; + // annotations are the run-time annotations for the container + // see https://github.com/opencontainers/runtime-spec/blob/main/config.md#annotations + map annotations = 3; + // containerName is the name of the container + string containerName = 4; + // containerID is the id of the container + string containerID = 5; + // podName is the pod name + string podName = 6; + // podUID is the pod uid + string podUID = 7; + // podNamespace is the namespace of the pod + string podNamespace = 8; + // containerImage is the full image location (repo + image) + string containerImage = 9; } message StackTraceEntry { - // linear address of the function in kernel or user space. - uint64 address = 1; - // offset is the offset into the native instructions for the function. - uint64 offset = 2; - // symbol is the symbol name of the function. - string symbol = 3; - // module path for user space addresses. - string module = 4; + // linear address of the function in kernel or user space. + uint64 address = 1; + // offset is the offset into the native instructions for the function. + uint64 offset = 2; + // symbol is the symbol name of the function. + string symbol = 3; + // module path for user space addresses. + string module = 4; } diff --git a/runtime-monitor/cmd/runtime-monitor/main.go b/runtime-monitor/cmd/runtime-monitor/main.go index 8be9ba89..9ab3779c 100644 --- a/runtime-monitor/cmd/runtime-monitor/main.go +++ b/runtime-monitor/cmd/runtime-monitor/main.go @@ -12,6 +12,7 @@ import ( "time" "github.com/google/gops/agent" + "github.com/grpc-ecosystem/go-grpc-middleware/providers/prometheus" "github.com/rs/zerolog/log" "github.com/runtime-radar/runtime-radar/lib/logger" "github.com/runtime-radar/runtime-radar/lib/rabbit" @@ -23,6 +24,7 @@ import ( "github.com/runtime-radar/runtime-radar/runtime-monitor/pkg/build" "github.com/runtime-radar/runtime-radar/runtime-monitor/pkg/config" "github.com/runtime-radar/runtime-radar/runtime-monitor/pkg/database" + "github.com/runtime-radar/runtime-radar/runtime-monitor/pkg/metrics" "github.com/runtime-radar/runtime-radar/runtime-monitor/pkg/monitor" "github.com/runtime-radar/runtime-radar/runtime-monitor/pkg/monitor/publisher" "github.com/runtime-radar/runtime-radar/runtime-monitor/pkg/monitor/updater" @@ -98,8 +100,14 @@ func main() { log.Fatal().Msgf("### Failed to migrate DB: %v", err) } + grpcMetrics := prometheus.NewServerMetrics(prometheus.WithServerHandlingTimeHistogram()) + opts := []grpc.ServerOption{ - grpc.ChainUnaryInterceptor(interceptor.Recovery, interceptor.Correlation), + grpc.ChainUnaryInterceptor( + interceptor.Recovery, + interceptor.Correlation, + grpcMetrics.UnaryServerInterceptor(), + ), grpc.MaxRecvMsgSize(server.MaxRecvMsgSize), } @@ -120,22 +128,27 @@ func main() { defer closeTetra() log.Info().Msgf("Connected to tetragon version %s at %s", tetra.Version, cfg.TetragonAddr) - mb, err := rabbit.NewMessageBroker(cfg.RabbitAddr, cfg.RabbitUser, cfg.RabbitPassword, cfg.RabbitQueue) + mb, err := rabbit.NewMessageBroker( + cfg.RabbitAddr, + cfg.RabbitUser, + cfg.RabbitPassword, + cfg.RabbitQueue, + rabbit.WithStateReporter(metrics.RabbitStateReporter(cfg.RabbitQueue, false)), + ) if err != nil { log.Fatal().Msgf("### Failed to initialize Message Broker: %v", err) } defer mb.Close() - grpcSrv := grpc.NewServer(opts...) - configSvc := composeServices(db, tetra, verifier, cfg.Auth) - - api.RegisterConfigControllerServer(grpcSrv, configSvc) + // Initialize metrics + m, err := metrics.PrepareRegistry(build.AppName, cfg.OwnCSURL, grpcMetrics) + if err != nil { + log.Fatal().Msgf("### Failed to initialize metrics: %v", err) + } - // Register reflection service on gRPC server - reflection.Register(grpcSrv) + iSrv := server.NewInstrumentation(cfg.InstrumentationAddr, m) - // Create and Run the instrumentation HTTP server for probes, etc. - iSrv := server.NewInstrumentation(cfg.InstrumentationAddr) + // Run the instrumentation HTTP server for metrics, probes, etc. go func() { if err := iSrv.ListenAndServe(); err != nil && !errors.Is(err, http.ErrServerClosed) { log.Fatal().Msgf("### Can't serve instrumentation HTTP requests: %v", err) @@ -143,6 +156,24 @@ func main() { }() log.Info().Msgf("Instrumentation HTTP server listening at %v", cfg.InstrumentationAddr) + grpcSrv := grpc.NewServer(opts...) + + nodeName := cfg.NodeName + if nodeName == "" { + h, err := os.Hostname() + if err != nil { + log.Fatal().Msgf("### Failed to get hostname: %v", err) + } + nodeName = h + } + + configSvc := composeServices(db, tetra, verifier, cfg.Auth, nodeName) + + api.RegisterConfigControllerServer(grpcSrv, configSvc) + + // Register reflection service on gRPC server + reflection.Register(grpcSrv) + // Run gRPC server go func() { if err := grpcSrv.Serve(lis); err != nil { @@ -244,10 +275,11 @@ func eventsPublisher(tetra *monitor.Tetra, mb *rabbit.MessageBroker) { p.Run(shutdown) } -func composeServices(db *gorm.DB, monitor monitor.Monitor, verifier jwt.Verifier, isAuth bool) (configSvc api.ConfigControllerServer) { +func composeServices(db *gorm.DB, monitor monitor.Monitor, verifier jwt.Verifier, isAuth bool, nodeName string) (configSvc api.ConfigControllerServer) { configSvc = &service.ConfigGeneric{ ConfigRepository: &database.ConfigDatabase{db}, Monitor: monitor, + NodeName: nodeName, } if isAuth { @@ -257,7 +289,7 @@ func composeServices(db *gorm.DB, monitor monitor.Monitor, verifier jwt.Verifier } } - configSvc = &service.ConfigLogging{configSvc} + configSvc = &service.ConfigLogging{&service.ConfigAudit{configSvc}} return } diff --git a/runtime-monitor/docker-compose.test.yml b/runtime-monitor/docker-compose.test.yml index c59784f0..7e7606eb 100644 --- a/runtime-monitor/docker-compose.test.yml +++ b/runtime-monitor/docker-compose.test.yml @@ -1,8 +1,11 @@ services: test: - image: golang:1.25 - working_dir: /go/src/repo - volumes: - - .:/go/src/repo + extends: + file: ../docker-compose.test.yml + service: test + depends_on: {} + environment: + - COMPONENT=runtime-monitor - command: "go test -race -count=1 -vet=off ./..." +networks: + local: diff --git a/runtime-monitor/pkg/config/config.go b/runtime-monitor/pkg/config/config.go index 245918b5..b05afcd2 100644 --- a/runtime-monitor/pkg/config/config.go +++ b/runtime-monitor/pkg/config/config.go @@ -20,7 +20,7 @@ type Config struct { LogFile string // path to log file ListenGRPCAddr string // address "[host]:port" that server should be listening on ListenHTTPAddr string // address "[host]:port" that server should be listening for health checks - InstrumentationAddr string // address "[host]:port" that instrumentation server should be listening for health checks and metrics + InstrumentationAddr string // address "[host]:port" that instrumentation server should be listening on TLS bool // is TLS enabled? TokenKey string // key for jwt token Auth bool // is auth enabled? @@ -31,7 +31,9 @@ type Config struct { RabbitUser string // RabbitMQ user RabbitPassword string // RabbitMQ password RabbitQueue string // RabbitMQ queue name to publish events to + OwnCSURL string // URL of current CS (http(s)://host[:port]). GopsAddr string // gops listen address + NodeName string // name of node the component is running on } // New reads config from environment and returns pointer to a new Config. @@ -60,7 +62,9 @@ func New() *Config { flag.StringVar(&c.RabbitPassword, "rabbitPassword", config.LookupEnvString("RABBIT_PASSWORD", "guest"), "Set RabbitMQ password.") flag.StringVar(&c.RabbitQueue, "rabbitQueue", config.LookupEnvString("RABBIT_QUEUE", "runtime_events"), "Set RabbitMQ queue name to publish events to.") flag.StringVar(&c.GopsAddr, "listenGopsAddr", config.LookupEnvString("LISTEN_GOPS_ADDR", "127.0.0.1:7000"), `Address in form of "[host]:port" that gops agent should be listening on. It's not safe to listen to interfaces other than loopback in production.`) - flag.StringVar(&c.InstrumentationAddr, "listenInstrumentationAddr", config.LookupEnvString("LISTEN_INSTRUMENTATION_ADDR", ":9090"), `Address in form of "[host]:port" that instrumentation HTTP server should be listening on.`) + flag.StringVar(&c.OwnCSURL, "ownCSURL", config.LookupEnvString("OWN_CS_URL", ""), "URL of current CS (http(s)://host[:port]).") + flag.StringVar(&c.InstrumentationAddr, "listenInstrumentationAddr", config.LookupEnvString("LISTEN_INSTRUMENTATION_ADDR", ":9090"), `Address in form of "[host]:port" that instrumentation (metrics, probes...) HTTP server should be listening on.`) + flag.StringVar(&c.NodeName, "nodeName", config.LookupEnvString("NODE_NAME", ""), `Name of node the component is running on.`) flag.Parse() diff --git a/runtime-monitor/pkg/database/config.go b/runtime-monitor/pkg/database/config.go index 124fab56..3d8c2804 100644 --- a/runtime-monitor/pkg/database/config.go +++ b/runtime-monitor/pkg/database/config.go @@ -13,6 +13,7 @@ type ConfigRepository interface { Add(ctx context.Context, ls ...*model.Config) error GetLast(ctx context.Context, preloadData bool) (*model.Config, error) Delete(ctx context.Context, id uuid.UUID) error + UpdateWithMap(ctx context.Context, id uuid.UUID, m map[string]any) error } type ConfigDatabase struct { @@ -33,7 +34,7 @@ func (cd *ConfigDatabase) GetLast(ctx context.Context, preloadData bool) (*model c := &model.Config{} err := cd.preloadData(ctx, preloadData). - Order("created_at desc"). + Order("updated_at desc"). Take(&c). Error @@ -46,6 +47,14 @@ func (cd *ConfigDatabase) Delete(ctx context.Context, id uuid.UUID) error { Error } +// UpdateWithMap updates record in DB by setting of provided key-value (even zeroed) map entries, where key can either be DB column, or struct field name. +func (cd *ConfigDatabase) UpdateWithMap(ctx context.Context, id uuid.UUID, m map[string]any) error { + return cd.WithContext(ctx). + Model(&model.Config{Base: model.Base{ID: id}}). + Updates(m). + Error +} + func (cd *ConfigDatabase) preloadData(ctx context.Context, preloadData bool) *gorm.DB { if preloadData { return cd.WithContext(ctx). diff --git a/runtime-monitor/pkg/database/database.go b/runtime-monitor/pkg/database/database.go index 0cb52ebe..3bb96422 100644 --- a/runtime-monitor/pkg/database/database.go +++ b/runtime-monitor/pkg/database/database.go @@ -8,6 +8,7 @@ import ( "time" "github.com/rs/zerolog/log" + "github.com/runtime-radar/runtime-radar/lib/logger" "github.com/runtime-radar/runtime-radar/runtime-monitor/pkg/model" "gorm.io/driver/postgres" "gorm.io/gorm" @@ -39,7 +40,7 @@ func New(address, database, user, password string, sslMode, sslCheckCert bool) ( ll = gorm_logger.Info } gormLogger := gorm_logger.New( - &GORMLogger{&log.Logger}, + &logger.GORM{&log.Logger}, gorm_logger.Config{ SlowThreshold: 100 * time.Millisecond, // Slow SQL threshold Colorful: false, // Disable color diff --git a/runtime-monitor/pkg/metrics/metrics.go b/runtime-monitor/pkg/metrics/metrics.go new file mode 100644 index 00000000..c97c8216 --- /dev/null +++ b/runtime-monitor/pkg/metrics/metrics.go @@ -0,0 +1,70 @@ +package metrics + +import ( + "strconv" + + "github.com/prometheus/client_golang/prometheus" + "github.com/runtime-radar/runtime-radar/lib/metrics" +) + +const ( + queueLabel = "queue" + isConsumerLabel = "is_consumer" +) + +var ( + commonMetrics []prometheus.Collector + + RabbitAddEventsSuccessCount = addToCommon(prometheus.NewCounter(prometheus.CounterOpts{ + Name: "rabbit_add_events_success_count", + Help: "Number of events successfully added to the rabbitmq queue", + })) + RabbitAddEventsFailureCount = addToCommon(prometheus.NewCounter(prometheus.CounterOpts{ + Name: "rabbit_add_events_failure_count", + Help: "Number of events that failed to be added to the rabbitmq queue", + })) + TetragonEventsBufferSizeGauge = addToCommon(prometheus.NewGauge(prometheus.GaugeOpts{ + Name: "tetragon_events_buffer_size_gauge", + Help: "Number of events in the buffer received from the tetragon", + })) + TetragonEventsDroppedCount = addToCommon(prometheus.NewCounter(prometheus.CounterOpts{ + Name: "tetragon_events_dropped_count", + Help: "Number of tetragon events dropped", + })) + RabbitBrokerConnectionStateGauge = addToCommon(prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Name: "rabbit_broker_connection_state_gauge", + Help: "Availability of broker connection to rabbit (1 - available, 0 - not available)", + }, []string{queueLabel, isConsumerLabel})) + + // Example addToCommon definition: + // + // ProxySuccessCount = addToCommon(prometheus.NewCounterVec(prometheus.CounterOpts{ + // Name: "proxy_success_count", + // Help: "The number of successfully processed messages on proxy service", + // }, []string{myLabel})) +) + +// PrepareRegistry creates and registers service global metrics +func PrepareRegistry(service, cluster string, m ...prometheus.Collector) (*metrics.Registry, error) { + var mm []prometheus.Collector + mm = append(mm, commonMetrics...) + mm = append(mm, m...) + + return metrics.NewRegistry(service, cluster, mm...) +} + +func RabbitStateReporter(queue string, isConsumer bool) func(bool) { + return func(isAlive bool) { + var value float64 + if isAlive { + value = 1 + } + + RabbitBrokerConnectionStateGauge.With(prometheus.Labels{queueLabel: queue, isConsumerLabel: strconv.FormatBool(isConsumer)}).Set(value) + } +} + +func addToCommon[T prometheus.Collector](metric T) T { + commonMetrics = append(commonMetrics, metric) + return metric +} diff --git a/runtime-monitor/pkg/model/config.go b/runtime-monitor/pkg/model/config.go index 2d2a8e68..0ca8956b 100644 --- a/runtime-monitor/pkg/model/config.go +++ b/runtime-monitor/pkg/model/config.go @@ -18,8 +18,8 @@ var ( //go:embed tracingpolicy/connect.yaml connect string - //go:embed tracingpolicy/process-credentials.yaml - processCredentials string + //go:embed tracingpolicy/permissions.yaml + permissions string //go:embed tracingpolicy/file-monitoring.yaml fileMonitoring string @@ -36,11 +36,17 @@ var ( //go:embed tracingpolicy/listen-socket.yaml listenSocket string - //go:embed tracingpolicy/dup.yaml - dup string + //go:embed tracingpolicy/io-streams.yaml + ioStreams string //go:embed tracingpolicy/io-uring.yml ioUring string + + //go:embed tracingpolicy/umh.yaml + umh string + + //go:embed tracingpolicy/rootkit.yml + rootkit string ) var ( @@ -55,10 +61,10 @@ var ( Yaml: connect, Enabled: false, }, - "process-credentials": { - Name: "Privilege escalation", - Description: "This source tracks the commit_creds function allowing detection of privilege escalation, including superuser (root) privileges.", - Yaml: processCredentials, + "permissions": { + Name: "File and process access rights actions", + Description: "The source monitors calls to the Linux kernel function `commit_creds()`, which may indicate an attacker's attempts to escalate process privileges, including gaining superuser (root) rights. Additionally, the source monitors calls to the LSM function `security_path_chmod()` with permission sets that include execute rights.", + Yaml: permissions, Enabled: false, }, "file-monitoring": { @@ -91,10 +97,10 @@ var ( Yaml: listenSocket, Enabled: false, }, - "dup": { - Name: "Copying file descriptors", - Description: "This source monitors calls to functions that perform file descriptor copying. Currently, it tracks the copying of the standard input (Stdin) file descriptor, which may indicate an attempt to build a pipe required for the operation of various hacking tools.", - Yaml: dup, + "io-streams": { + Name: "Standard I/O stream actions", + Description: "The source monitors calls to the Linux kernel function `do_dup2()`, which duplicates the standard input (STDIN) file descriptor, as well as the creation of named pipe files (S_IFIFO) via the LSM function `security_path_mknod()`. Such actions often indicate that an attacker is attempting to launch a reverse shell, a covert communication channel, or another attack tool.", + Yaml: ioStreams, Enabled: false, }, "io-uring": { @@ -103,6 +109,18 @@ var ( Yaml: ioUring, Enabled: false, }, + "umh": { + Name: "Usermode helper API usage", + Description: "The source monitors calls to the Linux kernel functions call_usermodehelper_setup() and call_usermodehelper_exec(), which indicate the setup and launch of processes via the usermode helper API. This helps detect several different attacks that abuse this interface to launch processes at the host OS level.", + Yaml: umh, + Enabled: false, + }, + "rootkit": { + Name: "Rootkit loading monitoring", + Description: "The source monitors calls to the Linux kernel function kallsyms_lookup_name() used to obtain the address of the system call table, which may indicate rootkit activity on the target system. Additionally, to detect rootkits that use eBPF, the source monitors the loading of eBPF programs via the bpf_check() function.", + Yaml: rootkit, + Enabled: false, + }, }, AllowList: []*tetragon.Filter{ {PodRegex: []string{"deathstar"}}, diff --git a/runtime-monitor/pkg/model/tracingpolicy/file-monitoring.yaml b/runtime-monitor/pkg/model/tracingpolicy/file-monitoring.yaml index ea26cfa2..7f35d500 100644 --- a/runtime-monitor/pkg/model/tracingpolicy/file-monitoring.yaml +++ b/runtime-monitor/pkg/model/tracingpolicy/file-monitoring.yaml @@ -95,7 +95,7 @@ spec: values: - "host_ns" # WRITE/MODIFY - matchArgs: + matchArgs: - index: 0 operator: "Prefix" values: @@ -127,11 +127,11 @@ spec: values: - "/notify_on_release" - "/release_agent" + - "/core_pattern" - index: 1 operator: "Equal" values: - "2" # MAY_WRITE - # SYNOPSIS # int security_mmap_file(struct file *file, unsigned long prot, unsigned long flags) # @file: file @@ -139,13 +139,13 @@ spec: # - PROT_READ: memory pages with mapped @file may be read # - PROT_WRITE: memory pages with mapped @file may be written # @flags: mapping visibility to other processes (bitwise OR) - # + # # DESCRIPTION # Check permissions for a mmap operation. - # + # # RETURN VALUE # Returns 0 if permission is granted. (https://elixir.bootlin.com/linux/v6.8-rc7/source/security/security.c#L2792) - # + # # SYSCALLS # shmat(), ipc(), mmap_pgoff(), old_mmap(), mmap() # @@ -183,7 +183,7 @@ spec: - "/etc/csh.cshrc" - "/etc/csh.login" - "/etc/kubernetes/pki/" - - "/etc/security/pwquality.conf" + - "/etc/security/pwquality.conf" - "/run/secrets/" - "/proc/" - "/tmp/nginx/client-body/" @@ -254,6 +254,7 @@ spec: values: - "/notify_on_release" - "/release_agent" + - "/core_pattern" - index: 1 operator: "Mask" values: @@ -267,10 +268,10 @@ spec: # # RETURN VALUE # Returns 0 if permission is granted. (https://elixir.bootlin.com/linux/v6.8-rc7/source/security/security.c#L1928) - # + # # SYSCALLS # truncate() - # + # - call: "security_path_truncate" syscall: false return: true @@ -315,3 +316,131 @@ spec: values: - "/notify_on_release" - "/release_agent" + - "/core_pattern" + # SYNOPSIS + # int security_path_link(struct dentry *old_dentry, const struct path *new_dir, struct dentry *new_dentry) + # @old_dentry: existing file + # @new_dir: new parent directory + # @new_dentry: new hard link + # + # DESCRIPTION + # Check if creating a hard link is allowed. + # + # RETURN VALUE + # Returns 0 if permission is granted. (https://elixir.bootlin.com/linux/v6.16-rc5/source/security/security.c#L1987) + # + # SYSCALLS + # link, linkat + # + - call: "security_path_link" + syscall: false + return: true + args: + - index: 0 + type: "dentry" + label: "existing file" + - index: 2 + type: "dentry" + label: "new hard link" + returnArg: + index: 0 + type: "int" + selectors: + - matchNamespaces: + - namespace: Pid + operator: NotIn + values: + - "host_ns" + matchArgs: + - index: 0 + operator: "Prefix" + values: + - "/etc/" + - "/boot/" + - "/root/" + - "/run/secrets/" + - "/lib/" + - "/bin/" + - "/sbin/" + - "/usr/lib/" + - "/usr/local/lib/" + - "/usr/local/sbin/" + - "/usr/local/bin/" + - "/usr/bin/" + - "/usr/sbin/" + - "/home/" + - matchNamespaces: + - namespace: Pid + operator: NotIn + values: + - "host_ns" + matchArgs: + - index: 0 + operator: "Postfix" + values: + - ".bashrc" + - ".bash_profile" + - ".bash_login" + - ".bash_logout" + - ".cshrc" + - ".cshdirs" + - ".profile" + - ".login" + - ".logout" + - ".history" + - "-release" + # SYNOPSIS + # int security_path_rename(const struct path *old_dir, struct dentry *old_dentry, + # const struct path *new_dir, struct dentry *new_dentry, + # unsigned int flags) + # @old_dir: parent directory of the old file + # @old_dentry: the old file + # @new_dir: parent directory of the new file + # @new_dentry: the new file + # @flags: flags + # + # DESCRIPTION + # Check for permission to rename a file or directory. + # + # RETURN VALUE + # Return: Returns 0 if permission is granted. (https://elixir.bootlin.com/linux/v5.4/source/security/security.c#L1064) + # + # SYSCALLS + # rename, renameat, renameat2 + # + - call: "security_path_rename" + syscall: false + return: true + args: + - index: 1 + type: "dentry" + label: "the old file" + - index: 3 + type: "dentry" + label: "the new file" + returnArg: + index: 0 + type: "int" + selectors: + - matchNamespaces: + - namespace: Pid + operator: NotIn + values: + - "host_ns" + matchArgs: + - index: 3 + operator: "Prefix" + values: + - "/etc/" + - "/boot/" + - "/lib" + - "/bin/" + - "/sbin/" + - "/usr/lib" + - "/usr/local/lib" + - "/usr/local/sbin/" + - "/usr/local/bin/" + - "/usr/bin/" + - "/usr/sbin/" + - "/root/" + - "/home/" diff --git a/runtime-monitor/pkg/model/tracingpolicy/io-streams.yaml b/runtime-monitor/pkg/model/tracingpolicy/io-streams.yaml new file mode 100644 index 00000000..a5f05bcc --- /dev/null +++ b/runtime-monitor/pkg/model/tracingpolicy/io-streams.yaml @@ -0,0 +1,72 @@ +apiVersion: cilium.io/v1alpha1 +kind: TracingPolicy +metadata: + name: "io-streams" +spec: + kprobes: + # SYNOPSIS + # int do_dup2(struct files_struct *files, struct file *file, unsigned fd, unsigned flags) + # @files: files related to the new fd (fd that will become a copy) + # @file: file related to the old fd (fd being copied) + # @fd: file descriptor which needs to be copied + # @flags: flags for tweaking function behavior + # + # DESCRIPTION + # This function creates a copy of the file descriptor (fd). + # + # RETURN VALUE + # On success, this function returns the new descriptor. If there is an error, the error code is returned. + # + # SYSCALLS + # dup2(), dup3() + # + - call: "do_dup2" + syscall: false + return: true + args: + - index: 1 + type: "file" + - index: 2 + type: "int" + label: "fd" + returnArg: + index: 0 + type: "int" + selectors: + - matchArgs: + - index: 2 + operator: "Equal" + values: + - "0" # stdin + # SYNOPSIS + # int security_path_mknod(const struct path *dir, struct dentry *dentry, umode_t mode, unsigned int dev) + # @dir: parent directory path + # @dentry: new file path + # @umode_t: new file mode + # @dev: device number + # + # DESCRIPTION + # Check permissions when creating a file (regular and special ones). + # + # RETURN VALUE + # Returns 0 if permission is granted + # + # SYSCALLS + # mknod(), mknodat() + # + - call: "security_path_mknod" + syscall: false + return: false + args: + - index: 1 + type: "dentry" + label: "new file path" + - index: 2 + type: "uint32" # WA for broken uint16 argument type in actual releases + label: "new file mode" + selectors: + - matchArgs: + - index: 2 + operator: "Mask" + values: + - "010000" # S_IFIFO file type mask in octal form diff --git a/runtime-monitor/pkg/model/tracingpolicy/permissions.yaml b/runtime-monitor/pkg/model/tracingpolicy/permissions.yaml new file mode 100644 index 00000000..c5d3b9ca --- /dev/null +++ b/runtime-monitor/pkg/model/tracingpolicy/permissions.yaml @@ -0,0 +1,71 @@ +apiVersion: cilium.io/v1alpha1 +kind: TracingPolicy +metadata: + name: "permissions" +spec: + kprobes: + # SYNOPSIS + # int commit_creds(struct cred *new) + # @new: The credentials to be assigned + # + # DESCRIPTION + # Install a new set of credentials to the current task, using RCU to replace + # the old set. Both the objective and the subjective credentials pointers are + # updated. This function may not be called if the subjective credentials are + # in an overridden state. + # + # This function eats the caller's reference to the new credentials. + # + # RETURN VALUE + # Always returns 0 thus allowing this function to be tail-called at the end of, say, sys_setgid(). + # + - call: "commit_creds" + syscall: false + return: false + args: + - index: 0 # The new credentials to apply + type: "cred" + label: "the new credentials to apply" + selectors: + - matchNamespaces: + - namespace: Pid + operator: NotIn + values: + - "host_ns" + matchActions: + - action: Post + rateLimit: "1m" + # SYNOPSIS + # int security_path_chmod(const struct path *path, umode_t mode) - Check if changing the file's mode is allowed + # @path: file path + # @mode: file mode + # + # DESCRIPTION + # Check for permission to change a mode of the file @path. The new mode is + # specified in @mode which is a bitmask of constants from + # . + # + # RETURN VALUE + # Returns 0 if permission is granted. + # + - call: "security_path_chmod" + syscall: false + return: false + args: + - index: 0 + type: "path" + label: "file path" + - index: 1 + type: "uint32" # WA for broken uint16 in Tetragon + label: "new file mode" + selectors: + - matchNamespaces: + - namespace: Pid + operator: NotIn + values: + - "host_ns" + matchArgs: + - index: 1 + operator: "Mask" + values: + - "0111" # equivalent of symbolic --x--x--x \ No newline at end of file diff --git a/runtime-monitor/pkg/model/tracingpolicy/rootkit.yml b/runtime-monitor/pkg/model/tracingpolicy/rootkit.yml new file mode 100644 index 00000000..e220f65b --- /dev/null +++ b/runtime-monitor/pkg/model/tracingpolicy/rootkit.yml @@ -0,0 +1,45 @@ +apiVersion: cilium.io/v1alpha1 +kind: TracingPolicy +metadata: + name: "rootkit" +spec: + kprobes: + # SYNOPSIS + # int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u32 uattr_size); + # @prog: bpf program definition + # @attr: bpf command attributes + # @uattr: userspace pointer + # @uattr_size: userspace union size + # + # DESCRIPTION + # Run eBPF verifier. + # + # RETURN VALUE + # Returns 0 if eBPF-program is valid. + # + - call: "bpf_check" + syscall: false + return: true + args: + - index: 1 + type: "bpf_attr" + returnArg: + index: 0 + type: "int" + # SYNOPSIS + # unsigned long kallsyms_lookup_name(const char *name); + # @name: function symbol name + # + # DESCRIPTION + # Lookup the address for a symbol. + # + # RETURN VALUE + # Returns address of function symbol name. + # Returns 0 if not found. + # + - call: "kallsyms_lookup_name" + syscall: false + return: false + args: + - index: 0 + type: "string" \ No newline at end of file diff --git a/runtime-monitor/pkg/model/tracingpolicy/umh.yaml b/runtime-monitor/pkg/model/tracingpolicy/umh.yaml new file mode 100644 index 00000000..8a0989b3 --- /dev/null +++ b/runtime-monitor/pkg/model/tracingpolicy/umh.yaml @@ -0,0 +1,61 @@ +apiVersion: cilium.io/v1alpha1 +kind: TracingPolicy +metadata: + name: "umh" +spec: + kprobes: + # SYNOPSIS + # struct subprocess_info *call_usermodehelper_setup(const char *path, char **argv, + # char **envp, gfp_t gfp_mask, + # int (*init)(struct subprocess_info *info, struct cred *new), + # void (*cleanup)(struct subprocess_info *info), + # void *data) + # + # @path: path to usermode executable + # @argv: arg vector for process + # @envp: environment for process + # @gfp_mask: gfp mask for memory allocation + # @cleanup: a cleanup function + # @init: an init function + # @data: arbitrary context sensitive data + # + # DESCRIPTION + # Prepare to call a usermode helper. + # + # RETURN VALUE + # Returns either %NULL on allocation failure, or a subprocess_info + # structure. This should be passed to call_usermodehelper_exec to + # exec the process and free the structure. + # + - call: "call_usermodehelper_setup" + syscall: false + return: false + args: + - index: 0 + type: "string" + label: "path" + # SYNOPSIS + # int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait) + # + # @sub_info: information about the subprocessa + # @wait: wait for the application to finish and return status. + # when UMH_NO_WAIT don't wait at all, but you get no useful error back + # when the program couldn't be exec'ed. This makes it safe to call + # from interrupt context. + # + # DESCRIPTION + # Runs a user-space application. The application is started + # asynchronously if wait is not set, and runs as a child of system workqueues. + # (ie. it runs with full root capabilities and optimized affinity). + # + # RETURN VALUE + # Returns 0 if success. + # + - call: "call_usermodehelper_exec" + syscall: false + return: false + args: + - index: 0 + type: "string" + label: "path" + resolve: "path" diff --git a/runtime-monitor/pkg/monitor/config/config.go b/runtime-monitor/pkg/monitor/config/config.go index cdecb089..89dd5eae 100644 --- a/runtime-monitor/pkg/monitor/config/config.go +++ b/runtime-monitor/pkg/monitor/config/config.go @@ -11,11 +11,6 @@ type Selector struct { EventsClient, TracingPolicies, TracingPolicyStates bool } -type InitTetra struct { - Selector Selector - Config *model.Config -} - func Diff(oldCfg, newCfg *model.Config) (sel Selector, changed bool) { // TODO: disabled for debugging purposes, anyways it's a small optimization // if !newCfg.CreatedAt.After(oldCfg.CreatedAt) { diff --git a/runtime-monitor/pkg/monitor/monitor.go b/runtime-monitor/pkg/monitor/monitor.go index ce3b000d..157e6b25 100644 --- a/runtime-monitor/pkg/monitor/monitor.go +++ b/runtime-monitor/pkg/monitor/monitor.go @@ -10,6 +10,7 @@ import ( "github.com/cilium/tetragon/api/v1/tetragon" "github.com/rs/zerolog/log" "github.com/runtime-radar/runtime-radar/lib/security" + "github.com/runtime-radar/runtime-radar/runtime-monitor/pkg/metrics" "github.com/runtime-radar/runtime-radar/runtime-monitor/pkg/model" "github.com/runtime-radar/runtime-radar/runtime-monitor/pkg/monitor/config" "google.golang.org/grpc" @@ -19,6 +20,7 @@ import ( const ( connectTimeout = time.Second dispatchTimeout = time.Second + metricsInterval = 15 * time.Second ) var ( @@ -27,10 +29,11 @@ var ( // Monitor is interface of Tetra monitoring instance. type Monitor interface { - Config() *model.Config - SetConfig(cfg *model.Config) + Config() (sel config.Selector, cfg *model.Config) + SetConfig(sel config.Selector, cfg *model.Config) Init(ctx context.Context, cfg *model.Config) error - Reinit(sel config.Selector, cfg *model.Config) + Update(sel config.Selector, cfg *model.Config) + LastInitErr() error Run(stop <-chan struct{}) error Events() <-chan *tetragon.GetEventsResponse } @@ -46,11 +49,15 @@ type Tetra struct { eventsCancelCause context.CancelCauseFunc config *model.Config + selector config.Selector configMu sync.RWMutex ready chan struct{} - reinit chan config.InitTetra + update chan bool events chan *tetragon.GetEventsResponse + + lastInitErr error // err occurred during last config initialization + errMu sync.RWMutex } // NewTetra creates new Tetra instance. It returns any possible error and closing function which is supposed to be put in defer statement in main. @@ -75,7 +82,7 @@ func NewTetra(address string, bufferSize int) (*Tetra, func() error, error) { sensorsClient: sensors, ready: make(chan struct{}), - reinit: make(chan config.InitTetra), + update: make(chan bool, 1), events: make(chan *tetragon.GetEventsResponse, bufferSize), } @@ -83,18 +90,19 @@ func NewTetra(address string, bufferSize int) (*Tetra, func() error, error) { } // Config returns current Tetra config. It's safe for concurrent use. -func (t *Tetra) Config() *model.Config { +func (t *Tetra) Config() (sel config.Selector, cfg *model.Config) { t.configMu.RLock() defer t.configMu.RUnlock() - return t.config + return t.selector, t.config } // SetConfig sets new Tetra config (but does not apply it). It's safe for concurrent use. -func (t *Tetra) SetConfig(cfg *model.Config) { +func (t *Tetra) SetConfig(sel config.Selector, cfg *model.Config) { t.configMu.Lock() defer t.configMu.Unlock() + t.selector = sel t.config = cfg } @@ -103,11 +111,19 @@ func (t *Tetra) Events() <-chan *tetragon.GetEventsResponse { return t.events } -// Reinit reinitializes Tetra based on config.Selector and given config. -func (t *Tetra) Reinit(sel config.Selector, cfg *model.Config) { - t.reinit <- config.InitTetra{ - sel, - cfg, +// Update reinitializes Tetra based on config.Selector and given config. +func (t *Tetra) Update(sel config.Selector, cfg *model.Config) { + t.configMu.Lock() + defer t.configMu.Unlock() + + // Or t.SetConfig + t.config = cfg + t.selector = sel + + select { + case t.update <- true: + default: + // Do nothing. If sending blocked, there is already update pending. } } @@ -115,35 +131,55 @@ func (t *Tetra) Reinit(sel config.Selector, cfg *model.Config) { // an ctx argument, which can be configured for cancellation on init phase. Cancellation or expiration of ctx // does not affect further stream processing. func (t *Tetra) Init(ctx context.Context, cfg *model.Config) error { - defer close(t.ready) + if cfg == nil { + return errors.New("nil Tetra config") + } + + sel := config.Selector{true, true, true} - return t.initBySelector(ctx, config.Selector{true, true, true}, cfg) + t.config = cfg + t.selector = sel + + if err := t.initBySelector(ctx, sel, cfg); err != nil { + return fmt.Errorf("can't init Tetra config: %w", err) + } + + close(t.ready) + + return nil } -func (t *Tetra) initBySelector(ctx context.Context, c config.Selector, cfg *model.Config) error { - t.configMu.Lock() - defer t.configMu.Unlock() +func (t *Tetra) initBySelector(ctx context.Context, sel config.Selector, cfg *model.Config) (err error) { + defer func() { + t.errMu.Lock() + t.lastInitErr = err + t.errMu.Unlock() + }() + + t.errMu.RLock() + if t.lastInitErr != nil { + sel = config.Selector{true, true, true} + } + t.errMu.RUnlock() - if c.EventsClient { + if sel.EventsClient { if err := t.initEventsClient(ctx, cfg); err != nil { return err } } - if c.TracingPolicies { + if sel.TracingPolicies { if err := t.initTracingPolicies(ctx, cfg); err != nil { return err } if err := t.initTracingPolicyStates(ctx, cfg); err != nil { return err } - } else if c.TracingPolicyStates { + } else if sel.TracingPolicyStates { if err := t.initTracingPolicyStates(ctx, cfg); err != nil { return err } } - t.config = cfg - return nil } @@ -265,11 +301,14 @@ func (t *Tetra) Run(stop <-chan struct{}) error { defer wg.Wait() defer t.eventsCancelCause(errClientContextCanceled) + go t.runMetrics(metricsInterval, stop) + for { select { - case it := <-t.reinit: - if err := t.initBySelector(context.Background(), it.Selector, it.Config); err != nil { - log.Error().Err(err).Interface("selector", it.Selector).Interface("config", it.Config).Msgf("Can't init tetra") + case <-t.update: + sel, cfg := t.Config() + if err := t.initBySelector(context.Background(), sel, cfg); err != nil { + log.Error().Err(err).Interface("selector", sel).Interface("config", cfg).Msgf("Can't init tetra") // Just log, don't break the monitor loop } case <-worker: @@ -298,6 +337,13 @@ func (t *Tetra) Run(stop <-chan struct{}) error { } } +func (t *Tetra) LastInitErr() error { + t.errMu.RLock() + defer t.errMu.RUnlock() + + return t.lastInitErr +} + // processStream processes the events stream, it intercepts any possible panic and converts it to error. // eventsCtx is a client context which will be checked in case of an error. As a result It lets returning // specific error and distinguishing it from generic status error with codes.Canceled. @@ -334,6 +380,7 @@ func (t *Tetra) processStream(eventsCtx context.Context) (err error) { select { case t.events <- resp: case <-timer.C: + metrics.TetragonEventsDroppedCount.Inc() log.Error().Interface("event", resp.GetEvent()).Msgf("Timeout dispatching event") } @@ -341,6 +388,21 @@ func (t *Tetra) processStream(eventsCtx context.Context) (err error) { } } +func (t *Tetra) runMetrics(interval time.Duration, stop <-chan struct{}) { + ticker := time.NewTicker(interval) + defer ticker.Stop() + + for { + select { + case <-stop: + return + + case <-ticker.C: + metrics.TetragonEventsBufferSizeGauge.Set(float64(len(t.events))) + } + } +} + func isTracingPolicyEnabled(tps *tetragon.TracingPolicyStatus) bool { if tps.GetState() == tetragon.TracingPolicyState_TP_STATE_ENABLED { return true diff --git a/runtime-monitor/pkg/monitor/publisher/publisher.go b/runtime-monitor/pkg/monitor/publisher/publisher.go index ae145e8d..03026224 100644 --- a/runtime-monitor/pkg/monitor/publisher/publisher.go +++ b/runtime-monitor/pkg/monitor/publisher/publisher.go @@ -5,6 +5,7 @@ import ( "github.com/rs/zerolog/log" "github.com/runtime-radar/runtime-radar/lib/rabbit" + "github.com/runtime-radar/runtime-radar/runtime-monitor/pkg/metrics" "github.com/runtime-radar/runtime-radar/runtime-monitor/pkg/monitor" ) @@ -25,7 +26,12 @@ func (p *Publisher) Run(stop <-chan struct{}) { case ev := <-events: if err := p.PublishConsumer.Publish(ctx, ev); err != nil { log.Error().Err(err).Msgf("Can't publish event") + metrics.RabbitAddEventsFailureCount.Inc() + + continue } + metrics.RabbitAddEventsSuccessCount.Inc() + case <-stop: return } diff --git a/runtime-monitor/pkg/monitor/updater/updater.go b/runtime-monitor/pkg/monitor/updater/updater.go index 72f75f22..03978481 100644 --- a/runtime-monitor/pkg/monitor/updater/updater.go +++ b/runtime-monitor/pkg/monitor/updater/updater.go @@ -33,7 +33,7 @@ func (u *Updater) Run(stop <-chan struct{}) { continue } - oldCfg := u.Monitor.Config() + _, oldCfg := u.Monitor.Config() log.Debug().Interface("old_config", oldCfg).Msgf("Old monitor config") log.Debug().Interface("new_config", cfg).Msgf("New monitor config") @@ -45,7 +45,7 @@ func (u *Updater) Run(stop <-chan struct{}) { Interface("selector", sel). Msgf("Monitor config changed, re-initializing") - u.Monitor.Reinit(sel, cfg) + u.Monitor.Update(sel, cfg) } else { log.Debug().Msgf("Monitor config didn't change") } diff --git a/runtime-monitor/pkg/server/server.go b/runtime-monitor/pkg/server/server.go index 3c5c6541..fdd3c06a 100644 --- a/runtime-monitor/pkg/server/server.go +++ b/runtime-monitor/pkg/server/server.go @@ -8,6 +8,8 @@ import ( "github.com/grpc-ecosystem/grpc-gateway/v2/runtime" "github.com/justinas/alice" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promhttp" "github.com/rs/cors" "github.com/runtime-radar/runtime-radar/lib/server/healthcheck" "github.com/runtime-radar/runtime-radar/lib/server/middleware" @@ -19,8 +21,8 @@ import ( ) const ( - readTimeout = 2 * time.Minute - writeTimeout = 2 * time.Minute + readTimeout = 30 * time.Second + writeTimeout = 30 * time.Second // Maximum message size for grpc request MaxRecvMsgSize = 10 * 1024 * 1024 // 10MB ) @@ -46,12 +48,15 @@ func New(httpAddr, grpcAddr string, tlsConfig *tls.Config) (*http.Server, error) return s, nil } -func NewInstrumentation(listenAddress string) *http.Server { +func NewInstrumentation(listenAddress string, gatherer prometheus.Gatherer) *http.Server { mux := http.NewServeMux() mux.HandleFunc("/ready", healthcheck.ReadyHandler) mux.HandleFunc("/live", healthcheck.LiveHandler) + handler := promhttp.HandlerFor(gatherer, promhttp.HandlerOpts{}) + mux.Handle("/metrics", handler) + h := alice.New( middleware.Log, middleware.Recovery, diff --git a/runtime-monitor/pkg/service/config_audit.go b/runtime-monitor/pkg/service/config_audit.go new file mode 100644 index 00000000..af6d5975 --- /dev/null +++ b/runtime-monitor/pkg/service/config_audit.go @@ -0,0 +1,65 @@ +package service + +import ( + "context" + "time" + + "github.com/rs/zerolog/log" + "github.com/runtime-radar/runtime-radar/lib/security/jwt" + lib_context "github.com/runtime-radar/runtime-radar/lib/security/jwt/context" + "github.com/runtime-radar/runtime-radar/lib/server/interceptor" + "github.com/runtime-radar/runtime-radar/runtime-monitor/api" + "google.golang.org/protobuf/types/known/emptypb" +) + +type ConfigAudit struct { + api.ConfigControllerServer +} + +func extractIDs(ctx context.Context) (string, string, bool) { + corrID, _ := interceptor.CorrelationIDFromContext(ctx) + token, _ := jwt.UnverifiedTokenFromContext(ctx) + tokenUserID := token.GetUserID() + userID, _ := lib_context.GetUserID(ctx) + authorized := userID != "" && userID == tokenUserID + + return corrID.String(), tokenUserID, authorized +} + +func (cl *ConfigAudit) Add(ctx context.Context, req *api.Config) (resp *emptypb.Empty, err error) { + ctx = lib_context.WithEmptyUserID(ctx) + + defer func(t0 time.Time) { + corrID, userID, authorized := extractIDs(ctx) + + conf := req.GetConfig() + tp := make(map[string]*api.TracingPolicy, len(conf.GetTracingPolicies())) + for name, policy := range conf.GetTracingPolicies() { + tp[name] = &api.TracingPolicy{ + Name: policy.GetName(), + Enabled: policy.GetEnabled(), + } + } + + log.Err(err). + Str("delay", time.Since(t0).String()). + Bool("audit", true). + Bool("authorized", authorized). + Str("user_id", userID). + Str("correlation_id", corrID). + Str("object", "runtime_monitor_config"). + Str("operation", "update"). + Interface("args", &api.Config{ + Id: req.GetId(), + Config: &api.Config_ConfigJSON{ + Version: conf.GetVersion(), + TracingPolicies: tp, + }, + }). + Interface("result", resp). + Send() + }(time.Now()) + + resp, err = cl.ConfigControllerServer.Add(ctx, req) + return +} diff --git a/runtime-monitor/pkg/service/config_auth.go b/runtime-monitor/pkg/service/config_auth.go index 88ecc4de..aa8abe8d 100644 --- a/runtime-monitor/pkg/service/config_auth.go +++ b/runtime-monitor/pkg/service/config_auth.go @@ -5,6 +5,7 @@ import ( "github.com/runtime-radar/runtime-radar/lib/errcommon" "github.com/runtime-radar/runtime-radar/lib/security/jwt" + lib_context "github.com/runtime-radar/runtime-radar/lib/security/jwt/context" "github.com/runtime-radar/runtime-radar/runtime-monitor/api" "google.golang.org/protobuf/types/known/emptypb" ) @@ -26,6 +27,8 @@ func (ca *ConfigAuth) Add(ctx context.Context, req *api.Config) (resp *emptypb.E return nil, errcommon.PermissionErrorToStatus(err) } + lib_context.SetUserID(ctx) + resp, err = ca.ConfigControllerServer.Add(ctx, req) return } @@ -35,6 +38,30 @@ func (ca *ConfigAuth) Read(ctx context.Context, req *emptypb.Empty) (resp *api.C return nil, errcommon.PermissionErrorToStatus(err) } + lib_context.SetUserID(ctx) + resp, err = ca.ConfigControllerServer.Read(ctx, req) return } + +func (ca *ConfigAuth) ResetToDefault(ctx context.Context, req *emptypb.Empty) (resp *emptypb.Empty, err error) { + if err := ca.Verifier.VerifyPermission(ctx, jwt.PermissionSystemSettings, jwt.ActionUpdate); err != nil { + return nil, errcommon.PermissionErrorToStatus(err) + } + + lib_context.SetUserID(ctx) + + resp, err = ca.ConfigControllerServer.ResetToDefault(ctx, req) + return +} + +func (ca *ConfigAuth) Status(ctx context.Context, req *emptypb.Empty) (resp *api.ConfigStatus, err error) { + if err := ca.Verifier.VerifyPermission(ctx, jwt.PermissionSystemSettings, jwt.ActionRead); err != nil { + return nil, errcommon.PermissionErrorToStatus(err) + } + + lib_context.SetUserID(ctx) + + resp, err = ca.ConfigControllerServer.Status(ctx, req) + return +} diff --git a/runtime-monitor/pkg/service/config_generic.go b/runtime-monitor/pkg/service/config_generic.go index 1279cbf0..c835a2d5 100644 --- a/runtime-monitor/pkg/service/config_generic.go +++ b/runtime-monitor/pkg/service/config_generic.go @@ -5,6 +5,7 @@ import ( "errors" "fmt" "regexp" + "time" "github.com/cilium/tetragon/api/v1/tetragon" "github.com/cilium/tetragon/pkg/filters" @@ -30,6 +31,7 @@ type ConfigGeneric struct { ConfigRepository database.ConfigRepository Monitor monitor.Monitor + NodeName string } func (cg *ConfigGeneric) Add(ctx context.Context, req *api.Config) (*emptypb.Empty, error) { @@ -59,7 +61,7 @@ func (cg *ConfigGeneric) Add(ctx context.Context, req *api.Config) (*emptypb.Emp // Changes applied instantly when requested. However, there can be multiple nodes in cluster, // and consequently multiple runtime-monitor instances, each of which can update config. // To handle this there will be background worker doing same check periodically and calling Reinit when needed. - oldCfg := cg.Monitor.Config() + _, oldCfg := cg.Monitor.Config() log.Debug().Interface("old_config", oldCfg).Msgf("Old monitor config") log.Debug().Interface("new_config", cfg).Msgf("New monitor config") @@ -71,7 +73,7 @@ func (cg *ConfigGeneric) Add(ctx context.Context, req *api.Config) (*emptypb.Emp Interface("selector", sel). Msgf("Monitor config changed, re-initializing") - cg.Monitor.Reinit(sel, cfg) + cg.Monitor.Update(sel, cfg) } else { log.Debug().Msgf("Monitor config didn't change") } @@ -97,6 +99,44 @@ func (cg *ConfigGeneric) Read(ctx context.Context, _ *emptypb.Empty) (*api.Confi return resp, nil } +func (cg *ConfigGeneric) ResetToDefault(ctx context.Context, _ *emptypb.Empty) (*emptypb.Empty, error) { + m := map[string]any{ + "updated_at": time.Now(), + } + if err := cg.ConfigRepository.UpdateWithMap(ctx, model.DefaultConfig.ID, m); err != nil { + return nil, status.Errorf(codes.Internal, "can't update config: %v", err) + } + + resp := &emptypb.Empty{} + + return resp, nil +} + +func (cg *ConfigGeneric) Status(ctx context.Context, _ *emptypb.Empty) (*api.ConfigStatus, error) { + cfg, err := cg.ConfigRepository.GetLast(ctx, false) + if errors.Is(err, gorm.ErrRecordNotFound) { + return nil, status.Errorf(codes.NotFound, "config not found") + } else if err != nil { + return nil, status.Errorf(codes.Internal, "can't read config: %v", err) + } + + sel, changed := config.Diff(model.DefaultConfig, cfg) + + lastInitErr := "" + if err := cg.Monitor.LastInitErr(); err != nil { + lastInitErr = err.Error() + } + + resp := &api.ConfigStatus{ + Default: cfg.ID == model.DefaultConfig.ID, + DefaultTracingPolicies: !changed || !sel.TracingPolicies, + LastInitError: lastInitErr, + NodeName: cg.NodeName, + } + + return resp, nil +} + func (cg *ConfigGeneric) validateConfig(req *api.Config) (string, bool) { if req.Config == nil { return "no config", false @@ -108,10 +148,6 @@ func (cg *ConfigGeneric) validateConfig(req *api.Config) (string, bool) { return fmt.Sprintf("config version mismatch: expected %s, got %s", model.ConfigVersion, ver), false } - if len(req.Config.GetTracingPolicies()) == 0 { - return "no tracing policies", false - } - for i, f := range req.Config.GetAllowList() { if reason, ok := cg.validateFilter(f); !ok { return fmt.Sprintf("AllowList[%d] is invalid: %s", i, reason), false diff --git a/runtime-monitor/pkg/service/config_logging.go b/runtime-monitor/pkg/service/config_logging.go index 90f73e7c..4dfd0ebe 100644 --- a/runtime-monitor/pkg/service/config_logging.go +++ b/runtime-monitor/pkg/service/config_logging.go @@ -23,7 +23,7 @@ func (cl *ConfigLogging) Add(ctx context.Context, req *api.Config) (resp *emptyp Interface("args", req). Interface("result", resp). Stringer("correlation_id", corrID). - Msg("ConfigControllerServer.Add") + Msg("Called ConfigControllerServer.Add") }(time.Now()) resp, err = cl.ConfigControllerServer.Add(ctx, req) @@ -44,3 +44,33 @@ func (cl *ConfigLogging) Read(ctx context.Context, req *emptypb.Empty) (resp *ap resp, err = cl.ConfigControllerServer.Read(ctx, req) return } + +func (cl *ConfigLogging) ResetToDefault(ctx context.Context, req *emptypb.Empty) (resp *emptypb.Empty, err error) { + defer func(t0 time.Time) { + corrID, _ := interceptor.CorrelationIDFromContext(ctx) + + log.Err(err).Str("delay", time.Since(t0).String()). + Interface("args", req). + Interface("result", resp). + Stringer("correlation_id", corrID). + Msg("Called ConfigControllerServer.ResetToDefault") + }(time.Now()) + + resp, err = cl.ConfigControllerServer.ResetToDefault(ctx, req) + return +} + +func (cl *ConfigLogging) Status(ctx context.Context, req *emptypb.Empty) (resp *api.ConfigStatus, err error) { + defer func(t0 time.Time) { + corrID, _ := interceptor.CorrelationIDFromContext(ctx) + + log.Err(err).Str("delay", time.Since(t0).String()). + Interface("args", req). + Interface("result", resp). + Stringer("correlation_id", corrID). + Msg("Called ConfigControllerServer.Status") + }(time.Now()) + + resp, err = cl.ConfigControllerServer.Status(ctx, req) + return +} From a40ea8d88fe15006637fed87b18bd13f9cb83613 Mon Sep 17 00:00:00 2001 From: Alexey Olshanskiy <234377865+avlllo@users.noreply.github.com> Date: Tue, 19 May 2026 17:26:50 +0300 Subject: [PATCH 02/17] feat: update reverse-proxy to 0.2 --- reverse-proxy/.helm/Chart.yaml | 2 +- reverse-proxy/.helm/templates/configmap.yaml | 6 ++++++ reverse-proxy/.helm/values.yaml | 4 ++-- reverse-proxy/Caddyfile | 16 ++++++++++++---- reverse-proxy/Dockerfile | 6 ++++-- reverse-proxy/Taskfile.yml | 2 +- 6 files changed, 26 insertions(+), 10 deletions(-) diff --git a/reverse-proxy/.helm/Chart.yaml b/reverse-proxy/.helm/Chart.yaml index c62bfca1..230d0864 100644 --- a/reverse-proxy/.helm/Chart.yaml +++ b/reverse-proxy/.helm/Chart.yaml @@ -1,7 +1,7 @@ apiVersion: v2 name: reverse-proxy description: CS web-ui and reverse proxy component -version: v0.0.1 +version: 0.0.1 dependencies: - name: common repository: file://../../install/helm/charts/common diff --git a/reverse-proxy/.helm/templates/configmap.yaml b/reverse-proxy/.helm/templates/configmap.yaml index 2740ba28..4df8a976 100644 --- a/reverse-proxy/.helm/templates/configmap.yaml +++ b/reverse-proxy/.helm/templates/configmap.yaml @@ -2,6 +2,12 @@ apiVersion: v1 kind: ConfigMap metadata: name: {{ include "common.name" . }} + labels: + {{- include "common.labels" . | nindent 4 }} + {{- with (include "common.annotations" .) }} + annotations: + {{- . | nindent 4 }} + {{- end }} data: {{- $isChildCluster := eq (include "common.cs.isChildCluster" .) "true" }} env.js: | diff --git a/reverse-proxy/.helm/values.yaml b/reverse-proxy/.helm/values.yaml index f34012e9..f9ae8ee8 100644 --- a/reverse-proxy/.helm/values.yaml +++ b/reverse-proxy/.helm/values.yaml @@ -7,7 +7,7 @@ image: containerPorts: http: 9000 grpc: 8000 - health: 9001 + health: 9090 service: type: ClusterIP annotations: {} @@ -15,7 +15,7 @@ service: ports: http: 9000 grpc: 8000 - health: 9001 + health: 9090 nodePorts: {} containerSecurityContext: enabled: true diff --git a/reverse-proxy/Caddyfile b/reverse-proxy/Caddyfile index 9bd20085..8c45eb06 100644 --- a/reverse-proxy/Caddyfile +++ b/reverse-proxy/Caddyfile @@ -4,7 +4,11 @@ @grafana-redirect `{env.GRAFANA_REDIRECT} == "true"` redir @grafana-redirect {$GRAFANA} 301 - reverse_proxy {$GRAFANA} + reverse_proxy {$GRAFANA} { + transport http { + tls_insecure_skip_verify + } + } } handle /api/* { @@ -103,9 +107,11 @@ # admission-controller reverse_proxy /api/v1/detector/admission-controller/* {args[0]}://admission-controller:9000 + reverse_proxy /api/v1/admission-controller/* {args[0]}://admission-controller:9000 # runtime-monitor reverse_proxy /api/v1/config/runtime-monitor {args[0]}://runtime-monitor:9000 + reverse_proxy /api/v1/config/runtime-monitor/* {args[0]}://runtime-monitor:9000 # event-processor reverse_proxy /api/v1/config/event-processor {args[0]}://event-processor:9000 @@ -158,6 +164,7 @@ { servers { protocols h1 h2c h2 + metrics } log default { level {$LOG_LEVEL:INFO} @@ -178,11 +185,12 @@ # cs routes :8000 :9000 { log - import TLS.{$TLS:false} + import TLS.{$TLS:true} } -# health api -:9001 { +# health api & metrics +:9090 { + metrics /metrics respond /health "Caddy is up" 200 { close } diff --git a/reverse-proxy/Dockerfile b/reverse-proxy/Dockerfile index c4d7d822..2e1a01ce 100644 --- a/reverse-proxy/Dockerfile +++ b/reverse-proxy/Dockerfile @@ -1,8 +1,10 @@ -FROM golang:1.25.0 AS tools +ARG GO_VERSION=1.26.0 + +FROM golang:${GO_VERSION} AS tools RUN CGO_ENABLED=0 GOBIN=/usr/bin go install github.com/go-task/task/v3/cmd/task@v3.38.0 -FROM golang:1.25.0 AS builder +FROM golang:${GO_VERSION} AS builder COPY --from=tools /usr/bin/task /usr/bin COPY . /app diff --git a/reverse-proxy/Taskfile.yml b/reverse-proxy/Taskfile.yml index 708828c0..e01b551a 100644 --- a/reverse-proxy/Taskfile.yml +++ b/reverse-proxy/Taskfile.yml @@ -36,7 +36,7 @@ tasks: docker-build: deps: [radar-ui:build] cmds: - - docker build --tag {{.DOCKER_IMAGE}} . + - docker build --iidfile .build --tag {{.DOCKER_IMAGE}} . docker-push: vars: From dcb316024ea64640ba03fafd57ec5b24cfb0259c Mon Sep 17 00:00:00 2001 From: Alexey Olshanskiy <234377865+avlllo@users.noreply.github.com> Date: Sun, 24 May 2026 01:12:39 +0300 Subject: [PATCH 03/17] feat: update chart to 0.2 --- .github/workflows/docker-build-push.yaml | 2 +- .github/workflows/golangci-lint.yaml | 2 +- .github/workflows/test-lib.yaml | 2 +- auth-center/.helm/templates/secret.yaml | 16 - auth-center/.helm/values.yaml | 18 +- cluster-manager/.helm/values.yaml | 19 +- cluster-manager/pkg/helm/values.go | 63 +- cluster-manager/pkg/helm/values_test.go | 38 +- .../pkg/service/cluster_generic.go | 6 +- cs-manager/.helm/values.yaml | 5 - event-processor/.helm/values.yaml | 3 - .../wasm/CS_RT_BIN_PERM_RAISE/main.go | 2 +- history-api/.helm/values.yaml | 5 - install/helm/Chart.yaml | 34 +- install/helm/README.md | 431 ++-- install/helm/charts/clickhouse/README.md | 28 +- .../charts/clickhouse/templates/_helpers.tpl | 2 + .../charts/clickhouse/templates/secret.yaml | 4 +- .../clickhouse/templates/statefulset.yaml | 26 +- .../helm/charts/clickhouse/values.schema.json | 35 +- install/helm/charts/clickhouse/values.yaml | 13 +- .../charts/common/templates/_annotations.tpl | 4 +- .../helm/charts/common/templates/_auth.tpl | 27 + .../helm/charts/common/templates/_labels.tpl | 6 +- .../helm/charts/common/templates/_names.tpl | 12 +- install/helm/charts/common/templates/_tls.tpl | 2 +- .../helm/charts/common/templates/cs/_auth.tpl | 73 + .../common/templates/cs/_container_app.tpl | 101 +- .../templates/cs/_container_gateway.tpl | 47 + .../common/templates/cs/_container_logger.tpl | 72 + .../common/templates/cs/_deployment.yaml | 26 +- .../charts/common/templates/cs/_images.tpl | 2 +- .../charts/common/templates/cs/_names.tpl | 88 +- .../helm/charts/common/templates/cs/_tls.tpl | 61 +- .../helm/charts/common/templates/cs/_util.tpl | 183 +- install/helm/charts/grafana/Chart.yaml | 10 + install/helm/charts/grafana/README.md | 146 ++ .../charts/grafana/templates/_helpers.tpl | 47 + .../grafana/templates/dashboard-provider.yaml | 38 + .../charts/grafana/templates/deployment.yaml | 209 ++ .../grafana/templates/networkpolicy.yaml | 41 + .../helm/charts/grafana/templates/pvc.yaml | 29 + .../helm/charts/grafana/templates/secret.yaml | 19 + .../charts/grafana/templates/service.yaml | 20 + .../grafana/templates/serviceaccount.yaml | 13 + .../charts/grafana/templates/tls-secret.yaml | 17 + .../helm/charts/grafana/values.schema.json | 670 ++++++ install/helm/charts/grafana/values.yaml | 372 +++ install/helm/charts/postgresql/README.md | 58 +- .../postgresql/templates/backup/cronjob.yaml | 10 +- .../charts/postgresql/templates/secret.yaml | 4 +- .../postgresql/templates/statefulset.yaml | 50 +- .../helm/charts/postgresql/values.schema.json | 95 +- install/helm/charts/postgresql/values.yaml | 48 +- install/helm/charts/prometheus/Chart.yaml | 11 + install/helm/charts/prometheus/README.md | 152 ++ .../charts/prometheus/templates/_helpers.tpl | 6 + .../prometheus/templates/clusterrole.yaml | 48 + .../templates/clusterrolebinding.yaml | 22 + .../prometheus/templates/configmap.yaml | 118 + .../prometheus/templates/deployment.yaml | 146 ++ .../prometheus/templates/networkpolicy.yaml | 40 + .../helm/charts/prometheus/templates/pvc.yaml | 29 + .../templates/service-headless.yaml | 21 + .../charts/prometheus/templates/service.yaml | 19 + .../prometheus/templates/serviceaccount.yaml | 13 + .../helm/charts/prometheus/values.schema.json | 702 ++++++ install/helm/charts/prometheus/values.yaml | 373 +++ install/helm/charts/rabbitmq/README.md | 16 +- .../rabbitmq/templates/networkpolicy.yaml | 2 - .../charts/rabbitmq/templates/secret.yaml | 4 +- .../rabbitmq/templates/statefulset.yaml | 22 +- .../helm/charts/rabbitmq/values.schema.json | 40 +- install/helm/charts/rabbitmq/values.yaml | 15 +- install/helm/charts/redis/README.md | 53 +- .../{statefulset.yaml => deployment.yaml} | 90 +- install/helm/charts/redis/templates/pvc.yaml | 29 + .../helm/charts/redis/templates/secret.yaml | 6 +- install/helm/charts/redis/values.schema.json | 75 +- install/helm/charts/redis/values.yaml | 38 +- install/helm/dashboards/cluster-status.json | 1041 +++++++++ install/helm/dashboards/go-app.json | 692 ++++++ install/helm/dashboards/grpc.json | 1198 ++++++++++ install/helm/dashboards/public-api.json | 829 +++++++ install/helm/dashboards/reverse-proxy.json | 1055 +++++++++ install/helm/dashboards/runtime.json | 2072 +++++++++++++++++ install/helm/dashboards/tetragon-runtime.json | 600 +++++ install/helm/templates/_validations.tpl | 12 +- install/helm/templates/configmap.yaml | 61 + .../helm/templates/grafana-datasources.yaml | 26 + install/helm/templates/metrics-configmap.yaml | 196 ++ install/helm/templates/secret.yaml | 56 +- install/helm/templates/tls-secret.yaml | 21 +- install/helm/values.schema.json | 276 ++- install/helm/values.yaml | 166 +- notifier/.helm/values.yaml | 5 - policy-enforcer/.helm/values.yaml | 5 - public-api/.helm/values.yaml | 11 +- reverse-proxy/.helm/values.yaml | 4 - runtime-monitor/.helm/values.yaml | 3 - 100 files changed, 12820 insertions(+), 953 deletions(-) delete mode 100644 auth-center/.helm/templates/secret.yaml create mode 100644 install/helm/charts/common/templates/_auth.tpl create mode 100644 install/helm/charts/common/templates/cs/_auth.tpl create mode 100644 install/helm/charts/common/templates/cs/_container_gateway.tpl create mode 100644 install/helm/charts/common/templates/cs/_container_logger.tpl create mode 100644 install/helm/charts/grafana/Chart.yaml create mode 100644 install/helm/charts/grafana/README.md create mode 100644 install/helm/charts/grafana/templates/_helpers.tpl create mode 100644 install/helm/charts/grafana/templates/dashboard-provider.yaml create mode 100644 install/helm/charts/grafana/templates/deployment.yaml create mode 100644 install/helm/charts/grafana/templates/networkpolicy.yaml create mode 100644 install/helm/charts/grafana/templates/pvc.yaml create mode 100644 install/helm/charts/grafana/templates/secret.yaml create mode 100644 install/helm/charts/grafana/templates/service.yaml create mode 100644 install/helm/charts/grafana/templates/serviceaccount.yaml create mode 100644 install/helm/charts/grafana/templates/tls-secret.yaml create mode 100644 install/helm/charts/grafana/values.schema.json create mode 100644 install/helm/charts/grafana/values.yaml create mode 100644 install/helm/charts/prometheus/Chart.yaml create mode 100644 install/helm/charts/prometheus/README.md create mode 100644 install/helm/charts/prometheus/templates/_helpers.tpl create mode 100644 install/helm/charts/prometheus/templates/clusterrole.yaml create mode 100644 install/helm/charts/prometheus/templates/clusterrolebinding.yaml create mode 100644 install/helm/charts/prometheus/templates/configmap.yaml create mode 100644 install/helm/charts/prometheus/templates/deployment.yaml create mode 100644 install/helm/charts/prometheus/templates/networkpolicy.yaml create mode 100644 install/helm/charts/prometheus/templates/pvc.yaml create mode 100644 install/helm/charts/prometheus/templates/service-headless.yaml create mode 100644 install/helm/charts/prometheus/templates/service.yaml create mode 100644 install/helm/charts/prometheus/templates/serviceaccount.yaml create mode 100644 install/helm/charts/prometheus/values.schema.json create mode 100644 install/helm/charts/prometheus/values.yaml rename install/helm/charts/redis/templates/{statefulset.yaml => deployment.yaml} (62%) create mode 100644 install/helm/charts/redis/templates/pvc.yaml create mode 100644 install/helm/dashboards/cluster-status.json create mode 100644 install/helm/dashboards/go-app.json create mode 100644 install/helm/dashboards/grpc.json create mode 100644 install/helm/dashboards/public-api.json create mode 100644 install/helm/dashboards/reverse-proxy.json create mode 100644 install/helm/dashboards/runtime.json create mode 100644 install/helm/dashboards/tetragon-runtime.json create mode 100644 install/helm/templates/grafana-datasources.yaml create mode 100644 install/helm/templates/metrics-configmap.yaml diff --git a/.github/workflows/docker-build-push.yaml b/.github/workflows/docker-build-push.yaml index fa48b9ed..735da88c 100644 --- a/.github/workflows/docker-build-push.yaml +++ b/.github/workflows/docker-build-push.yaml @@ -10,7 +10,7 @@ env: TASK_VERSION: '3.38.0' REGISTRY: ghcr.io REPO: runtime-radar - GO_VERSION: '1.25' + GO_VERSION: '1.26' jobs: build-and-push: diff --git a/.github/workflows/golangci-lint.yaml b/.github/workflows/golangci-lint.yaml index d97c2228..fc14a34d 100644 --- a/.github/workflows/golangci-lint.yaml +++ b/.github/workflows/golangci-lint.yaml @@ -16,7 +16,7 @@ on: - '.golangci.yml' env: - GO_VERSION: '1.25' + GO_VERSION: '1.26' TASK_VERSION: '3.38.0' jobs: diff --git a/.github/workflows/test-lib.yaml b/.github/workflows/test-lib.yaml index 38eb4a50..25aa7560 100644 --- a/.github/workflows/test-lib.yaml +++ b/.github/workflows/test-lib.yaml @@ -11,7 +11,7 @@ on: - main env: - GO_VERSION: '1.25' + GO_VERSION: '1.26' TASK_VERSION: '3.38.0' jobs: diff --git a/auth-center/.helm/templates/secret.yaml b/auth-center/.helm/templates/secret.yaml deleted file mode 100644 index 959c0ffe..00000000 --- a/auth-center/.helm/templates/secret.yaml +++ /dev/null @@ -1,16 +0,0 @@ -{{- if ne (include "common.cs.isChildCluster" .) "true" }} -apiVersion: v1 -kind: Secret -metadata: - name: auth-center-account - labels: - {{- include "common.labels" . | nindent 4 }} - {{- with (include "common.annotations" .) }} - annotations: - {{- . | nindent 4 }} - {{- end }} -type: Opaque -data: - username: {{ required "administrator username is required!" .Values.administrator.username | b64enc | quote }} - password: {{ required "administrator password is required!" .Values.administrator.password | b64enc | quote }} -{{- end }} diff --git a/auth-center/.helm/values.yaml b/auth-center/.helm/values.yaml index 4f3c79c0..6df51d2d 100644 --- a/auth-center/.helm/values.yaml +++ b/auth-center/.helm/values.yaml @@ -13,10 +13,8 @@ service: http: 9000 grpc: 8000 administrator: - username: "" - password: "" + existingSecret: cs-account containerSecurityContext: - enabled: true seLinuxOptions: {} privileged: false allowPrivilegeEscalation: false @@ -24,21 +22,10 @@ containerSecurityContext: seccompProfile: type: "RuntimeDefault" podSecurityContext: - enabled: true fsGroupChangePolicy: Always sysctls: [] supplementalGroups: [] env: - - name: ADMINISTRATOR_USERNAME - valueFrom: - secretKeyRef: - name: auth-center-account - key: username - - name: ADMINISTRATOR_PASSWORD - valueFrom: - secretKeyRef: - name: auth-center-account - key: password postgresql: enabled: true serviceAccount: @@ -58,7 +45,6 @@ resources: memory: 128Mi ephemeral-storage: 1Mi livenessProbe: - enabled: true httpGet: path: /live scheme: HTTP @@ -67,7 +53,6 @@ livenessProbe: successThreshold: 1 failureThreshold: 2 readinessProbe: - enabled: true httpGet: path: /ready scheme: HTTP @@ -76,7 +61,6 @@ readinessProbe: successThreshold: 1 failureThreshold: 3 startupProbe: - enabled: true httpGet: path: /ready scheme: HTTP diff --git a/cluster-manager/.helm/values.yaml b/cluster-manager/.helm/values.yaml index 18ab9c5e..95b3dc02 100644 --- a/cluster-manager/.helm/values.yaml +++ b/cluster-manager/.helm/values.yaml @@ -13,7 +13,6 @@ service: grpc: 8000 http: 9000 containerSecurityContext: - enabled: true seLinuxOptions: {} privileged: false allowPrivilegeEscalation: false @@ -21,26 +20,17 @@ containerSecurityContext: seccompProfile: type: "RuntimeDefault" podSecurityContext: - enabled: true fsGroupChangePolicy: Always sysctls: [] supplementalGroups: [] env: - - name: ADMINISTRATOR_USERNAME - valueFrom: - secretKeyRef: - name: auth-center-account - key: username - - name: ADMINISTRATOR_PASSWORD - valueFrom: - secretKeyRef: - name: auth-center-account - key: password - name: PUBLIC_ACCESS_TOKEN_SALT_KEY valueFrom: secretKeyRef: - name: cs-keys + name: '{{ include "common.cs.keys.secretName" . }}' key: publicAccessTokenSalt +administrator: + existingSecret: cs-account encryption: enabled: true postgresql: @@ -63,7 +53,6 @@ resources: {} # memory: 512Mi # ephemeral-storage: 1Gi livenessProbe: - enabled: true httpGet: path: /live scheme: HTTP @@ -72,7 +61,6 @@ livenessProbe: successThreshold: 1 failureThreshold: 2 readinessProbe: - enabled: true httpGet: path: /ready scheme: HTTP @@ -81,7 +69,6 @@ readinessProbe: successThreshold: 1 failureThreshold: 3 startupProbe: - enabled: true httpGet: path: /ready scheme: HTTP diff --git a/cluster-manager/pkg/helm/values.go b/cluster-manager/pkg/helm/values.go index fe27feac..254fa1eb 100644 --- a/cluster-manager/pkg/helm/values.go +++ b/cluster-manager/pkg/helm/values.go @@ -43,11 +43,17 @@ type Values struct { PublicAccessTokenSalt string `json:"publicAccessTokenSalt,omitempty"` } `json:"keys,omitzero"` - Postgresql TLSGlobal `json:"postgresql"` - Redis TLSGlobal `json:"redis"` - Clickhouse TLSGlobal `json:"clickhouse"` - Grafana TLSGlobal `json:"grafana,omitzero"` - Loki TLSGlobal `json:"loki,omitzero"` + Administrator struct { + Username string `json:"username,omitempty"` + Password string `json:"password,omitempty"` + } `json:"administrator,omitzero"` + + Postgresql AuthTLSGlobal `json:"postgresql"` + Redis AuthTLSGlobal `json:"redis"` + Rabbitmq AuthGlobal `json:"rabbitmq,omitzero"` + Clickhouse AuthTLSGlobal `json:"clickhouse"` + Grafana TLSGlobal `json:"grafana,omitzero"` + Loki TLSGlobal `json:"loki,omitzero"` } `json:"global,omitzero"` TLS struct { @@ -57,13 +63,6 @@ type Values struct { CertKey string `json:"certKey,omitempty"` } `json:"tls,omitzero"` - AuthAPI struct { - Administrator struct { - Username string `json:"username,omitempty"` - Password string `json:"password,omitempty"` - } `json:"administrator,omitzero"` - } `json:"auth-center,omitzero"` - ImagePullSecret struct { Username string `json:"username,omitempty"` Password string `json:"password,omitempty"` @@ -248,20 +247,40 @@ type TLSGlobal struct { } `json:"tls"` } -// buildHelmArgs recursively converts a value to Helm command-line arguments. -// It traverses the value and generates the appropriate --set or --set-string arguments -// based on the value's kind. +// AuthTLSGlobal is the global-section block for services that share both an +// existing auth secret name and TLS settings across consumer sub-charts +// (postgresql, redis, clickhouse). +type AuthTLSGlobal struct { + Auth struct { + ExistingSecret string `json:"existingSecret,omitempty"` + } `json:"auth,omitzero"` + TLS struct { + Enabled bool `json:"enabled"` + Verify bool `json:"verify"` + } `json:"tls"` +} + +// AuthGlobal is the global-section block for services that share only an +// existing auth secret name across consumer sub-charts (rabbitmq has no TLS +// knobs at the global level). +type AuthGlobal struct { + Auth struct { + ExistingSecret string `json:"existingSecret,omitempty"` + } `json:"auth,omitzero"` +} + +// buildHelmArgs recursively converts a struct's fields to Helm command-line arguments. +// It traverses the struct and generates the appropriate --set or --set-string arguments +// based on field types. // -// Each value is processed according to its kind: +// Each field is processed according to its kind: // - Strings use --set-string // - Bool, numeric types use --set -// - Arrays/Slices are expanded into per-element args using the `name[i]` form -// - Structs are processed recursively over their fields +// - Arrays/Slices are JSON marshaled and use --set +// - Structs are processed recursively // -// Struct fields with `json:"-"` are skipped. -// Struct fields with `json:",omitempty"` or `json:",omitzero"` are skipped if they -// contain zero values; this is communicated through the `hasOmit` argument when -// recursing. +// Fields with `json:"-"` are skipped. +// Fields with `json:",omitempty"` or `json:",omitzero"` are skipped if they contain zero values. // // Returns an error if JSON marshaling fails for array/slice fields. func buildHelmArgs(v any, prefix string, hasOmit bool) ([]string, error) { diff --git a/cluster-manager/pkg/helm/values_test.go b/cluster-manager/pkg/helm/values_test.go index 7e246b5c..12ade6f7 100644 --- a/cluster-manager/pkg/helm/values_test.go +++ b/cluster-manager/pkg/helm/values_test.go @@ -61,11 +61,11 @@ func TestBuildHelmArgs(t *testing.T) { Nested struct { Field1 string `json:"field1"` Field2 int `json:"field2"` - } `json:"nested"` + } `json:"nested,omitempty"` NestedEmpty struct { Field1 string `json:"field1"` Field2 int `json:"field2"` - } `json:"nestedEmpty,omitzero"` + } `json:"nestedEmpty,omitempty"` NestedZero struct { Field1 string `json:"field1"` Field2 int `json:"field2"` @@ -225,6 +225,10 @@ func generateValues() Values { values.Global.ImageShortNames = false values.Global.Keys.Encryption = "encryption-key" values.Global.Keys.Token = "token-key" + values.Global.Postgresql.Auth.ExistingSecret = "pg-secret" + values.Global.Redis.Auth.ExistingSecret = "redis-secret" + values.Global.Rabbitmq.Auth.ExistingSecret = "rabbitmq-secret" + values.Global.Clickhouse.Auth.ExistingSecret = "ch-secret" values.TLS.CertCA = "ca-cert-data" values.TLS.Cert = "cert-data" @@ -275,9 +279,9 @@ func generateValues() Values { // CS Manager values.CSManager.RegistrationToken = "registration-token" - // AuthAPI - values.AuthAPI.Administrator.Username = "user" - values.AuthAPI.Administrator.Password = "pass" + // Administrator + values.Global.Administrator.Username = "user" + values.Global.Administrator.Password = "pass" return values } @@ -311,6 +315,7 @@ func TestValuesToHelmArgs(t *testing.T) { "--set-string 'postgresql.auth.username=postgres'", "--set-string 'postgresql.auth.password=postgres-password'", "--set 'postgresql.persistence.enabled=true'", + "--set-string 'global.postgresql.auth.existingSecret=pg-secret'", "--set 'global.postgresql.tls.enabled=true'", "--set 'global.postgresql.tls.verify=true'", "--set-string 'postgresql.persistence.storageClass=standard-1'", @@ -318,8 +323,11 @@ func TestValuesToHelmArgs(t *testing.T) { "--set-string 'redis.auth.username=redis'", "--set-string 'redis.auth.password=redis-password'", "--set 'redis.persistence.enabled=false'", + "--set-string 'global.redis.auth.existingSecret=redis-secret'", "--set 'global.redis.tls.enabled=false'", "--set 'global.redis.tls.verify=false'", + "--set-string 'global.rabbitmq.auth.existingSecret=rabbitmq-secret'", + "--set-string 'global.clickhouse.auth.existingSecret=ch-secret'", "--set 'metrics.enabled=false'", "--set 'rabbitmq.deploy=true'", "--set-string 'rabbitmq.auth.username=rabbitmq'", @@ -341,8 +349,8 @@ func TestValuesToHelmArgs(t *testing.T) { "--set-string 'notifier.env[1].name=HTTPS_PROXY'", "--set-string 'notifier.env[1].value=https://proxy.local'", "--set-string 'cs-manager.registrationToken=registration-token'", - "--set-string 'auth-center.administrator.username=user'", - "--set-string 'auth-center.administrator.password=pass'", + "--set-string 'global.administrator.username=user'", + "--set-string 'global.administrator.password=pass'", } if diff := cmp.Diff(args, expectedArgs, @@ -367,10 +375,6 @@ func TestValuesToYaml(t *testing.T) { lines := strings.Split(yaml, "\n") expectedLines := []string{ - "auth-center:", - " administrator:", - " password: pass", - " username: user", "clickhouse:", " deploy: false", " externalHost: clickhouse.local", @@ -379,8 +383,13 @@ func TestValuesToYaml(t *testing.T) { "cs-manager:", " registrationToken: registration-token", "global:", + " administrator:", + " password: pass", + " username: user", " centralCsUrl: https://central-cs.local", " clickhouse:", + " auth:", + " existingSecret: ch-secret", " tls:", " enabled: false", " verify: false", @@ -392,10 +401,17 @@ func TestValuesToYaml(t *testing.T) { " token: token-key", " ownCsUrl: https://cs.local", " postgresql:", + " auth:", + " existingSecret: pg-secret", " tls:", " enabled: true", " verify: true", + " rabbitmq:", + " auth:", + " existingSecret: rabbitmq-secret", " redis:", + " auth:", + " existingSecret: redis-secret", " tls:", " enabled: false", " verify: false", diff --git a/cluster-manager/pkg/service/cluster_generic.go b/cluster-manager/pkg/service/cluster_generic.go index 3aeaf5e8..fa073116 100644 --- a/cluster-manager/pkg/service/cluster_generic.go +++ b/cluster-manager/pkg/service/cluster_generic.go @@ -519,9 +519,9 @@ func (cg *ClusterGeneric) buildValues(cfg *model.ClusterConfig, token string) *h } } - // AuthAPI - v.AuthAPI.Administrator.Username = cg.AdministratorUsername - v.AuthAPI.Administrator.Password = cg.AdministratorPassword + // Administrator + v.Global.Administrator.Username = cg.AdministratorUsername + v.Global.Administrator.Password = cg.AdministratorPassword return &v } diff --git a/cs-manager/.helm/values.yaml b/cs-manager/.helm/values.yaml index b35cdcbe..6da1c7de 100644 --- a/cs-manager/.helm/values.yaml +++ b/cs-manager/.helm/values.yaml @@ -17,7 +17,6 @@ ownCsUrl: "" centralCsUrl: "" registrationToken: "" containerSecurityContext: - enabled: true seLinuxOptions: {} privileged: false allowPrivilegeEscalation: false @@ -25,7 +24,6 @@ containerSecurityContext: seccompProfile: type: "RuntimeDefault" podSecurityContext: - enabled: true fsGroupChangePolicy: Always sysctls: [] supplementalGroups: [] @@ -53,7 +51,6 @@ resources: memory: 512Mi ephemeral-storage: 1Gi livenessProbe: - enabled: true httpGet: path: /live scheme: HTTP @@ -62,7 +59,6 @@ livenessProbe: successThreshold: 1 failureThreshold: 2 readinessProbe: - enabled: true httpGet: path: /ready scheme: HTTP @@ -71,7 +67,6 @@ readinessProbe: successThreshold: 1 failureThreshold: 3 startupProbe: - enabled: true httpGet: path: /ready scheme: HTTP diff --git a/event-processor/.helm/values.yaml b/event-processor/.helm/values.yaml index 3f9b6f78..3b5795cb 100644 --- a/event-processor/.helm/values.yaml +++ b/event-processor/.helm/values.yaml @@ -45,7 +45,6 @@ resources: memory: 512Mi ephemeral-storage: 1Mi livenessProbe: - enabled: true httpGet: path: /live scheme: HTTP @@ -54,7 +53,6 @@ livenessProbe: successThreshold: 1 failureThreshold: 2 readinessProbe: - enabled: true httpGet: path: /ready scheme: HTTP @@ -63,7 +61,6 @@ readinessProbe: successThreshold: 1 failureThreshold: 3 startupProbe: - enabled: true httpGet: path: /ready scheme: HTTP diff --git a/event-processor/detector/wasm/CS_RT_BIN_PERM_RAISE/main.go b/event-processor/detector/wasm/CS_RT_BIN_PERM_RAISE/main.go index 90f1f348..7aa60373 100644 --- a/event-processor/detector/wasm/CS_RT_BIN_PERM_RAISE/main.go +++ b/event-processor/detector/wasm/CS_RT_BIN_PERM_RAISE/main.go @@ -415,7 +415,7 @@ func (d Detector) Detect(ctx context.Context, req *api.DetectReq) (*api.DetectRe }, "ancestors": [] }, - "node_name": "ptcs-master-node", + "node_name": "cs-master-node", "time": "2024-11-13T10:15:07.462452981Z", "aggregation_info": null } diff --git a/history-api/.helm/values.yaml b/history-api/.helm/values.yaml index c4794440..c4bcfd11 100644 --- a/history-api/.helm/values.yaml +++ b/history-api/.helm/values.yaml @@ -13,7 +13,6 @@ service: grpc: 8000 http: 9000 containerSecurityContext: - enabled: true seLinuxOptions: {} privileged: false allowPrivilegeEscalation: false @@ -21,7 +20,6 @@ containerSecurityContext: seccompProfile: type: "RuntimeDefault" podSecurityContext: - enabled: true fsGroupChangePolicy: Always sysctls: [] supplementalGroups: [] @@ -58,7 +56,6 @@ resources: memory: 128Mi ephemeral-storage: 1Mi livenessProbe: - enabled: true httpGet: path: /live scheme: HTTP @@ -67,7 +64,6 @@ livenessProbe: successThreshold: 1 failureThreshold: 2 readinessProbe: - enabled: true httpGet: path: /ready scheme: HTTP @@ -76,7 +72,6 @@ readinessProbe: successThreshold: 1 failureThreshold: 3 startupProbe: - enabled: true httpGet: path: /ready scheme: HTTP diff --git a/install/helm/Chart.yaml b/install/helm/Chart.yaml index cc5f2fe6..8646376a 100644 --- a/install/helm/Chart.yaml +++ b/install/helm/Chart.yaml @@ -2,46 +2,46 @@ apiVersion: v2 name: runtime-radar description: A Helm chart for Kubernetes type: application -version: 'v0.1.0' +version: 'v0.2.0' dependencies: - name: auth-center - version: v0.x.x + version: 0.x.x repository: file://../../auth-center/.helm tags: [cs-app, auth-center] - name: cluster-manager - version: v0.x.x + version: 0.x.x repository: file://../../cluster-manager/.helm tags: [cs-app, cluster-manager] - name: event-processor - version: v0.x.x + version: 0.x.x repository: file://../../event-processor/.helm tags: [cs-app, event-processor, runtime] - name: history-api - version: v0.x.x + version: 0.x.x repository: file://../../history-api/.helm tags: [cs-app, history-api] - name: notifier - version: v0.x.x + version: 0.x.x repository: file://../../notifier/.helm tags: [cs-app, notifier] - name: policy-enforcer - version: v0.x.x + version: 0.x.x repository: file://../../policy-enforcer/.helm tags: [cs-app, policy-enforcer] - name: cs-manager - version: v0.x.x + version: 0.x.x repository: file://../../cs-manager/.helm tags: [cs-app, cs-manager] - name: reverse-proxy - version: v0.x.x + version: 0.x.x repository: file://../../reverse-proxy/.helm tags: [cs-app, reverse-proxy] - name: runtime-monitor - version: v0.x.x + version: 0.x.x repository: file://../../runtime-monitor/.helm tags: [cs-app, runtime-monitor, 3rd-party, runtime] - name: public-api - version: v0.x.x + version: 0.x.x repository: file://../../public-api/.helm tags: [cs-app, public-api] - name: postgresql @@ -60,16 +60,24 @@ dependencies: version: 0.x.x condition: clickhouse.deploy tags: [3rd-party, clickhouse] + - name: grafana + version: 0.x.x + condition: grafana.deploy + tags: [3rd-party, grafana, monitoring] + - name: prometheus + version: 0.x.x + condition: prometheus.deploy + tags: [3rd-party, prometheus, monitoring] - name: common version: 0.x.x tags: [cs-lib] annotations: - runtime-monitor.tetragon: quay.io/cilium/tetragon:v1.3.0 + runtime-monitor.tetragon: quay.io/cilium/tetragon:v1.5.0 postgresql: postgres:17.5 postgresql.metrics: quay.io/prometheuscommunity/postgres-exporter:v0.17.1 redis: redis:8.0.2 rabbitmq: rabbitmq:4.1.1-management clickhouse: clickhouse/clickhouse-server:25.9.4 grafana: grafana/grafana:12.0.2 - prometheus.server: prom/prometheus:v3.4.1 + prometheus: prom/prometheus:v3.4.1 diff --git a/install/helm/README.md b/install/helm/README.md index f300cf6a..af1bd8fb 100644 --- a/install/helm/README.md +++ b/install/helm/README.md @@ -58,8 +58,8 @@ helm install runtime-radar -n runtime-radar ./install/helm \ --set global.keys.encryption= \ --set global.keys.publicAccessTokenSalt= \ --set global.ownCsUrl=https://your-domain.com \ - --set auth-center.administrator.username=admin \ - --set auth-center.administrator.password= \ + --set global.administrator.username=admin \ + --set global.administrator.password= \ --create-namespace ``` @@ -73,20 +73,26 @@ To use external databases instead of deploying them in the cluster: helm install runtime-radar -n runtime-radar ./install/helm \ --set postgresql.deploy=false \ --set postgresql.externalHost=postgres.example.com \ + --set global.postgresql.auth.existingSecret=my-postgresql-secret \ --set redis.deploy=false \ --set redis.externalHost=redis.example.com \ + --set global.redis.auth.existingSecret=my-redis-secret \ --set rabbitmq.deploy=false \ --set rabbitmq.externalHost=rabbitmq.example.com \ + --set global.rabbitmq.auth.existingSecret=my-rabbitmq-secret \ --set clickhouse.deploy=false \ --set clickhouse.externalHost=clickhouse.example.com \ + --set global.clickhouse.auth.existingSecret=my-clickhouse-secret \ --set global.keys.encryption= \ --set global.keys.publicAccessTokenSalt= \ --set global.ownCsUrl=https://your-domain.com \ - --set auth-center.administrator.username=admin \ - --set auth-center.administrator.password= \ + --set global.administrator.username=admin \ + --set global.administrator.password= \ --create-namespace ``` +Each `global..auth.existingSecret` must point to an operator-managed Secret carrying AUTH credentials only (`_USER`, `_PASSWORD`, and `_DB` for postgresql/clickhouse). Connection metadata (`_ADDR`/`_SSL_*`/`_TLS_*`) is supplied by the chart-owned `cs--config` ConfigMaps and must not be placed in the Secret. + ### Installing with Custom Values File Create a `custom-values.yaml` file: @@ -97,11 +103,11 @@ global: encryption: "" publicAccessTokenSalt: "" ownCsUrl: "https://your-domain.com" - -auth-center: administrator: username: admin password: "" + +auth-center: replicas: 2 reverse-proxy: @@ -142,6 +148,20 @@ To upgrade an existing installation: helm upgrade runtime-radar -n runtime-radar ./install/helm -f custom-values.yaml ``` +### Breaking changes + +The following breaking changes apply when upgrading from earlier versions: + +- **Administrator credentials moved to `global.administrator.*`.** The chart values `auth-api.administrator.username` / `auth-api.administrator.password` / `auth-api.administrator.existingSecret` (the last of which was a never-wired no-op) and the never-wired `auth-center.administrator.*` have been replaced by `global.administrator.username` / `global.administrator.password` / `global.administrator.existingSecret`. There is no auto-migration shim — update your environment overlays and `--set` flags before upgrading, or `helm upgrade` will fail with a `required` validation error. +- **Single `cs-account` Secret replaces `auth-api-account` and `auth-center-account`.** The chart now creates one umbrella-owned Secret named `cs-account` (or uses the operator-supplied Secret named in `global.administrator.existingSecret`). After a `helm upgrade`, the previously created `auth-api-account` and `auth-center-account` Secrets become orphans — they are no longer referenced by any workload and can be deleted safely: + + ```bash + kubectl delete secret auth-api-account auth-center-account -n runtime-radar --ignore-not-found + ``` +- **Connection metadata moved from Secrets to ConfigMaps.** The chart-owned Secrets `postgresql`/`redis`/`rabbitmq`/`clickhouse` no longer carry `_ADDR`/`_SSL_*`/`_TLS_*` keys — those moved to new ConfigMaps `cs-postgresql-config`/`cs-redis-config`/`cs-rabbitmq-config`/`cs-clickhouse-config`. Operators supplying their own auth Secret via `.auth.existingSecret` or the new `global..auth.existingSecret` must scope it to AUTH credentials only (`_USER`, `_PASSWORD`, `_DB` where applicable). Including ADDR/SSL keys in the Secret no longer has any effect; consumers read those from the ConfigMaps. +- **Cross-subchart secret-name propagation requires `global..auth.existingSecret`.** Setting only the top-level `postgresql.auth.existingSecret` propagates to sub-chart but NOT to consumer deployments (auth-api, history-api, etc.). To make consumer pods read from your external Secret, use `global.postgresql.auth.existingSecret` (and the corresponding global knobs for redis/rabbitmq/clickhouse). For external services where subchart isn't deployed (`.deploy=false`), only the global knob is needed. +- **`global.keys.existingSecret` now honored by every consumer.** `cluster-manager` and `public-api` previously hardcoded `cs-keys` for `PUBLIC_ACCESS_TOKEN_SALT_KEY` / `ACCESS_TOKEN_SALT` and silently ignored `global.keys.existingSecret`. They now resolve the keys-secret name through the `common.cs.keys.secretName` helper. Operators using a custom `global.keys.existingSecret` must ensure the Secret contains the `publicAccessTokenSalt` key (in addition to `encryption` and `token`). + ## Uninstalling To uninstall/delete the `runtime-radar` deployment: @@ -200,8 +220,8 @@ helm install runtime-radar -n runtime-radar ./install/helm \ --set global.keys.encryption=INIT-DO-NOT-USE \ --set global.keys.publicAccessTokenSalt=INIT-DO-NOT-USE \ --set global.ownCsUrl=https://your-domain.com \ - --set auth-center.administrator.username=admin \ - --set auth-center.administrator.password= \ + --set global.administrator.username=admin \ + --set global.administrator.password= \ --create-namespace ``` @@ -338,25 +358,130 @@ public-api: replicas: 3 ``` -### Monitoring and Metrics +### Monitoring (Metrics, Prometheus & Grafana) + +Runtime Radar ships an optional, self-contained observability stack. It has three layers that work together: + +1. **Metrics exposure** — make components expose Prometheus-format `/metrics`. +2. **Prometheus** — scrape and store those metrics. +3. **Grafana** — visualize them with the bundled dashboards. + +Each layer can be enabled independently and can also point at infrastructure you already run. -Enable metrics collection: +#### Quick start: full in-cluster stack + +To deploy the complete bundled stack (metrics + Prometheus + Grafana), set: ```yaml +# 1. Expose application + infrastructure metrics metrics: enabled: true - postgresql: metrics: enabled: true - rabbitmq: metrics: enabled: true - clickhouse: metrics: enabled: true + +# 2. Deploy Prometheus to scrape them +prometheus: + deploy: true + +# 3. Deploy Grafana to visualize them +grafana: + deploy: true + auth: + username: admin + password: "" +``` + +Once running, Grafana is exposed through the reverse-proxy at `https:///grafana` (see [Accessing Grafana](#accessing-grafana)). + +> **How the wiring works:** the umbrella chart auto-generates a `cs-metrics` ConfigMap (Prometheus scrape config) **only when Prometheus is deployed and at least one metrics flag is enabled**, and a `grafana-datasources` Secret that points Grafana's `Prometheus` datasource at the in-cluster (or external) Prometheus. You do not need to write scrape configs or datasources by hand. + +#### Enabling metrics + +Metrics are **off by default**. Turning a flag on only makes a component *expose* metrics — something still has to scrape them (an in-cluster or external Prometheus). + +| Flag | What it does | +| ---------------------------- | ------------------------------------------------------------------------------------------------------------------------ | +| `metrics.enabled` | Runtime Radar application services expose `/metrics`. Also wires the web UI's "open in Grafana" links (`GRAFANA_URL`). | +| `global.metrics.enabled` | Same as above; useful when sharing one value across child clusters. | +| `postgresql.metrics.enabled` | Starts a PostgreSQL Prometheus exporter (scraped on port `9187`). | +| `rabbitmq.metrics.enabled` | Exposes RabbitMQ metrics (scraped on port `9419`). | +| `clickhouse.metrics.enabled` | Exposes ClickHouse metrics (scraped on port `8001`). | + +For an **external** datastore (`.deploy=false`), set `.metrics.externalHost` so Prometheus scrapes the right target, e.g.: + +```yaml +postgresql: + deploy: false + externalHost: postgres.example.com + metrics: + enabled: true + externalHost: postgres-exporter.example.com:9187 +``` + +The generated scrape config also collects Tetragon runtime metrics (`runtime-monitor`, port `2112`) and reverse-proxy/Caddy metrics (port `9090`) automatically. + +#### Prometheus + +```yaml +prometheus: + deploy: true + replicaCount: 1 + persistence: + enabled: true + size: 20Gi + storageClass: fast-ssd +``` + +- The scrape configuration is read from the ConfigMap named in `prometheus.scrapeConfigmap` (default `cs-metrics`), which the chart renders for you from the metrics flags above. Enabling Prometheus without any metrics flag produces an empty scrape config and nothing to graph. +- Adjust retention/storage with `prometheus.persistence.*`. + +**Use an existing Prometheus** instead of deploying one — leave `prometheus.deploy=false` and point the stack at it. Grafana's datasource is configured from this value: + +```yaml +prometheus: + deploy: false + externalHost: https://prometheus.monitoring.svc.cluster.local:9090 +``` + +#### Grafana + +```yaml +grafana: + deploy: true + auth: + username: admin + password: "" + persistence: + enabled: true + size: 5Gi +``` + +- **Dashboards** — the chart packs every dashboard under `install/helm/dashboards/*.json` (runtime, Tetragon runtime, gRPC, Go app, cluster status, public-api, reverse-proxy) into the `cs-dashboards` ConfigMap and loads them automatically through the dashboard provider (`grafana.dashboardsProvider.enabled`, on by default). Add your own by listing extra ConfigMaps in `grafana.dashboardsConfigMaps`. +- **Datasource** — the `Prometheus` datasource is wired automatically to the deployed (or external) Prometheus; no manual setup needed. + +##### Accessing Grafana + +By default `grafana.subPath: grafana` serves Grafana behind the reverse-proxy, so it is reachable at: + +``` +https:///grafana +``` + +The Runtime Radar web UI links here directly when `metrics.enabled` is set. + +**Use an existing Grafana** instead — leave `grafana.deploy=false` and set `grafana.externalHost`. The UI's Grafana links then redirect to that host: + +```yaml +grafana: + deploy: false + externalHost: https://grafana.example.com ``` ### Node Affinity @@ -381,34 +506,41 @@ clickhouse: ### Global parameters -| Name | Description | Value | -| -------------------------------------- | --------------------------------------------------------------------------------------------------- | ------------- | -| `global.imageRegistry` | Global Docker image registry | `""` | -| `global.imageTag` | Global Docker image tag to use for RR components | `""` | -| `global.logLevel` | Logging level for components | `INFO` | -| `global.tls.enabled` | Enable TLS for RR components | `true` | -| `global.tls.existingSecret` | Name of the existing secret with TLS certificates | `""` | -| `global.auth.enabled` | Enable authentication for RR components | `true` | -| `global.keys.existingSecret` | Existing secret name with keys `encryption` and `token` | `""` | -| `global.keys.encryption` | Encryption key for secrets stored in database. Must be a 64-character hexadecimal string (32 bytes) | `""` | -| `global.keys.token` | Encryption key for authentication tokens. Must be a 64-character hexadecimal string (32 bytes) | `""` | -| `global.keys.publicAccessTokenSalt` | Salt for public API tokens. Must be a 128-character hexadecimal string (64 bytes) | `""` | -| `global.postgresql.tls.enabled` | Enable TLS traffic support (overrides `tls.enabled`) | `true` | -| `global.postgresql.tls.verify` | Verify TLS connection to the service (overrides `tls.verify`) | `true` | -| `global.postgresql.tls.existingSecret` | Name of an existing secret that contains the certificates (overrides `tls.existingSecret`) | `""` | -| `global.redis.tls.enabled` | Enable TLS traffic support (overrides `tls.enabled`) | `true` | -| `global.redis.tls.verify` | Verify TLS connection to the service (overrides `tls.verify`) | `true` | -| `global.redis.tls.existingSecret` | Name of an existing secret that contains the certificates (overrides `tls.existingSecret`) | `""` | -| `global.clickhouse.tls.enabled` | Enable TLS traffic support (overrides `tls.enabled`) | `true` | -| `global.clickhouse.tls.verify` | Verify TLS connection to the service (overrides `tls.verify`) | `true` | -| `global.clickhouse.tls.existingSecret` | Name of an existing secret that contains the certificates (overrides `tls.existingSecret`) | `""` | -| `global.grafana.tls.enabled` | Enable TLS traffic support (overrides `tls.enabled`) | `true` | -| `global.grafana.tls.verify` | Verify TLS connection to the service (overrides `tls.verify`) | `true` | -| `global.grafana.tls.existingSecret` | Name of an existing secret that contains the certificates (overrides `tls.existingSecret`) | `""` | -| `global.imagePullSecrets` | Names of the secrets of the global container registry as an array | `["regcred"]` | -| `global.ownCsUrl` | URL of primary installation | `""` | -| `global.centralCsUrl` | URL of primary installation | `""` | -| `global.isChildCluster` | Is this a child cluster | `false` | +| Name | Description | Value | +| --------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------- | +| `global.imageRegistry` | Global Docker image registry | `""` | +| `global.imageTag` | Global Docker image tag to use for RR components | `""` | +| `global.logLevel` | Logging level for components | `INFO` | +| `global.tls.enabled` | Enable TLS for RR components | `true` | +| `global.tls.existingSecret` | Name of the existing secret with TLS certificates | `""` | +| `global.auth.enabled` | Enable authentication for RR components | `true` | +| `global.keys.existingSecret` | Existing secret name with keys `encryption`, `token`, and `publicAccessTokenSalt` | `""` | +| `global.keys.encryption` | Encryption key for secrets stored in database. Must be a 64-character hexadecimal string (32 bytes) | `""` | +| `global.keys.token` | Encryption key for authentication tokens. Must be a 64-character hexadecimal string (32 bytes) | `""` | +| `global.keys.publicAccessTokenSalt` | Salt for public API tokens. Must be a 128-character hexadecimal string (64 bytes) | `""` | +| `global.administrator.existingSecret` | Name of an existing secret with administrator credentials (must contain keys `username` and `password`). When empty, the chart creates a Secret named `cs-account` from `username` and `password` below. | `""` | +| `global.administrator.username` | Administrator username. Ignored when `existingSecret` is set. | `""` | +| `global.administrator.password` | Administrator password. Ignored when `existingSecret` is set. | `""` | +| `global.postgresql.auth.existingSecret` | Name of an existing secret with PostgreSQL auth credentials (must contain `POSTGRES_USER`, `POSTGRES_PASSWORD`, `POSTGRES_DB`). When empty, the chart creates a Secret named `postgresql`. NOTE: when `postgresql.deploy=true` (default), you must also set `postgresql.auth.existingSecret` to the same value so the sub-chart reads from it. For external PostgreSQL, prefer `postgresql.deploy=false`. | `""` | +| `global.postgresql.tls.enabled` | Enable TLS traffic support (overrides `tls.enabled`) | `true` | +| `global.postgresql.tls.verify` | Verify TLS connection to the service (overrides `tls.verify`) | `true` | +| `global.postgresql.tls.existingSecret` | Name of an existing secret that contains the certificates (overrides `tls.existingSecret`) | `""` | +| `global.redis.auth.existingSecret` | Name of an existing secret with Redis auth credentials (must contain `REDIS_USER`, `REDIS_PASSWORD`). When empty, the chart creates a Secret named `redis`. NOTE: when `redis.deploy=true` (default), you must also set `redis.auth.existingSecret` to the same value so the sub-chart reads from it. | `""` | +| `global.redis.tls.enabled` | Enable TLS traffic support (overrides `tls.enabled`) | `true` | +| `global.redis.tls.verify` | Verify TLS connection to the service (overrides `tls.verify`) | `true` | +| `global.redis.tls.existingSecret` | Name of an existing secret that contains the certificates (overrides `tls.existingSecret`) | `""` | +| `global.rabbitmq.auth.existingSecret` | Name of an existing secret with RabbitMQ auth credentials (must contain `RABBIT_USER`, `RABBIT_PASSWORD`). When empty, the chart creates a Secret named `rabbitmq`. NOTE: when `rabbitmq.deploy=true` (default), you must also set `rabbitmq.auth.existingSecret` to the same value so the sub-chart reads from it. | `""` | +| `global.clickhouse.auth.existingSecret` | Name of an existing secret with ClickHouse auth credentials (must contain `CLICKHOUSE_USER`, `CLICKHOUSE_PASSWORD`, `CLICKHOUSE_DB`). When empty, the chart creates a Secret named `clickhouse`. NOTE: when `clickhouse.deploy=true` (default), you must also set `clickhouse.auth.existingSecret` to the same value so the sub-chart reads from it. For external ClickHouse, prefer `clickhouse.deploy=false`. | `""` | +| `global.clickhouse.tls.enabled` | Enable TLS traffic support (overrides `tls.enabled`) | `true` | +| `global.clickhouse.tls.verify` | Verify TLS connection to the service (overrides `tls.verify`) | `true` | +| `global.clickhouse.tls.existingSecret` | Name of an existing secret that contains the certificates (overrides `tls.existingSecret`) | `""` | +| `global.grafana.tls.enabled` | Enable TLS traffic support (overrides `tls.enabled`) | `true` | +| `global.grafana.tls.verify` | Verify TLS connection to the service (overrides `tls.verify`) | `true` | +| `global.grafana.tls.existingSecret` | Name of an existing secret that contains the certificates (overrides `tls.existingSecret`) | `""` | +| `global.imagePullSecrets` | Names of the secrets of the global container registry as an array | `["regcred"]` | +| `global.ownCsUrl` | URL of primary installation | `""` | +| `global.centralCsUrl` | URL of primary installation | `""` | +| `global.isChildCluster` | Is this a child cluster | `false` | ### Common RR parameters @@ -425,16 +557,14 @@ clickhouse: | `tls.cert` | TLS certificate | `""` | | `tls.certKey` | TLS certificate key | `""` | | `tls.certCA` | TLS certificate CA | `""` | +| `metrics.enabled` | Enable CS metrics | `false` | ### Auth-center component parameters -| Name | Description | Value | -| ------------------------------------------ | ---------------------------------------------------------- | ----- | -| `auth-center.nodeSelector` | Template to specify the labels of nodes for pod assignment | `{}` | -| `auth-center.replicas` | Number of replicas for the auth-center component | `2` | -| `auth-center.administrator.existingSecret` | Name of the existing secret with administrator credentials | `""` | -| `auth-center.administrator.username` | Administrator name | `""` | -| `auth-center.administrator.password` | Administrator password | `""` | +| Name | Description | Value | +| -------------------------- | ---------------------------------------------------------- | ----- | +| `auth-center.nodeSelector` | Template to specify the labels of nodes for pod assignment | `{}` | +| `auth-center.replicas` | Number of replicas for the auth-center component | `2` | ### Policy-enforcer component parameters @@ -530,94 +660,135 @@ clickhouse: ### Postgresql installation configuration -| Name | Description | Value | -| ------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------- | -| `postgresql.externalHost` | External host with PostgreSQL. Requires setting `postgresql.deploy` to `false`. | `""` | -| `postgresql.fullnameOverride` | String to fully override common.names.fullname template | `postgresql` | -| `postgresql.tls.autoGenerated` | Generate automatically self-signed TLS certificates if nothing is provided | `true` | -| `postgresql.tls.cert` | Certificate value | `""` | -| `postgresql.tls.certKey` | Certificate key value | `""` | -| `postgresql.tls.certCA` | CA Certificate value | `""` | -| `postgresql.auth.existingSecret` | Name of the existing secret with PostgreSQL credentials. The `auth.postgresPassword`, `auth.password`, and `auth.replicationPassword` values will be ignored and taken from this secret. The secret might also contain the `ldap-password` key if LDAP is enabled. If so, the `ldap.bind_password` value will be ignored and taken from this secret. | `postgresql` | -| `postgresql.auth.username` | Name of custom user to be created | `runtime-radar` | -| `postgresql.auth.password` | Password of custom user to be created. Ignored if `auth.existingSecret` is set. | `""` | -| `postgresql.auth.database` | Name of custom database to be created | `runtime-radar` | -| `postgresql.auth.existingSecretPasswordKey` | Name of the key in the existing secret with PostgreSQL credentials. Only used if `auth.existingSecret` is set. | `POSTGRES_PASSWORD` | -| `postgresql.nodeSelector` | Labels of nodes for primary PostgreSQL pod assignment | `{}` | -| `postgresql.resources` | Resource configuration for PostgreSQL container | `{}` | -| `postgresql.persistence.enabled` | Enable data persistence for primary PostgreSQL using PVC | `true` | -| `postgresql.persistence.storageClass` | Persistent volume storage class for primary PostgreSQL | `""` | -| `postgresql.persistence.size` | Persistent volume size for PostgreSQL | `1Gi` | -| `postgresql.persistence.existingClaim` | Name of an existing PVC | `""` | -| `postgresql.persistence.selector` | Template to specify an existing persistent volume | `{}` | -| `postgresql.metrics.enabled` | Start a prometheus exporter | `false` | -| `postgresql.metrics.externalHost` | PostgreSQL metrics external host | `""` | +| Name | Description | Value | +| ------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------- | +| `postgresql.deploy` | Deploy component | `true` | +| `postgresql.externalHost` | External host with PostgreSQL. Requires setting `postgresql.deploy` to `false`. | `""` | +| `postgresql.fullnameOverride` | String to fully override common.names.fullname template | `postgresql` | +| `postgresql.tls.autoGenerated` | Generate automatically self-signed TLS certificates if nothing is provided | `true` | +| `postgresql.tls.cert` | Certificate value | `""` | +| `postgresql.tls.certKey` | Certificate key value | `""` | +| `postgresql.tls.certCA` | CA Certificate value | `""` | +| `postgresql.auth.existingSecret` | Name of an existing secret with PostgreSQL credentials, read by the sub-chart when `postgresql.deploy=true`. The `auth.postgresPassword`, `auth.password`, and `auth.replicationPassword` values will be ignored and taken from this secret. The secret must carry AUTH credentials only — `POSTGRES_USER`, `POSTGRES_PASSWORD`, `POSTGRES_DB` (connection metadata `POSTGRES_ADDR`/`POSTGRES_SSL_*` lives in the always-created `cs-postgresql-config` ConfigMap and must not be placed in this Secret). NOTE: To make the same secret visible to all consumer services (auth-api, history-api, etc.), set `global.postgresql.auth.existingSecret` to the same value. The secret might also contain the `ldap-password` key if LDAP is enabled. | `postgresql` | +| `postgresql.auth.username` | Name of custom user to be created | `runtime-radar` | +| `postgresql.auth.password` | Password of custom user to be created. Ignored if `auth.existingSecret` is set. | `""` | +| `postgresql.auth.database` | Name of custom database to be created | `runtime-radar` | +| `postgresql.auth.existingSecretPasswordKey` | Name of the key in the existing secret with PostgreSQL credentials. Only used if `auth.existingSecret` is set. | `POSTGRES_PASSWORD` | +| `postgresql.nodeSelector` | Labels of nodes for primary PostgreSQL pod assignment | `{}` | +| `postgresql.resources` | Resource configuration for PostgreSQL container | `{}` | +| `postgresql.persistence.enabled` | Enable data persistence for primary PostgreSQL using PVC | `true` | +| `postgresql.persistence.storageClass` | Persistent volume storage class for primary PostgreSQL | `""` | +| `postgresql.persistence.size` | Persistent volume size for PostgreSQL | `1Gi` | +| `postgresql.persistence.existingClaim` | Name of an existing PVC | `""` | +| `postgresql.persistence.selector` | Template to specify an existing persistent volume | `{}` | +| `postgresql.metrics.enabled` | Start a prometheus exporter | `false` | +| `postgresql.metrics.externalHost` | PostgreSQL metrics external host | `""` | ### Redis installation configuration -| Name | Description | Value | -| -------------------------------------- | ------------------------------------------------------------------------------------------ | ---------------- | -| `redis.externalHost` | External host with Redis. Requires setting `redis.deploy` to `false`. | `""` | -| `redis.fullnameOverride` | String to fully override common.names.fullname | `redis` | -| `redis.tls.autoGenerated` | Generate automatically self-signed TLS certificates if nothing is provided | `true` | -| `redis.tls.cert` | Certificate value | `""` | -| `redis.tls.certKey` | Certificate key value | `""` | -| `redis.tls.certCA` | CA Certificate value | `""` | -| `redis.auth.existingSecret` | Name of the existing secret with Redis credentials | `redis` | -| `redis.auth.username` | Redis username | `runtime-radar` | -| `redis.auth.password` | Redis password | `""` | -| `redis.auth.existingSecretPasswordKey` | Password key to retrieve from the existing secret | `REDIS_PASSWORD` | -| `redis.replicaCount` | Number of Redis master instances to deploy (experimental, requires additional configuring) | `1` | -| `redis.nodeSelector` | Labels of nodes for Redis master pod assignment | `{}` | -| `redis.resources` | Resource configuration for Redis container | `{}` | -| `redis.persistence.enabled` | Enable persistence for Redis master nodes using PVC | `false` | -| `redis.persistence.storageClass` | Persistent volume storage class | `""` | -| `redis.persistence.size` | Persistent volume size | `1Gi` | -| `redis.persistence.existingClaim` | Use an existing PVC created manually | `""` | -| `redis.persistence.selector` | Template to specify additional labels for PVC | `{}` | +| Name | Description | Value | +| -------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------- | +| `redis.deploy` | Deploy component | `true` | +| `redis.externalHost` | External host with Redis. Requires setting `redis.deploy` to `false`. | `""` | +| `redis.fullnameOverride` | String to fully override common.names.fullname | `redis` | +| `redis.tls.autoGenerated` | Generate automatically self-signed TLS certificates if nothing is provided | `true` | +| `redis.tls.cert` | Certificate value | `""` | +| `redis.tls.certKey` | Certificate key value | `""` | +| `redis.tls.certCA` | CA Certificate value | `""` | +| `redis.auth.existingSecret` | Name of an existing secret with Redis credentials, read by the sub-chart when `redis.deploy=true`. When set, the `auth.password` parameter is ignored. The secret must carry AUTH credentials only — `REDIS_USER`, `REDIS_PASSWORD` (connection metadata `REDIS_ADDR`/`REDIS_TLS_*` lives in the always-created `cs-redis-config` ConfigMap and must not be placed in this Secret). NOTE: To make the same secret visible to all consumer services (auth-api, history-api, etc.), set `global.redis.auth.existingSecret` to the same value. | `redis` | +| `redis.auth.username` | Redis username | `runtime-radar` | +| `redis.auth.password` | Redis password | `""` | +| `redis.auth.existingSecretPasswordKey` | Password key to retrieve from the existing secret | `REDIS_PASSWORD` | +| `redis.replicaCount` | Number of Redis master instances to deploy (experimental, requires additional configuring) | `1` | +| `redis.nodeSelector` | Labels of nodes for Redis master pod assignment | `{}` | +| `redis.resources` | Resource configuration for Redis container | `{}` | +| `redis.persistence.enabled` | Enable persistence for Redis master nodes using PVC | `false` | +| `redis.persistence.storageClass` | Persistent volume storage class | `""` | +| `redis.persistence.size` | Persistent volume size | `1Gi` | +| `redis.persistence.existingClaim` | Use an existing PVC created manually | `""` | +| `redis.persistence.selector` | Template to specify additional labels for PVC | `{}` | ### RabbitMQ installation configuration -| Name | Description | Value | -| ----------------------------------------- | ---------------------------------------------------------------------------------------------------- | ----------------- | -| `rabbitmq.externalHost` | External host with RabbitMQ | `""` | -| `rabbitmq.fullnameOverride` | String to fully override rabbitmq.fullname template | `rabbitmq` | -| `rabbitmq.auth.username` | RabbitMQ application username | `runtime-radar` | -| `rabbitmq.auth.password` | RabbitMQ application password | `""` | -| `rabbitmq.auth.existingSecret` | Existing secret with RabbitMQ credentials (must contain value for the `rabbitmq-password` parameter) | `rabbitmq` | -| `rabbitmq.auth.existingSecretPasswordKey` | Password key to be retrieved from existing secret | `RABBIT_PASSWORD` | -| `rabbitmq.nodeSelector` | Template to specify the labels of nodes for pod assignment | `{}` | -| `rabbitmq.resources` | Resource configuration for RabbitMQ container | `{}` | -| `rabbitmq.persistence.enabled` | Enable RabbitMQ data persistence using PVC | `true` | -| `rabbitmq.persistence.storageClass` | Persistent volume storage class for RabbitMQ | `""` | -| `rabbitmq.persistence.size` | Persistent volume size for RabbitMQ | `1Gi` | -| `rabbitmq.persistence.existingClaim` | Name of an existing PVC | `""` | -| `rabbitmq.persistence.selector` | Template to specify an existing persistent volume | `{}` | -| `rabbitmq.metrics.enabled` | Enable exposing RabbitMQ metrics to be gathered by Prometheus | `false` | -| `rabbitmq.metrics.externalHost` | RabbitMQ metrics external host | `""` | +| Name | Description | Value | +| ----------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------- | +| `rabbitmq.deploy` | Deploy component | `true` | +| `rabbitmq.externalHost` | External host with RabbitMQ | `""` | +| `rabbitmq.fullnameOverride` | String to fully override rabbitmq.fullname template | `rabbitmq` | +| `rabbitmq.auth.username` | RabbitMQ application username | `runtime-radar` | +| `rabbitmq.auth.password` | RabbitMQ application password | `""` | +| `rabbitmq.auth.existingSecret` | Name of an existing secret with RabbitMQ credentials, read by the sub-chart when `rabbitmq.deploy=true`. When set, the `auth.password` parameter is ignored. The secret must carry AUTH credentials only — `RABBIT_USER`, `RABBIT_PASSWORD` (connection metadata `RABBIT_ADDR` lives in the always-created `cs-rabbitmq-config` ConfigMap and must not be placed in this Secret). NOTE: To make the same secret visible to all consumer services (event-processor, history-api, etc.), set `global.rabbitmq.auth.existingSecret` to the same value. | `rabbitmq` | +| `rabbitmq.auth.existingSecretPasswordKey` | Password key to be retrieved from existing secret | `RABBIT_PASSWORD` | +| `rabbitmq.nodeSelector` | Template to specify the labels of nodes for pod assignment | `{}` | +| `rabbitmq.resources` | Resource configuration for RabbitMQ container | `{}` | +| `rabbitmq.persistence.enabled` | Enable RabbitMQ data persistence using PVC | `true` | +| `rabbitmq.persistence.storageClass` | Persistent volume storage class for RabbitMQ | `""` | +| `rabbitmq.persistence.size` | Persistent volume size for RabbitMQ | `1Gi` | +| `rabbitmq.persistence.existingClaim` | Name of an existing PVC | `""` | +| `rabbitmq.persistence.selector` | Template to specify an existing persistent volume | `{}` | +| `rabbitmq.metrics.enabled` | Enable exposing RabbitMQ metrics to be gathered by Prometheus | `false` | +| `rabbitmq.metrics.externalHost` | RabbitMQ metrics external host | `""` | ### Clickhouse installation configuration -| Name | Description | Value | -| ------------------------------------------- | ------------------------------------------------------------------------------- | --------------------- | -| `clickhouse.externalHost` | External host with ClickHouse. Requires setting `clickhouse.deploy` to `false`. | `""` | -| `clickhouse.fullnameOverride` | String to fully override common.names.fullname | `clickhouse` | -| `clickhouse.nodeSelector` | Labels of nodes for ClickHouse pod assignment | `{}` | -| `clickhouse.replicaCount` | Number of ClickHouse replicas to deploy per shard | `1` | -| `clickhouse.resources` | Resource configuration for Clickhouse container | `{}` | -| `clickhouse.persistence.enabled` | Enable persistence using PVC | `true` | -| `clickhouse.persistence.storageClass` | Persistent volume storage class | `""` | -| `clickhouse.persistence.size` | Data volume size | `5Gi` | -| `clickhouse.persistence.existingClaim` | Name of an existing PVC | `""` | -| `clickhouse.persistence.selector` | Template to specify an existing persistent volume | `{}` | -| `clickhouse.tls.autoGenerated` | Generate automatically self-signed TLS certificates if nothing is provided | `true` | -| `clickhouse.tls.cert` | Certificate value | `""` | -| `clickhouse.tls.certKey` | Certificate key value | `""` | -| `clickhouse.tls.certCA` | CA Certificate value | `""` | -| `clickhouse.auth.username` | ClickHouse administrator name | `runtime-radar` | -| `clickhouse.auth.password` | ClickHouse administartor password | `""` | -| `clickhouse.auth.existingSecret` | Name of the secret with the administrator password | `clickhouse` | -| `clickhouse.auth.existingSecretPasswordKey` | Name of the key stored in the existing secret | `CLICKHOUSE_PASSWORD` | -| `clickhouse.auth.database` | Name of the ClickHouse database | `runtime-radar` | -| `clickhouse.metrics.enabled` | Enable the export of Prometheus metrics | `false` | -| `clickhouse.metrics.externalHost` | ClickHouse metrics external host | `""` | +| Name | Description | Value | +| ------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------- | +| `clickhouse.deploy` | Deploy component | `true` | +| `clickhouse.externalHost` | External host with ClickHouse. Requires setting `clickhouse.deploy` to `false`. | `""` | +| `clickhouse.fullnameOverride` | String to fully override common.names.fullname | `clickhouse` | +| `clickhouse.nodeSelector` | Labels of nodes for ClickHouse pod assignment | `{}` | +| `clickhouse.replicaCount` | Number of ClickHouse replicas to deploy per shard | `1` | +| `clickhouse.resources` | Resource configuration for Clickhouse container | `{}` | +| `clickhouse.persistence.enabled` | Enable persistence using PVC | `true` | +| `clickhouse.persistence.storageClass` | Persistent volume storage class | `""` | +| `clickhouse.persistence.size` | Data volume size | `5Gi` | +| `clickhouse.persistence.existingClaim` | Name of an existing PVC | `""` | +| `clickhouse.persistence.selector` | Template to specify an existing persistent volume | `{}` | +| `clickhouse.tls.autoGenerated` | Generate automatically self-signed TLS certificates if nothing is provided | `true` | +| `clickhouse.tls.cert` | Certificate value | `""` | +| `clickhouse.tls.certKey` | Certificate key value | `""` | +| `clickhouse.tls.certCA` | CA Certificate value | `""` | +| `clickhouse.auth.username` | ClickHouse administrator name | `runtime-radar` | +| `clickhouse.auth.password` | ClickHouse administartor password | `""` | +| `clickhouse.auth.existingSecret` | Name of an existing secret with ClickHouse credentials, read by the sub-chart when `clickhouse.deploy=true`. When set, the `auth.password` parameter is ignored. The secret must carry AUTH credentials only — `CLICKHOUSE_USER`, `CLICKHOUSE_PASSWORD`, `CLICKHOUSE_DB` (connection metadata `CLICKHOUSE_ADDR`/`CLICKHOUSE_SSL_*` lives in the always-created `cs-clickhouse-config` ConfigMap and must not be placed in this Secret). NOTE: To make the same secret visible to all consumer services (history-api, event-processor, etc.), set `global.clickhouse.auth.existingSecret` to the same value. | `clickhouse` | +| `clickhouse.auth.existingSecretPasswordKey` | Name of the key stored in the existing secret | `CLICKHOUSE_PASSWORD` | +| `clickhouse.auth.database` | Name of the ClickHouse database | `runtime-radar` | +| `clickhouse.metrics.enabled` | Enable the export of Prometheus metrics | `false` | +| `clickhouse.metrics.externalHost` | ClickHouse metrics external host | `""` | + +### Grafana installation configuration + +| Name | Description | Value | +| ------------------------------------ | ---------------------------------------------------------------------------- | --------------------- | +| `grafana.deploy` | Deploy component | `false` | +| `grafana.externalHost` | External host with Grafana. Requires setting `grafana.deploy` to `false`. | `""` | +| `grafana.fullnameOverride` | String to fully override common.names.fullname | `grafana` | +| `grafana.nodeSelector` | Node labels for pod assignment | `{}` | +| `grafana.replicaCount` | Number of Grafana nodes | `1` | +| `grafana.resources` | Resource configuration for Clickhouse container | `{}` | +| `grafana.tls.autoGenerated` | Generate automatically self-signed TLS certificates if nothing is provided | `true` | +| `grafana.tls.cert` | Certificate value | `""` | +| `grafana.tls.certKey` | Certificate key value | `""` | +| `grafana.tls.certCA` | CA Certificate value | `""` | +| `grafana.auth.username` | Grafana administrator name | `runtime-radar` | +| `grafana.auth.password` | Grafana administartor password | `""` | +| `grafana.datasourcesSecretName` | The name of an externally-managed secret containing custom datasource files. | `grafana-datasources` | +| `grafana.dashboardsProvider.enabled` | Enable the use of a Grafana dashboard provider | `true` | +| `grafana.dashboardsConfigMaps` | Array with the names of a series of ConfigMaps containing dashboards files | `{}` | +| `grafana.subPath` | Use sub path for grafana for exposing it via reverse proxy | `grafana` | + +### Prometheus installation configuration + +| Name | Description | Value | +| -------------------------------------- | ------------------------------------------------------------------------------- | ------------ | +| `prometheus.deploy` | Deploy component | `false` | +| `prometheus.externalHost` | External host with Prometheus. Requires setting `prometheus.deploy` to `false`. | `""` | +| `prometheus.fullnameOverride` | String to fully override common.names.fullname | `prometheus` | +| `prometheus.replicaCount` | Number of Prometheus replicas to deploy | `1` | +| `prometheus.resources` | Resource configuration for Clickhouse container | `{}` | +| `prometheus.persistence.enabled` | Enable persistence using PVC | `true` | +| `prometheus.persistence.storageClass` | Persistent volume storage class | `""` | +| `prometheus.persistence.size` | Data volume size | `5Gi` | +| `prometheus.persistence.existingClaim` | Name of an existing PVC | `""` | +| `prometheus.persistence.selector` | Template to specify an existing persistent volume | `{}` | +| `prometheus.scrapeConfigmap` | ConfigMap which contains scrape config files | `cs-metrics` | diff --git a/install/helm/charts/clickhouse/README.md b/install/helm/charts/clickhouse/README.md index a32c1d00..1c0b2007 100644 --- a/install/helm/charts/clickhouse/README.md +++ b/install/helm/charts/clickhouse/README.md @@ -4,17 +4,18 @@ ### Global parameters -| Name | Description | Value | -| -------------------------------------- | ------------------------------------------------------------------------------------------- | ----- | -| `global.imageRegistry` | Global Docker image registry | `""` | -| `global.imagePullSecrets` | Global Docker registry secret names as an array | `[]` | -| `global.storageClass` | Global StorageClass for Persistent Volume(s) | `""` | -| `global.clickhouse.tls.enabled` | Enable TLS traffic support (overrides `tls.enabled`) | `nil` | -| `global.clickhouse.tls.autoGenerated` | Generate automatically self-signed TLS certificates (overrides `tls.autoGenerated`) | `nil` | -| `global.clickhouse.tls.existingSecret` | Name of an existing secret that contains the certificates (overrides `tls.existingSecret`) | `""` | -| `global.clickhouse.tls.cert` | Certificate value. Requires `tls.autoGenerated` to be `false` (overrides `tls.cert`) | `""` | -| `global.clickhouse.tls.certKey` | Certificate key value. Requires `tls.autoGenerated` to be `false` (overrides `tls.certKey`) | `""` | -| `global.clickhouse.tls.certCA` | CA Certificate value. Requires `tls.autoGenerated` to be `false` (overrides `tls.certCA`) | `""` | +| Name | Description | Value | +| --------------------------------------- | ------------------------------------------------------------------------------------------- | ----- | +| `global.imageRegistry` | Global Docker image registry | `""` | +| `global.imagePullSecrets` | Global Docker registry secret names as an array | `[]` | +| `global.storageClass` | Global StorageClass for Persistent Volume(s) | `""` | +| `global.clickhouse.auth.existingSecret` | Name of an existing secret that contains the certificates (overrides `auth.existingSecret`) | `""` | +| `global.clickhouse.tls.enabled` | Enable TLS traffic support (overrides `tls.enabled`) | `nil` | +| `global.clickhouse.tls.autoGenerated` | Generate automatically self-signed TLS certificates (overrides `tls.autoGenerated`) | `nil` | +| `global.clickhouse.tls.existingSecret` | Name of an existing secret that contains the certificates (overrides `tls.existingSecret`) | `""` | +| `global.clickhouse.tls.cert` | Certificate value. Requires `tls.autoGenerated` to be `false` (overrides `tls.cert`) | `""` | +| `global.clickhouse.tls.certKey` | Certificate key value. Requires `tls.autoGenerated` to be `false` (overrides `tls.certKey`) | `""` | +| `global.clickhouse.tls.certCA` | CA Certificate value. Requires `tls.autoGenerated` to be `false` (overrides `tls.certCA`) | `""` | ### Common parameters @@ -81,12 +82,10 @@ | `containerPorts.postgresql` | ClickHouse PostgreSQL container port | `9005` | | `containerPorts.interserver` | ClickHouse Interserver container port | `9009` | | `containerPorts.metrics` | ClickHouse metrics container port | `8001` | -| `podSecurityContext.enabled` | Enabled ClickHouse pods' Security Context | `true` | | `podSecurityContext.fsGroupChangePolicy` | Set filesystem group change policy | `Always` | | `podSecurityContext.sysctls` | Set kernel settings using the sysctl interface | `[]` | | `podSecurityContext.supplementalGroups` | Set filesystem extra groups | `[]` | | `podSecurityContext.fsGroup` | Set ClickHouse pod's Security Context fsGroup | `1001` | -| `containerSecurityContext.enabled` | Enable containers' Security Context | `true` | | `containerSecurityContext.seLinuxOptions` | Set SELinux options in container | `{}` | | `containerSecurityContext.runAsUser` | Set containers' Security Context runAsUser | `1001` | | `containerSecurityContext.runAsGroup` | Set containers' Security Context runAsGroup | `1001` | @@ -96,19 +95,16 @@ | `containerSecurityContext.readOnlyRootFilesystem` | Set contraller container's Security Context allowPrivilegeEscalation | `true` | | `containerSecurityContext.capabilities.drop` | List of capabilities to be droppedn | `["ALL"]` | | `containerSecurityContext.seccompProfile.type` | Set container's Security Context seccomp profile | `RuntimeDefault` | -| `livenessProbe.enabled` | Enable livenessProbe on ClickHouse containers | `true` | | `livenessProbe.failureThreshold` | Failure threshold for livenessProbe | `3` | | `livenessProbe.initialDelaySeconds` | Initial delay seconds for livenessProbe | `10` | | `livenessProbe.periodSeconds` | Period seconds for livenessProbe | `10` | | `livenessProbe.successThreshold` | Success threshold for livenessProbe | `1` | | `livenessProbe.timeoutSeconds` | Timeout seconds for livenessProbe | `1` | -| `readinessProbe.enabled` | Enable readinessProbe on ClickHouse containers | `true` | | `readinessProbe.failureThreshold` | Failure threshold for readinessProbe | `3` | | `readinessProbe.initialDelaySeconds` | Initial delay seconds for readinessProbe | `10` | | `readinessProbe.periodSeconds` | Period seconds for readinessProbe | `10` | | `readinessProbe.successThreshold` | Success threshold for readinessProbe | `1` | | `readinessProbe.timeoutSeconds` | Timeout seconds for readinessProbe | `1` | -| `startupProbe.enabled` | Enable startupProbe on ClickHouse containers | `false` | | `startupProbe.failureThreshold` | Failure threshold for startupProbe | `3` | | `startupProbe.initialDelaySeconds` | Initial delay seconds for startupProbe | `10` | | `startupProbe.periodSeconds` | Period seconds for startupProbe | `10` | diff --git a/install/helm/charts/clickhouse/templates/_helpers.tpl b/install/helm/charts/clickhouse/templates/_helpers.tpl index 218cd01f..6214b756 100644 --- a/install/helm/charts/clickhouse/templates/_helpers.tpl +++ b/install/helm/charts/clickhouse/templates/_helpers.tpl @@ -43,5 +43,7 @@ Fix ClickHouse database name if it contains special symbols {{- define "clickhouse.database" -}} {{- if regexMatch "^[a-zA-Z0-9_]*$" .Values.auth.database | not -}} {{- printf "`%s`" .Values.auth.database -}} +{{- else }} + {{- .Values.auth.database -}} {{- end -}} {{- end -}} diff --git a/install/helm/charts/clickhouse/templates/secret.yaml b/install/helm/charts/clickhouse/templates/secret.yaml index dc1dcdf9..36cd9630 100644 --- a/install/helm/charts/clickhouse/templates/secret.yaml +++ b/install/helm/charts/clickhouse/templates/secret.yaml @@ -1,5 +1,5 @@ -{{- if not .Values.auth.existingSecret }} -{{- $secretName := include "common.authSecretName" . }} +{{- if and (not .Values.auth.existingSecret) (not ((.Values.global.clickhouse).auth).existingSecret) }} +{{- $secretName := include "common.auth.secretName" . }} {{- $key := include "clickhouse.secretPasswordKey" . }} {{- $password := include "common.secrets.password" (dict "context" . "secret" $secretName "key" $key "defaultValue" .Values.auth.password) }} apiVersion: v1 diff --git a/install/helm/charts/clickhouse/templates/statefulset.yaml b/install/helm/charts/clickhouse/templates/statefulset.yaml index 821f56eb..93986e72 100644 --- a/install/helm/charts/clickhouse/templates/statefulset.yaml +++ b/install/helm/charts/clickhouse/templates/statefulset.yaml @@ -26,9 +26,9 @@ spec: spec: serviceAccountName: {{ include "common.serviceAccountName" . }} {{- include "common.imagePullSecrets" . | nindent 6 }} - {{- if (.Values.podSecurityContext).enabled }} + {{- with .Values.podSecurityContext }} securityContext: - {{- omit .Values.podSecurityContext "enabled" | toYaml | nindent 8 }} + {{- toYaml . | nindent 8 }} {{- end }} {{- if .Values.terminationGracePeriodSeconds }} terminationGracePeriodSeconds: {{ .Values.terminationGracePeriodSeconds }} @@ -42,9 +42,9 @@ spec: resources: {{- toYaml . | nindent 12 }} {{- end }} - {{- if (.Values.containerSecurityContext).enabled }} + {{- with .Values.containerSecurityContext }} securityContext: - {{- omit .Values.containerSecurityContext "enabled" | toYaml | nindent 12 }} + {{- toYaml . | nindent 12 }} {{- end }} command: - /bin/sh @@ -65,9 +65,9 @@ spec: - name: clickhouse image: {{ include "clickhouse.image" . }} imagePullPolicy: {{ .Values.image.pullPolicy | quote }} - {{- if (.Values.containerSecurityContext).enabled }} + {{- with .Values.containerSecurityContext }} securityContext: - {{- omit .Values.containerSecurityContext "enabled" | toYaml | nindent 12 }} + {{- toYaml . | nindent 12 }} {{- end }} env: - name: CLICKHOUSE_HTTP_PORT @@ -107,7 +107,7 @@ spec: - name: CLICKHOUSE_PASSWORD valueFrom: secretKeyRef: - name: {{ include "common.authSecretName" $ }} + name: {{ include "common.auth.secretName" $ }} key: {{ include "clickhouse.secretPasswordKey" $ }} {{- if .Values.auth.database }} - name: CLICKHOUSE_DB @@ -150,19 +150,19 @@ spec: - name: http-metrics containerPort: {{ $.Values.containerPorts.metrics }} {{- end }} - {{- if .Values.startupProbe.enabled }} - startupProbe: {{- omit .Values.startupProbe "enabled" | toYaml | nindent 12 }} + {{- with .Values.startupProbe }} + startupProbe: {{- toYaml . | nindent 12 }} httpGet: path: /ping port: http {{- end }} - {{- if .Values.livenessProbe.enabled }} - livenessProbe: {{- omit .Values.livenessProbe "enabled" | toYaml | nindent 12 }} + {{- with .Values.livenessProbe }} + livenessProbe: {{- toYaml . | nindent 12 }} tcpSocket: port: http {{- end }} - {{- if .Values.readinessProbe.enabled }} - readinessProbe: {{- omit .Values.readinessProbe "enabled" | toYaml | nindent 12 }} + {{- with .Values.readinessProbe }} + readinessProbe: {{- toYaml . | nindent 12 }} httpGet: path: /ping port: http diff --git a/install/helm/charts/clickhouse/values.schema.json b/install/helm/charts/clickhouse/values.schema.json index 28eca858..4e53abf2 100644 --- a/install/helm/charts/clickhouse/values.schema.json +++ b/install/helm/charts/clickhouse/values.schema.json @@ -24,6 +24,16 @@ "clickhouse": { "type": "object", "properties": { + "auth": { + "type": "object", + "properties": { + "existingSecret": { + "type": "string", + "description": "Name of an existing secret that contains the certificates (overrides `auth.existingSecret`)", + "default": "" + } + } + }, "tls": { "type": "object", "properties": { @@ -354,11 +364,6 @@ "podSecurityContext": { "type": "object", "properties": { - "enabled": { - "type": "boolean", - "description": "Enabled ClickHouse pods' Security Context", - "default": true - }, "fsGroupChangePolicy": { "type": "string", "description": "Set filesystem group change policy", @@ -386,11 +391,6 @@ "containerSecurityContext": { "type": "object", "properties": { - "enabled": { - "type": "boolean", - "description": "Enable containers' Security Context", - "default": true - }, "runAsUser": { "type": "number", "description": "Set containers' Security Context runAsUser", @@ -451,11 +451,6 @@ "livenessProbe": { "type": "object", "properties": { - "enabled": { - "type": "boolean", - "description": "Enable livenessProbe on ClickHouse containers", - "default": true - }, "failureThreshold": { "type": "number", "description": "Failure threshold for livenessProbe", @@ -486,11 +481,6 @@ "readinessProbe": { "type": "object", "properties": { - "enabled": { - "type": "boolean", - "description": "Enable readinessProbe on ClickHouse containers", - "default": true - }, "failureThreshold": { "type": "number", "description": "Failure threshold for readinessProbe", @@ -521,11 +511,6 @@ "startupProbe": { "type": "object", "properties": { - "enabled": { - "type": "boolean", - "description": "Enable startupProbe on ClickHouse containers", - "default": false - }, "failureThreshold": { "type": "number", "description": "Failure threshold for startupProbe", diff --git a/install/helm/charts/clickhouse/values.yaml b/install/helm/charts/clickhouse/values.yaml index 88ae4b85..c1e5865d 100644 --- a/install/helm/charts/clickhouse/values.yaml +++ b/install/helm/charts/clickhouse/values.yaml @@ -11,6 +11,9 @@ global: storageClass: "" clickhouse: + auth: + # -- Name of an existing secret that contains the certificates (overrides `auth.existingSecret`) + existingSecret: "" tls: # -- [boolean, nullable] Enable TLS traffic support (overrides `tls.enabled`) enabled: null @@ -173,8 +176,6 @@ containerPorts: # Configure Pods Security Context # ref: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-pod podSecurityContext: - # -- Enabled ClickHouse pods' Security Context - enabled: true # -- Set filesystem group change policy fsGroupChangePolicy: Always # -- Set kernel settings using the sysctl interface @@ -186,8 +187,6 @@ podSecurityContext: # Configure Container Security Context # ref: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-container containerSecurityContext: - # -- Enable containers' Security Context - enabled: true # -- [object,nullable] Set SELinux options in container seLinuxOptions: {} # -- Set containers' Security Context runAsUser @@ -211,8 +210,6 @@ containerSecurityContext: # Configure extra options for ClickHouse containers' liveness and readiness probes # ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-probes/#configure-probes livenessProbe: - # -- Enable livenessProbe on ClickHouse containers - enabled: true # -- Failure threshold for livenessProbe failureThreshold: 3 # -- Initial delay seconds for livenessProbe @@ -224,8 +221,6 @@ livenessProbe: # -- Timeout seconds for livenessProbe timeoutSeconds: 1 readinessProbe: - # -- Enable readinessProbe on ClickHouse containers - enabled: true # -- Failure threshold for readinessProbe failureThreshold: 3 # -- Initial delay seconds for readinessProbe @@ -237,8 +232,6 @@ readinessProbe: # -- Timeout seconds for readinessProbe timeoutSeconds: 1 startupProbe: - # -- Enable startupProbe on ClickHouse containers - enabled: false # -- Failure threshold for startupProbe failureThreshold: 3 # -- Initial delay seconds for startupProbe diff --git a/install/helm/charts/common/templates/_annotations.tpl b/install/helm/charts/common/templates/_annotations.tpl index b0b7ce68..b629b3c8 100644 --- a/install/helm/charts/common/templates/_annotations.tpl +++ b/install/helm/charts/common/templates/_annotations.tpl @@ -13,11 +13,11 @@ Pod annotations {{- define "common.podAnnotations" -}} {{- include "common.annotations" . }} {{- with .Values.podAnnotations }} -{{- tpl (toYaml .) $ }} +{{ tpl (toYaml .) $ }} {{- end }} {{- if eq (include "common.metrics.enabled" .) "true" }} {{- with (.Values.metrics).podAnnotations }} -{{- tpl (toYaml .) $ }} +{{ tpl (toYaml .) $ }} {{- end }} {{- end }} {{- end }} diff --git a/install/helm/charts/common/templates/_auth.tpl b/install/helm/charts/common/templates/_auth.tpl new file mode 100644 index 00000000..16c42b9d --- /dev/null +++ b/install/helm/charts/common/templates/_auth.tpl @@ -0,0 +1,27 @@ +{{/* +Return auth secret name helper +*/}} +{{- define "common.auth._existingSecret" -}} +{{- $global := (get (.context.Values.global | default dict) .name | default dict) }} +{{- with default (.context.Values.auth).existingSecret ($global.auth).existingSecret -}} + {{- printf "%s" (tpl . $.context) -}} +{{- end -}} +{{- end -}} + +{{/* +Return auth secret name +*/}} +{{- define "common.auth.existingSecret" -}} +{{- include "common.auth._existingSecret" (dict "context" . "name" .Chart.Name) }} +{{- end -}} + +{{/* +Return the secret containing auth info +*/}} +{{- define "common.auth.secretName" -}} +{{- with include "common.auth.existingSecret" . -}} + {{- printf "%s" . -}} +{{- else -}} + {{- printf "%s" (include "common.fullname" .) -}} +{{- end -}} +{{- end -}} diff --git a/install/helm/charts/common/templates/_labels.tpl b/install/helm/charts/common/templates/_labels.tpl index 2888819f..20fb624d 100644 --- a/install/helm/charts/common/templates/_labels.tpl +++ b/install/helm/charts/common/templates/_labels.tpl @@ -14,12 +14,16 @@ app.kubernetes.io/managed-by: {{ .Release.Service }} Selector labels */}} {{- define "common.selectorLabels" -}} +{{- with .Values.selectorLabels }} +{{- tpl (toYaml .) . }} +{{- else }} app.kubernetes.io/name: {{ include "common.name" . }} app.kubernetes.io/instance: {{ .Release.Name }} {{- with .Values.labels }} {{- toYaml . }} {{- end }} {{- end }} +{{- end }} {{/* Pod labels @@ -27,7 +31,7 @@ Pod labels {{- define "common.podLabels" -}} {{- include "common.labels" . }} {{- with .Values.podLabels }} -{{- tpl (toYaml .) $ }} +{{ tpl (toYaml .) $ }} {{- end }} {{- end }} diff --git a/install/helm/charts/common/templates/_names.tpl b/install/helm/charts/common/templates/_names.tpl index 6badcaf4..b177a6bf 100644 --- a/install/helm/charts/common/templates/_names.tpl +++ b/install/helm/charts/common/templates/_names.tpl @@ -62,7 +62,7 @@ Get the configuration ConfigMap name. {{- if .Values.existingConfigmap -}} {{- printf "%s" (tpl .Values.existingConfigmap $) -}} {{- else -}} - {{- printf "%s" (include "common.fullname" .) -}} + {{- printf "%s-config" (include "common.name" .) -}} {{- end -}} {{- end -}} @@ -78,12 +78,12 @@ Get the configuration Secret name. {{- end -}} {{/* -Get the configuration secret. +Get the PVC name. */}} -{{- define "common.authSecretName" -}} -{{- if .Values.auth.existingSecret -}} - {{- printf "%s" (tpl .Values.auth.existingSecret $) -}} +{{- define "common.pvcName" -}} +{{- if .Values.persistence.existingClaim -}} + {{- printf "%s" (tpl .Values.persistence.existingClaim $) -}} {{- else -}} - {{- printf "%s" (include "common.fullname" .) -}} + {{- printf "data-%s" (include "common.fullname" .) -}} {{- end -}} {{- end -}} diff --git a/install/helm/charts/common/templates/_tls.tpl b/install/helm/charts/common/templates/_tls.tpl index d5f941e0..2f5f9bf8 100644 --- a/install/helm/charts/common/templates/_tls.tpl +++ b/install/helm/charts/common/templates/_tls.tpl @@ -99,7 +99,7 @@ Return TLS secret name */}} {{- define "common.tls.existingSecret" -}} {{- $global := (get (.Values.global | default dict) .Chart.Name | default dict) }} -{{- with default ($global.tls).existingSecret (.Values.tls).existingSecret -}} +{{- with default (.Values.tls).existingSecret ($global.tls).existingSecret -}} {{- printf "%s" (tpl . $) -}} {{- end -}} {{- end -}} diff --git a/install/helm/charts/common/templates/cs/_auth.tpl b/install/helm/charts/common/templates/cs/_auth.tpl new file mode 100644 index 00000000..3a489b23 --- /dev/null +++ b/install/helm/charts/common/templates/cs/_auth.tpl @@ -0,0 +1,73 @@ +{{- define "common.cs.auth.enabled" -}} +{{- $globalAuth := hasKey ((.Values.global).auth) "enabled" | ternary ((.Values.global).auth).enabled true -}} +{{- if eq (default $globalAuth (.Values.auth).enabled | toString) "true" -}} + {{- true -}} +{{- end -}} +{{- end -}} + +{{/* +Return auth secret name +*/}} +{{- define "common.cs.auth.secretName" -}} +{{- with default (.Values.administrator).existingSecret ((.Values.global).administrator).existingSecret -}} + {{- printf "%s" (tpl . $) -}} +{{- else -}} + {{- printf "cs-account" -}} +{{- end -}} +{{- end -}} + +{{/* +Return PostgreSQL auth existing secret name +*/}} +{{- define "common.cs.auth.postgresql.existingSecret" -}} +{{- include "common.auth._existingSecret" (dict "context" . "name" "postgresql") -}} +{{- end -}} + +{{/* +Return PostgreSQL auth secret name +*/}} +{{- define "common.cs.auth.postgresql.secretName" -}} +{{- default "postgresql" (include "common.cs.auth.postgresql.existingSecret" .) -}} +{{- end -}} + +{{/* +Return ClickHouse auth existing secret name +*/}} +{{- define "common.cs.auth.clickhouse.existingSecret" -}} +{{- include "common.auth._existingSecret" (dict "context" . "name" "clickhouse") -}} +{{- end -}} + +{{/* +Return ClickHouse auth secret name +*/}} +{{- define "common.cs.auth.clickhouse.secretName" -}} +{{- default "clickhouse" (include "common.cs.auth.clickhouse.existingSecret" .) -}} +{{- end -}} + +{{/* +Return Redis auth existing secret name +*/}} +{{- define "common.cs.auth.redis.existingSecret" -}} +{{- include "common.auth._existingSecret" (dict "context" . "name" "redis") -}} +{{- end -}} + +{{/* +Return Redis auth secret name +*/}} +{{- define "common.cs.auth.redis.secretName" -}} +{{- default "redis" (include "common.cs.auth.redis.existingSecret" .) -}} +{{- end -}} + +{{/* +Return RabbitMQ auth existing secret name +*/}} +{{- define "common.cs.auth.rabbitmq.existingSecret" -}} +{{- include "common.auth._existingSecret" (dict "context" . "name" "rabbitmq") -}} +{{- end -}} + +{{/* +Return RabbitMQ auth secret name +*/}} +{{- define "common.cs.auth.rabbitmq.secretName" -}} +{{- default "rabbitmq" (include "common.cs.auth.rabbitmq.existingSecret" .) -}} +{{- end -}} diff --git a/install/helm/charts/common/templates/cs/_container_app.tpl b/install/helm/charts/common/templates/cs/_container_app.tpl index 938a8928..d145935e 100644 --- a/install/helm/charts/common/templates/cs/_container_app.tpl +++ b/install/helm/charts/common/templates/cs/_container_app.tpl @@ -9,75 +9,52 @@ env: {{- include "common.cs.container.app.env" . | nindent 4 }} envFrom: - - configMapRef: - name: {{ include "common.cs.configmapName" . }} - {{- if (.Values.postgresql).enabled }} - - secretRef: - name: postgresql - {{- end }} - {{- if (.Values.redis).enabled }} - - secretRef: - name: redis - {{- end }} - {{- if (.Values.rabbitmq).enabled }} - - secretRef: - name: rabbitmq - {{- end }} - {{- if (.Values.clickhouse).enabled }} - - secretRef: - name: clickhouse - {{- end }} - {{- with .Values.envFrom }} - {{- tpl (toYaml .) $ | nindent 4 }} - {{- end }} + {{- include "common.cs.container.app.envFrom" . | nindent 4 }} {{- with .Values.containerPorts }} ports: - {{- range $key, $val := . }} - - name: {{ $key }} - containerPort: {{ $val }} - {{- end }} + {{- range $key, $val := . }} + - name: {{ $key }} + containerPort: {{ $val }} + {{- end }} {{- end }} {{- with .Values.resources }} resources: {{- toYaml . | nindent 4 }} {{- end }} - {{- if (.Values.startupProbe).enabled }} - startupProbe: {{- omit .Values.startupProbe "enabled" | toYaml | nindent 4 }} + {{- with .Values.startupProbe }} + startupProbe: {{- toYaml . | nindent 4 }} {{- end }} - {{- if (.Values.livenessProbe).enabled }} - livenessProbe: {{- omit .Values.livenessProbe "enabled" | toYaml | nindent 4 }} + {{- with .Values.livenessProbe }} + livenessProbe: {{- toYaml . | nindent 4 }} {{- end }} - {{- if (.Values.readinessProbe).enabled }} - readinessProbe: {{- omit .Values.readinessProbe "enabled" | toYaml | nindent 4 }} + {{- with .Values.readinessProbe }} + readinessProbe: {{- toYaml . | nindent 4 }} {{- end }} {{- if ne (include "common.cs.devMode.enabled" .) "true" }} - {{- if (.Values.containerSecurityContext).enabled }} + {{- with .Values.containerSecurityContext }} securityContext: - {{- omit .Values.containerSecurityContext "enabled" | toYaml | nindent 4 }} + {{- toYaml . | nindent 4 }} {{- end }} {{- end }} - {{- include "common.cs.volumeMounts" (dict "context" .) | nindent 2 }} + volumeMounts: + {{- include "common.cs.volumeMounts" . | nindent 4 }} {{- end -}} {{- define "common.cs.container.app.env" -}} -- name: LOG_LEVEL - value: {{ include "common.cs.logLevel" . | quote }} {{- if eq (include "common.cs.auth.enabled" .) "true" }} - name: TOKEN_KEY valueFrom: secretKeyRef: - name: {{ include "common.cs.keysSecretName" . }} + name: {{ include "common.cs.keys.secretName" . }} key: token {{- end }} {{- if eq (include "common.cs.encryption.enabled" .) "true" }} - name: ENCRYPTION_KEY valueFrom: secretKeyRef: - name: {{ include "common.cs.keysSecretName" . }} + name: {{ include "common.cs.keys.secretName" . }} key: encryption {{- end }} -- name: GOPS_CONFIG_DIR - value: "/tmp" {{- if (.Values.rabbitmq).enabled }} {{- with (.Values.rabbitmq).queue }} - name: RABBIT_QUEUE @@ -92,7 +69,51 @@ value: {{. | quote }} {{- end }} {{- end }} +{{- if .Values.administrator }} +- name: ADMINISTRATOR_USERNAME + valueFrom: + secretKeyRef: + name: {{ include "common.cs.auth.secretName" . }} + key: username +- name: ADMINISTRATOR_PASSWORD + valueFrom: + secretKeyRef: + name: {{ include "common.cs.auth.secretName" . }} + key: password +{{- end }} {{- with .Values.env }} {{ tpl (toYaml .) $ }} {{- end }} {{- end -}} + +{{- define "common.cs.container.app.envFrom" -}} +- configMapRef: + name: {{ include "common.cs.configmapName" . }} +{{- if (.Values.postgresql).enabled }} +- secretRef: + name: {{ include "common.cs.postgresql.secretName" . }} +- configMapRef: + name: {{ include "common.cs.postgresql.configmapName" . }} +{{- end }} +{{- if (.Values.redis).enabled }} +- secretRef: + name: {{ include "common.cs.redis.secretName" . }} +- configMapRef: + name: {{ include "common.cs.redis.configmapName" . }} +{{- end }} +{{- if (.Values.rabbitmq).enabled }} +- secretRef: + name: {{ include "common.cs.rabbitmq.secretName" . }} +- configMapRef: + name: {{ include "common.cs.rabbitmq.configmapName" . }} +{{- end }} +{{- if (.Values.clickhouse).enabled }} +- secretRef: + name: {{ include "common.cs.clickhouse.secretName" . }} +- configMapRef: + name: {{ include "common.cs.clickhouse.configmapName" . }} +{{- end }} +{{- with .Values.envFrom }} +{{ tpl (toYaml .) $ }} +{{- end }} +{{- end -}} diff --git a/install/helm/charts/common/templates/cs/_container_gateway.tpl b/install/helm/charts/common/templates/cs/_container_gateway.tpl new file mode 100644 index 00000000..75229733 --- /dev/null +++ b/install/helm/charts/common/templates/cs/_container_gateway.tpl @@ -0,0 +1,47 @@ +{{- define "common.cs.container.gateway" -}} +- name: {{ include "common.name" . }}-gateway + image: {{ include "common.cs.image" (dict "context" . "image" (.Values.gateway).image) }} + imagePullPolicy: {{ eq (include "common.cs.devMode.enabled" .) "true" | ternary "Always" (default "IfNotPresent" ((.Values.gateway).image).pullPolicy) }} + {{- with (.Values.gateway).command }} + command: + {{- tpl (toYaml .) $ | nindent 4 }} + {{- end }} + env: + {{- with (.Values.gateway).env }} + {{- tpl (toYaml .) . | nindent 4 }} + {{- end }} + envFrom: + - configMapRef: + name: {{ include "common.cs.configmapName" . }} + {{- with (.Values.gateway).envFrom }} + {{- tpl (toYaml .) $ | nindent 4 }} + {{- end }} + {{- with (.Values.gateway).containerPorts }} + ports: + {{- range $key, $val := . }} + - name: {{ $key }} + containerPort: {{ $val }} + {{- end }} + {{- end }} + {{- with (.Values.gateway).resources }} + resources: + {{- toYaml . | nindent 4 }} + {{- end }} + {{- with (.Values.gateway).startupProbe }} + startupProbe: {{- toYaml . | nindent 4 }} + {{- end }} + {{- with (.Values.gateway).livenessProbe }} + livenessProbe: {{- toYaml . | nindent 4 }} + {{- end }} + {{- with (.Values.gateway).readinessProbe }} + readinessProbe: {{- toYaml . | nindent 4 }} + {{- end }} + {{- if ne (include "common.cs.devMode.enabled" .) "true" }} + {{- with (.Values.gateway).containerSecurityContext }} + securityContext: + {{- toYaml . | nindent 4 }} + {{- end }} + {{- end }} + volumeMounts: + {{- include "common.cs.volumeMounts" . | nindent 4 }} +{{- end -}} diff --git a/install/helm/charts/common/templates/cs/_container_logger.tpl b/install/helm/charts/common/templates/cs/_container_logger.tpl new file mode 100644 index 00000000..5a61bd52 --- /dev/null +++ b/install/helm/charts/common/templates/cs/_container_logger.tpl @@ -0,0 +1,72 @@ +{{- define "common.cs.container.logger" -}} +- name: logger + image: {{ include "common.image" (dict "context" . "image" ((.Values.global).logger).image) }} + imagePullPolicy: {{ (((.Values.global).logger).image).pullPolicy | quote }} + command: + - /fluent-bit/bin/fluent-bit + - -c + - /fluent-bit/etc/fluent-bit.yaml + env: + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: COMPONENT + value: {{ include "common.name" . }} + - name: BUFFER_CHUNK_SIZE + value: {{ default "128k" (default ((.Values.global).logger).bufferChunkSize (.Values.logger).bufferChunkSize) }} + - name: BUFFER_MAX_SIZE + value: {{ default "128k" (default ((.Values.global).logger).bufferMaxSize (.Values.logger).bufferMaxSize) }} + {{- with (.Values.logger).env }} + {{- tpl (toYaml .) . | nindent 4 }} + {{- end }} + envFrom: + - configMapRef: + name: {{ include "common.cs.configmapName" . }} + {{- with (.Values.logger).resources }} + resources: + {{- toYaml . | nindent 4 }} + {{- else }} + resources: + requests: + cpu: 100m + memory: 128Mi + ephemeral-storage: 50Mi + limits: + cpu: 150m + memory: 192Mi + ephemeral-storage: 1024Mi + {{- end }} + {{- with (.Values.logger).startupProbe }} + startupProbe: {{- toYaml . | nindent 4 }} + {{- end }} + {{- with (.Values.logger).livenessProbe }} + livenessProbe: {{- toYaml . | nindent 4 }} + {{- end }} + {{- with (.Values.logger).readinessProbe }} + readinessProbe: {{- toYaml . | nindent 4 }} + {{- end }} + {{- if ne (include "common.cs.devMode.enabled" .) "true" }} + {{- with (.Values.logger).containerSecurityContext }} + securityContext: + {{- toYaml . | nindent 4 }} + {{- end }} + {{- end }} + volumeMounts: + - name: empty-dir + mountPath: /tmp + subPath: tmp-dir + - name: config-logger + mountPath: /fluent-bit/etc/fluent-bit.yaml + readOnly: true + subPath: fluent-bit.yaml + {{- if eq (include "common.cs.tls.loki.enabled" .) "true" }} + - name: certificates-loki + mountPath: /etc/ssl/certs/loki.pem + readOnly: true + subPath: ca.crt + {{- end }} + - name: empty-dir + mountPath: /var/log/cs + subPath: log-dir +{{- end -}} diff --git a/install/helm/charts/common/templates/cs/_deployment.yaml b/install/helm/charts/common/templates/cs/_deployment.yaml index b2e8ca63..071e1ef3 100644 --- a/install/helm/charts/common/templates/cs/_deployment.yaml +++ b/install/helm/charts/common/templates/cs/_deployment.yaml @@ -16,18 +16,33 @@ spec: {{- include "common.selectorLabels" . | nindent 6 }} template: metadata: + {{- $podAnnotations := include "common.podAnnotations" . }} + {{- $authEnabled := eq (include "common.cs.auth.enabled" .) "true" }} + {{- if or $podAnnotations $authEnabled }} annotations: - {{- include "common.podAnnotations" . | indent 8 }} - {{- if eq (include "common.cs.auth.enabled" .) "true" }} + {{- with $podAnnotations }} + {{- . | nindent 8 }} + {{- end }} + {{- if $authEnabled }} checksum/keys: {{ default (randAlphaNum 32) ((.Values.global).keys).token | sha256sum | quote }} {{- end }} + {{- end }} labels: {{- include "common.podLabels" . | nindent 8 }} spec: + enableServiceLinks: {{ eq (kindOf .Values.enableServiceLinks) "bool" | ternary .Values.enableServiceLinks false }} serviceAccountName: {{ include "common.cs.serviceAccountName" . }} {{- include "common.imagePullSecrets" . | nindent 6 }} + {{- if .Values.initContainers }} + initContainers: + {{- tpl (toYaml .Values.initContainers) . | nindent 8 -}} + {{- end }} containers: {{- include "common.cs.container.app" . | nindent 8 -}} + + {{- if (.Values.gateway).enabled -}} + {{- include "common.cs.container.gateway" . | nindent 8 -}} + {{- end -}} {{- with .Values.nodeSelector }} nodeSelector: {{- toYaml . | nindent 8 }} @@ -41,10 +56,11 @@ spec: {{- toYaml . | nindent 8 }} {{- end }} {{- if ne (include "common.cs.devMode.enabled" .) "true" }} - {{- if (.Values.podSecurityContext).enabled }} + {{- with .Values.podSecurityContext }} securityContext: - {{- omit .Values.podSecurityContext "enabled" | toYaml | nindent 8 }} + {{- toYaml . | nindent 8 }} {{- end }} {{- end }} - {{- include "common.cs.volumes" . | nindent 6 }} + volumes: + {{- include "common.cs.volumes" . | nindent 8 }} {{- end -}} diff --git a/install/helm/charts/common/templates/cs/_images.tpl b/install/helm/charts/common/templates/cs/_images.tpl index 52f3c02f..72a892b0 100644 --- a/install/helm/charts/common/templates/cs/_images.tpl +++ b/install/helm/charts/common/templates/cs/_images.tpl @@ -6,6 +6,6 @@ Return the proper image name {{- with (.context.Values.global).imageRegistry -}} {{- $_ := set $.image "registry" . -}} {{- end -}} -{{- $tag := coalesce (.context.Values.global).imageTag (.context.Values.global).csVersion .context.Chart.Version -}} +{{- $tag := default (include "common.cs.csVersion" .context) (.context.Values.global).imageTag -}} {{- include "common.image" (dict "defaultTag" $tag | mergeOverwrite .) -}} {{- end -}} diff --git a/install/helm/charts/common/templates/cs/_names.tpl b/install/helm/charts/common/templates/cs/_names.tpl index a1e14a19..71513a54 100644 --- a/install/helm/charts/common/templates/cs/_names.tpl +++ b/install/helm/charts/common/templates/cs/_names.tpl @@ -6,12 +6,16 @@ {{- printf "%s-config" (include "common.cs.basename" .) -}} {{- end -}} +{{- define "common.cs.logger.configmapName" -}} +{{- printf "logger-config" -}} +{{- end -}} + {{/* Create the name of the service account to use */}} {{- define "common.cs.serviceAccountName" -}} {{- if .Values.serviceAccount.create -}} - {{ default (include "common.basename" .) .Values.serviceAccount.name }} + {{ default (include "common.name" .) .Values.serviceAccount.name }} {{- else -}} {{ default "default" .Values.serviceAccount.name }} {{- end -}} @@ -41,16 +45,92 @@ Return central cs url Return cs version */}} {{- define "common.cs.csVersion" -}} -{{- default (.Values.global).csVersion .Values.csVersion | default "v0.0.0" -}} +{{- default (.Values.global).csVersion .Values.csVersion | default .Chart.Version -}} {{- end -}} {{/* Return keys secret name */}} -{{- define "common.cs.keysSecretName" -}} +{{- define "common.cs.keys.secretName" -}} {{- default "cs-keys" ((.Values.global).keys).existingSecret -}} {{- end -}} {{- define "common.cs.grafana.address" -}} -{{- default (.Values.grafana).externalHost ((.Values.global).grafana).externalHost | default (printf "%s://grafana.%s.svc.cluster.local:3000" (include "common.cs.http-scheme" .) .Release.Namespace) | trimSuffix "/" }} +{{- default (.Values.grafana).externalHost ((.Values.global).grafana).externalHost | default (printf "%s://grafana.%s.svc.cluster.local:3000" (include "common.cs.grafana.http-scheme" .) .Release.Namespace) | trimSuffix "/" }} +{{- end -}} + +{{- define "common.cs.prometheus.address" -}} +{{- default (.Values.prometheus).externalHost ((.Values.global).prometheus).externalHost | default (printf "%s://prometheus.%s.svc.cluster.local:9090" (include "common.cs.prometheus.http-scheme" .) .Release.Namespace) | trimSuffix "/" }} +{{- end -}} + +{{- define "common.cs.loki.address" -}} +{{- default (.Values.loki).externalHost ((.Values.global).loki).externalHost | default (printf "%s://loki.%s.svc.cluster.local:3000" (include "common.cs.loki.http-scheme" .) .Release.Namespace) | trimSuffix "/" }} +{{- end -}} + +{{/* +Return postgresql secret name. +Precedence: global.postgresql.auth.existingSecret > local postgresql.auth.existingSecret > "postgresql". +*/}} +{{- define "common.cs.postgresql.secretName" -}} +{{- $local := ((.Values.postgresql).auth).existingSecret -}} +{{- $global := (((.Values.global).postgresql).auth).existingSecret -}} +{{- default "postgresql" (default $local $global) -}} +{{- end -}} + +{{/* +Return redis secret name. +Precedence: global.redis.auth.existingSecret > local redis.auth.existingSecret > "redis". +*/}} +{{- define "common.cs.redis.secretName" -}} +{{- $local := ((.Values.redis).auth).existingSecret -}} +{{- $global := (((.Values.global).redis).auth).existingSecret -}} +{{- default "redis" (default $local $global) -}} +{{- end -}} + +{{/* +Return rabbitmq secret name. +Precedence: global.rabbitmq.auth.existingSecret > local rabbitmq.auth.existingSecret > "rabbitmq". +*/}} +{{- define "common.cs.rabbitmq.secretName" -}} +{{- $local := ((.Values.rabbitmq).auth).existingSecret -}} +{{- $global := (((.Values.global).rabbitmq).auth).existingSecret -}} +{{- default "rabbitmq" (default $local $global) -}} +{{- end -}} + +{{/* +Return clickhouse secret name. +Precedence: global.clickhouse.auth.existingSecret > local clickhouse.auth.existingSecret > "clickhouse". +*/}} +{{- define "common.cs.clickhouse.secretName" -}} +{{- $local := ((.Values.clickhouse).auth).existingSecret -}} +{{- $global := (((.Values.global).clickhouse).auth).existingSecret -}} +{{- default "clickhouse" (default $local $global) -}} +{{- end -}} + +{{/* +Return postgresql config map name. +*/}} +{{- define "common.cs.postgresql.configmapName" -}} +{{- printf "postgresql" -}} +{{- end -}} + +{{/* +Return redis config map name. +*/}} +{{- define "common.cs.redis.configmapName" -}} +{{- printf "redis" -}} +{{- end -}} + +{{/* +Return rabbitmq config map name. +*/}} +{{- define "common.cs.rabbitmq.configmapName" -}} +{{- printf "rabbitmq" -}} +{{- end -}} + +{{/* +Return clickhouse config map name. +*/}} +{{- define "common.cs.clickhouse.configmapName" -}} +{{- printf "clickhouse" -}} {{- end -}} diff --git a/install/helm/charts/common/templates/cs/_tls.tpl b/install/helm/charts/common/templates/cs/_tls.tpl index ee2ad256..259e7cd7 100644 --- a/install/helm/charts/common/templates/cs/_tls.tpl +++ b/install/helm/charts/common/templates/cs/_tls.tpl @@ -11,46 +11,63 @@ Return true if TLS is enabled for CS {{- define "common.cs.tls.component.enabled" -}} {{- $values := get .context.Values .component | default dict -}} {{- $global := get .context.Values.global .component | default dict -}} -{{- $enabled := $values.enabled | empty | not -}} -{{- $tls := default ($global.tls).enabled ($values.tls).enabled | empty | not -}} -{{- if and $enabled $tls -}} - {{- true -}} -{{- end -}} +{{- default ($global.tls).enabled ($values.tls).enabled | empty | not -}} {{- end -}} {{- define "common.cs.tls.component.verify" -}} {{- $values := get .context.Values .component | default dict -}} {{- $global := get (.context.Values.global | default dict) .component | default dict -}} -{{- $enabled := $values.enabled | empty | not -}} {{- $tls := default ($global.tls).enabled ($values.tls).enabled | empty | not -}} {{- $verify := default ($global.tls).verify ($values.tls).verify | empty | not -}} -{{- if and $enabled $tls $verify -}} - {{- true -}} -{{- end -}} +{{- and $tls $verify -}} {{- end -}} {{- define "common.cs.tls.postgresql.enabled" -}} -{{- include "common.cs.tls.component.enabled" (dict "context" . "component" "postgresql") -}} +{{- include "common.cs.tls.component.enabled" (dict "context" . "component" "postgresql") | eq "true" | and (.Values.postgresql).enabled -}} {{- end -}} {{- define "common.cs.tls.postgresql.verify" -}} -{{- include "common.cs.tls.component.verify" (dict "context" . "component" "postgresql") -}} +{{- include "common.cs.tls.component.verify" (dict "context" . "component" "postgresql") | eq "true" | and (.Values.postgresql).enabled -}} {{- end -}} {{- define "common.cs.tls.clickhouse.enabled" -}} -{{- include "common.cs.tls.component.enabled" (dict "context" . "component" "clickhouse") -}} +{{- include "common.cs.tls.component.enabled" (dict "context" . "component" "clickhouse") | eq "true" | and (.Values.clickhouse).enabled -}} {{- end -}} {{- define "common.cs.tls.clickhouse.verify" -}} -{{- include "common.cs.tls.component.verify" (dict "context" . "component" "clickhouse") -}} +{{- include "common.cs.tls.component.verify" (dict "context" . "component" "clickhouse") | eq "true" | and (.Values.clickhouse).enabled -}} {{- end -}} {{- define "common.cs.tls.redis.enabled" -}} -{{- include "common.cs.tls.component.enabled" (dict "context" . "component" "redis") -}} +{{- include "common.cs.tls.component.enabled" (dict "context" . "component" "redis") | eq "true" | and (.Values.redis).enabled -}} {{- end -}} {{- define "common.cs.tls.redis.verify" -}} -{{- include "common.cs.tls.component.verify" (dict "context" . "component" "redis") -}} +{{- include "common.cs.tls.component.verify" (dict "context" . "component" "redis") | eq "true" | and (.Values.redis).enabled -}} +{{- end -}} + +{{- define "common.cs.tls.grafana.enabled" -}} +{{- include "common.cs.tls.component.enabled" (dict "context" . "component" "grafana") -}} +{{- end -}} + +{{- define "common.cs.tls.grafana.verify" -}} +{{- include "common.cs.tls.component.verify" (dict "context" . "component" "grafana") -}} +{{- end -}} + +{{- define "common.cs.tls.prometheus.enabled" -}} +{{- include "common.cs.tls.component.enabled" (dict "context" . "component" "prometheus") -}} +{{- end -}} + +{{- define "common.cs.tls.prometheus.verify" -}} +{{- include "common.cs.tls.component.verify" (dict "context" . "component" "prometheus") -}} +{{- end -}} + +{{- define "common.cs.tls.loki.enabled" -}} +{{- include "common.cs.tls.component.enabled" (dict "context" . "component" "loki") -}} +{{- end -}} + +{{- define "common.cs.tls.loki.verify" -}} +{{- include "common.cs.tls.component.verify" (dict "context" . "component" "loki") -}} {{- end -}} {{/* @@ -80,3 +97,17 @@ Return Redis certificates secret name {{- define "common.cs.tls.redis.secretName" -}} {{- default "redis-crt" (((.Values.global).redis).tls).existingSecret -}} {{- end -}} + +{{/* +Return Grafana certificates secret name +*/}} +{{- define "common.cs.tls.grafana.secretName" -}} +{{- default "grafana-crt" (((.Values.global).grafana).tls).existingSecret -}} +{{- end -}} + +{{/* +Return Loki certificates secret name +*/}} +{{- define "common.cs.tls.loki.secretName" -}} +{{- default "loki-crt" (((.Values.global).loki).tls).existingSecret -}} +{{- end -}} diff --git a/install/helm/charts/common/templates/cs/_util.tpl b/install/helm/charts/common/templates/cs/_util.tpl index 031570a1..3755149a 100644 --- a/install/helm/charts/common/templates/cs/_util.tpl +++ b/install/helm/charts/common/templates/cs/_util.tpl @@ -1,12 +1,12 @@ {{- define "common.cs.devMode.enabled" -}} -{{- if any (.Values.global).devMode .Values.devMode }} +{{- if any (.Values.global).devMode .Values.devMode (.Values.global).dev .Values.dev }} {{- true -}} {{- end -}} {{- end -}} -{{- define "common.cs.auth.enabled" -}} -{{- $globalAuth := hasKey ((.Values.global).auth) "enabled" | ternary ((.Values.global).auth).enabled true -}} -{{- if eq (default $globalAuth (.Values.auth).enabled | toString) "true" -}} +{{- define "common.cs.logger.enabled" -}} +{{- $globalLogger := hasKey ((.Values.global).logger) "enabled" | ternary ((.Values.global).logger).enabled true -}} +{{- if eq (default $globalLogger (.Values.logger).enabled | toString) "true" -}} {{- true -}} {{- end -}} {{- end -}} @@ -31,6 +31,18 @@ {{- eq (include "common.cs.tls.enabled" .) "true" | ternary "https" "http" -}} {{- end -}} +{{- define "common.cs.grafana.http-scheme" -}} +{{- eq (include "common.cs.tls.grafana.enabled" .) "true" | ternary "https" "http" -}} +{{- end -}} + +{{- define "common.cs.prometheus.http-scheme" -}} +{{- eq (include "common.cs.tls.prometheus.enabled" .) "true" | ternary "https" "http" -}} +{{- end -}} + +{{- define "common.cs.loki.http-scheme" -}} +{{- eq (include "common.cs.tls.loki.enabled" .) "true" | ternary "https" "http" -}} +{{- end -}} + {{- define "common.cs.logLevel" -}} {{- if .Values.logLevel -}} {{- .Values.logLevel -}} @@ -44,80 +56,97 @@ {{- end -}} {{- define "common.cs.volumeMounts" -}} -{{- $val := hasKey . "values" | ternary (default (dict) .values) .context.Values -}} -volumeMounts: - - name: empty-dir - mountPath: /tmp - subPath: tmp-dir - - name: empty-dir - mountPath: /.config - subPath: gops-dir - {{- if eq (include "common.cs.tls.enabled" .context) "true" }} - - name: certificates - mountPath: ca.pem - subPath: ca.crt - readOnly: true - - name: certificates - mountPath: /etc/ssl/certs/ca.pem - readOnly: true - subPath: ca.crt - - name: certificates - mountPath: cert.pem - subPath: tls.crt - readOnly: true - - name: certificates - mountPath: key.pem - subPath: tls.key - readOnly: true - {{- end }} - {{- if eq (include "common.cs.tls.postgresql.verify" .context) "true" }} - - name: certificates-postgresql - mountPath: db_ca.pem - subPath: ca.crt - readOnly: true - {{- end }} - {{- if eq (include "common.cs.tls.redis.verify" .context) "true" }} - - name: certificates-redis - mountPath: redis_ca.pem - subPath: ca.crt - readOnly: true - {{- end }} - {{- if eq (include "common.cs.tls.clickhouse.verify" .context) "true" }} - - name: certificates-clickhouse - mountPath: /etc/ssl/certs/clickhouse.pem - readOnly: true - subPath: ca.crt - {{- end }} - {{- with $val.volumeMounts }} - {{- toYaml . | nindent 2 }} - {{- end }} +- name: empty-dir + mountPath: /tmp + subPath: tmp-dir +- name: empty-dir + mountPath: /.config + subPath: gops-dir +{{- if eq (include "common.cs.tls.enabled" .) "true" }} +- name: certificates + mountPath: ca.pem + subPath: ca.crt + readOnly: true +- name: certificates + mountPath: /etc/ssl/certs/ca.pem + readOnly: true + subPath: ca.crt +- name: certificates + mountPath: cert.pem + subPath: tls.crt + readOnly: true +- name: certificates + mountPath: key.pem + subPath: tls.key + readOnly: true +{{- end }} +{{- if eq (include "common.cs.tls.postgresql.verify" .) "true" }} +- name: certificates-postgresql + mountPath: db_ca.pem + subPath: ca.crt + readOnly: true +{{- end }} +{{- if eq (include "common.cs.tls.redis.verify" .) "true" }} +- name: certificates-redis + mountPath: redis_ca.pem + subPath: ca.crt + readOnly: true +{{- end }} +{{- if eq (include "common.cs.tls.clickhouse.verify" .) "true" }} +- name: certificates-clickhouse + mountPath: /etc/ssl/certs/clickhouse.pem + readOnly: true + subPath: ca.crt +{{- end }} +{{- if eq (include "common.cs.logger.enabled" .) "true" }} +- name: empty-dir + mountPath: /var/log/cs + subPath: log-dir +{{- end }} +{{- with .Values.volumeMounts }} +{{ toYaml . }} +{{- end }} {{- end -}} {{- define "common.cs.volumes" -}} -volumes: - - name: empty-dir - emptyDir: {} - {{- if eq (include "common.cs.tls.enabled" .) "true" }} - - name: certificates - secret: - secretName: {{ include "common.cs.tls.secretName" . }} - {{- end }} - {{- if eq (include "common.cs.tls.postgresql.verify" .) "true" }} - - name: certificates-postgresql - secret: - secretName: {{ include "common.cs.tls.postgresql.secretName" . }} - {{- end }} - {{- if eq (include "common.cs.tls.redis.verify" .) "true" }} - - name: certificates-redis - secret: - secretName: {{ include "common.cs.tls.redis.secretName" . }} - {{- end }} - {{- if eq (include "common.cs.tls.clickhouse.verify" .) "true" }} - - name: certificates-clickhouse - secret: - secretName: {{ include "common.cs.tls.clickhouse.secretName" . }} - {{- end }} - {{- with .Values.volumes }} - {{- toYaml . | nindent 2 }} - {{- end }} +- name: empty-dir + emptyDir: {} +{{- if eq (include "common.cs.tls.enabled" .) "true" }} +- name: certificates + secret: + secretName: {{ include "common.cs.tls.secretName" . }} +{{- end }} +{{- if eq (include "common.cs.tls.postgresql.verify" .) "true" }} +- name: certificates-postgresql + secret: + secretName: {{ include "common.cs.tls.postgresql.secretName" . }} +{{- end }} +{{- if eq (include "common.cs.tls.redis.verify" .) "true" }} +- name: certificates-redis + secret: + secretName: {{ include "common.cs.tls.redis.secretName" . }} +{{- end }} +{{- if eq (include "common.cs.tls.clickhouse.verify" .) "true" }} +- name: certificates-clickhouse + secret: + secretName: {{ include "common.cs.tls.clickhouse.secretName" . }} +{{- end }} +{{- if eq (include "common.cs.tls.grafana.verify" .) "true" }} +- name: certificates-grafana + secret: + secretName: {{ include "common.cs.tls.grafana.secretName" . }} +{{- end }} +{{- if eq (include "common.cs.tls.loki.verify" .) "true" }} +- name: certificates-loki + secret: + secretName: {{ include "common.cs.tls.loki.secretName" . }} +{{- end }} +{{- if eq (include "common.cs.logger.enabled" .) "true" }} +- name: config-logger + configMap: + name: {{ include "common.cs.logger.configmapName" . }} +{{- end }} +{{- with .Values.volumes }} +{{ toYaml . }} +{{- end }} {{- end -}} diff --git a/install/helm/charts/grafana/Chart.yaml b/install/helm/charts/grafana/Chart.yaml new file mode 100644 index 00000000..0459f263 --- /dev/null +++ b/install/helm/charts/grafana/Chart.yaml @@ -0,0 +1,10 @@ +apiVersion: v2 +appVersion: 12.0.2 +dependencies: +- name: common + repository: file://../common + version: 0.x.x +description: Grafana is an open source metric analytics and visualization suite for + visualizing time series data that supports various types of data sources. +name: grafana +version: 0.0.1 diff --git a/install/helm/charts/grafana/README.md b/install/helm/charts/grafana/README.md new file mode 100644 index 00000000..10837b9b --- /dev/null +++ b/install/helm/charts/grafana/README.md @@ -0,0 +1,146 @@ +# Chart + +## Parameters + +### Global parameters + +| Name | Description | Value | +| ----------------------------------- | ------------------------------------------------------------------------------------------- | ----- | +| `global.imageRegistry` | Global Docker image registry | `""` | +| `global.imagePullSecrets` | Global Docker registry secret names as an array | `[]` | +| `global.storageClass` | Global StorageClass for Persistent Volume(s) | `""` | +| `global.grafana.tls.enabled` | Enable TLS traffic support (overrides `tls.enabled`) | `nil` | +| `global.grafana.tls.autoGenerated` | Generate automatically self-signed TLS certificates (overrides `tls.autoGenerated`) | `nil` | +| `global.grafana.tls.existingSecret` | Name of an existing secret that contains the certificates (overrides `tls.existingSecret`) | `""` | +| `global.grafana.tls.cert` | Certificate value. Requires `tls.autoGenerated` to be `false` (overrides `tls.cert`) | `""` | +| `global.grafana.tls.certKey` | Certificate key value. Requires `tls.autoGenerated` to be `false` (overrides `tls.certKey`) | `""` | +| `global.grafana.tls.certCA` | CA Certificate value. Requires `tls.autoGenerated` to be `false` (overrides `tls.certCA`) | `""` | + +### Common parameters + +| Name | Description | Value | +| ------------------- | -------------------------------------------------------------------------------------- | --------------- | +| `nameOverride` | String to partially override common.fullname template (will maintain the release name) | `""` | +| `fullnameOverride` | String to fully override common.fullname template | `""` | +| `namespaceOverride` | String to fully override common.namespace | `""` | +| `clusterDomain` | Kubernetes Cluster Domain | `cluster.local` | +| `labels` | Add labels to all the deployed resources | `{}` | +| `annotations` | Add annotations to all the deployed resources | `{}` | +| `imagePullSecrets` | Global Docker registry secret names as an array | `[]` | + +### Grafana parameters + +| Name | Description | Value | +| -------------------------------- | ------------------------------------------------------------------------------------------------------- | ----------------------- | +| `image.registry` | Grafana image registry | `REGISTRY_NAME` | +| `image.repository` | Grafana image repository | `REPOSITORY_NAME/redis` | +| `image.digest` | Grafana image digest in the way sha256:aa.... Please note this parameter, if set, will override the tag | `""` | +| `image.pullPolicy` | Grafana image pull policy | `IfNotPresent` | +| `auth.enabled` | Enable password authentication | `true` | +| `auth.username` | Grafana username | `admin` | +| `auth.password` | Grafana password | `""` | +| `auth.existingSecret` | The name of an existing secret with Grafana credentials | `""` | +| `auth.existingSecretPasswordKey` | Password key to be retrieved from existing secret | `""` | +| `tls.enabled` | Enable TLS traffic | `false` | +| `tls.autoGenerated` | Enable autogenerated certificates | `false` | +| `tls.existingSecret` | The name of the existing secret that contains the TLS certificates | `""` | +| `tls.certFilename` | Certificate filename | `tls.crt` | +| `tls.certKeyFilename` | Certificate Key filename | `tls.key` | +| `tls.certCAFilename` | CA Certificate filename | `ca.crt` | +| `tls.cert` | Certificate value. Requires `tls.autoGenerated` to be `false` | `""` | +| `tls.certKey` | Certificate key value. Requires `tls.autoGenerated` to be `false` | `""` | +| `tls.certCA` | CA Certificate value. Requires `tls.autoGenerated` to be `false` | `""` | +| `resources` | Set container requests and limits for different resources like CPU or memory | `{}` | + +### Grafana configuration parameters + +| Name | Description | Value | +| --------------------------------------------------- | -------------------------------------------------------------------------------------------- | ---------------- | +| `plugins` | Grafana plugins to be installed in deployment time separated by commas | `""` | +| `configuration` | Grafana's grafana.ini configuration to be injected as Secret | `""` | +| `existingSecret` | Name of an existing Secret with grafana.ini configuration (must contain grafana.ini key) | `""` | +| `dashboardsProvider.enabled` | Enable the use of a Grafana dashboard provider | `false` | +| `dashboardsProvider.existingConfigMap` | Name of a ConfigMap containing a custom dashboard provider | `""` | +| `dashboardsConfigMaps` | Array with the names of a series of ConfigMaps containing dashboards files | `[]` | +| `datasourcesSecretName` | The name of an externally-managed secret containing custom datasource files. | `""` | +| `subPath` | Use sub path for grafana for exposing it via reverse proxy | `""` | +| `logLevel` | Logging level | `console` | +| `replicaCount` | Number of Grafana instances to deploy | `1` | +| `podLabels` | Extra labels for Grafana master pods | `{}` | +| `podAnnotations` | Annotations for Grafana master pods | `{}` | +| `affinity` | Affinity for Grafana master pods assignment | `{}` | +| `nodeSelector` | Node labels for Grafana master pods assignment | `{}` | +| `tolerations` | Tolerations for Grafana master pods assignment | `[]` | +| `terminationGracePeriodSeconds` | Seconds Grafana pod needs to terminate gracefully | `""` | +| `extraEnvVars` | Array with extra environment variables to add to Grafana master nodes | `[]` | +| `volumes` | Optionally specify extra list of additional volumes for the Grafana master pod(s) | `[]` | +| `volumeMounts` | Optionally specify extra list of additional volumeMounts for the Grafana master container(s) | `[]` | +| `containerPorts.grafana` | Container port to open on Grafana master nodes | `3000` | +| `podSecurityContext.fsGroupChangePolicy` | Set filesystem group change policy | `Always` | +| `podSecurityContext.sysctls` | Set kernel settings using the sysctl interface | `[]` | +| `podSecurityContext.supplementalGroups` | Set filesystem extra groups | `[]` | +| `podSecurityContext.fsGroup` | Set Grafana master pod's Security Context fsGroup | `1001` | +| `containerSecurityContext.seLinuxOptions` | Set SELinux options in container | `{}` | +| `containerSecurityContext.runAsUser` | Set Grafana master containers' Security Context runAsUser | `1001` | +| `containerSecurityContext.runAsGroup` | Set Grafana master containers' Security Context runAsGroup | `1001` | +| `containerSecurityContext.runAsNonRoot` | Set Grafana master containers' Security Context runAsNonRoot | `true` | +| `containerSecurityContext.allowPrivilegeEscalation` | Is it possible to escalate Grafana pod(s) privileges | `false` | +| `containerSecurityContext.readOnlyRootFilesystem` | Set container's Security Context read-only root filesystem | `true` | +| `containerSecurityContext.seccompProfile.type` | Set Grafana master containers' Security Context seccompProfile | `RuntimeDefault` | +| `containerSecurityContext.capabilities.drop` | Set Grafana master containers' Security Context capabilities to drop | `["ALL"]` | +| `startupProbe.initialDelaySeconds` | Initial delay seconds for startupProbe | `20` | +| `startupProbe.periodSeconds` | Period seconds for startupProbe | `5` | +| `startupProbe.timeoutSeconds` | Timeout seconds for startupProbe | `5` | +| `startupProbe.successThreshold` | Failure threshold for startupProbe | `1` | +| `startupProbe.failureThreshold` | Success threshold for startupProbe | `5` | +| `livenessProbe.initialDelaySeconds` | Initial delay seconds for livenessProbe | `20` | +| `livenessProbe.periodSeconds` | Period seconds for livenessProbe | `5` | +| `livenessProbe.timeoutSeconds` | Timeout seconds for livenessProbe | `5` | +| `livenessProbe.successThreshold` | Failure threshold for livenessProbe | `1` | +| `livenessProbe.failureThreshold` | Success threshold for livenessProbe | `5` | +| `readinessProbe.initialDelaySeconds` | Initial delay seconds for readinessProbe | `20` | +| `readinessProbe.periodSeconds` | Period seconds for readinessProbe | `5` | +| `readinessProbe.timeoutSeconds` | Timeout seconds for readinessProbe | `1` | +| `readinessProbe.successThreshold` | Failure threshold for readinessProbe | `1` | +| `readinessProbe.failureThreshold` | Success threshold for readinessProbe | `5` | + +### Service Parameters + +| Name | Description | Value | +| ------------------------------ | -------------------------------------------------------- | ----------- | +| `service.type` | Grafana master service type | `ClusterIP` | +| `service.ports.grafana` | Grafana master service port | `3000` | +| `service.annotations` | Additional custom annotations for Grafana master service | `{}` | +| `service.headless.annotations` | Annotations for the headless service. | `{}` | + +### Persistence Parameters + +| Name | Description | Value | +| -------------------------------------------------- | ----------------------------------------------------------------------- | ------------------- | +| `persistence.enabled` | Enable persistence using Persistent Volume Claims | `false` | +| `persistence.existingClaim` | Name of an existing PVC to use | `""` | +| `persistence.storageClass` | Storage class of backing PVC | `""` | +| `persistence.labels` | Persistent Volume Claim labels | `{}` | +| `persistence.annotations` | Persistent Volume Claim annotations | `{}` | +| `persistence.accessModes` | Persistent Volume Access Modes | `["ReadWriteOnce"]` | +| `persistence.size` | Size of data volume | `8Gi` | +| `persistence.selector` | Selector to match an existing Persistent Volume for ClickHouse data PVC | `{}` | +| `persistence.dataSource` | Custom PVC data source | `{}` | +| `persistence.persistentVolumeClaimRetentionPolicy` | PostgreSQL Persistent Volume Claim Retention Policy | `{}` | + +### Other Parameters + +| Name | Description | Value | +| ----------------------------------- | --------------------------------------------------------------- | ------- | +| `serviceAccount.create` | Specifies whether a ServiceAccount should be created | `true` | +| `serviceAccount.name` | The name of the ServiceAccount to use. | `""` | +| `serviceAccount.automount` | Whether to auto mount the service account token | `false` | +| `serviceAccount.annotations` | Additional custom annotations for the ServiceAccount | `{}` | +| `metrics.enabled` | Enable the export of Prometheus metrics | `false` | +| `metrics.externalHost` | Grafana metrics external host | `""` | +| `metrics.podAnnotations` | Annotations for metrics scraping | `{}` | +| `networkPolicy.enabled` | Enable creation of NetworkPolicy resources | `true` | +| `networkPolicy.allowExternal` | Don't require client label for connections | `true` | +| `networkPolicy.allowExternalEgress` | Allow the pod to access any range of port and all destinations. | `true` | +| `networkPolicy.extraIngress` | Add extra ingress rules to the NetworkPolicy | `[]` | +| `networkPolicy.extraEgress` | Add extra egress rules to the NetworkPolicy | `[]` | diff --git a/install/helm/charts/grafana/templates/_helpers.tpl b/install/helm/charts/grafana/templates/_helpers.tpl new file mode 100644 index 00000000..e41d63ff --- /dev/null +++ b/install/helm/charts/grafana/templates/_helpers.tpl @@ -0,0 +1,47 @@ +{{/* +Return the proper ClickHouse image name +*/}} +{{- define "grafana.image" -}} +{{ include "common.image" (dict "context" . "image" .Values.image) }} +{{- end -}} + +{{/* +Return the path to the cert file. +*/}} +{{- define "grafana.tlsCert" -}} +{{- printf "/etc/grafana/certs/%s" (default "tls.crt" .Values.tls.certFilename) -}} +{{- end -}} + +{{/* +Return the path to the cert key file. +*/}} +{{- define "grafana.tlsCertKey" -}} +{{- printf "/etc/grafana/certs/%s" (default "tls.key" .Values.tls.certKeyFilename) -}} +{{- end -}} + +{{/* +Return the path to the CA cert file. +*/}} +{{- define "grafana.tlsCACert" -}} +{{- printf "/etc/grafana/certs/%s" (default "ca.crt" .Values.tls.certCAFilename) -}} +{{- end -}} + +{{/* +Get the ClickHouse password key inside the secret +*/}} +{{- define "grafana.secretPasswordKey" -}} +{{- if .Values.auth.existingSecret -}} + {{- .Values.auth.existingSecretPasswordKey -}} +{{- else }} + {{- print "GF_SECURITY_ADMIN_PASSWORD" -}} +{{- end -}} +{{- end -}} + +{{- define "grafana.rootUrl" -}} +{{- $root := "%(protocol)s://%(domain)s:%(http_port)s" }} +{{- with .Values.subPath }} + {{- printf "%s/%s/" $root (trimAll "/" .) }} +{{- else }} + {{- printf "%s/" $root }} +{{- end -}} +{{- end -}} diff --git a/install/helm/charts/grafana/templates/dashboard-provider.yaml b/install/helm/charts/grafana/templates/dashboard-provider.yaml new file mode 100644 index 00000000..68e87059 --- /dev/null +++ b/install/helm/charts/grafana/templates/dashboard-provider.yaml @@ -0,0 +1,38 @@ +{{- if and .Values.dashboardsProvider.enabled (not .Values.dashboardsProvider.existingConfigMap) }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "common.fullname" . }}-provider + namespace: {{ include "common.namespace" . | quote }} + labels: + {{- include "common.labels" . | nindent 4 }} + {{- with (include "common.annotations" .) }} + annotations: + {{- . | nindent 4 }} + {{- end }} +data: + default.yaml: |- + apiVersion: 1 + providers: + # an unique provider name + - name: 'default' + # org id. will default to orgId 1 if not specified + orgId: 1 + # name of the dashboard folder. Required + folder: dashboards + # folder UID. will be automatically generated if not specified + folderUid: '' + # provider type. Required + type: file + # disable dashboard deletion + disableDeletion: false + # enable dashboard editing + editable: true + # how often Grafana will scan for changed dashboards + updateIntervalSeconds: 10 + options: + # path to dashboard files on disk. Required + path: /var/lib/grafana/dashboards + # enable folders creation for dashboards + foldersFromFilesStructure: true +{{- end }} diff --git a/install/helm/charts/grafana/templates/deployment.yaml b/install/helm/charts/grafana/templates/deployment.yaml new file mode 100644 index 00000000..1d685f21 --- /dev/null +++ b/install/helm/charts/grafana/templates/deployment.yaml @@ -0,0 +1,209 @@ +{{- $tlsEnabled := eq (include "common.tls.enabled" .) "true" }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "common.fullname" . }} + labels: + {{- include "common.labels" . | nindent 4 }} + {{- with (include "common.annotations" .) }} + annotations: + {{- . | nindent 4 }} + {{- end }} +spec: + replicas: {{ .Values.replicaCount }} + selector: + matchLabels: + {{- include "common.selectorLabels" . | nindent 6 }} + template: + metadata: + {{- with (include "common.podAnnotations" .) }} + annotations: + {{- . | nindent 8 }} + {{- end }} + labels: + {{- include "common.podLabels" . | nindent 8 }} + spec: + serviceAccountName: {{ include "common.serviceAccountName" . }} + {{- include "common.imagePullSecrets" . | nindent 6 }} + {{- with .Values.podSecurityContext }} + securityContext: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- if .Values.terminationGracePeriodSeconds }} + terminationGracePeriodSeconds: {{ .Values.terminationGracePeriodSeconds }} + {{- end }} + containers: + - name: grafana + image: {{ include "grafana.image" . }} + imagePullPolicy: {{ .Values.image.pullPolicy | quote }} + {{- with .Values.containerSecurityContext }} + securityContext: + {{- toYaml . | nindent 12 }} + {{- end }} + env: + - name: GF_SECURITY_ADMIN_USER + value: {{ .Values.auth.username | quote }} + - name: GF_SECURITY_ADMIN_PASSWORD + valueFrom: + secretKeyRef: + name: {{ include "common.auth.secretName" . }} + key: {{ include "grafana.secretPasswordKey" . }} + {{- if $tlsEnabled }} + - name: GF_SERVER_PROTOCOL + value: https + - name: GF_SERVER_CERT_FILE + value: {{ include "grafana.tlsCert" . }} + - name: GF_SERVER_CERT_KEY + value: {{ include "grafana.tlsCertKey" . }} + {{- end }} + {{- if .Values.subPath }} + - name: GF_SERVER_ROOT_URL + value: {{ include "grafana.rootUrl" . | quote }} + - name: GF_SERVER_SERVE_FROM_SUB_PATH + value: "true" + {{- end }} + - name: GF_INSTALL_PLUGINS + value: {{ default "" .Values.plugins | quote }} + - name: GF_PATHS_PLUGINS + value: "/var/lib/grafana/plugins" + - name: GF_PATHS_PROVISIONING + value: "/etc/grafana/provisioning" + - name: GF_PATHS_CONFIG + value: "/etc/grafana/grafana.ini" + - name: GF_PATHS_DATA + value: "/var/lib/grafana/data" + - name: GF_PATHS_LOGS + value: "/var/log/grafana" + - name: GF_LOG_MODE + value: {{ .Values.logLevel | quote }} + {{- with .Values.extraEnvVars }} + {{- tpl (toYaml .) $ | nindent 12 }} + {{- end }} + ports: + - name: grafana + containerPort: {{ .Values.containerPorts.grafana }} + protocol: TCP + {{- with .Values.startupProbe }} + startupProbe: {{- toYaml . | nindent 12 }} + httpGet: + path: /api/health + port: grafana + scheme: {{ $tlsEnabled | ternary "HTTPS" "HTTP" }} + {{- end }} + {{- with .Values.livenessProbe }} + livenessProbe: {{- toYaml . | nindent 12 }} + httpGet: + path: /api/health + port: grafana + scheme: {{ $tlsEnabled | ternary "HTTPS" "HTTP" }} + {{- end }} + {{- with .Values.readinessProbe }} + readinessProbe: {{- toYaml . | nindent 12 }} + httpGet: + path: /api/health + port: grafana + scheme: {{ $tlsEnabled | ternary "HTTPS" "HTTP" }} + {{- end }} + {{- with .Values.resources }} + resources: + {{- toYaml . | nindent 12 }} + {{- end }} + volumeMounts: + - name: empty-dir + mountPath: /tmp + subPath: tmp-dir + - name: empty-dir + mountPath: /var/lib/grafana/plugins + subPath: plugin-dir + {{- if $tlsEnabled }} + - name: grafana-certificates + mountPath: /etc/grafana/certs + {{- end }} + {{- if or .Values.configuration .Values.existingConfigmap }} + - name: config + mountPath: /etc/grafana/grafana.ini + subPath: grafana.ini + {{- end }} + - name: data + mountPath: /var/lib/grafana/data + {{- if .Values.dashboardsProvider.enabled }} + - name: dashboards-provider + mountPath: /etc/grafana/provisioning/dashboards + {{- end }} + {{- range .Values.dashboardsConfigMaps }} + - name: {{ .configMapName }} + {{- $path := "/var/lib/grafana/dashboards" }} + {{- if .folderName }} + {{- $path = printf "%s/%s" $path .folderName | trimSuffix "/" }} + {{- end }} + {{- if .fileName }} + {{- $path = printf "%s/%s" $path .fileName }} + {{- end }} + mountPath: {{ $path }} + {{- if .fileName }} + subPath: {{ .fileName }} + {{- end }} + {{- end }} + {{- if .Values.datasourcesSecretName }} + - name: datasources + mountPath: /etc/grafana/provisioning/datasources + {{- end }} + {{- with .Values.volumeMounts }} + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + volumes: + - name: empty-dir + emptyDir: {} + {{- if or .Values.configuration .Values.existingConfigmap }} + - name: config + secret: + secretName: {{ include "common.configSecretName" . }} + {{- end }} + {{- if $tlsEnabled }} + - name: grafana-certificates + secret: + secretName: {{ include "common.tls.secretName" . }} + defaultMode: 256 + {{- end }} + {{- if .Values.dashboardsProvider.enabled }} + - name: dashboards-provider + configMap: + {{- if .Values.dashboardsProvider.existingConfigMap }} + name: {{ .Values.dashboardsProvider.existingConfigMap }} + {{- else }} + name: {{ include "common.fullname" . }}-provider + {{- end }} + {{- end }} + {{- range .Values.dashboardsConfigMaps }} + - name: {{ .configMapName }} + configMap: + name: {{ .configMapName }} + {{- end }} + {{- if .Values.datasourcesSecretName }} + - name: datasources + secret: + secretName: {{ .Values.datasourcesSecretName }} + {{- end }} + {{- with .Values.volumes }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- if not .Values.persistence.enabled }} + - name: data + emptyDir: {} + {{- else }} + - name: data + persistentVolumeClaim: + claimName: {{ include "common.pvcName" . }} + {{- end }} diff --git a/install/helm/charts/grafana/templates/networkpolicy.yaml b/install/helm/charts/grafana/templates/networkpolicy.yaml new file mode 100644 index 00000000..e181cb61 --- /dev/null +++ b/install/helm/charts/grafana/templates/networkpolicy.yaml @@ -0,0 +1,41 @@ +{{- if .Values.networkPolicy.enabled }} +{{- $tlsEnabled := eq (include "common.tls.enabled" .) "true" }} +kind: NetworkPolicy +apiVersion: networking.k8s.io/v1 +metadata: + name: {{ include "common.fullname" . }} + namespace: {{ include "common.namespace" . | quote }} + labels: + {{- include "common.labels" . | nindent 4 }} + {{- with (include "common.annotations" .) }} + annotations: + {{- . | nindent 4 }} + {{- end }} +spec: + podSelector: + matchLabels: + {{- include "common.selectorLabels" . | nindent 6 }} + policyTypes: + - Ingress + - Egress + egress: + - {{ printf "{}" }} + {{- with .Values.networkPolicy.extraEgress }} + {{- toYaml . | nindent 4 }} + {{- end }} + ingress: + - ports: + - port: {{ .Values.containerPorts.grafana }} + {{- if not .Values.networkPolicy.allowExternal }} + from: + - podSelector: + matchLabels: + {{- include "common.selectorLabels" . | nindent 14 }} + - podSelector: + matchLabels: + {{ include "common.fullname" . }}-client: "true" + {{- end }} + {{- with .Values.networkPolicy.extraIngress }} + {{- toYaml . | nindent 4 }} + {{- end }} +{{- end }} diff --git a/install/helm/charts/grafana/templates/pvc.yaml b/install/helm/charts/grafana/templates/pvc.yaml new file mode 100644 index 00000000..0f3510a1 --- /dev/null +++ b/install/helm/charts/grafana/templates/pvc.yaml @@ -0,0 +1,29 @@ +{{- if and .Values.persistence.enabled (not .Values.persistence.existingClaim) }} +kind: PersistentVolumeClaim +apiVersion: v1 +metadata: + name: {{ include "common.pvcName" . }} + namespace: {{ .Release.Namespace | quote }} + {{- with (include "common.mergeAnnotations" (dict "context" . "value" .Values.persistence.annotations)) }} + annotations: + {{- . | nindent 4 }} + {{- end }} + {{- with (include "common.mergeLabels" (dict "context" . "value" .Values.persistence.labels)) }} + labels: + {{- . | nindent 4 }} + {{- end }} +spec: + accessModes: {{- toYaml .Values.persistence.accessModes | nindent 10 }} + {{- with .Values.persistence.dataSource }} + dataSource: {{- toYaml . | nindent 10 }} + {{- end }} + resources: + requests: + storage: {{ .Values.persistence.size | quote }} + {{- with .Values.persistence.selector }} + selector: {{- toYaml . | nindent 10 }} + {{- end }} + {{- with (default .Values.persistence.storageClass (.Values.global).storageClass) }} + storageClassName: {{ eq . "-" | ternary "" . | quote }} + {{- end }} +{{- end }} diff --git a/install/helm/charts/grafana/templates/secret.yaml b/install/helm/charts/grafana/templates/secret.yaml new file mode 100644 index 00000000..f0eec884 --- /dev/null +++ b/install/helm/charts/grafana/templates/secret.yaml @@ -0,0 +1,19 @@ +{{- if and (not .Values.auth.existingSecret) (not ((.Values.global.grafana).auth).existingSecret) }} +{{- $secretName := include "common.auth.secretName" . }} +{{- $key := include "grafana.secretPasswordKey" . }} +{{- $password := include "common.secrets.password" (dict "context" . "secret" $secretName "key" $key "defaultValue" .Values.auth.password) }} +apiVersion: v1 +kind: Secret +metadata: + name: {{ $secretName }} + namespace: {{ include "common.namespace" . | quote }} + labels: + {{- include "common.labels" . | nindent 4 }} + {{- with (include "common.annotations" .) }} + annotations: + {{- . | nindent 4 }} + {{- end }} +type: Opaque +data: + {{ $key }}: {{ $password | b64enc | quote }} +{{- end }} diff --git a/install/helm/charts/grafana/templates/service.yaml b/install/helm/charts/grafana/templates/service.yaml new file mode 100644 index 00000000..a5bca9e0 --- /dev/null +++ b/install/helm/charts/grafana/templates/service.yaml @@ -0,0 +1,20 @@ +{{- $tlsEnabled := eq (include "common.tls.enabled" .) "true" }} +apiVersion: v1 +kind: Service +metadata: + name: {{ include "common.fullname" . }} + labels: + {{- include "common.labels" . | nindent 4 }} + {{- with (include "common.mergeAnnotations" (dict "context" . "value" .Values.service.annotations)) }} + annotations: + {{- . | nindent 4 }} + {{- end }} +spec: + type: {{ .Values.service.type }} + ports: + - name: grafana + targetPort: grafana + port: {{ .Values.service.ports.grafana }} + protocol: TCP + selector: + {{- include "common.selectorLabels" . | nindent 4 }} diff --git a/install/helm/charts/grafana/templates/serviceaccount.yaml b/install/helm/charts/grafana/templates/serviceaccount.yaml new file mode 100644 index 00000000..090e1493 --- /dev/null +++ b/install/helm/charts/grafana/templates/serviceaccount.yaml @@ -0,0 +1,13 @@ +{{- if .Values.serviceAccount.create -}} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "common.serviceAccountName" . }} + labels: + {{- include "common.labels" . | nindent 4 }} + {{- with (include "common.mergeAnnotations" (dict "context" . "value" .Values.serviceAccount.annotations)) }} + annotations: + {{- . | nindent 4 }} + {{- end }} +automountServiceAccountToken: {{ .Values.serviceAccount.automount }} +{{- end }} diff --git a/install/helm/charts/grafana/templates/tls-secret.yaml b/install/helm/charts/grafana/templates/tls-secret.yaml new file mode 100644 index 00000000..5f93344c --- /dev/null +++ b/install/helm/charts/grafana/templates/tls-secret.yaml @@ -0,0 +1,17 @@ +{{- if (eq (include "common.tls.createSecret" .) "true") }} +--- +apiVersion: v1 +kind: Secret +metadata: + name: {{ include "common.tls.secretName" . }} + namespace: {{ include "common.namespace" . | quote }} + labels: + {{- include "common.labels" . | nindent 4 }} + {{- with (include "common.annotations" .) }} + annotations: + {{- . | nindent 4 }} + {{- end }} +type: kubernetes.io/tls +data: + {{- include "common.tls.generateSecret" (dict "context" .) | indent 2 }} +{{- end }} diff --git a/install/helm/charts/grafana/values.schema.json b/install/helm/charts/grafana/values.schema.json new file mode 100644 index 00000000..bd8226f1 --- /dev/null +++ b/install/helm/charts/grafana/values.schema.json @@ -0,0 +1,670 @@ +{ + "title": "Chart Values", + "type": "object", + "properties": { + "global": { + "type": "object", + "properties": { + "imageRegistry": { + "type": "string", + "description": "Global Docker image registry", + "default": "" + }, + "imagePullSecrets": { + "type": "array", + "description": "Global Docker registry secret names as an array", + "default": [], + "items": {} + }, + "storageClass": { + "type": "string", + "description": "Global StorageClass for Persistent Volume(s)", + "default": "" + }, + "grafana": { + "type": "object", + "properties": { + "tls": { + "type": "object", + "properties": { + "enabled": { + "type": [ + "boolean", + "null" + ], + "description": "Enable TLS traffic support (overrides `tls.enabled`)", + "default": null, + "nullable": true + }, + "autoGenerated": { + "type": [ + "boolean", + "null" + ], + "description": "Generate automatically self-signed TLS certificates (overrides `tls.autoGenerated`)", + "default": null, + "nullable": true + }, + "existingSecret": { + "type": "string", + "description": "Name of an existing secret that contains the certificates (overrides `tls.existingSecret`)", + "default": "" + }, + "cert": { + "type": "string", + "description": "Certificate value. Requires `tls.autoGenerated` to be `false` (overrides `tls.cert`)", + "default": "" + }, + "certKey": { + "type": "string", + "description": "Certificate key value. Requires `tls.autoGenerated` to be `false` (overrides `tls.certKey`)", + "default": "" + }, + "certCA": { + "type": "string", + "description": "CA Certificate value. Requires `tls.autoGenerated` to be `false` (overrides `tls.certCA`)", + "default": "" + } + } + } + } + } + } + }, + "nameOverride": { + "type": "string", + "description": "String to partially override common.fullname template (will maintain the release name)", + "default": "" + }, + "fullnameOverride": { + "type": "string", + "description": "String to fully override common.fullname template", + "default": "" + }, + "namespaceOverride": { + "type": "string", + "description": "String to fully override common.namespace", + "default": "" + }, + "clusterDomain": { + "type": "string", + "description": "Kubernetes Cluster Domain", + "default": "cluster.local" + }, + "labels": { + "type": "object", + "description": "Add labels to all the deployed resources", + "default": {} + }, + "annotations": { + "type": "object", + "description": "Add annotations to all the deployed resources", + "default": {} + }, + "imagePullSecrets": { + "type": "array", + "description": "Global Docker registry secret names as an array", + "default": [], + "items": {} + }, + "image": { + "type": "object", + "properties": { + "registry": { + "type": "string", + "description": "Grafana image registry", + "default": "REGISTRY_NAME" + }, + "repository": { + "type": "string", + "description": "Grafana image repository", + "default": "REPOSITORY_NAME/redis" + }, + "digest": { + "type": "string", + "description": "Grafana image digest in the way sha256:aa.... Please note this parameter, if set, will override the tag", + "default": "" + }, + "pullPolicy": { + "type": "string", + "description": "Grafana image pull policy", + "default": "IfNotPresent" + } + } + }, + "auth": { + "type": "object", + "properties": { + "enabled": { + "type": "boolean", + "description": "Enable password authentication", + "default": true + }, + "username": { + "type": "string", + "description": "Grafana username", + "default": "admin" + }, + "password": { + "type": "string", + "description": "Grafana password", + "default": "" + }, + "existingSecret": { + "type": "string", + "description": "The name of an existing secret with Grafana credentials", + "default": "" + }, + "existingSecretPasswordKey": { + "type": "string", + "description": "Password key to be retrieved from existing secret", + "default": "" + } + } + }, + "tls": { + "type": "object", + "properties": { + "enabled": { + "type": "boolean", + "description": "Enable TLS traffic", + "default": false + }, + "autoGenerated": { + "type": "boolean", + "description": "Enable autogenerated certificates", + "default": false + }, + "existingSecret": { + "type": "string", + "description": "The name of the existing secret that contains the TLS certificates", + "default": "" + }, + "certFilename": { + "type": "string", + "description": "Certificate filename", + "default": "tls.crt" + }, + "certKeyFilename": { + "type": "string", + "description": "Certificate Key filename", + "default": "tls.key" + }, + "certCAFilename": { + "type": "string", + "description": "CA Certificate filename", + "default": "ca.crt" + }, + "cert": { + "type": "string", + "description": "Certificate value. Requires `tls.autoGenerated` to be `false`", + "default": "" + }, + "certKey": { + "type": "string", + "description": "Certificate key value. Requires `tls.autoGenerated` to be `false`", + "default": "" + }, + "certCA": { + "type": "string", + "description": "CA Certificate value. Requires `tls.autoGenerated` to be `false`", + "default": "" + } + } + }, + "plugins": { + "type": "string", + "description": "Grafana plugins to be installed in deployment time separated by commas", + "default": "" + }, + "configuration": { + "type": "string", + "description": "Grafana's grafana.ini configuration to be injected as Secret", + "default": "" + }, + "existingSecret": { + "type": "string", + "description": "Name of an existing Secret with grafana.ini configuration (must contain grafana.ini key)", + "default": "" + }, + "dashboardsProvider": { + "type": "object", + "properties": { + "enabled": { + "type": "boolean", + "description": "Enable the use of a Grafana dashboard provider", + "default": false + }, + "existingConfigMap": { + "type": "string", + "description": "Name of a ConfigMap containing a custom dashboard provider", + "default": "" + } + } + }, + "dashboardsConfigMaps": { + "type": "array", + "description": "Array with the names of a series of ConfigMaps containing dashboards files", + "default": [], + "items": {} + }, + "datasourcesSecretName": { + "type": "string", + "description": "The name of an externally-managed secret containing custom datasource files.", + "default": "" + }, + "subPath": { + "type": "string", + "description": "Use sub path for grafana for exposing it via reverse proxy", + "default": "" + }, + "logLevel": { + "type": "string", + "description": "Logging level", + "default": "console" + }, + "replicaCount": { + "type": "number", + "description": "Number of Grafana instances to deploy", + "default": 1 + }, + "podLabels": { + "type": "object", + "description": "Extra labels for Grafana master pods", + "default": {} + }, + "podAnnotations": { + "type": "object", + "description": "Annotations for Grafana master pods", + "default": {} + }, + "affinity": { + "type": "object", + "description": "Affinity for Grafana master pods assignment", + "default": {} + }, + "nodeSelector": { + "type": "object", + "description": "Node labels for Grafana master pods assignment", + "default": {} + }, + "tolerations": { + "type": "array", + "description": "Tolerations for Grafana master pods assignment", + "default": [], + "items": {} + }, + "terminationGracePeriodSeconds": { + "type": "string", + "description": "Seconds Grafana pod needs to terminate gracefully", + "default": "" + }, + "extraEnvVars": { + "type": "array", + "description": "Array with extra environment variables to add to Grafana master nodes", + "default": [], + "items": {} + }, + "volumes": { + "type": "array", + "description": "Optionally specify extra list of additional volumes for the Grafana master pod(s)", + "default": [], + "items": {} + }, + "volumeMounts": { + "type": "array", + "description": "Optionally specify extra list of additional volumeMounts for the Grafana master container(s)", + "default": [], + "items": {} + }, + "containerPorts": { + "type": "object", + "properties": { + "grafana": { + "type": "number", + "description": "Container port to open on Grafana master nodes", + "default": 3000 + } + } + }, + "podSecurityContext": { + "type": "object", + "properties": { + "fsGroupChangePolicy": { + "type": "string", + "description": "Set filesystem group change policy", + "default": "Always" + }, + "sysctls": { + "type": "array", + "description": "Set kernel settings using the sysctl interface", + "default": [], + "items": {} + }, + "supplementalGroups": { + "type": "array", + "description": "Set filesystem extra groups", + "default": [], + "items": {} + }, + "fsGroup": { + "type": "number", + "description": "Set Grafana master pod's Security Context fsGroup", + "default": 1001 + } + } + }, + "containerSecurityContext": { + "type": "object", + "properties": { + "runAsUser": { + "type": "number", + "description": "Set Grafana master containers' Security Context runAsUser", + "default": 1001 + }, + "runAsGroup": { + "type": "number", + "description": "Set Grafana master containers' Security Context runAsGroup", + "default": 1001 + }, + "runAsNonRoot": { + "type": "boolean", + "description": "Set Grafana master containers' Security Context runAsNonRoot", + "default": true + }, + "allowPrivilegeEscalation": { + "type": "boolean", + "description": "Is it possible to escalate Grafana pod(s) privileges", + "default": false + }, + "readOnlyRootFilesystem": { + "type": "boolean", + "description": "Set container's Security Context read-only root filesystem", + "default": true + }, + "seccompProfile": { + "type": "object", + "properties": { + "type": { + "type": "string", + "description": "Set Grafana master containers' Security Context seccompProfile", + "default": "RuntimeDefault" + } + } + }, + "capabilities": { + "type": "object", + "properties": { + "drop": { + "type": "array", + "description": "Set Grafana master containers' Security Context capabilities to drop", + "default": [ + "ALL" + ], + "items": { + "type": "string" + } + } + } + } + } + }, + "startupProbe": { + "type": "object", + "properties": { + "initialDelaySeconds": { + "type": "number", + "description": "Initial delay seconds for startupProbe", + "default": 20 + }, + "periodSeconds": { + "type": "number", + "description": "Period seconds for startupProbe", + "default": 5 + }, + "timeoutSeconds": { + "type": "number", + "description": "Timeout seconds for startupProbe", + "default": 5 + }, + "successThreshold": { + "type": "number", + "description": "Failure threshold for startupProbe", + "default": 1 + }, + "failureThreshold": { + "type": "number", + "description": "Success threshold for startupProbe", + "default": 5 + } + } + }, + "livenessProbe": { + "type": "object", + "properties": { + "initialDelaySeconds": { + "type": "number", + "description": "Initial delay seconds for livenessProbe", + "default": 20 + }, + "periodSeconds": { + "type": "number", + "description": "Period seconds for livenessProbe", + "default": 5 + }, + "timeoutSeconds": { + "type": "number", + "description": "Timeout seconds for livenessProbe", + "default": 5 + }, + "successThreshold": { + "type": "number", + "description": "Failure threshold for livenessProbe", + "default": 1 + }, + "failureThreshold": { + "type": "number", + "description": "Success threshold for livenessProbe", + "default": 5 + } + } + }, + "readinessProbe": { + "type": "object", + "properties": { + "initialDelaySeconds": { + "type": "number", + "description": "Initial delay seconds for readinessProbe", + "default": 20 + }, + "periodSeconds": { + "type": "number", + "description": "Period seconds for readinessProbe", + "default": 5 + }, + "timeoutSeconds": { + "type": "number", + "description": "Timeout seconds for readinessProbe", + "default": 1 + }, + "successThreshold": { + "type": "number", + "description": "Failure threshold for readinessProbe", + "default": 1 + }, + "failureThreshold": { + "type": "number", + "description": "Success threshold for readinessProbe", + "default": 5 + } + } + }, + "service": { + "type": "object", + "properties": { + "type": { + "type": "string", + "description": "Grafana master service type", + "default": "ClusterIP" + }, + "ports": { + "type": "object", + "properties": { + "grafana": { + "type": "number", + "description": "Grafana master service port", + "default": 3000 + } + } + }, + "annotations": { + "type": "object", + "description": "Additional custom annotations for Grafana master service", + "default": {} + }, + "headless": { + "type": "object", + "properties": { + "annotations": { + "type": "object", + "description": "Annotations for the headless service.", + "default": {} + } + } + } + } + }, + "persistence": { + "type": "object", + "properties": { + "enabled": { + "type": "boolean", + "description": "Enable persistence using Persistent Volume Claims", + "default": false + }, + "existingClaim": { + "type": "string", + "description": "Name of an existing PVC to use", + "default": "" + }, + "storageClass": { + "type": "string", + "description": "Storage class of backing PVC", + "default": "" + }, + "labels": { + "type": "object", + "description": "Persistent Volume Claim labels", + "default": {} + }, + "annotations": { + "type": "object", + "description": "Persistent Volume Claim annotations", + "default": {} + }, + "accessModes": { + "type": "array", + "description": "Persistent Volume Access Modes", + "default": [ + "ReadWriteOnce" + ], + "items": { + "type": "string" + } + }, + "size": { + "type": "string", + "description": "Size of data volume", + "default": "8Gi" + }, + "selector": { + "type": "object", + "description": "Selector to match an existing Persistent Volume for ClickHouse data PVC", + "default": {} + }, + "dataSource": { + "type": "object", + "description": "Custom PVC data source", + "default": {} + }, + "persistentVolumeClaimRetentionPolicy": { + "type": "object", + "description": "PostgreSQL Persistent Volume Claim Retention Policy", + "default": {} + } + } + }, + "serviceAccount": { + "type": "object", + "properties": { + "create": { + "type": "boolean", + "description": "Specifies whether a ServiceAccount should be created", + "default": true + }, + "name": { + "type": "string", + "description": "The name of the ServiceAccount to use.", + "default": "" + }, + "automount": { + "type": "boolean", + "description": "Whether to auto mount the service account token", + "default": false + }, + "annotations": { + "type": "object", + "description": "Additional custom annotations for the ServiceAccount", + "default": {} + } + } + }, + "metrics": { + "type": "object", + "properties": { + "enabled": { + "type": "boolean", + "description": "Enable the export of Prometheus metrics", + "default": false + }, + "externalHost": { + "type": "string", + "description": "Grafana metrics external host", + "default": "" + } + } + }, + "networkPolicy": { + "type": "object", + "properties": { + "enabled": { + "type": "boolean", + "description": "Enable creation of NetworkPolicy resources", + "default": true + }, + "allowExternal": { + "type": "boolean", + "description": "Don't require client label for connections", + "default": true + }, + "allowExternalEgress": { + "type": "boolean", + "description": "Allow the pod to access any range of port and all destinations.", + "default": true + }, + "extraIngress": { + "type": "array", + "description": "Add extra ingress rules to the NetworkPolicy", + "default": [], + "items": {} + }, + "extraEgress": { + "type": "array", + "description": "Add extra egress rules to the NetworkPolicy", + "default": [], + "items": {} + } + } + } + } +} \ No newline at end of file diff --git a/install/helm/charts/grafana/values.yaml b/install/helm/charts/grafana/values.yaml new file mode 100644 index 00000000..cc322f60 --- /dev/null +++ b/install/helm/charts/grafana/values.yaml @@ -0,0 +1,372 @@ +# @section Global parameters +global: + # -- Global Docker image registry + imageRegistry: "" + # -- Global Docker registry secret names as an array + # e.g. + # imagePullSecrets: + # - myRegistryKeySecretName + imagePullSecrets: [] + # -- Global StorageClass for Persistent Volume(s) + storageClass: "" + + grafana: + tls: + # -- [boolean, nullable] Enable TLS traffic support (overrides `tls.enabled`) + enabled: null + # -- [boolean, nullable] Generate automatically self-signed TLS certificates (overrides `tls.autoGenerated`) + autoGenerated: null + # -- Name of an existing secret that contains the certificates (overrides `tls.existingSecret`) + existingSecret: "" + # -- Certificate value. Requires `tls.autoGenerated` to be `false` (overrides `tls.cert`) + cert: "" + # -- Certificate key value. Requires `tls.autoGenerated` to be `false` (overrides `tls.certKey`) + certKey: "" + # -- CA Certificate value. Requires `tls.autoGenerated` to be `false` (overrides `tls.certCA`) + certCA: "" + +# @section Common parameters +# -- String to partially override common.fullname template (will maintain the release name) +nameOverride: "" +# -- String to fully override common.fullname template +fullnameOverride: "" +# -- String to fully override common.namespace +namespaceOverride: "" +# -- Kubernetes Cluster Domain +clusterDomain: cluster.local +# -- Add labels to all the deployed resources +labels: {} +# -- Add annotations to all the deployed resources +annotations: {} +# -- Global Docker registry secret names as an array +# e.g. +# imagePullSecrets: +# - myRegistryKeySecretName +imagePullSecrets: [] + +# @section Grafana parameters +# Grafana image +image: + # -- [default: REGISTRY_NAME] Grafana image registry + registry: "" + # -- [default: REPOSITORY_NAME/redis] Grafana image repository + repository: grafana/grafana + # @skip image.tag Grafana image tag (immutable tags are recommended) + tag: 12.0.2 + # -- Grafana image digest in the way sha256:aa.... Please note this parameter, if set, will override the tag + digest: "" + # -- Grafana image pull policy + # Specify a imagePullPolicy + # Defaults to 'Always' if image tag is 'latest', else set to 'IfNotPresent' + # ref: https://kubernetes.io/docs/concepts/containers/images/#pre-pulled-images + pullPolicy: IfNotPresent +# Authentication +auth: + # -- Enable password authentication + enabled: true + # -- Grafana username + username: "admin" + # -- Grafana password + password: "" + # -- The name of an existing secret with Grafana credentials + # Defaults to a random 10-character alphanumeric string if not set + existingSecret: "" + # -- Password key to be retrieved from existing secret + existingSecretPasswordKey: "" +# TLS configuration +tls: + # -- Enable TLS traffic + enabled: false + # -- Enable autogenerated certificates + autoGenerated: false + # -- The name of the existing secret that contains the TLS certificates + existingSecret: "" + # -- Certificate filename + certFilename: "tls.crt" + # -- Certificate Key filename + certKeyFilename: "tls.key" + # -- CA Certificate filename + certCAFilename: "ca.crt" + # -- Certificate value. Requires `tls.autoGenerated` to be `false` + cert: "" + # -- Certificate key value. Requires `tls.autoGenerated` to be `false` + certKey: "" + # -- CA Certificate value. Requires `tls.autoGenerated` to be `false` + certCA: "" +# Grafana resource requests and limits +# ref: http://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/ +# -- [object] Set container requests and limits for different resources like CPU or memory +resources: + requests: + cpu: 500m + memory: 1024Mi + ephemeral-storage: 50Mi + limits: + cpu: "1.0" + memory: 1536Mi + ephemeral-storage: 1024Mi +# @section Grafana configuration parameters +# -- Grafana plugins to be installed in deployment time separated by commas +# Specify plugins as a list separated by commas ( you will need to scape them when specifying from command line ) +# Example: +# plugins: grafana-kubernetes-app,grafana-example-app +plugins: "" +# Parameters to override the default grafana.ini file. +# It is needed to create a configmap containing the grafana.ini file. +# -- Grafana's grafana.ini configuration to be injected as Secret +configuration: "" +# -- Name of an existing Secret with grafana.ini configuration (must contain grafana.ini key) +existingSecret: "" +# Create dasboard provider to load dashboards, a default one is created to load dashboards +# from "/var/lib/grafana/dashboards" +dashboardsProvider: + # -- Enable the use of a Grafana dashboard provider + enabled: false + # -- Name of a ConfigMap containing a custom dashboard provider + existingConfigMap: "" +# -- Array with the names of a series of ConfigMaps containing dashboards files +# They will be mounted by the default dashboard provider if it is enabled +# Use an array with the configMap names. +# In order to use subfolders, uncomment "#foldersFromFilesStructure: true" line in default provider config. or create your own dashboard provider. +# Example: +# dashboardsConfigMaps: +# - configMapName: mydashboard +# folderName: foo +# fileName: mydashboard.json +# - configMapName: myotherdashboard +# folderName: bar +# fileName: myotherdashboard.json +dashboardsConfigMaps: [] +# Import datasources from an externally-managed secret, or a secret definition set via Helm values. +# -- The name of an externally-managed secret containing custom datasource files. +datasourcesSecretName: "" +# -- Use sub path for grafana for exposing it via reverse proxy +subPath: "" +# -- Logging level +logLevel: console +# -- Number of Grafana instances to deploy +replicaCount: 1 +# -- Extra labels for Grafana master pods +# ref: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ +podLabels: {} +# -- Annotations for Grafana master pods +# ref: https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/ +podAnnotations: {} +# -- Affinity for Grafana master pods assignment +# ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity +# NOTE: `podAffinityPreset`, `podAntiAffinityPreset`, and `nodeAffinityPreset` will be ignored when it's set +affinity: {} +# -- Node labels for Grafana master pods assignment +# ref: https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/ +nodeSelector: {} +# -- Tolerations for Grafana master pods assignment +# ref: https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/ +tolerations: [] +# -- Seconds Grafana pod needs to terminate gracefully +# ref: https://kubernetes.io/docs/concepts/workloads/pods/pod/#termination-of-pods +terminationGracePeriodSeconds: "" +# -- Array with extra environment variables to add to Grafana master nodes +# e.g: +# extraEnvVars: +# - name: FOO +# value: "bar" +extraEnvVars: [] +# -- Optionally specify extra list of additional volumes for the Grafana master pod(s) +volumes: [] +# -- Optionally specify extra list of additional volumeMounts for the Grafana master container(s) +volumeMounts: [] +containerPorts: + # -- Container port to open on Grafana master nodes + grafana: 3000 +# Configure Pods Security Context +# ref: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-pod +podSecurityContext: + # -- Set filesystem group change policy + fsGroupChangePolicy: Always + # -- Set kernel settings using the sysctl interface + sysctls: [] + # -- Set filesystem extra groups + supplementalGroups: [] + # -- Set Grafana master pod's Security Context fsGroup + fsGroup: 1001 +# Configure Container Security Context +# ref: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-pod +containerSecurityContext: + # -- [object,nullable] Set SELinux options in container + seLinuxOptions: {} + # -- Set Grafana master containers' Security Context runAsUser + runAsUser: 1001 + # -- Set Grafana master containers' Security Context runAsGroup + runAsGroup: 1001 + # -- Set Grafana master containers' Security Context runAsNonRoot + runAsNonRoot: true + # -- Is it possible to escalate Grafana pod(s) privileges + allowPrivilegeEscalation: false + # -- Set container's Security Context read-only root filesystem + readOnlyRootFilesystem: true + seccompProfile: + # -- Set Grafana master containers' Security Context seccompProfile + type: RuntimeDefault + capabilities: + # -- Set Grafana master containers' Security Context capabilities to drop + drop: ["ALL"] +# Configure extra options for Grafana containers' liveness and readiness probes +# ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-probes/#configure-probes +startupProbe: + # -- Initial delay seconds for startupProbe + initialDelaySeconds: 20 + # -- Period seconds for startupProbe + periodSeconds: 5 + # -- Timeout seconds for startupProbe + timeoutSeconds: 5 + # -- Failure threshold for startupProbe + successThreshold: 1 + # -- Success threshold for startupProbe + failureThreshold: 5 +livenessProbe: + # -- Initial delay seconds for livenessProbe + initialDelaySeconds: 20 + # -- Period seconds for livenessProbe + periodSeconds: 5 + # -- Timeout seconds for livenessProbe + timeoutSeconds: 5 + # -- Failure threshold for livenessProbe + successThreshold: 1 + # -- Success threshold for livenessProbe + failureThreshold: 5 +readinessProbe: + # -- Initial delay seconds for readinessProbe + initialDelaySeconds: 20 + # -- Period seconds for readinessProbe + periodSeconds: 5 + # -- Timeout seconds for readinessProbe + timeoutSeconds: 1 + # -- Failure threshold for readinessProbe + successThreshold: 1 + # -- Success threshold for readinessProbe + failureThreshold: 5 +# @section Service Parameters +# Grafana master service parameters +service: + # -- Grafana master service type + type: ClusterIP + ports: + # -- Grafana master service port + grafana: 3000 + # -- Additional custom annotations for Grafana master service + annotations: {} + # Headless service properties + headless: + # -- Annotations for the headless service. + annotations: {} +# @section Persistence Parameters +# Enable persistence using Persistent Volume Claims +# ref: https://kubernetes.io/docs/concepts/storage/persistent-volumes/ +persistence: + # -- Enable persistence using Persistent Volume Claims + enabled: false + # -- Name of an existing PVC to use + existingClaim: "" + # -- Storage class of backing PVC + # If defined, storageClassName: + # If set to "-", storageClassName: "", which disables dynamic provisioning + # If undefined (the default) or set to null, no storageClassName spec is + # set, choosing the default provisioner. (gp2 on AWS, standard on + # GKE, AWS & OpenStack) + storageClass: "" + # -- Persistent Volume Claim labels + labels: {} + # -- Persistent Volume Claim annotations + annotations: {} + # -- Persistent Volume Access Modes + accessModes: + - ReadWriteOnce + # -- Size of data volume + size: 8Gi + # -- Selector to match an existing Persistent Volume for ClickHouse data PVC + # If set, the PVC can't have a PV dynamically provisioned for it + # E.g. + # selector: + # matchLabels: + # app: my-app + selector: {} + # -- Custom PVC data source + dataSource: {} + # -- PostgreSQL Persistent Volume Claim Retention Policy + # ref: https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#persistentvolumeclaim-retention + # Example: + # whenScaled: Retain + # whenDeleted: Retain + persistentVolumeClaimRetentionPolicy: {} +# @section Other Parameters +# Service account +# ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/ +serviceAccount: + # -- Specifies whether a ServiceAccount should be created + create: true + # -- The name of the ServiceAccount to use. + # If not set and create is true, a name is generated using the common.fullname template + name: "" + # -- Whether to auto mount the service account token + # ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/#use-the-default-service-account-to-access-the-api-server + automount: false + # -- Additional custom annotations for the ServiceAccount + annotations: {} +# Prometheus metrics +metrics: + # -- Enable the export of Prometheus metrics + enabled: false + # -- Grafana metrics external host + externalHost: "" + # -- [object] Annotations for metrics scraping + podAnnotations: + prometheus.io/scrape: "true" + prometheus.io/port: "{{ .Values.containerPorts.grafana }}" + prometheus.io/path: "/metrics" +# Network Policy configuration +# ref: https://kubernetes.io/docs/concepts/services-networking/network-policies/ +networkPolicy: + # -- Enable creation of NetworkPolicy resources + enabled: true + # -- Don't require client label for connections + # When set to false, only pods with the correct client label will have network access to the ports + # Grafana is listening on. When true, Grafana will accept connections from any source + # (with the correct destination port). + allowExternal: true + # -- Allow the pod to access any range of port and all destinations. + allowExternalEgress: true + # -- Add extra ingress rules to the NetworkPolicy + # e.g: + # extraIngress: + # - ports: + # - port: 1234 + # from: + # - podSelector: + # - matchLabels: + # - role: frontend + # - podSelector: + # - matchExpressions: + # - key: role + # operator: In + # values: + # - frontend + extraIngress: [] + # -- Add extra egress rules to the NetworkPolicy + # e.g: + # extraEgress: + # - ports: + # - port: 1234 + # to: + # - podSelector: + # - matchLabels: + # - role: frontend + # - podSelector: + # - matchExpressions: + # - key: role + # operator: In + # values: + # - frontend + extraEgress: [] + +# @skip +deployed: true diff --git a/install/helm/charts/postgresql/README.md b/install/helm/charts/postgresql/README.md index 346ba672..73a506b3 100644 --- a/install/helm/charts/postgresql/README.md +++ b/install/helm/charts/postgresql/README.md @@ -4,17 +4,18 @@ ### Global parameters -| Name | Description | Value | -| -------------------------------------- | ------------------------------------------------------------------------------------------- | ----- | -| `global.imageRegistry` | Global Docker image registry | `""` | -| `global.imagePullSecrets` | Global Docker registry secret names as an array | `[]` | -| `global.storageClass` | Global StorageClass for Persistent Volume(s) | `""` | -| `global.postgresql.tls.enabled` | Enable TLS traffic support (overrides `tls.enabled`) | `nil` | -| `global.postgresql.tls.autoGenerated` | Generate automatically self-signed TLS certificates (overrides `tls.autoGenerated`) | `nil` | -| `global.postgresql.tls.existingSecret` | Name of an existing secret that contains the certificates (overrides `tls.existingSecret`) | `""` | -| `global.postgresql.tls.cert` | Certificate value. Requires `tls.autoGenerated` to be `false` (overrides `tls.cert`) | `""` | -| `global.postgresql.tls.certKey` | Certificate key value. Requires `tls.autoGenerated` to be `false` (overrides `tls.certKey`) | `""` | -| `global.postgresql.tls.certCA` | CA Certificate value. Requires `tls.autoGenerated` to be `false` (overrides `tls.certCA`) | `""` | +| Name | Description | Value | +| --------------------------------------- | ------------------------------------------------------------------------------------------- | ----- | +| `global.imageRegistry` | Global Docker image registry | `""` | +| `global.imagePullSecrets` | Global Docker registry secret names as an array | `[]` | +| `global.storageClass` | Global StorageClass for Persistent Volume(s) | `""` | +| `global.postgresql.auth.existingSecret` | Name of an existing secret that contains the certificates (overrides `auth.existingSecret`) | `""` | +| `global.postgresql.tls.enabled` | Enable TLS traffic support (overrides `tls.enabled`) | `nil` | +| `global.postgresql.tls.autoGenerated` | Generate automatically self-signed TLS certificates (overrides `tls.autoGenerated`) | `nil` | +| `global.postgresql.tls.existingSecret` | Name of an existing secret that contains the certificates (overrides `tls.existingSecret`) | `""` | +| `global.postgresql.tls.cert` | Certificate value. Requires `tls.autoGenerated` to be `false` (overrides `tls.cert`) | `""` | +| `global.postgresql.tls.certKey` | Certificate key value. Requires `tls.autoGenerated` to be `false` (overrides `tls.certKey`) | `""` | +| `global.postgresql.tls.certCA` | CA Certificate value. Requires `tls.autoGenerated` to be `false` (overrides `tls.certCA`) | `""` | ### Common parameters @@ -76,12 +77,10 @@ | `volumeMounts` | Optionally specify extra list of additional volumeMounts for the PostgreSQL container(s) | `[]` | | `volumes` | Optionally specify extra list of additional volumes for the PostgreSQL pod(s) | `[]` | | `containerPorts.postgresql` | PostgreSQL container port | `5432` | -| `podSecurityContext.enabled` | Enabled pods' Security Context | `true` | | `podSecurityContext.fsGroupChangePolicy` | Set filesystem group change policy | `Always` | | `podSecurityContext.sysctls` | Set kernel settings using the sysctl interface | `[]` | | `podSecurityContext.supplementalGroups` | Set filesystem extra groups | `[]` | | `podSecurityContext.fsGroup` | Group ID for the pod | `1001` | -| `containerSecurityContext.enabled` | Enable containers' Security Context | `true` | | `containerSecurityContext.seLinuxOptions` | Set SELinux options in container | `{}` | | `containerSecurityContext.runAsUser` | Set containers' Security Context runAsUser | `1001` | | `containerSecurityContext.runAsGroup` | Set containers' Security Context runAsGroup | `1001` | @@ -91,19 +90,16 @@ | `containerSecurityContext.allowPrivilegeEscalation` | Set container's Security Context allowPrivilegeEscalation | `false` | | `containerSecurityContext.capabilities.drop` | List of capabilities to be dropped | `["ALL"]` | | `containerSecurityContext.seccompProfile.type` | Set container's Security Context seccomp profile | `RuntimeDefault` | -| `livenessProbe.enabled` | Enable livenessProbe on PostgreSQL containers | `true` | | `livenessProbe.initialDelaySeconds` | Initial delay seconds for livenessProbe | `30` | | `livenessProbe.periodSeconds` | Period seconds for livenessProbe | `10` | | `livenessProbe.timeoutSeconds` | Timeout seconds for livenessProbe | `5` | | `livenessProbe.failureThreshold` | Failure threshold for livenessProbe | `6` | | `livenessProbe.successThreshold` | Success threshold for livenessProbe | `1` | -| `readinessProbe.enabled` | Enable readinessProbe on PostgreSQL containers | `true` | | `readinessProbe.initialDelaySeconds` | Initial delay seconds for readinessProbe | `5` | | `readinessProbe.periodSeconds` | Period seconds for readinessProbe | `10` | | `readinessProbe.timeoutSeconds` | Timeout seconds for readinessProbe | `5` | | `readinessProbe.failureThreshold` | Failure threshold for readinessProbe | `6` | | `readinessProbe.successThreshold` | Success threshold for readinessProbe | `1` | -| `startupProbe.enabled` | Enable startupProbe on PostgreSQL containers | `false` | | `startupProbe.initialDelaySeconds` | Initial delay seconds for startupProbe | `30` | | `startupProbe.periodSeconds` | Period seconds for startupProbe | `10` | | `startupProbe.timeoutSeconds` | Timeout seconds for startupProbe | `1` | @@ -121,18 +117,18 @@ ### Persistence Parameters -| Name | Description | Value | -| -------------------------------------------------- | --------------------------------------------------------------------------------------- | ------------------- | -| `persistence.enabled` | Enable PostgreSQL data persistence using PVC | `true` | -| `persistence.existingClaim` | Name of an existing PVC to use | `""` | -| `persistence.storageClass` | PVC Storage Class for PostgreSQL data volume | `""` | -| `persistence.accessModes` | PVC Access Mode for PostgreSQL volume | `["ReadWriteOnce"]` | -| `persistence.size` | PVC Storage Request for PostgreSQL volume | `8Gi` | -| `persistence.annotations` | Annotations for the PVC | `{}` | -| `persistence.labels` | Labels for the PVC | `{}` | -| `persistence.selector` | Selector to match an existing Persistent Volume (this value is evaluated as a template) | `{}` | -| `persistence.dataSource` | Custom PVC data source | `{}` | -| `persistence.persistentVolumeClaimRetentionPolicy` | PostgreSQL Persistent Volume Claim Retention Policy | `{}` | +| Name | Description | Value | +| -------------------------------------------------- | ----------------------------------------------------------------------- | ------------------- | +| `persistence.enabled` | Enable persistence using Persistent Volume Claims | `true` | +| `persistence.existingClaim` | Name of an existing PVC to use | `""` | +| `persistence.storageClass` | Storage class of backing PVC | `""` | +| `persistence.labels` | Persistent Volume Claim labels | `{}` | +| `persistence.annotations` | Persistent Volume Claim annotations | `{}` | +| `persistence.accessModes` | Persistent Volume Access Modes | `["ReadWriteOnce"]` | +| `persistence.size` | Size of data volume | `8Gi` | +| `persistence.selector` | Selector to match an existing Persistent Volume for ClickHouse data PVC | `{}` | +| `persistence.dataSource` | Custom PVC data source | `{}` | +| `persistence.persistentVolumeClaimRetentionPolicy` | PostgreSQL Persistent Volume Claim Retention Policy | `{}` | ### Other Parameters @@ -156,7 +152,6 @@ | `metrics.collectors` | Control enabled collectors | `{}` | | `metrics.customMetrics` | Define additional custom metrics | `{}` | | `metrics.extraEnvVars` | Extra environment variables to add to PostgreSQL Prometheus exporter | `[]` | -| `metrics.containerSecurityContext.enabled` | Enable containers' Security Context | `true` | | `metrics.containerSecurityContext.seLinuxOptions` | Set SELinux options in container | `{}` | | `metrics.containerSecurityContext.runAsUser` | Set containers' Security Context runAsUser | `1001` | | `metrics.containerSecurityContext.runAsGroup` | Set containers' Security Context runAsGroup | `1001` | @@ -166,19 +161,16 @@ | `metrics.containerSecurityContext.allowPrivilegeEscalation` | Set container's Security Context allowPrivilegeEscalation | `false` | | `metrics.containerSecurityContext.capabilities.drop` | List of capabilities to be dropped | `["ALL"]` | | `metrics.containerSecurityContext.seccompProfile.type` | Set container's Security Context seccomp profile | `RuntimeDefault` | -| `metrics.livenessProbe.enabled` | Enable livenessProbe on PostgreSQL Prometheus exporter containers | `true` | | `metrics.livenessProbe.initialDelaySeconds` | Initial delay seconds for livenessProbe | `5` | | `metrics.livenessProbe.periodSeconds` | Period seconds for livenessProbe | `10` | | `metrics.livenessProbe.timeoutSeconds` | Timeout seconds for livenessProbe | `5` | | `metrics.livenessProbe.failureThreshold` | Failure threshold for livenessProbe | `6` | | `metrics.livenessProbe.successThreshold` | Success threshold for livenessProbe | `1` | -| `metrics.readinessProbe.enabled` | Enable readinessProbe on PostgreSQL Prometheus exporter containers | `true` | | `metrics.readinessProbe.initialDelaySeconds` | Initial delay seconds for readinessProbe | `5` | | `metrics.readinessProbe.periodSeconds` | Period seconds for readinessProbe | `10` | | `metrics.readinessProbe.timeoutSeconds` | Timeout seconds for readinessProbe | `5` | | `metrics.readinessProbe.failureThreshold` | Failure threshold for readinessProbe | `6` | | `metrics.readinessProbe.successThreshold` | Success threshold for readinessProbe | `1` | -| `metrics.startupProbe.enabled` | Enable startupProbe on PostgreSQL Prometheus exporter containers | `false` | | `metrics.startupProbe.initialDelaySeconds` | Initial delay seconds for startupProbe | `10` | | `metrics.startupProbe.periodSeconds` | Period seconds for startupProbe | `10` | | `metrics.startupProbe.timeoutSeconds` | Timeout seconds for startupProbe | `1` | @@ -213,12 +205,10 @@ | `backup.cronjob.startingDeadlineSeconds` | Set the cronjob parameter startingDeadlineSeconds | `""` | | `backup.cronjob.ttlSecondsAfterFinished` | Set the cronjob parameter ttlSecondsAfterFinished | `""` | | `backup.cronjob.restartPolicy` | Set the cronjob parameter restartPolicy | `OnFailure` | -| `backup.cronjob.podSecurityContext.enabled` | Enable PodSecurityContext for CronJob/Backup | `true` | | `backup.cronjob.podSecurityContext.fsGroupChangePolicy` | Set filesystem group change policy | `Always` | | `backup.cronjob.podSecurityContext.sysctls` | Set kernel settings using the sysctl interface | `[]` | | `backup.cronjob.podSecurityContext.supplementalGroups` | Set filesystem extra groups | `[]` | | `backup.cronjob.podSecurityContext.fsGroup` | Group ID for the CronJob | `1001` | -| `backup.cronjob.containerSecurityContext.enabled` | Enable containers' Security Context | `true` | | `backup.cronjob.containerSecurityContext.seLinuxOptions` | Set SELinux options in container | `{}` | | `backup.cronjob.containerSecurityContext.runAsUser` | Set containers' Security Context runAsUser | `1001` | | `backup.cronjob.containerSecurityContext.runAsGroup` | Set containers' Security Context runAsGroup | `1001` | diff --git a/install/helm/charts/postgresql/templates/backup/cronjob.yaml b/install/helm/charts/postgresql/templates/backup/cronjob.yaml index bdff4350..f533463f 100644 --- a/install/helm/charts/postgresql/templates/backup/cronjob.yaml +++ b/install/helm/charts/postgresql/templates/backup/cronjob.yaml @@ -58,7 +58,7 @@ spec: - name: PGPASSWORD valueFrom: secretKeyRef: - name: {{ include "common.authSecretName" . }} + name: {{ include "common.auth.secretName" . }} key: {{ include "postgresql.adminPasswordKey" . }} - name: PGHOST value: {{ include "common.fullname" . }} @@ -87,18 +87,18 @@ spec: {{- with .Values.backup.cronjob.volumeMounts }} {{- toYaml . | nindent 14 }} {{- end }} - {{- if .Values.backup.cronjob.containerSecurityContext.enabled }} + {{- with .Values.backup.cronjob.containerSecurityContext }} securityContext: - {{- omit .Values.backup.cronjob.containerSecurityContext "enabled" | toYaml | nindent 14 }} + {{- toYaml . | nindent 14 }} {{- end }} {{- with .Values.backup.cronjob.resources }} resources: {{- toYaml . | nindent 14 }} {{- end }} restartPolicy: {{ .Values.backup.cronjob.restartPolicy }} - {{- if .Values.backup.cronjob.podSecurityContext.enabled }} + {{- with .Values.backup.cronjob.podSecurityContext }} securityContext: - fsGroup: {{ .Values.backup.cronjob.podSecurityContext.fsGroup }} + fsGroup: {{ .fsGroup }} {{- end }} volumes: {{- if $tlsEnabled }} diff --git a/install/helm/charts/postgresql/templates/secret.yaml b/install/helm/charts/postgresql/templates/secret.yaml index 5b383c9c..dd7122c7 100644 --- a/install/helm/charts/postgresql/templates/secret.yaml +++ b/install/helm/charts/postgresql/templates/secret.yaml @@ -1,5 +1,5 @@ -{{- if not .Values.auth.existingSecret }} -{{- $secretName := include "common.authSecretName" . }} +{{- if and (not .Values.auth.existingSecret) (not .Values.global.postgresql.auth.existingSecret) }} +{{- $secretName := include "common.auth.secretName" . }} {{- $adminPasswordKey := include "postgresql.adminPasswordKey" . }} {{- $adminPassword := "" }} {{- if .Values.auth.enablePostgresUser }} diff --git a/install/helm/charts/postgresql/templates/statefulset.yaml b/install/helm/charts/postgresql/templates/statefulset.yaml index 2a851978..09866cc7 100644 --- a/install/helm/charts/postgresql/templates/statefulset.yaml +++ b/install/helm/charts/postgresql/templates/statefulset.yaml @@ -27,9 +27,9 @@ spec: spec: serviceAccountName: {{ include "common.serviceAccountName" . }} {{- include "common.imagePullSecrets" . | nindent 6 }} - {{- if .Values.podSecurityContext.enabled }} + {{- with .Values.podSecurityContext }} securityContext: - {{- omit .Values.podSecurityContext "enabled" | toYaml | nindent 8 }} + {{- toYaml . | nindent 8 }} {{- end }} {{- if .Values.terminationGracePeriodSeconds }} terminationGracePeriodSeconds: {{ .Values.terminationGracePeriodSeconds }} @@ -43,9 +43,9 @@ spec: resources: {{- toYaml . | nindent 12 }} {{- end }} - {{- if .Values.containerSecurityContext.enabled }} + {{- with .Values.containerSecurityContext }} securityContext: - {{- omit .Values.containerSecurityContext "enabled" | toYaml | nindent 12 }} + {{- toYaml . | nindent 12 }} {{- end }} command: - /bin/sh @@ -63,9 +63,9 @@ spec: - name: postgresql image: {{ include "postgresql.image" . }} imagePullPolicy: {{ .Values.image.pullPolicy | quote }} - {{- if .Values.containerSecurityContext.enabled }} + {{- with .Values.containerSecurityContext }} securityContext: - {{- omit .Values.containerSecurityContext "enabled" | toYaml | nindent 12 }} + {{- toYaml . | nindent 12 }} {{- end }} command: - /usr/local/bin/docker-entrypoint.sh @@ -82,7 +82,7 @@ spec: - name: POSTGRES_PASSWORD valueFrom: secretKeyRef: - name: {{ include "common.authSecretName" . }} + name: {{ include "common.auth.secretName" . }} key: {{ include "postgresql.userPasswordKey" . }} {{- with .Values.auth.database }} - name: POSTGRES_DB @@ -95,30 +95,30 @@ spec: ports: - name: tcp-postgresql containerPort: {{ .Values.containerPorts.postgresql }} - {{- if .Values.startupProbe.enabled }} - startupProbe: {{- omit .Values.startupProbe "enabled" | toYaml | nindent 12 }} + {{- with .Values.startupProbe }} + startupProbe: {{- toYaml . | nindent 12 }} exec: command: - /bin/sh - -c - {{- include "postgresql.probeCommand" . | nindent 16 }} + {{- include "postgresql.probeCommand" $ | nindent 16 }} {{- end }} - {{- if .Values.livenessProbe.enabled }} - livenessProbe: {{- omit .Values.livenessProbe "enabled" | toYaml | nindent 12 }} + {{- with .Values.livenessProbe }} + livenessProbe: {{- toYaml . | nindent 12 }} exec: command: - /bin/sh - -c - {{- include "postgresql.probeCommand" . | nindent 16 }} + {{- include "postgresql.probeCommand" $ | nindent 16 }} {{- end }} - {{- if .Values.readinessProbe.enabled }} - readinessProbe: {{- omit .Values.readinessProbe "enabled" | toYaml | nindent 12 }} + {{- with .Values.readinessProbe }} + readinessProbe: {{- toYaml . | nindent 12 }} exec: command: - /bin/sh - -c - -e - {{- include "postgresql.probeCommand" . | nindent 16 }} + {{- include "postgresql.probeCommand" $ | nindent 16 }} {{- end }} {{- with .Values.resources }} resources: @@ -151,9 +151,9 @@ spec: - name: metrics image: {{ include "common.metrics.image" . }} imagePullPolicy: {{ .Values.metrics.image.pullPolicy | quote }} - {{- if .Values.metrics.containerSecurityContext.enabled }} + {{- with .Values.metrics.containerSecurityContext }} securityContext: - {{- omit .Values.metrics.containerSecurityContext "enabled" | toYaml | nindent 12 }} + {{- toYaml . | nindent 12 }} {{- end }} {{- if or .Values.metrics.customMetrics .Values.metrics.collectors }} args: @@ -173,7 +173,7 @@ spec: - name: DATA_SOURCE_PASS valueFrom: secretKeyRef: - name: {{ include "common.authSecretName" . }} + name: {{ include "common.auth.secretName" . }} key: {{ $pwdKey }} - name: DATA_SOURCE_USER value: {{ default "postgres" $customUser | quote }} @@ -183,19 +183,19 @@ spec: ports: - name: http-metrics containerPort: {{ .Values.metrics.containerPorts.metrics }} - {{- if .Values.metrics.startupProbe.enabled }} - startupProbe: {{- omit .Values.metrics.startupProbe "enabled" | toYaml | nindent 12 }} + {{- with .Values.metrics.startupProbe }} + startupProbe: {{- toYaml . | nindent 12 }} tcpSocket: port: http-metrics {{- end }} - {{- if .Values.metrics.livenessProbe.enabled }} - livenessProbe: {{- omit .Values.metrics.livenessProbe "enabled" | toYaml | nindent 12 }} + {{- with .Values.metrics.livenessProbe }} + livenessProbe: {{- toYaml . | nindent 12 }} httpGet: path: / port: http-metrics {{- end }} - {{- if .Values.metrics.readinessProbe.enabled }} - readinessProbe: {{- omit .Values.metrics.readinessProbe "enabled" | toYaml | nindent 12 }} + {{- with .Values.metrics.readinessProbe }} + readinessProbe: {{- toYaml . | nindent 12 }} httpGet: path: / port: http-metrics diff --git a/install/helm/charts/postgresql/values.schema.json b/install/helm/charts/postgresql/values.schema.json index df718a2f..2ec2eb98 100644 --- a/install/helm/charts/postgresql/values.schema.json +++ b/install/helm/charts/postgresql/values.schema.json @@ -24,6 +24,16 @@ "postgresql": { "type": "object", "properties": { + "auth": { + "type": "object", + "properties": { + "existingSecret": { + "type": "string", + "description": "Name of an existing secret that contains the certificates (overrides `auth.existingSecret`)", + "default": "" + } + } + }, "tls": { "type": "object", "properties": { @@ -329,11 +339,6 @@ "podSecurityContext": { "type": "object", "properties": { - "enabled": { - "type": "boolean", - "description": "Enabled pods' Security Context", - "default": true - }, "fsGroupChangePolicy": { "type": "string", "description": "Set filesystem group change policy", @@ -361,11 +366,6 @@ "containerSecurityContext": { "type": "object", "properties": { - "enabled": { - "type": "boolean", - "description": "Enable containers' Security Context", - "default": true - }, "runAsUser": { "type": "number", "description": "Set containers' Security Context runAsUser", @@ -426,11 +426,6 @@ "livenessProbe": { "type": "object", "properties": { - "enabled": { - "type": "boolean", - "description": "Enable livenessProbe on PostgreSQL containers", - "default": true - }, "initialDelaySeconds": { "type": "number", "description": "Initial delay seconds for livenessProbe", @@ -461,11 +456,6 @@ "readinessProbe": { "type": "object", "properties": { - "enabled": { - "type": "boolean", - "description": "Enable readinessProbe on PostgreSQL containers", - "default": true - }, "initialDelaySeconds": { "type": "number", "description": "Initial delay seconds for readinessProbe", @@ -496,11 +486,6 @@ "startupProbe": { "type": "object", "properties": { - "enabled": { - "type": "boolean", - "description": "Enable startupProbe on PostgreSQL containers", - "default": false - }, "initialDelaySeconds": { "type": "number", "description": "Initial delay seconds for startupProbe", @@ -568,7 +553,7 @@ "properties": { "enabled": { "type": "boolean", - "description": "Enable PostgreSQL data persistence using PVC", + "description": "Enable persistence using Persistent Volume Claims", "default": true }, "existingClaim": { @@ -578,12 +563,22 @@ }, "storageClass": { "type": "string", - "description": "PVC Storage Class for PostgreSQL data volume", + "description": "Storage class of backing PVC", "default": "" }, + "labels": { + "type": "object", + "description": "Persistent Volume Claim labels", + "default": {} + }, + "annotations": { + "type": "object", + "description": "Persistent Volume Claim annotations", + "default": {} + }, "accessModes": { "type": "array", - "description": "PVC Access Mode for PostgreSQL volume", + "description": "Persistent Volume Access Modes", "default": [ "ReadWriteOnce" ], @@ -593,22 +588,12 @@ }, "size": { "type": "string", - "description": "PVC Storage Request for PostgreSQL volume", + "description": "Size of data volume", "default": "8Gi" }, - "annotations": { - "type": "object", - "description": "Annotations for the PVC", - "default": {} - }, - "labels": { - "type": "object", - "description": "Labels for the PVC", - "default": {} - }, "selector": { "type": "object", - "description": "Selector to match an existing Persistent Volume (this value is evaluated as a template)", + "description": "Selector to match an existing Persistent Volume for ClickHouse data PVC", "default": {} }, "dataSource": { @@ -705,11 +690,6 @@ "containerSecurityContext": { "type": "object", "properties": { - "enabled": { - "type": "boolean", - "description": "Enable containers' Security Context", - "default": true - }, "runAsUser": { "type": "number", "description": "Set containers' Security Context runAsUser", @@ -770,11 +750,6 @@ "livenessProbe": { "type": "object", "properties": { - "enabled": { - "type": "boolean", - "description": "Enable livenessProbe on PostgreSQL Prometheus exporter containers", - "default": true - }, "initialDelaySeconds": { "type": "number", "description": "Initial delay seconds for livenessProbe", @@ -805,11 +780,6 @@ "readinessProbe": { "type": "object", "properties": { - "enabled": { - "type": "boolean", - "description": "Enable readinessProbe on PostgreSQL Prometheus exporter containers", - "default": true - }, "initialDelaySeconds": { "type": "number", "description": "Initial delay seconds for readinessProbe", @@ -840,11 +810,6 @@ "startupProbe": { "type": "object", "properties": { - "enabled": { - "type": "boolean", - "description": "Enable startupProbe on PostgreSQL Prometheus exporter containers", - "default": false - }, "initialDelaySeconds": { "type": "number", "description": "Initial delay seconds for startupProbe", @@ -994,11 +959,6 @@ "podSecurityContext": { "type": "object", "properties": { - "enabled": { - "type": "boolean", - "description": "Enable PodSecurityContext for CronJob/Backup", - "default": true - }, "fsGroupChangePolicy": { "type": "string", "description": "Set filesystem group change policy", @@ -1026,11 +986,6 @@ "containerSecurityContext": { "type": "object", "properties": { - "enabled": { - "type": "boolean", - "description": "Enable containers' Security Context", - "default": true - }, "runAsUser": { "type": "number", "description": "Set containers' Security Context runAsUser", diff --git a/install/helm/charts/postgresql/values.yaml b/install/helm/charts/postgresql/values.yaml index dae72fba..03fc794e 100644 --- a/install/helm/charts/postgresql/values.yaml +++ b/install/helm/charts/postgresql/values.yaml @@ -10,6 +10,9 @@ global: # -- Global StorageClass for Persistent Volume(s) storageClass: "" postgresql: + auth: + # -- Name of an existing secret that contains the certificates (overrides `auth.existingSecret`) + existingSecret: "" tls: # -- [boolean, nullable] Enable TLS traffic support (overrides `tls.enabled`) enabled: null @@ -165,8 +168,6 @@ containerPorts: # Pod Security Context # ref: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/ podSecurityContext: - # -- Enabled pods' Security Context - enabled: true # -- Set filesystem group change policy fsGroupChangePolicy: Always # -- Set kernel settings using the sysctl interface @@ -178,8 +179,6 @@ podSecurityContext: # Container Security Context # ref: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/ containerSecurityContext: - # -- Enable containers' Security Context - enabled: true # -- [object,nullable] Set SELinux options in container seLinuxOptions: {} # -- Set containers' Security Context runAsUser @@ -203,8 +202,6 @@ containerSecurityContext: # Configure extra options for PostgreSQL containers' liveness, readiness and startup probes # ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#configure-probes livenessProbe: - # -- Enable livenessProbe on PostgreSQL containers - enabled: true # -- Initial delay seconds for livenessProbe initialDelaySeconds: 30 # -- Period seconds for livenessProbe @@ -216,8 +213,6 @@ livenessProbe: # -- Success threshold for livenessProbe successThreshold: 1 readinessProbe: - # -- Enable readinessProbe on PostgreSQL containers - enabled: true # -- Initial delay seconds for readinessProbe initialDelaySeconds: 5 # -- Period seconds for readinessProbe @@ -229,8 +224,6 @@ readinessProbe: # -- Success threshold for readinessProbe successThreshold: 1 startupProbe: - # -- Enable startupProbe on PostgreSQL containers - enabled: false # -- Initial delay seconds for startupProbe initialDelaySeconds: 30 # -- Period seconds for startupProbe @@ -256,29 +249,32 @@ service: # -- Annotations for the headless service. annotations: {} # @section Persistence Parameters -# PostgreSQL persistence configuration +# Enable persistence using Persistent Volume Claims +# ref: https://kubernetes.io/docs/concepts/storage/persistent-volumes/ persistence: - # -- Enable PostgreSQL data persistence using PVC + # -- Enable persistence using Persistent Volume Claims enabled: true # -- Name of an existing PVC to use existingClaim: "" - # -- PVC Storage Class for PostgreSQL data volume + # -- Storage class of backing PVC # If defined, storageClassName: # If set to "-", storageClassName: "", which disables dynamic provisioning # If undefined (the default) or set to null, no storageClassName spec is # set, choosing the default provisioner. (gp2 on AWS, standard on # GKE, AWS & OpenStack) storageClass: "" - # -- PVC Access Mode for PostgreSQL volume + # -- Persistent Volume Claim labels + labels: {} + # -- Persistent Volume Claim annotations + annotations: {} + # -- Persistent Volume Access Modes accessModes: - ReadWriteOnce - # -- PVC Storage Request for PostgreSQL volume + # -- Size of data volume size: 8Gi - # -- Annotations for the PVC - annotations: {} - # -- Labels for the PVC - labels: {} - # -- Selector to match an existing Persistent Volume (this value is evaluated as a template) + # -- Selector to match an existing Persistent Volume for ClickHouse data PVC + # If set, the PVC can't have a PV dynamically provisioned for it + # E.g. # selector: # matchLabels: # app: my-app @@ -351,8 +347,6 @@ metrics: # PostgreSQL Prometheus exporter containers' Security Context # ref: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-container containerSecurityContext: - # -- Enable containers' Security Context - enabled: true # -- [object,nullable] Set SELinux options in container seLinuxOptions: {} # -- Set containers' Security Context runAsUser @@ -376,8 +370,6 @@ metrics: # Configure extra options for PostgreSQL Prometheus exporter containers' liveness, readiness and startup probes # ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#configure-probes livenessProbe: - # -- Enable livenessProbe on PostgreSQL Prometheus exporter containers - enabled: true # -- Initial delay seconds for livenessProbe initialDelaySeconds: 5 # -- Period seconds for livenessProbe @@ -389,8 +381,6 @@ metrics: # -- Success threshold for livenessProbe successThreshold: 1 readinessProbe: - # -- Enable readinessProbe on PostgreSQL Prometheus exporter containers - enabled: true # -- Initial delay seconds for readinessProbe initialDelaySeconds: 5 # -- Period seconds for readinessProbe @@ -402,8 +392,6 @@ metrics: # -- Success threshold for readinessProbe successThreshold: 1 startupProbe: - # -- Enable startupProbe on PostgreSQL Prometheus exporter containers - enabled: false # -- Initial delay seconds for startupProbe initialDelaySeconds: 10 # -- Period seconds for startupProbe @@ -514,8 +502,6 @@ backup: # -- Set the cronjob parameter restartPolicy restartPolicy: OnFailure podSecurityContext: - # -- Enable PodSecurityContext for CronJob/Backup - enabled: true # -- Set filesystem group change policy fsGroupChangePolicy: Always # -- Set kernel settings using the sysctl interface @@ -527,8 +513,6 @@ backup: # backup container's Security Context # ref: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-container containerSecurityContext: - # -- Enable containers' Security Context - enabled: true # -- [object,nullable] Set SELinux options in container seLinuxOptions: {} # -- Set containers' Security Context runAsUser diff --git a/install/helm/charts/prometheus/Chart.yaml b/install/helm/charts/prometheus/Chart.yaml new file mode 100644 index 00000000..414cc42c --- /dev/null +++ b/install/helm/charts/prometheus/Chart.yaml @@ -0,0 +1,11 @@ +apiVersion: v2 +appVersion: 3.4.1 +dependencies: +- name: common + repository: file://../common + version: 0.x.x +description: Prometheus is an open source monitoring and alerting system. It enables + sysadmins to monitor their infrastructures by collecting metrics from configured + targets at given intervals. +name: prometheus +version: 0.0.1 diff --git a/install/helm/charts/prometheus/README.md b/install/helm/charts/prometheus/README.md new file mode 100644 index 00000000..2b5e6e54 --- /dev/null +++ b/install/helm/charts/prometheus/README.md @@ -0,0 +1,152 @@ +# Chart + +## Parameters + +### Global parameters + +| Name | Description | Value | +| -------------------------------------- | ------------------------------------------------------------------------------------------- | ----- | +| `global.imageRegistry` | Global Docker image registry | `""` | +| `global.imagePullSecrets` | Global Docker registry secret names as an array | `[]` | +| `global.storageClass` | Global StorageClass for Persistent Volume(s) | `""` | +| `global.prometheus.tls.enabled` | Enable TLS traffic support (overrides `tls.enabled`) | `nil` | +| `global.prometheus.tls.autoGenerated` | Generate automatically self-signed TLS certificates (overrides `tls.autoGenerated`) | `nil` | +| `global.prometheus.tls.existingSecret` | Name of an existing secret that contains the certificates (overrides `tls.existingSecret`) | `""` | +| `global.prometheus.tls.cert` | Certificate value. Requires `tls.autoGenerated` to be `false` (overrides `tls.cert`) | `""` | +| `global.prometheus.tls.certKey` | Certificate key value. Requires `tls.autoGenerated` to be `false` (overrides `tls.certKey`) | `""` | +| `global.prometheus.tls.certCA` | CA Certificate value. Requires `tls.autoGenerated` to be `false` (overrides `tls.certCA`) | `""` | + +### Common parameters + +| Name | Description | Value | +| ------------------- | -------------------------------------------------------------------------------------- | --------------- | +| `nameOverride` | String to partially override common.fullname template (will maintain the release name) | `""` | +| `fullnameOverride` | String to fully override common.fullname template | `""` | +| `namespaceOverride` | String to fully override common.namespace | `""` | +| `clusterDomain` | Kubernetes Cluster Domain | `cluster.local` | +| `labels` | Add labels to all the deployed resources | `{}` | +| `annotations` | Add annotations to all the deployed resources | `{}` | +| `imagePullSecrets` | Global Docker registry secret names as an array | `[]` | + +### Prometheus parameters + +| Name | Description | Value | +| -------------------------------- | ---------------------------------------------------------------------------------------------------------- | ----------------------- | +| `image.registry` | Prometheus image registry | `REGISTRY_NAME` | +| `image.repository` | Prometheus image repository | `REPOSITORY_NAME/redis` | +| `image.digest` | Prometheus image digest in the way sha256:aa.... Please note this parameter, if set, will override the tag | `""` | +| `image.pullPolicy` | Prometheus image pull policy | `IfNotPresent` | +| `auth.enabled` | Enable password authentication (currently is not supported by this chart) | `false` | +| `auth.username` | Prometheus username | `default` | +| `auth.password` | Prometheus password | `""` | +| `auth.existingSecret` | The name of an existing secret with Prometheus credentials | `""` | +| `auth.existingSecretPasswordKey` | Password key to be retrieved from existing secret | `""` | +| `tls.enabled` | Enable TLS traffic | `false` | +| `tls.authClients` | Require clients to authenticate | `false` | +| `tls.autoGenerated` | Enable autogenerated certificates | `false` | +| `tls.existingSecret` | The name of the existing secret that contains the TLS certificates | `""` | +| `tls.certFilename` | Certificate filename | `tls.crt` | +| `tls.certKeyFilename` | Certificate Key filename | `tls.key` | +| `tls.certCAFilename` | CA Certificate filename | `ca.crt` | +| `tls.cert` | Certificate value. Requires `tls.autoGenerated` to be `false` | `""` | +| `tls.certKey` | Certificate key value. Requires `tls.autoGenerated` to be `false` | `""` | +| `tls.certCA` | CA Certificate value. Requires `tls.autoGenerated` to be `false` | `""` | +| `resources` | Set container requests and limits for different resources like CPU or memory | `{}` | + +### Prometheus configuration parameters + +| Name | Description | Value | +| --------------------------------------------------- | ----------------------------------------------------------------------------------------------- | ---------------- | +| `configuration` | Configuration to be added into the ConfigMap | `""` | +| `existingConfigmap` | The name of an existing ConfigMap with your custom configuration for Prometheus | `""` | +| `routePrefix` | Prefix for the internal routes of web endpoints | `/` | +| `remoteWrite` | The remote_write spec configuration for Prometheus | `[]` | +| `scrapeInterval` | Interval between consecutive scrapes. Example: "1m" | `""` | +| `scrapeTimeout` | Interval between consecutive scrapes. Example: "10s" | `""` | +| `evaluationInterval` | Interval between consecutive evaluations. Example: "1m" | `""` | +| `enableAdminAPI` | Enable Prometheus adminitrative API | `false` | +| `enableRemoteWriteReceiver` | Enable Prometheus to be used as a receiver for the Prometheus remote write protocol. | `false` | +| `enableFeatures` | Enable access to Prometheus disabled features. | `[]` | +| `logLevel` | Log level for Prometheus | `info` | +| `logFormat` | Log format for Prometheus | `logfmt` | +| `retention` | Metrics retention days | `10d` | +| `retentionSize` | Maximum size of metrics | `0` | +| `scrapeConfigmap` | ConfigMap which contains scrape config files | `""` | +| `replicaCount` | Number of Prometheus instances to deploy | `1` | +| `podLabels` | Extra labels for Prometheus master pods | `{}` | +| `podAnnotations` | Annotations for Prometheus master pods | `{}` | +| `affinity` | Affinity for Prometheus master pods assignment | `{}` | +| `nodeSelector` | Node labels for Prometheus master pods assignment | `{}` | +| `tolerations` | Tolerations for Prometheus master pods assignment | `[]` | +| `terminationGracePeriodSeconds` | Seconds PostgreSQL pod needs to terminate gracefully | `""` | +| `extraEnvVars` | Array with extra environment variables to add to Prometheus master nodes | `[]` | +| `volumes` | Optionally specify extra list of additional volumes for the Prometheus master pod(s) | `[]` | +| `volumeMounts` | Optionally specify extra list of additional volumeMounts for the Prometheus master container(s) | `[]` | +| `containerPorts.http` | Prometheus components web container port | `9090` | +| `podSecurityContext.fsGroupChangePolicy` | Set filesystem group change policy | `Always` | +| `podSecurityContext.sysctls` | Set kernel settings using the sysctl interface | `[]` | +| `podSecurityContext.supplementalGroups` | Set filesystem extra groups | `[]` | +| `podSecurityContext.fsGroup` | Set Prometheus master pod's Security Context fsGroup | `1001` | +| `containerSecurityContext.seLinuxOptions` | Set SELinux options in container | `{}` | +| `containerSecurityContext.runAsUser` | Set Prometheus master containers' Security Context runAsUser | `1001` | +| `containerSecurityContext.runAsGroup` | Set Prometheus master containers' Security Context runAsGroup | `1001` | +| `containerSecurityContext.runAsNonRoot` | Set Prometheus master containers' Security Context runAsNonRoot | `true` | +| `containerSecurityContext.allowPrivilegeEscalation` | Is it possible to escalate Prometheus pod(s) privileges | `false` | +| `containerSecurityContext.readOnlyRootFilesystem` | Set container's Security Context read-only root filesystem | `true` | +| `containerSecurityContext.seccompProfile.type` | Set Prometheus master containers' Security Context seccompProfile | `RuntimeDefault` | +| `containerSecurityContext.capabilities.drop` | Set Prometheus master containers' Security Context capabilities to drop | `["ALL"]` | +| `startupProbe.initialDelaySeconds` | Initial delay seconds for startupProbe | `2` | +| `startupProbe.periodSeconds` | Period seconds for startupProbe | `5` | +| `startupProbe.timeoutSeconds` | Timeout seconds for startupProbe | `2` | +| `startupProbe.successThreshold` | Failure threshold for startupProbe | `1` | +| `startupProbe.failureThreshold` | Success threshold for startupProbe | `10` | +| `livenessProbe.initialDelaySeconds` | Initial delay seconds for livenessProbe | `5` | +| `livenessProbe.periodSeconds` | Period seconds for livenessProbe | `20` | +| `livenessProbe.timeoutSeconds` | Timeout seconds for livenessProbe | `3` | +| `livenessProbe.successThreshold` | Failure threshold for livenessProbe | `1` | +| `livenessProbe.failureThreshold` | Success threshold for livenessProbe | `3` | +| `readinessProbe.initialDelaySeconds` | Initial delay seconds for readinessProbe | `5` | +| `readinessProbe.periodSeconds` | Period seconds for readinessProbe | `10` | +| `readinessProbe.timeoutSeconds` | Timeout seconds for readinessProbe | `2` | +| `readinessProbe.successThreshold` | Failure threshold for readinessProbe | `1` | +| `readinessProbe.failureThreshold` | Success threshold for readinessProbe | `5` | + +### Service Parameters + +| Name | Description | Value | +| ------------------------------ | ----------------------------------------------------------- | ----------- | +| `service.type` | Prometheus master service type | `ClusterIP` | +| `service.ports.http` | Prometheus HTTP service port | `9090` | +| `service.annotations` | Additional custom annotations for Prometheus master service | `{}` | +| `service.headless.annotations` | Annotations for the headless service. | `{}` | + +### Persistence Parameters + +| Name | Description | Value | +| -------------------------------------------------- | ----------------------------------------------------------------------- | ------------------- | +| `persistence.enabled` | Enable persistence using Persistent Volume Claims | `true` | +| `persistence.existingClaim` | Name of an existing PVC to use | `""` | +| `persistence.storageClass` | Storage class of backing PVC | `""` | +| `persistence.labels` | Persistent Volume Claim labels | `{}` | +| `persistence.annotations` | Persistent Volume Claim annotations | `{}` | +| `persistence.accessModes` | Persistent Volume Access Modes | `["ReadWriteOnce"]` | +| `persistence.size` | Size of data volume | `8Gi` | +| `persistence.selector` | Selector to match an existing Persistent Volume for ClickHouse data PVC | `{}` | +| `persistence.dataSource` | Custom PVC data source | `{}` | +| `persistence.persistentVolumeClaimRetentionPolicy` | PostgreSQL Persistent Volume Claim Retention Policy | `{}` | + +### Other Parameters + +| Name | Description | Value | +| ----------------------------------- | --------------------------------------------------------------- | ------ | +| `serviceAccount.create` | Specifies whether a ServiceAccount should be created | `true` | +| `serviceAccount.name` | The name of the ServiceAccount to use. | `""` | +| `serviceAccount.automount` | Whether to auto mount the service account token | `true` | +| `serviceAccount.annotations` | Additional custom annotations for the ServiceAccount | `{}` | +| `networkPolicy.enabled` | Enable creation of NetworkPolicy resources | `true` | +| `networkPolicy.allowExternal` | Don't require client label for connections | `true` | +| `networkPolicy.allowExternalEgress` | Allow the pod to access any range of port and all destinations. | `true` | +| `networkPolicy.extraIngress` | Add extra ingress rules to the NetworkPolicy | `[]` | +| `networkPolicy.extraEgress` | Add extra egress rules to the NetworkPolicy | `[]` | +| `rbac.create` | Specifies whether RBAC resources should be created | `true` | +| `rbac.rules` | Custom RBAC rules to set | `[]` | diff --git a/install/helm/charts/prometheus/templates/_helpers.tpl b/install/helm/charts/prometheus/templates/_helpers.tpl new file mode 100644 index 00000000..a2e7d5c0 --- /dev/null +++ b/install/helm/charts/prometheus/templates/_helpers.tpl @@ -0,0 +1,6 @@ +{{/* +Return the proper prometheus image name +*/}} +{{- define "prometheus.image" -}} +{{ include "common.image" (dict "context" . "image" .Values.image) }} +{{- end -}} diff --git a/install/helm/charts/prometheus/templates/clusterrole.yaml b/install/helm/charts/prometheus/templates/clusterrole.yaml new file mode 100644 index 00000000..dc4c0d0e --- /dev/null +++ b/install/helm/charts/prometheus/templates/clusterrole.yaml @@ -0,0 +1,48 @@ +{{- if .Values.rbac.create }} +kind: ClusterRole +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: {{ include "common.fullname.namespace" . }} + labels: + {{- include "common.labels" . | nindent 4 }} + app.kubernetes.io/part-of: prometheus + app.kubernetes.io/component: server + {{- with (include "common.annotations" .) }} + annotations: + {{- . | nindent 4 }} + {{- end }} +rules: + # These rules come from + - apiGroups: + - "" + resources: + - nodes + - nodes/proxy + - nodes/metrics + - services + - endpoints + - pods + - ingresses + - configmaps + verbs: + - get + - list + - watch + - apiGroups: + - "extensions" + - "networking.k8s.io" + resources: + - ingresses/status + - ingresses + verbs: + - get + - list + - watch + - nonResourceURLs: + - "/metrics" + verbs: + - get + {{- if .Values.rbac.rules }} + {{- tpl (toYaml .Values.rbac.rules) . | nindent 2 }} + {{- end }} +{{- end }} diff --git a/install/helm/charts/prometheus/templates/clusterrolebinding.yaml b/install/helm/charts/prometheus/templates/clusterrolebinding.yaml new file mode 100644 index 00000000..5557b37d --- /dev/null +++ b/install/helm/charts/prometheus/templates/clusterrolebinding.yaml @@ -0,0 +1,22 @@ +{{- if .Values.rbac.create }} +kind: ClusterRoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: {{ include "common.fullname.namespace" . }} + labels: + {{- include "common.labels" . | nindent 4 }} + app.kubernetes.io/part-of: prometheus + app.kubernetes.io/component: server + {{- with (include "common.annotations" .) }} + annotations: + {{- . | nindent 4 }} + {{- end }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ include "common.fullname.namespace" . }} +subjects: + - kind: ServiceAccount + name: {{ include "common.serviceAccountName" . }} + namespace: {{ include "common.namespace" . | quote }} +{{- end }} diff --git a/install/helm/charts/prometheus/templates/configmap.yaml b/install/helm/charts/prometheus/templates/configmap.yaml new file mode 100644 index 00000000..db60ed53 --- /dev/null +++ b/install/helm/charts/prometheus/templates/configmap.yaml @@ -0,0 +1,118 @@ +{{- if not .Values.existingConfigmap }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "common.configmapName" . }} + namespace: {{ include "common.namespace" . | quote }} + labels: + {{- include "common.labels" . | nindent 4 }} + {{- with (include "common.annotations" .) }} + annotations: + {{- . | nindent 4 }} + {{- end }} +data: + prometheus.yml: |- + {{- if .Values.configuration }} + {{- tpl .Values.configuration $ | nindent 6 }} + {{- else }} + global: + {{- if .Values.scrapeInterval }} + scrape_interval: {{ .Values.scrapeInterval }} + {{- end }} + {{- if .Values.scrapeTimeout }} + scrape_timeout: {{ .Values.scrapeTimeout }} + {{- end }} + {{- if .Values.evaluationInterval }} + evaluation_interval: {{ .Values.evaluationInterval }} + {{- end }} + external_labels: + monitor: {{ include "common.fullname" . }} + {{- if .Values.externalLabels }} + {{- tpl (toYaml .Values.externalLabels) . | nindent 10 }} + {{- end }} + {{- if .Values.remoteWrite }} + remote_write: {{- tpl (toYaml .Values.remoteWrite) . | nindent 8 }} + {{- end }} + scrape_configs: + - job_name: prometheus + kubernetes_sd_configs: + - role: endpoints + namespaces: + names: + - {{ include "common.namespace" . }} + metrics_path: /metrics + relabel_configs: + - source_labels: + - job + target_label: __tmp_prometheus_job_name + - action: keep + source_labels: + - __meta_kubernetes_service_label_app_kubernetes_io_component + - __meta_kubernetes_service_labelpresent_app_kubernetes_io_component + regex: (server);true + - action: keep + source_labels: + - __meta_kubernetes_service_label_app_kubernetes_io_instance + - __meta_kubernetes_service_labelpresent_app_kubernetes_io_instance + regex: ({{ .Release.Name }});true + - action: keep + source_labels: + - __meta_kubernetes_service_label_app_kubernetes_io_name + - __meta_kubernetes_service_labelpresent_app_kubernetes_io_name + regex: (prometheus);true + - action: keep + source_labels: + - __meta_kubernetes_endpoint_port_name + regex: http + - source_labels: + - __meta_kubernetes_endpoint_address_target_kind + - __meta_kubernetes_endpoint_address_target_name + separator: ; + regex: Node;(.*) + replacement: {{"${1}"}} + target_label: node + - source_labels: + - __meta_kubernetes_endpoint_address_target_kind + - __meta_kubernetes_endpoint_address_target_name + separator: ; + regex: Pod;(.*) + replacement: {{"${1}"}} + target_label: pod + - source_labels: + - __meta_kubernetes_namespace + target_label: namespace + - source_labels: + - __meta_kubernetes_service_name + target_label: service + - source_labels: + - __meta_kubernetes_pod_name + target_label: pod + - source_labels: + - __meta_kubernetes_pod_container_name + target_label: container + - action: drop + source_labels: + - __meta_kubernetes_pod_phase + regex: (Failed|Succeeded) + - source_labels: + - __meta_kubernetes_service_name + target_label: job + replacement: {{"${1}"}} + - target_label: endpoint + replacement: http + - source_labels: + - __address__ + target_label: __tmp_hash + modulus: 1 + action: hashmod + - source_labels: + - __tmp_hash + regex: 0 + action: keep + {{- if .Values.scrapeConfigmap}} + scrape_config_files: + - /etc/prometheus/scrape-config-files/*.yaml + - /etc/prometheus/scrape-config-files/*.yml + {{- end }} + {{- end }} +{{- end }} diff --git a/install/helm/charts/prometheus/templates/deployment.yaml b/install/helm/charts/prometheus/templates/deployment.yaml new file mode 100644 index 00000000..b02c2029 --- /dev/null +++ b/install/helm/charts/prometheus/templates/deployment.yaml @@ -0,0 +1,146 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "common.fullname" . }} + labels: + {{- include "common.labels" . | nindent 4 }} + {{- with (include "common.annotations" .) }} + annotations: + {{- . | nindent 4 }} + {{- end }} +spec: + replicas: {{ .Values.replicaCount }} + selector: + matchLabels: + {{- include "common.selectorLabels" . | nindent 6 }} + template: + metadata: + {{- with (include "common.podAnnotations" .) }} + annotations: + {{- . | nindent 8 }} + {{- end }} + labels: + {{- include "common.podLabels" . | nindent 8 }} + spec: + serviceAccountName: {{ include "common.serviceAccountName" . }} + {{- include "common.imagePullSecrets" . | nindent 6 }} + {{- with .Values.podSecurityContext }} + securityContext: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- if .Values.terminationGracePeriodSeconds }} + terminationGracePeriodSeconds: {{ .Values.terminationGracePeriodSeconds }} + {{- end }} + containers: + - name: prometheus + image: {{ include "prometheus.image" . }} + imagePullPolicy: {{ .Values.image.pullPolicy | quote }} + {{- with .Values.containerSecurityContext }} + securityContext: + {{- toYaml . | nindent 12 }} + {{- end }} + args: + - "--config.file=/etc/prometheus/prometheus.yml" + - "--storage.tsdb.path=/var/lib/prometheus/data" + - "--storage.tsdb.retention.time={{ .Values.retention }}" + - "--storage.tsdb.retention.size={{ .Values.retentionSize }}" + - "--log.level={{ .Values.logLevel }}" + - "--log.format={{ .Values.logFormat }}" + - "--web.listen-address=:{{ .Values.containerPorts.http }}" + - "--web.console.libraries=/etc/prometheus/console_libraries" + - "--web.console.templates=/etc/prometheus/consoles" + {{- if .Values.enableAdminAPI}} + - "--web.enable-admin-api" + {{- end }} + {{- if .Values.enableRemoteWriteReceiver }} + - "--web.enable-remote-write-receiver" + {{- end }} + {{- if .Values.routePrefix }} + - "--web.route-prefix={{ .Values.routePrefix }}" + {{- end }} + {{- if .Values.enableFeatures }} + - "--enable-feature={{ join "," .Values.enableFeatures }}" + {{- end }} + {{- if .Values.extraEnvVars }} + env: + {{- with .Values.extraEnvVars }} + {{- tpl (toYaml .) $ | nindent 12 }} + {{- end }} + {{- end }} + ports: + - name: http + containerPort: {{ .Values.containerPorts.http }} + protocol: TCP + {{- with .Values.startupProbe }} + startupProbe: {{- toYaml . | nindent 12 }} + tcpSocket: + port: http + {{- end }} + {{- with .Values.livenessProbe }} + livenessProbe: {{- toYaml . | nindent 12 }} + httpGet: + path: /-/healthy + port: http + {{- end }} + {{- with .Values.readinessProbe }} + readinessProbe: {{- toYaml . | nindent 12 }} + httpGet: + path: /-/ready + port: http + {{- end }} + {{- with .Values.resources }} + resources: + {{- toYaml . | nindent 12 }} + {{- end }} + volumeMounts: + - name: empty-dir + mountPath: /tmp + subPath: tmp-dir + - name: config + mountPath: /etc/prometheus/prometheus.yml + readOnly: true + subPath: prometheus.yml + {{- if .Values.scrapeConfigmap }} + - name: scrape-config-files + mountPath: /etc/prometheus/scrape-config-files + readOnly: true + {{- end }} + - name: data + mountPath: /var/lib/prometheus/data + {{- with .Values.volumeMounts }} + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + volumes: + - name: empty-dir + emptyDir: {} + - name: config + configMap: + name: {{ include "common.configmapName" . }} + {{- if .Values.scrapeConfigmap }} + - name: scrape-config-files + configMap: + name: {{ .Values.scrapeConfigmap }} + {{- end }} + {{- with .Values.volumes }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- if not .Values.persistence.enabled }} + - name: data + emptyDir: {} + {{- else }} + - name: data + persistentVolumeClaim: + claimName: {{ include "common.pvcName" . }} + {{- end }} diff --git a/install/helm/charts/prometheus/templates/networkpolicy.yaml b/install/helm/charts/prometheus/templates/networkpolicy.yaml new file mode 100644 index 00000000..a041b30f --- /dev/null +++ b/install/helm/charts/prometheus/templates/networkpolicy.yaml @@ -0,0 +1,40 @@ +{{- if .Values.networkPolicy.enabled }} +kind: NetworkPolicy +apiVersion: networking.k8s.io/v1 +metadata: + name: {{ include "common.fullname" . }} + namespace: {{ include "common.namespace" . | quote }} + labels: + {{- include "common.labels" . | nindent 4 }} + {{- with (include "common.annotations" .) }} + annotations: + {{- . | nindent 4 }} + {{- end }} +spec: + podSelector: + matchLabels: + {{- include "common.selectorLabels" . | nindent 6 }} + policyTypes: + - Ingress + - Egress + egress: + - {{ printf "{}" }} + {{- with .Values.networkPolicy.extraEgress }} + {{- toYaml . | nindent 4 }} + {{- end }} + ingress: + - ports: + - port: {{ .Values.containerPorts.http }} + {{- if not .Values.networkPolicy.allowExternal }} + from: + - podSelector: + matchLabels: + {{- include "common.selectorLabels" . | nindent 14 }} + - podSelector: + matchLabels: + {{ include "common.fullname" . }}-client: "true" + {{- end }} + {{- with .Values.networkPolicy.extraIngress }} + {{- toYaml . | nindent 4 }} + {{- end }} +{{- end }} diff --git a/install/helm/charts/prometheus/templates/pvc.yaml b/install/helm/charts/prometheus/templates/pvc.yaml new file mode 100644 index 00000000..0f3510a1 --- /dev/null +++ b/install/helm/charts/prometheus/templates/pvc.yaml @@ -0,0 +1,29 @@ +{{- if and .Values.persistence.enabled (not .Values.persistence.existingClaim) }} +kind: PersistentVolumeClaim +apiVersion: v1 +metadata: + name: {{ include "common.pvcName" . }} + namespace: {{ .Release.Namespace | quote }} + {{- with (include "common.mergeAnnotations" (dict "context" . "value" .Values.persistence.annotations)) }} + annotations: + {{- . | nindent 4 }} + {{- end }} + {{- with (include "common.mergeLabels" (dict "context" . "value" .Values.persistence.labels)) }} + labels: + {{- . | nindent 4 }} + {{- end }} +spec: + accessModes: {{- toYaml .Values.persistence.accessModes | nindent 10 }} + {{- with .Values.persistence.dataSource }} + dataSource: {{- toYaml . | nindent 10 }} + {{- end }} + resources: + requests: + storage: {{ .Values.persistence.size | quote }} + {{- with .Values.persistence.selector }} + selector: {{- toYaml . | nindent 10 }} + {{- end }} + {{- with (default .Values.persistence.storageClass (.Values.global).storageClass) }} + storageClassName: {{ eq . "-" | ternary "" . | quote }} + {{- end }} +{{- end }} diff --git a/install/helm/charts/prometheus/templates/service-headless.yaml b/install/helm/charts/prometheus/templates/service-headless.yaml new file mode 100644 index 00000000..b815954b --- /dev/null +++ b/install/helm/charts/prometheus/templates/service-headless.yaml @@ -0,0 +1,21 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "common.fullname" . }}-headless + labels: + {{- include "common.labels" . | nindent 4 }} + {{- with (include "common.mergeAnnotations" (dict "context" . "value" .Values.service.headless.annotations)) }} + annotations: + {{- . | nindent 4 }} + {{- end }} +spec: + type: ClusterIP + clusterIP: None + publishNotReadyAddresses: true + ports: + - name: http + targetPort: http + port: {{ .Values.service.ports.http }} + protocol: TCP + selector: + {{- include "common.selectorLabels" . | nindent 4 }} diff --git a/install/helm/charts/prometheus/templates/service.yaml b/install/helm/charts/prometheus/templates/service.yaml new file mode 100644 index 00000000..49618beb --- /dev/null +++ b/install/helm/charts/prometheus/templates/service.yaml @@ -0,0 +1,19 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "common.fullname" . }} + labels: + {{- include "common.labels" . | nindent 4 }} + {{- with (include "common.mergeAnnotations" (dict "context" . "value" .Values.service.annotations)) }} + annotations: + {{- . | nindent 4 }} + {{- end }} +spec: + type: {{ .Values.service.type }} + ports: + - name: http + targetPort: http + port: {{ .Values.service.ports.http }} + protocol: TCP + selector: + {{- include "common.selectorLabels" . | nindent 4 }} diff --git a/install/helm/charts/prometheus/templates/serviceaccount.yaml b/install/helm/charts/prometheus/templates/serviceaccount.yaml new file mode 100644 index 00000000..090e1493 --- /dev/null +++ b/install/helm/charts/prometheus/templates/serviceaccount.yaml @@ -0,0 +1,13 @@ +{{- if .Values.serviceAccount.create -}} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "common.serviceAccountName" . }} + labels: + {{- include "common.labels" . | nindent 4 }} + {{- with (include "common.mergeAnnotations" (dict "context" . "value" .Values.serviceAccount.annotations)) }} + annotations: + {{- . | nindent 4 }} + {{- end }} +automountServiceAccountToken: {{ .Values.serviceAccount.automount }} +{{- end }} diff --git a/install/helm/charts/prometheus/values.schema.json b/install/helm/charts/prometheus/values.schema.json new file mode 100644 index 00000000..ff031b4d --- /dev/null +++ b/install/helm/charts/prometheus/values.schema.json @@ -0,0 +1,702 @@ +{ + "title": "Chart Values", + "type": "object", + "properties": { + "global": { + "type": "object", + "properties": { + "imageRegistry": { + "type": "string", + "description": "Global Docker image registry", + "default": "" + }, + "imagePullSecrets": { + "type": "array", + "description": "Global Docker registry secret names as an array", + "default": [], + "items": {} + }, + "storageClass": { + "type": "string", + "description": "Global StorageClass for Persistent Volume(s)", + "default": "" + }, + "prometheus": { + "type": "object", + "properties": { + "tls": { + "type": "object", + "properties": { + "enabled": { + "type": [ + "boolean", + "null" + ], + "description": "Enable TLS traffic support (overrides `tls.enabled`)", + "default": null, + "nullable": true + }, + "autoGenerated": { + "type": [ + "boolean", + "null" + ], + "description": "Generate automatically self-signed TLS certificates (overrides `tls.autoGenerated`)", + "default": null, + "nullable": true + }, + "existingSecret": { + "type": "string", + "description": "Name of an existing secret that contains the certificates (overrides `tls.existingSecret`)", + "default": "" + }, + "cert": { + "type": "string", + "description": "Certificate value. Requires `tls.autoGenerated` to be `false` (overrides `tls.cert`)", + "default": "" + }, + "certKey": { + "type": "string", + "description": "Certificate key value. Requires `tls.autoGenerated` to be `false` (overrides `tls.certKey`)", + "default": "" + }, + "certCA": { + "type": "string", + "description": "CA Certificate value. Requires `tls.autoGenerated` to be `false` (overrides `tls.certCA`)", + "default": "" + } + } + } + } + } + } + }, + "nameOverride": { + "type": "string", + "description": "String to partially override common.fullname template (will maintain the release name)", + "default": "" + }, + "fullnameOverride": { + "type": "string", + "description": "String to fully override common.fullname template", + "default": "" + }, + "namespaceOverride": { + "type": "string", + "description": "String to fully override common.namespace", + "default": "" + }, + "clusterDomain": { + "type": "string", + "description": "Kubernetes Cluster Domain", + "default": "cluster.local" + }, + "labels": { + "type": "object", + "description": "Add labels to all the deployed resources", + "default": {} + }, + "annotations": { + "type": "object", + "description": "Add annotations to all the deployed resources", + "default": {} + }, + "imagePullSecrets": { + "type": "array", + "description": "Global Docker registry secret names as an array", + "default": [], + "items": {} + }, + "image": { + "type": "object", + "properties": { + "registry": { + "type": "string", + "description": "Prometheus image registry", + "default": "REGISTRY_NAME" + }, + "repository": { + "type": "string", + "description": "Prometheus image repository", + "default": "REPOSITORY_NAME/redis" + }, + "digest": { + "type": "string", + "description": "Prometheus image digest in the way sha256:aa.... Please note this parameter, if set, will override the tag", + "default": "" + }, + "pullPolicy": { + "type": "string", + "description": "Prometheus image pull policy", + "default": "IfNotPresent" + } + } + }, + "auth": { + "type": "object", + "properties": { + "enabled": { + "type": "boolean", + "description": "Enable password authentication (currently is not supported by this chart)", + "default": false + }, + "username": { + "type": "string", + "description": "Prometheus username", + "default": "default" + }, + "password": { + "type": "string", + "description": "Prometheus password", + "default": "" + }, + "existingSecret": { + "type": "string", + "description": "The name of an existing secret with Prometheus credentials", + "default": "" + }, + "existingSecretPasswordKey": { + "type": "string", + "description": "Password key to be retrieved from existing secret", + "default": "" + } + } + }, + "tls": { + "type": "object", + "properties": { + "enabled": { + "type": "boolean", + "description": "Enable TLS traffic", + "default": false + }, + "authClients": { + "type": "boolean", + "description": "Require clients to authenticate", + "default": false + }, + "autoGenerated": { + "type": "boolean", + "description": "Enable autogenerated certificates", + "default": false + }, + "existingSecret": { + "type": "string", + "description": "The name of the existing secret that contains the TLS certificates", + "default": "" + }, + "certFilename": { + "type": "string", + "description": "Certificate filename", + "default": "tls.crt" + }, + "certKeyFilename": { + "type": "string", + "description": "Certificate Key filename", + "default": "tls.key" + }, + "certCAFilename": { + "type": "string", + "description": "CA Certificate filename", + "default": "ca.crt" + }, + "cert": { + "type": "string", + "description": "Certificate value. Requires `tls.autoGenerated` to be `false`", + "default": "" + }, + "certKey": { + "type": "string", + "description": "Certificate key value. Requires `tls.autoGenerated` to be `false`", + "default": "" + }, + "certCA": { + "type": "string", + "description": "CA Certificate value. Requires `tls.autoGenerated` to be `false`", + "default": "" + } + } + }, + "configuration": { + "type": "string", + "description": "Configuration to be added into the ConfigMap", + "default": "\"\"" + }, + "existingConfigmap": { + "type": "string", + "description": "The name of an existing ConfigMap with your custom configuration for Prometheus", + "default": "" + }, + "routePrefix": { + "type": "string", + "description": "Prefix for the internal routes of web endpoints", + "default": "/" + }, + "remoteWrite": { + "type": "array", + "description": "The remote_write spec configuration for Prometheus", + "default": [], + "items": {} + }, + "scrapeInterval": { + "type": "string", + "description": "Interval between consecutive scrapes. Example: \"1m\"", + "default": "" + }, + "scrapeTimeout": { + "type": "string", + "description": "Interval between consecutive scrapes. Example: \"10s\"", + "default": "" + }, + "evaluationInterval": { + "type": "string", + "description": "Interval between consecutive evaluations. Example: \"1m\"", + "default": "" + }, + "enableAdminAPI": { + "type": "boolean", + "description": "Enable Prometheus adminitrative API", + "default": false + }, + "enableRemoteWriteReceiver": { + "type": "boolean", + "description": "Enable Prometheus to be used as a receiver for the Prometheus remote write protocol.", + "default": false + }, + "enableFeatures": { + "type": "array", + "description": "Enable access to Prometheus disabled features.", + "default": [], + "items": {} + }, + "logLevel": { + "type": "string", + "description": "Log level for Prometheus", + "default": "info" + }, + "logFormat": { + "type": "string", + "description": "Log format for Prometheus", + "default": "logfmt" + }, + "retention": { + "type": "string", + "description": "Metrics retention days", + "default": "10d" + }, + "retentionSize": { + "type": "string", + "description": "Maximum size of metrics", + "default": "0" + }, + "scrapeConfigmap": { + "type": "string", + "description": "ConfigMap which contains scrape config files", + "default": "" + }, + "replicaCount": { + "type": "number", + "description": "Number of Prometheus instances to deploy", + "default": 1 + }, + "podLabels": { + "type": "object", + "description": "Extra labels for Prometheus master pods", + "default": {} + }, + "podAnnotations": { + "type": "object", + "description": "Annotations for Prometheus master pods", + "default": {} + }, + "affinity": { + "type": "object", + "description": "Affinity for Prometheus master pods assignment", + "default": {} + }, + "nodeSelector": { + "type": "object", + "description": "Node labels for Prometheus master pods assignment", + "default": {} + }, + "tolerations": { + "type": "array", + "description": "Tolerations for Prometheus master pods assignment", + "default": [], + "items": {} + }, + "terminationGracePeriodSeconds": { + "type": "string", + "description": "Seconds PostgreSQL pod needs to terminate gracefully", + "default": "" + }, + "extraEnvVars": { + "type": "array", + "description": "Array with extra environment variables to add to Prometheus master nodes", + "default": [], + "items": {} + }, + "volumes": { + "type": "array", + "description": "Optionally specify extra list of additional volumes for the Prometheus master pod(s)", + "default": [], + "items": {} + }, + "volumeMounts": { + "type": "array", + "description": "Optionally specify extra list of additional volumeMounts for the Prometheus master container(s)", + "default": [], + "items": {} + }, + "containerPorts": { + "type": "object", + "properties": { + "http": { + "type": "number", + "description": "Prometheus components web container port", + "default": 9090 + } + } + }, + "podSecurityContext": { + "type": "object", + "properties": { + "fsGroupChangePolicy": { + "type": "string", + "description": "Set filesystem group change policy", + "default": "Always" + }, + "sysctls": { + "type": "array", + "description": "Set kernel settings using the sysctl interface", + "default": [], + "items": {} + }, + "supplementalGroups": { + "type": "array", + "description": "Set filesystem extra groups", + "default": [], + "items": {} + }, + "fsGroup": { + "type": "number", + "description": "Set Prometheus master pod's Security Context fsGroup", + "default": 1001 + } + } + }, + "containerSecurityContext": { + "type": "object", + "properties": { + "runAsUser": { + "type": "number", + "description": "Set Prometheus master containers' Security Context runAsUser", + "default": 1001 + }, + "runAsGroup": { + "type": "number", + "description": "Set Prometheus master containers' Security Context runAsGroup", + "default": 1001 + }, + "runAsNonRoot": { + "type": "boolean", + "description": "Set Prometheus master containers' Security Context runAsNonRoot", + "default": true + }, + "allowPrivilegeEscalation": { + "type": "boolean", + "description": "Is it possible to escalate Prometheus pod(s) privileges", + "default": false + }, + "readOnlyRootFilesystem": { + "type": "boolean", + "description": "Set container's Security Context read-only root filesystem", + "default": true + }, + "seccompProfile": { + "type": "object", + "properties": { + "type": { + "type": "string", + "description": "Set Prometheus master containers' Security Context seccompProfile", + "default": "RuntimeDefault" + } + } + }, + "capabilities": { + "type": "object", + "properties": { + "drop": { + "type": "array", + "description": "Set Prometheus master containers' Security Context capabilities to drop", + "default": [ + "ALL" + ], + "items": { + "type": "string" + } + } + } + } + } + }, + "startupProbe": { + "type": "object", + "properties": { + "initialDelaySeconds": { + "type": "number", + "description": "Initial delay seconds for startupProbe", + "default": 2 + }, + "periodSeconds": { + "type": "number", + "description": "Period seconds for startupProbe", + "default": 5 + }, + "timeoutSeconds": { + "type": "number", + "description": "Timeout seconds for startupProbe", + "default": 2 + }, + "successThreshold": { + "type": "number", + "description": "Failure threshold for startupProbe", + "default": 1 + }, + "failureThreshold": { + "type": "number", + "description": "Success threshold for startupProbe", + "default": 10 + } + } + }, + "livenessProbe": { + "type": "object", + "properties": { + "initialDelaySeconds": { + "type": "number", + "description": "Initial delay seconds for livenessProbe", + "default": 5 + }, + "periodSeconds": { + "type": "number", + "description": "Period seconds for livenessProbe", + "default": 20 + }, + "timeoutSeconds": { + "type": "number", + "description": "Timeout seconds for livenessProbe", + "default": 3 + }, + "successThreshold": { + "type": "number", + "description": "Failure threshold for livenessProbe", + "default": 1 + }, + "failureThreshold": { + "type": "number", + "description": "Success threshold for livenessProbe", + "default": 3 + } + } + }, + "readinessProbe": { + "type": "object", + "properties": { + "initialDelaySeconds": { + "type": "number", + "description": "Initial delay seconds for readinessProbe", + "default": 5 + }, + "periodSeconds": { + "type": "number", + "description": "Period seconds for readinessProbe", + "default": 10 + }, + "timeoutSeconds": { + "type": "number", + "description": "Timeout seconds for readinessProbe", + "default": 2 + }, + "successThreshold": { + "type": "number", + "description": "Failure threshold for readinessProbe", + "default": 1 + }, + "failureThreshold": { + "type": "number", + "description": "Success threshold for readinessProbe", + "default": 5 + } + } + }, + "service": { + "type": "object", + "properties": { + "type": { + "type": "string", + "description": "Prometheus master service type", + "default": "ClusterIP" + }, + "ports": { + "type": "object", + "properties": { + "http": { + "type": "number", + "description": "Prometheus HTTP service port", + "default": 9090 + } + } + }, + "annotations": { + "type": "object", + "description": "Additional custom annotations for Prometheus master service", + "default": {} + }, + "headless": { + "type": "object", + "properties": { + "annotations": { + "type": "object", + "description": "Annotations for the headless service.", + "default": {} + } + } + } + } + }, + "persistence": { + "type": "object", + "properties": { + "enabled": { + "type": "boolean", + "description": "Enable persistence using Persistent Volume Claims", + "default": true + }, + "existingClaim": { + "type": "string", + "description": "Name of an existing PVC to use", + "default": "" + }, + "storageClass": { + "type": "string", + "description": "Storage class of backing PVC", + "default": "" + }, + "labels": { + "type": "object", + "description": "Persistent Volume Claim labels", + "default": {} + }, + "annotations": { + "type": "object", + "description": "Persistent Volume Claim annotations", + "default": {} + }, + "accessModes": { + "type": "array", + "description": "Persistent Volume Access Modes", + "default": [ + "ReadWriteOnce" + ], + "items": { + "type": "string" + } + }, + "size": { + "type": "string", + "description": "Size of data volume", + "default": "8Gi" + }, + "selector": { + "type": "object", + "description": "Selector to match an existing Persistent Volume for ClickHouse data PVC", + "default": {} + }, + "dataSource": { + "type": "object", + "description": "Custom PVC data source", + "default": {} + }, + "persistentVolumeClaimRetentionPolicy": { + "type": "object", + "description": "PostgreSQL Persistent Volume Claim Retention Policy", + "default": {} + } + } + }, + "serviceAccount": { + "type": "object", + "properties": { + "create": { + "type": "boolean", + "description": "Specifies whether a ServiceAccount should be created", + "default": true + }, + "name": { + "type": "string", + "description": "The name of the ServiceAccount to use.", + "default": "" + }, + "automount": { + "type": "boolean", + "description": "Whether to auto mount the service account token", + "default": true + }, + "annotations": { + "type": "object", + "description": "Additional custom annotations for the ServiceAccount", + "default": {} + } + } + }, + "networkPolicy": { + "type": "object", + "properties": { + "enabled": { + "type": "boolean", + "description": "Enable creation of NetworkPolicy resources", + "default": true + }, + "allowExternal": { + "type": "boolean", + "description": "Don't require client label for connections", + "default": true + }, + "allowExternalEgress": { + "type": "boolean", + "description": "Allow the pod to access any range of port and all destinations.", + "default": true + }, + "extraIngress": { + "type": "array", + "description": "Add extra ingress rules to the NetworkPolicy", + "default": [], + "items": {} + }, + "extraEgress": { + "type": "array", + "description": "Add extra egress rules to the NetworkPolicy", + "default": [], + "items": {} + } + } + }, + "rbac": { + "type": "object", + "properties": { + "create": { + "type": "boolean", + "description": "Specifies whether RBAC resources should be created", + "default": true + }, + "rules": { + "type": "array", + "description": "Custom RBAC rules to set", + "default": [], + "items": {} + } + } + } + } +} \ No newline at end of file diff --git a/install/helm/charts/prometheus/values.yaml b/install/helm/charts/prometheus/values.yaml new file mode 100644 index 00000000..631975fb --- /dev/null +++ b/install/helm/charts/prometheus/values.yaml @@ -0,0 +1,373 @@ +# @section Global parameters +global: + # -- Global Docker image registry + imageRegistry: "" + # -- Global Docker registry secret names as an array + # e.g. + # imagePullSecrets: + # - myRegistryKeySecretName + imagePullSecrets: [] + # -- Global StorageClass for Persistent Volume(s) + storageClass: "" + + prometheus: + tls: + # -- [boolean, nullable] Enable TLS traffic support (overrides `tls.enabled`) + enabled: null + # -- [boolean, nullable] Generate automatically self-signed TLS certificates (overrides `tls.autoGenerated`) + autoGenerated: null + # -- Name of an existing secret that contains the certificates (overrides `tls.existingSecret`) + existingSecret: "" + # -- Certificate value. Requires `tls.autoGenerated` to be `false` (overrides `tls.cert`) + cert: "" + # -- Certificate key value. Requires `tls.autoGenerated` to be `false` (overrides `tls.certKey`) + certKey: "" + # -- CA Certificate value. Requires `tls.autoGenerated` to be `false` (overrides `tls.certCA`) + certCA: "" + +# @section Common parameters +# -- String to partially override common.fullname template (will maintain the release name) +nameOverride: "" +# -- String to fully override common.fullname template +fullnameOverride: "" +# -- String to fully override common.namespace +namespaceOverride: "" +# -- Kubernetes Cluster Domain +clusterDomain: cluster.local +# -- Add labels to all the deployed resources +labels: {} +# -- Add annotations to all the deployed resources +annotations: {} +# -- Global Docker registry secret names as an array +# e.g. +# imagePullSecrets: +# - myRegistryKeySecretName +imagePullSecrets: [] + +# @section Prometheus parameters +# Prometheus image +image: + # -- [default: REGISTRY_NAME] Prometheus image registry + registry: "" + # -- [default: REPOSITORY_NAME/redis] Prometheus image repository + repository: prom/prometheus + # @skip image.tag Prometheus image tag (immutable tags are recommended) + tag: v3.4.1 + # -- Prometheus image digest in the way sha256:aa.... Please note this parameter, if set, will override the tag + digest: "" + # -- Prometheus image pull policy + # Specify a imagePullPolicy + # Defaults to 'Always' if image tag is 'latest', else set to 'IfNotPresent' + # ref: https://kubernetes.io/docs/concepts/containers/images/#pre-pulled-images + pullPolicy: IfNotPresent +# Authentication +auth: + # -- Enable password authentication (currently is not supported by this chart) + enabled: false + # -- Prometheus username + username: "default" + # -- Prometheus password + password: "" + # -- The name of an existing secret with Prometheus credentials + # Defaults to a random 10-character alphanumeric string if not set + existingSecret: "" + # -- Password key to be retrieved from existing secret + existingSecretPasswordKey: "" +# TLS configuration +tls: + # -- Enable TLS traffic + enabled: false + # -- Require clients to authenticate + authClients: false + # -- Enable autogenerated certificates + autoGenerated: false + # -- The name of the existing secret that contains the TLS certificates + existingSecret: "" + # -- Certificate filename + certFilename: "tls.crt" + # -- Certificate Key filename + certKeyFilename: "tls.key" + # -- CA Certificate filename + certCAFilename: "ca.crt" + # -- Certificate value. Requires `tls.autoGenerated` to be `false` + cert: "" + # -- Certificate key value. Requires `tls.autoGenerated` to be `false` + certKey: "" + # -- CA Certificate value. Requires `tls.autoGenerated` to be `false` + certCA: "" +# Redis resource requests and limits +# ref: http://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/ +# -- [object] Set container requests and limits for different resources like CPU or memory +resources: + requests: + cpu: 500m + memory: 1024Mi + ephemeral-storage: 50Mi + limits: + cpu: "1.0" + memory: 1536Mi + ephemeral-storage: 1024Mi +# @section Prometheus configuration parameters +# -- [string] Configuration to be added into the ConfigMap +configuration: "" +# -- The name of an existing ConfigMap with your custom configuration for Prometheus +existingConfigmap: "" +# -- Prefix for the internal routes of web endpoints +routePrefix: / +# -- The remote_write spec configuration for Prometheus +remoteWrite: [] +# -- Interval between consecutive scrapes. Example: "1m" +scrapeInterval: "" +# -- Interval between consecutive scrapes. Example: "10s" +scrapeTimeout: "" +# -- Interval between consecutive evaluations. Example: "1m" +evaluationInterval: "" +# -- Enable Prometheus adminitrative API +# ref: https://prometheus.io/docs/prometheus/latest/querying/api/#tsdb-admin-apis +enableAdminAPI: false +# -- Enable Prometheus to be used as a receiver for the Prometheus remote write protocol. +enableRemoteWriteReceiver: false +# -- Enable access to Prometheus disabled features. +# ref: https://prometheus.io/docs/prometheus/latest/disabled_features/ +enableFeatures: [] +# -- Log level for Prometheus +logLevel: info +# -- Log format for Prometheus +logFormat: logfmt +# -- Metrics retention days +retention: 10d +# -- Maximum size of metrics +retentionSize: "0" +# -- ConfigMap which contains scrape config files +# ref: +scrapeConfigmap: "" +# -- Number of Prometheus instances to deploy +replicaCount: 1 +# -- Extra labels for Prometheus master pods +# ref: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ +podLabels: {} +# -- Annotations for Prometheus master pods +# ref: https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/ +podAnnotations: {} +# -- Affinity for Prometheus master pods assignment +# ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity +# NOTE: `podAffinityPreset`, `podAntiAffinityPreset`, and `nodeAffinityPreset` will be ignored when it's set +affinity: {} +# -- Node labels for Prometheus master pods assignment +# ref: https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/ +nodeSelector: {} +# -- Tolerations for Prometheus master pods assignment +# ref: https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/ +tolerations: [] +# -- Seconds PostgreSQL pod needs to terminate gracefully +# ref: https://kubernetes.io/docs/concepts/workloads/pods/pod/#termination-of-pods +terminationGracePeriodSeconds: "" +# -- Array with extra environment variables to add to Prometheus master nodes +# e.g: +# extraEnvVars: +# - name: FOO +# value: "bar" +extraEnvVars: [] +# -- Optionally specify extra list of additional volumes for the Prometheus master pod(s) +volumes: [] +# -- Optionally specify extra list of additional volumeMounts for the Prometheus master container(s) +volumeMounts: [] +containerPorts: + # -- Prometheus components web container port + http: 9090 +# Configure Pods Security Context +# ref: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-pod +podSecurityContext: + # -- Set filesystem group change policy + fsGroupChangePolicy: Always + # -- Set kernel settings using the sysctl interface + sysctls: [] + # -- Set filesystem extra groups + supplementalGroups: [] + # -- Set Prometheus master pod's Security Context fsGroup + fsGroup: 1001 +# Configure Container Security Context +# ref: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-pod +containerSecurityContext: + # -- [object,nullable] Set SELinux options in container + seLinuxOptions: {} + # -- Set Prometheus master containers' Security Context runAsUser + runAsUser: 1001 + # -- Set Prometheus master containers' Security Context runAsGroup + runAsGroup: 1001 + # -- Set Prometheus master containers' Security Context runAsNonRoot + runAsNonRoot: true + # -- Is it possible to escalate Prometheus pod(s) privileges + allowPrivilegeEscalation: false + # -- Set container's Security Context read-only root filesystem + readOnlyRootFilesystem: true + seccompProfile: + # -- Set Prometheus master containers' Security Context seccompProfile + type: RuntimeDefault + capabilities: + # -- Set Prometheus master containers' Security Context capabilities to drop + drop: ["ALL"] +# Configure extra options for Prometheus containers' liveness and readiness probes +# ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-probes/#configure-probes +startupProbe: + # -- Initial delay seconds for startupProbe + initialDelaySeconds: 2 + # -- Period seconds for startupProbe + periodSeconds: 5 + # -- Timeout seconds for startupProbe + timeoutSeconds: 2 + # -- Failure threshold for startupProbe + successThreshold: 1 + # -- Success threshold for startupProbe + failureThreshold: 10 +livenessProbe: + # -- Initial delay seconds for livenessProbe + initialDelaySeconds: 5 + # -- Period seconds for livenessProbe + periodSeconds: 20 + # -- Timeout seconds for livenessProbe + timeoutSeconds: 3 + # -- Failure threshold for livenessProbe + successThreshold: 1 + # -- Success threshold for livenessProbe + failureThreshold: 3 +readinessProbe: + # -- Initial delay seconds for readinessProbe + initialDelaySeconds: 5 + # -- Period seconds for readinessProbe + periodSeconds: 10 + # -- Timeout seconds for readinessProbe + timeoutSeconds: 2 + # -- Failure threshold for readinessProbe + successThreshold: 1 + # -- Success threshold for readinessProbe + failureThreshold: 5 +# @section Service Parameters +# Prometheus master service parameters +service: + # -- Prometheus master service type + type: ClusterIP + ports: + # -- Prometheus HTTP service port + http: 9090 + # -- Additional custom annotations for Prometheus master service + annotations: {} + # Headless service properties + headless: + # -- Annotations for the headless service. + annotations: {} +# @section Persistence Parameters +# Enable persistence using Persistent Volume Claims +# ref: https://kubernetes.io/docs/concepts/storage/persistent-volumes/ +persistence: + # -- Enable persistence using Persistent Volume Claims + enabled: true + # -- Name of an existing PVC to use + existingClaim: "" + # -- Storage class of backing PVC + # If defined, storageClassName: + # If set to "-", storageClassName: "", which disables dynamic provisioning + # If undefined (the default) or set to null, no storageClassName spec is + # set, choosing the default provisioner. (gp2 on AWS, standard on + # GKE, AWS & OpenStack) + storageClass: "" + # -- Persistent Volume Claim labels + labels: {} + # -- Persistent Volume Claim annotations + annotations: {} + # -- Persistent Volume Access Modes + accessModes: + - ReadWriteOnce + # -- Size of data volume + size: 8Gi + # -- Selector to match an existing Persistent Volume for ClickHouse data PVC + # If set, the PVC can't have a PV dynamically provisioned for it + # E.g. + # selector: + # matchLabels: + # app: my-app + selector: {} + # -- Custom PVC data source + dataSource: {} + # -- PostgreSQL Persistent Volume Claim Retention Policy + # ref: https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#persistentvolumeclaim-retention + # Example: + # whenScaled: Retain + # whenDeleted: Retain + persistentVolumeClaimRetentionPolicy: {} +# @section Other Parameters +# Service account +# ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/ +serviceAccount: + # -- Specifies whether a ServiceAccount should be created + create: true + # -- The name of the ServiceAccount to use. + # If not set and create is true, a name is generated using the common.fullname template + name: "" + # -- Whether to auto mount the service account token + # ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/#use-the-default-service-account-to-access-the-api-server + automount: true + # -- Additional custom annotations for the ServiceAccount + annotations: {} +# Network Policy configuration +# ref: https://kubernetes.io/docs/concepts/services-networking/network-policies/ +networkPolicy: + # -- Enable creation of NetworkPolicy resources + enabled: true + # -- Don't require client label for connections + # When set to false, only pods with the correct client label will have network access to the ports + # Prometheus is listening on. When true, Prometheus will accept connections from any source + # (with the correct destination port). + allowExternal: true + # -- Allow the pod to access any range of port and all destinations. + allowExternalEgress: true + # -- Add extra ingress rules to the NetworkPolicy + # e.g: + # extraIngress: + # - ports: + # - port: 1234 + # from: + # - podSelector: + # - matchLabels: + # - role: frontend + # - podSelector: + # - matchExpressions: + # - key: role + # operator: In + # values: + # - frontend + extraIngress: [] + # -- Add extra egress rules to the NetworkPolicy + # e.g: + # extraEgress: + # - ports: + # - port: 1234 + # to: + # - podSelector: + # - matchLabels: + # - role: frontend + # - podSelector: + # - matchExpressions: + # - key: role + # operator: In + # values: + # - frontend + extraEgress: [] +# RBAC configuration +rbac: + # -- Specifies whether RBAC resources should be created + create: true + # -- Custom RBAC rules to set + # e.g: + # rules: + # - apiGroups: + # - "" + # resources: + # - pods + # verbs: + # - get + # - list + rules: [] + +# @skip +deployed: true diff --git a/install/helm/charts/rabbitmq/README.md b/install/helm/charts/rabbitmq/README.md index d8d7c2c1..bf6a81f1 100644 --- a/install/helm/charts/rabbitmq/README.md +++ b/install/helm/charts/rabbitmq/README.md @@ -4,11 +4,12 @@ ### Global parameters -| Name | Description | Value | -| ------------------------- | ----------------------------------------------- | ----- | -| `global.imageRegistry` | Global Docker image registry | `""` | -| `global.imagePullSecrets` | Global Docker registry secret names as an array | `[]` | -| `global.storageClass` | Global StorageClass for Persistent Volume(s) | `""` | +| Name | Description | Value | +| ------------------------------------- | ------------------------------------------------------------------------------------------- | ----- | +| `global.imageRegistry` | Global Docker image registry | `""` | +| `global.imagePullSecrets` | Global Docker registry secret names as an array | `[]` | +| `global.storageClass` | Global StorageClass for Persistent Volume(s) | `""` | +| `global.rabbitmq.auth.existingSecret` | Name of an existing secret that contains the certificates (overrides `auth.existingSecret`) | `""` | ### Common parameters @@ -61,12 +62,10 @@ | `containerPorts.manager` | manager | `15672` | | `containerPorts.epmd` | EPMD | `4369` | | `containerPorts.metrics` | metrics | `9419` | -| `podSecurityContext.enabled` | Enable RabbitMQ pods' Security Context | `true` | | `podSecurityContext.fsGroupChangePolicy` | Set filesystem group change policy | `Always` | | `podSecurityContext.sysctls` | Set kernel settings using the sysctl interface | `[]` | | `podSecurityContext.supplementalGroups` | Set filesystem extra groups | `[]` | | `podSecurityContext.fsGroup` | Set RabbitMQ pod's Security Context fsGroup | `1001` | -| `containerSecurityContext.enabled` | Enabled RabbitMQ containers' Security Context | `true` | | `containerSecurityContext.seLinuxOptions` | Set SELinux options in container | `nil` | | `containerSecurityContext.runAsUser` | Set RabbitMQ containers' Security Context runAsUser | `1001` | | `containerSecurityContext.runAsGroup` | Set RabbitMQ containers' Security Context runAsGroup | `1001` | @@ -75,19 +74,16 @@ | `containerSecurityContext.readOnlyRootFilesystem` | Set container's Security Context readOnlyRootFilesystem | `true` | | `containerSecurityContext.capabilities.drop` | Set container's Security Context runAsNonRoot | `["ALL"]` | | `containerSecurityContext.seccompProfile.type` | Set container's Security Context seccomp profile | `RuntimeDefault` | -| `livenessProbe.enabled` | Enable livenessProbe | `true` | | `livenessProbe.initialDelaySeconds` | Initial delay seconds for livenessProbe | `120` | | `livenessProbe.timeoutSeconds` | Timeout seconds for livenessProbe | `20` | | `livenessProbe.periodSeconds` | Period seconds for livenessProbe | `30` | | `livenessProbe.failureThreshold` | Failure threshold for livenessProbe | `6` | | `livenessProbe.successThreshold` | Success threshold for livenessProbe | `1` | -| `readinessProbe.enabled` | Enable readinessProbe | `true` | | `readinessProbe.initialDelaySeconds` | Initial delay seconds for readinessProbe | `10` | | `readinessProbe.timeoutSeconds` | Timeout seconds for readinessProbe | `20` | | `readinessProbe.periodSeconds` | Period seconds for readinessProbe | `30` | | `readinessProbe.failureThreshold` | Failure threshold for readinessProbe | `3` | | `readinessProbe.successThreshold` | Success threshold for readinessProbe | `1` | -| `startupProbe.enabled` | Enable startupProbe | `false` | | `startupProbe.initialDelaySeconds` | Initial delay seconds for startupProbe | `10` | | `startupProbe.timeoutSeconds` | Timeout seconds for startupProbe | `20` | | `startupProbe.periodSeconds` | Period seconds for startupProbe | `30` | diff --git a/install/helm/charts/rabbitmq/templates/networkpolicy.yaml b/install/helm/charts/rabbitmq/templates/networkpolicy.yaml index 179410ea..caa17cb3 100644 --- a/install/helm/charts/rabbitmq/templates/networkpolicy.yaml +++ b/install/helm/charts/rabbitmq/templates/networkpolicy.yaml @@ -27,7 +27,6 @@ spec: - ports: - port: {{ .Values.service.ports.epmd }} - port: {{ .Values.service.ports.amqp }} - - port: {{ .Values.service.ports.amqpTls }} - port: {{ .Values.service.ports.dist }} - port: {{ .Values.service.ports.manager }} {{- if (eq (include "common.metrics.enabled" .) "true") }} @@ -44,7 +43,6 @@ spec: - ports: - port: {{ .Values.containerPorts.epmd }} - port: {{ .Values.containerPorts.amqp }} - - port: {{ .Values.containerPorts.amqpTls }} - port: {{ .Values.containerPorts.dist }} - port: {{ .Values.containerPorts.manager }} {{- if (eq (include "common.metrics.enabled" $) "true") }} diff --git a/install/helm/charts/rabbitmq/templates/secret.yaml b/install/helm/charts/rabbitmq/templates/secret.yaml index 7cd7649a..ae4b4415 100644 --- a/install/helm/charts/rabbitmq/templates/secret.yaml +++ b/install/helm/charts/rabbitmq/templates/secret.yaml @@ -1,5 +1,5 @@ -{{- if not .Values.auth.existingSecret }} -{{- $secretName := include "common.authSecretName" . }} +{{- if and (not .Values.auth.existingSecret) (not ((.Values.global.rabbitmq).auth).existingSecret) }} +{{- $secretName := include "common.auth.secretName" . }} {{- $key := include "rabbitmq.secretPasswordKey" . }} {{- $password := include "common.secrets.password" (dict "context" . "secret" $secretName "key" $key "defaultValue" .Values.auth.password) }} apiVersion: v1 diff --git a/install/helm/charts/rabbitmq/templates/statefulset.yaml b/install/helm/charts/rabbitmq/templates/statefulset.yaml index 46bebb88..b6cb2e12 100644 --- a/install/helm/charts/rabbitmq/templates/statefulset.yaml +++ b/install/helm/charts/rabbitmq/templates/statefulset.yaml @@ -26,9 +26,9 @@ spec: spec: serviceAccountName: {{ include "common.serviceAccountName" . }} {{- include "common.imagePullSecrets" . | nindent 6 }} - {{- if .Values.podSecurityContext.enabled }} + {{- with .Values.podSecurityContext }} securityContext: - {{- omit .Values.podSecurityContext "enabled" | toYaml | nindent 8 }} + {{- toYaml . | nindent 8 }} {{- end }} {{- if .Values.terminationGracePeriodSeconds }} terminationGracePeriodSeconds: {{ .Values.terminationGracePeriodSeconds }} @@ -37,9 +37,9 @@ spec: - name: rabbitmq image: {{ include "rabbitmq.image" . }} imagePullPolicy: {{ .Values.image.pullPolicy | quote }} - {{- if .Values.containerSecurityContext.enabled }} + {{- with .Values.containerSecurityContext }} securityContext: - {{- omit .Values.containerSecurityContext "enabled" | toYaml | nindent 12 }} + {{- toYaml . | nindent 12 }} {{- end }} command: - /bin/bash @@ -98,7 +98,7 @@ spec: - name: RABBITMQ_DEFAULT_PASS valueFrom: secretKeyRef: - name: {{ include "common.authSecretName" . }} + name: {{ include "common.auth.secretName" . }} key: {{ include "rabbitmq.secretPasswordKey" . }} - name: RABBITMQ_PLUGINS value: {{ include "rabbitmq.plugins" . | quote }} @@ -118,21 +118,21 @@ spec: - name: metrics containerPort: {{ .Values.containerPorts.metrics }} {{- end }} - {{- if .Values.startupProbe.enabled }} - startupProbe: {{- omit .Values.startupProbe "enabled" | toYaml | nindent 12 }} + {{- with .Values.startupProbe }} + startupProbe: {{- toYaml . | nindent 12 }} tcpSocket: port: amqp {{- end }} - {{- if .Values.livenessProbe.enabled }} - livenessProbe: {{- omit .Values.livenessProbe "enabled" | toYaml | nindent 12 }} + {{- with .Values.livenessProbe }} + livenessProbe: {{- toYaml . | nindent 12 }} exec: command: - sh - -ec - rabbitmq-diagnostics -q ping {{- end }} - {{- if .Values.readinessProbe.enabled }} - readinessProbe: {{- omit .Values.readinessProbe "enabled" | toYaml | nindent 12 }} + {{- with .Values.readinessProbe }} + readinessProbe: {{- toYaml . | nindent 12 }} exec: command: - sh diff --git a/install/helm/charts/rabbitmq/values.schema.json b/install/helm/charts/rabbitmq/values.schema.json index 8ac7226c..24fbe5f1 100644 --- a/install/helm/charts/rabbitmq/values.schema.json +++ b/install/helm/charts/rabbitmq/values.schema.json @@ -20,6 +20,21 @@ "type": "string", "description": "Global StorageClass for Persistent Volume(s)", "default": "" + }, + "rabbitmq": { + "type": "object", + "properties": { + "auth": { + "type": "object", + "properties": { + "existingSecret": { + "type": "string", + "description": "Name of an existing secret that contains the certificates (overrides `auth.existingSecret`)", + "default": "" + } + } + } + } } } }, @@ -226,11 +241,6 @@ "podSecurityContext": { "type": "object", "properties": { - "enabled": { - "type": "boolean", - "description": "Enable RabbitMQ pods' Security Context", - "default": true - }, "fsGroupChangePolicy": { "type": "string", "description": "Set filesystem group change policy", @@ -258,11 +268,6 @@ "containerSecurityContext": { "type": "object", "properties": { - "enabled": { - "type": "boolean", - "description": "Enabled RabbitMQ containers' Security Context", - "default": true - }, "runAsUser": { "type": "number", "description": "Set RabbitMQ containers' Security Context runAsUser", @@ -318,11 +323,6 @@ "livenessProbe": { "type": "object", "properties": { - "enabled": { - "type": "boolean", - "description": "Enable livenessProbe", - "default": true - }, "initialDelaySeconds": { "type": "number", "description": "Initial delay seconds for livenessProbe", @@ -353,11 +353,6 @@ "readinessProbe": { "type": "object", "properties": { - "enabled": { - "type": "boolean", - "description": "Enable readinessProbe", - "default": true - }, "initialDelaySeconds": { "type": "number", "description": "Initial delay seconds for readinessProbe", @@ -388,11 +383,6 @@ "startupProbe": { "type": "object", "properties": { - "enabled": { - "type": "boolean", - "description": "Enable startupProbe", - "default": false - }, "initialDelaySeconds": { "type": "number", "description": "Initial delay seconds for startupProbe", diff --git a/install/helm/charts/rabbitmq/values.yaml b/install/helm/charts/rabbitmq/values.yaml index cb5fac34..f7c40c22 100644 --- a/install/helm/charts/rabbitmq/values.yaml +++ b/install/helm/charts/rabbitmq/values.yaml @@ -9,6 +9,10 @@ global: imagePullSecrets: [] # -- Global StorageClass for Persistent Volume(s) storageClass: "" + rabbitmq: + auth: + # -- Name of an existing secret that contains the certificates (overrides `auth.existingSecret`) + existingSecret: "" # @section Common parameters # -- String to partially override common.fullname template (will maintain the release name) @@ -77,7 +81,6 @@ ulimitNofiles: "65536" plugins: "rabbitmq_management rabbitmq_peer_discovery_k8s" # -- [string] RabbitMQ Configuration file content: required cluster configuration # Do not override unless you know what you are doing. -# To add more configuration, use `extraConfiguration` of `advancedConfiguration` instead configuration: "" # -- The name of an existing ConfigMap with your custom configuration existingConfigmap: "" @@ -137,8 +140,6 @@ containerPorts: # RabbitMQ pods' Security Context # ref: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-pod podSecurityContext: - # -- Enable RabbitMQ pods' Security Context - enabled: true # -- Set filesystem group change policy fsGroupChangePolicy: Always # -- Set kernel settings using the sysctl interface @@ -154,8 +155,6 @@ podSecurityContext: # drop: ["NET_RAW"] # readOnlyRootFilesystem: true containerSecurityContext: - # -- Enabled RabbitMQ containers' Security Context - enabled: true # -- [object,nullable] Set SELinux options in container seLinuxOptions: null # -- Set RabbitMQ containers' Security Context runAsUser @@ -177,8 +176,6 @@ containerSecurityContext: # Configure RabbitMQ containers' extra options for liveness probe # ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-probes/#configure-probes livenessProbe: - # -- Enable livenessProbe - enabled: true # -- Initial delay seconds for livenessProbe initialDelaySeconds: 120 # -- Timeout seconds for livenessProbe @@ -192,8 +189,6 @@ livenessProbe: # Configure RabbitMQ containers' extra options for readiness probe # ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-probes/#configure-probes readinessProbe: - # -- Enable readinessProbe - enabled: true # -- Initial delay seconds for readinessProbe initialDelaySeconds: 10 # -- Timeout seconds for readinessProbe @@ -207,8 +202,6 @@ readinessProbe: # Configure RabbitMQ containers' extra options for startup probe # ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-probes/#configure-probes startupProbe: - # -- Enable startupProbe - enabled: false # -- Initial delay seconds for startupProbe initialDelaySeconds: 10 # -- Timeout seconds for startupProbe diff --git a/install/helm/charts/redis/README.md b/install/helm/charts/redis/README.md index 19ee85aa..a5d54653 100644 --- a/install/helm/charts/redis/README.md +++ b/install/helm/charts/redis/README.md @@ -4,17 +4,18 @@ ### Global parameters -| Name | Description | Value | -| --------------------------------- | ------------------------------------------------------------------------------------------- | ----- | -| `global.imageRegistry` | Global Docker image registry | `""` | -| `global.imagePullSecrets` | Global Docker registry secret names as an array | `[]` | -| `global.storageClass` | Global StorageClass for Persistent Volume(s) | `""` | -| `global.redis.tls.enabled` | Enable TLS traffic support (overrides `tls.enabled`) | `nil` | -| `global.redis.tls.autoGenerated` | Generate automatically self-signed TLS certificates (overrides `tls.autoGenerated`) | `nil` | -| `global.redis.tls.existingSecret` | Name of an existing secret that contains the certificates (overrides `tls.existingSecret`) | `""` | -| `global.redis.tls.cert` | Certificate value. Requires `tls.autoGenerated` to be `false` (overrides `tls.cert`) | `""` | -| `global.redis.tls.certKey` | Certificate key value. Requires `tls.autoGenerated` to be `false` (overrides `tls.certKey`) | `""` | -| `global.redis.tls.certCA` | CA Certificate value. Requires `tls.autoGenerated` to be `false` (overrides `tls.certCA`) | `""` | +| Name | Description | Value | +| ---------------------------------- | ------------------------------------------------------------------------------------------- | ----- | +| `global.imageRegistry` | Global Docker image registry | `""` | +| `global.imagePullSecrets` | Global Docker registry secret names as an array | `[]` | +| `global.storageClass` | Global StorageClass for Persistent Volume(s) | `""` | +| `global.redis.auth.existingSecret` | Name of an existing secret that contains the certificates (overrides `auth.existingSecret`) | `""` | +| `global.redis.tls.enabled` | Enable TLS traffic support (overrides `tls.enabled`) | `nil` | +| `global.redis.tls.autoGenerated` | Generate automatically self-signed TLS certificates (overrides `tls.autoGenerated`) | `nil` | +| `global.redis.tls.existingSecret` | Name of an existing secret that contains the certificates (overrides `tls.existingSecret`) | `""` | +| `global.redis.tls.cert` | Certificate value. Requires `tls.autoGenerated` to be `false` (overrides `tls.cert`) | `""` | +| `global.redis.tls.certKey` | Certificate key value. Requires `tls.autoGenerated` to be `false` (overrides `tls.certKey`) | `""` | +| `global.redis.tls.certCA` | CA Certificate value. Requires `tls.autoGenerated` to be `false` (overrides `tls.certCA`) | `""` | ### Common parameters @@ -71,12 +72,10 @@ | `volumes` | Optionally specify extra list of additional volumes for the Redis® master pod(s) | `[]` | | `volumeMounts` | Optionally specify extra list of additional volumeMounts for the Redis® master container(s) | `[]` | | `containerPorts.redis` | Container port to open on Redis® master nodes | `6379` | -| `podSecurityContext.enabled` | Enabled Redis® master pods' Security Context | `true` | | `podSecurityContext.fsGroupChangePolicy` | Set filesystem group change policy | `Always` | | `podSecurityContext.sysctls` | Set kernel settings using the sysctl interface | `[]` | | `podSecurityContext.supplementalGroups` | Set filesystem extra groups | `[]` | | `podSecurityContext.fsGroup` | Set Redis® master pod's Security Context fsGroup | `1001` | -| `containerSecurityContext.enabled` | Enabled Redis® master containers' Security Context | `true` | | `containerSecurityContext.seLinuxOptions` | Set SELinux options in container | `{}` | | `containerSecurityContext.runAsUser` | Set Redis® master containers' Security Context runAsUser | `1001` | | `containerSecurityContext.runAsGroup` | Set Redis® master containers' Security Context runAsGroup | `1001` | @@ -85,19 +84,16 @@ | `containerSecurityContext.readOnlyRootFilesystem` | Set container's Security Context read-only root filesystem | `true` | | `containerSecurityContext.seccompProfile.type` | Set Redis® master containers' Security Context seccompProfile | `RuntimeDefault` | | `containerSecurityContext.capabilities.drop` | Set Redis® master containers' Security Context capabilities to drop | `["ALL"]` | -| `startupProbe.enabled` | Enable startupProbe on Redis® master nodes | `false` | | `startupProbe.initialDelaySeconds` | Initial delay seconds for startupProbe | `20` | | `startupProbe.periodSeconds` | Period seconds for startupProbe | `5` | | `startupProbe.timeoutSeconds` | Timeout seconds for startupProbe | `5` | | `startupProbe.successThreshold` | Failure threshold for startupProbe | `1` | | `startupProbe.failureThreshold` | Success threshold for startupProbe | `5` | -| `livenessProbe.enabled` | Enable livenessProbe on Redis® master nodes | `true` | | `livenessProbe.initialDelaySeconds` | Initial delay seconds for livenessProbe | `20` | | `livenessProbe.periodSeconds` | Period seconds for livenessProbe | `5` | | `livenessProbe.timeoutSeconds` | Timeout seconds for livenessProbe | `5` | | `livenessProbe.successThreshold` | Failure threshold for livenessProbe | `1` | | `livenessProbe.failureThreshold` | Success threshold for livenessProbe | `5` | -| `readinessProbe.enabled` | Enable readinessProbe on Redis® master nodes | `true` | | `readinessProbe.initialDelaySeconds` | Initial delay seconds for readinessProbe | `20` | | `readinessProbe.periodSeconds` | Period seconds for readinessProbe | `5` | | `readinessProbe.timeoutSeconds` | Timeout seconds for readinessProbe | `1` | @@ -109,25 +105,24 @@ | Name | Description | Value | | ------------------------------ | ----------------------------------------------------------- | ----------- | | `service.type` | Redis® master service type | `ClusterIP` | -| `service.portNames.redis` | Redis® master service port name | `tcp-redis` | | `service.ports.redis` | Redis® master service port | `6379` | | `service.annotations` | Additional custom annotations for Redis® master service | `{}` | | `service.headless.annotations` | Annotations for the headless service. | `{}` | ### Persistence Parameters -| Name | Description | Value | -| -------------------------------------------------- | --------------------------------------------------------------------------------------- | ------------------- | -| `persistence.enabled` | Enable PostgreSQL data persistence using PVC | `true` | -| `persistence.existingClaim` | Name of an existing PVC to use | `""` | -| `persistence.storageClass` | PVC Storage Class for PostgreSQL data volume | `""` | -| `persistence.accessModes` | PVC Access Mode for PostgreSQL volume | `["ReadWriteOnce"]` | -| `persistence.size` | PVC Storage Request for PostgreSQL volume | `8Gi` | -| `persistence.annotations` | Annotations for the PVC | `{}` | -| `persistence.labels` | Labels for the PVC | `{}` | -| `persistence.selector` | Selector to match an existing Persistent Volume (this value is evaluated as a template) | `{}` | -| `persistence.dataSource` | Custom PVC data source | `{}` | -| `persistence.persistentVolumeClaimRetentionPolicy` | PostgreSQL Persistent Volume Claim Retention Policy | `{}` | +| Name | Description | Value | +| -------------------------------------------------- | ----------------------------------------------------------------------- | ------------------- | +| `persistence.enabled` | Enable persistence using Persistent Volume Claims | `true` | +| `persistence.existingClaim` | Name of an existing PVC to use | `""` | +| `persistence.storageClass` | Storage class of backing PVC | `""` | +| `persistence.labels` | Persistent Volume Claim labels | `{}` | +| `persistence.annotations` | Persistent Volume Claim annotations | `{}` | +| `persistence.accessModes` | Persistent Volume Access Modes | `["ReadWriteOnce"]` | +| `persistence.size` | Size of data volume | `8Gi` | +| `persistence.selector` | Selector to match an existing Persistent Volume for ClickHouse data PVC | `{}` | +| `persistence.dataSource` | Custom PVC data source | `{}` | +| `persistence.persistentVolumeClaimRetentionPolicy` | PostgreSQL Persistent Volume Claim Retention Policy | `{}` | ### Other Parameters diff --git a/install/helm/charts/redis/templates/statefulset.yaml b/install/helm/charts/redis/templates/deployment.yaml similarity index 62% rename from install/helm/charts/redis/templates/statefulset.yaml rename to install/helm/charts/redis/templates/deployment.yaml index 7880293f..255c1065 100644 --- a/install/helm/charts/redis/templates/statefulset.yaml +++ b/install/helm/charts/redis/templates/deployment.yaml @@ -1,6 +1,6 @@ {{- $tlsEnabled := eq (include "common.tls.enabled" .) "true" }} apiVersion: apps/v1 -kind: StatefulSet +kind: Deployment metadata: name: {{ include "common.fullname" . }} labels: @@ -14,7 +14,6 @@ spec: selector: matchLabels: {{- include "common.selectorLabels" . | nindent 6 }} - serviceName: {{ include "common.fullname" . }}-headless template: metadata: {{- with (include "common.podAnnotations" .) }} @@ -26,9 +25,9 @@ spec: spec: serviceAccountName: {{ include "common.serviceAccountName" . }} {{- include "common.imagePullSecrets" . | nindent 6 }} - {{- if .Values.podSecurityContext.enabled }} + {{- with .Values.podSecurityContext }} securityContext: - {{- omit .Values.podSecurityContext "enabled" | toYaml | nindent 8 }} + {{- toYaml . | nindent 8 }} {{- end }} {{- if .Values.terminationGracePeriodSeconds }} terminationGracePeriodSeconds: {{ .Values.terminationGracePeriodSeconds }} @@ -37,9 +36,9 @@ spec: - name: redis image: {{ include "redis.image" . }} imagePullPolicy: {{ .Values.image.pullPolicy | quote }} - {{- if .Values.containerSecurityContext.enabled }} + {{- with .Values.containerSecurityContext }} securityContext: - {{- omit .Values.containerSecurityContext "enabled" | toYaml | nindent 12 }} + {{- toYaml . | nindent 12 }} {{- end }} command: - /bin/bash @@ -62,7 +61,7 @@ spec: - name: REDIS_PASSWORD valueFrom: secretKeyRef: - name: {{ include "common.authSecretName" . }} + name: {{ include "common.auth.secretName" . }} key: {{ include "redis.secretPasswordKey" . }} {{- end }} {{- with .Values.extraEnvVars }} @@ -71,13 +70,15 @@ spec: ports: - name: redis containerPort: {{ .Values.containerPorts.redis }} - {{- if .Values.startupProbe.enabled }} - startupProbe: {{- omit .Values.startupProbe "enabled" | toYaml | nindent 12 }} + {{- with .Values.startupProbe }} + startupProbe: + {{- toYaml . | nindent 12 }} tcpSocket: port: redis {{- end }} - {{- if .Values.livenessProbe.enabled }} - livenessProbe: {{- omit .Values.livenessProbe "enabled" | toYaml | nindent 12 }} + {{- with .Values.livenessProbe }} + livenessProbe: + {{- toYaml . | nindent 12 }} exec: command: - sh @@ -85,23 +86,24 @@ spec: - >- redis-cli -h localhost - -p {{ .Values.containerPorts.redis }} - {{- if .Values.auth.enabled }} + -p {{ $.Values.containerPorts.redis }} + {{- if $.Values.auth.enabled }} --user "${REDIS_USER}" --pass "${REDIS_PASSWORD}" {{- end }} {{- if $tlsEnabled }} --tls - --cacert {{ include "redis.tlsCACert" . }} - {{- if .Values.tls.authClients }} - --cert {{ include "redis.tlsCert" . }} - --key {{ include "redis.tlsCertKey" . }} + --cacert {{ include "redis.tlsCACert" $ }} + {{- if $.Values.tls.authClients }} + --cert {{ include "redis.tlsCert" $ }} + --key {{ include "redis.tlsCertKey" $ }} {{- end }} {{- end }} ping | head -n1 | ( ! grep -vP '^\s*(PONG|LOADING|MASTERDOWN)' ) {{- end }} - {{- if .Values.readinessProbe.enabled }} - readinessProbe: {{- omit .Values.readinessProbe "enabled" | toYaml | nindent 12 }} + {{- with .Values.readinessProbe }} + readinessProbe: + {{- toYaml . | nindent 12 }} exec: command: - sh @@ -109,17 +111,17 @@ spec: - >- redis-cli -h localhost - -p {{ .Values.containerPorts.redis }} - {{- if .Values.auth.enabled }} + -p {{ $.Values.containerPorts.redis }} + {{- if $.Values.auth.enabled }} --user "${REDIS_USER}" --pass "${REDIS_PASSWORD}" {{- end }} {{- if $tlsEnabled }} --tls - --cacert {{ include "redis.tlsCACert" . }} - {{- if .Values.tls.authClients }} - --cert {{ include "redis.tlsCert" . }} - --key {{ include "redis.tlsCertKey" . }} + --cacert {{ include "redis.tlsCACert" $ }} + {{- if $.Values.tls.authClients }} + --cert {{ include "redis.tlsCert" $ }} + --key {{ include "redis.tlsCertKey" $ }} {{- end }} {{- end }} ping | head -n1 | ( ! grep -vP '^\s*(PONG)' ) @@ -172,43 +174,11 @@ spec: {{- with .Values.volumes }} {{- toYaml . | nindent 8 }} {{- end }} - {{- if not .Values.persistence.enabled }} + {{- if not .Values.persistence.enabled }} - name: data emptyDir: {} - {{- else if .Values.persistence.existingClaim }} + {{- else }} - name: data persistentVolumeClaim: - claimName: {{ tpl .Values.persistence.existingClaim . }} - {{- else }} - {{- with .Values.persistence.persistentVolumeClaimRetentionPolicy }} - persistentVolumeClaimRetentionPolicy: - {{ toYaml . | nindent 4 }} - {{- end }} - volumeClaimTemplates: - - apiVersion: v1 - kind: PersistentVolumeClaim - metadata: - name: data - {{- with (include "common.mergeAnnotations" (dict "context" . "value" .Values.persistence.annotations)) }} - annotations: - {{- . | nindent 10 }} + claimName: {{ include "common.pvcName" . }} {{- end }} - {{- with (include "common.mergeLabels" (dict "context" . "value" .Values.persistence.labels)) }} - labels: - {{- . | nindent 10 }} - {{- end }} - spec: - accessModes: {{- toYaml .Values.persistence.accessModes | nindent 10 }} - {{- with .Values.persistence.dataSource }} - dataSource: {{- toYaml . | nindent 10 }} - {{- end }} - resources: - requests: - storage: {{ .Values.persistence.size | quote }} - {{- with .Values.persistence.selector }} - selector: {{- toYaml . | nindent 10 }} - {{- end }} - {{- with (default .Values.persistence.storageClass (.Values.global).storageClass) }} - storageClassName: {{ eq . "-" | ternary "" . | quote }} - {{- end }} - {{- end }} diff --git a/install/helm/charts/redis/templates/pvc.yaml b/install/helm/charts/redis/templates/pvc.yaml new file mode 100644 index 00000000..0f3510a1 --- /dev/null +++ b/install/helm/charts/redis/templates/pvc.yaml @@ -0,0 +1,29 @@ +{{- if and .Values.persistence.enabled (not .Values.persistence.existingClaim) }} +kind: PersistentVolumeClaim +apiVersion: v1 +metadata: + name: {{ include "common.pvcName" . }} + namespace: {{ .Release.Namespace | quote }} + {{- with (include "common.mergeAnnotations" (dict "context" . "value" .Values.persistence.annotations)) }} + annotations: + {{- . | nindent 4 }} + {{- end }} + {{- with (include "common.mergeLabels" (dict "context" . "value" .Values.persistence.labels)) }} + labels: + {{- . | nindent 4 }} + {{- end }} +spec: + accessModes: {{- toYaml .Values.persistence.accessModes | nindent 10 }} + {{- with .Values.persistence.dataSource }} + dataSource: {{- toYaml . | nindent 10 }} + {{- end }} + resources: + requests: + storage: {{ .Values.persistence.size | quote }} + {{- with .Values.persistence.selector }} + selector: {{- toYaml . | nindent 10 }} + {{- end }} + {{- with (default .Values.persistence.storageClass (.Values.global).storageClass) }} + storageClassName: {{ eq . "-" | ternary "" . | quote }} + {{- end }} +{{- end }} diff --git a/install/helm/charts/redis/templates/secret.yaml b/install/helm/charts/redis/templates/secret.yaml index f651b60b..2f1fd5e5 100644 --- a/install/helm/charts/redis/templates/secret.yaml +++ b/install/helm/charts/redis/templates/secret.yaml @@ -1,11 +1,11 @@ -{{- if not .Values.auth.existingSecret }} -{{- $secretName := include "common.authSecretName" . }} +{{- if and (not .Values.auth.existingSecret) (not ((.Values.global.redis).auth).existingSecret) }} +{{- $secretName := include "common.auth.secretName" . }} {{- $key := include "redis.secretPasswordKey" . }} {{- $password := include "common.secrets.password" (dict "context" . "secret" $secretName "key" $key "defaultValue" .Values.auth.password) }} apiVersion: v1 kind: Secret metadata: - name: {{ include "common.authSecretName" . }} + name: {{ $secretName }} namespace: {{ include "common.namespace" . | quote }} labels: {{- include "common.labels" . | nindent 4 }} diff --git a/install/helm/charts/redis/values.schema.json b/install/helm/charts/redis/values.schema.json index de742629..8e036638 100644 --- a/install/helm/charts/redis/values.schema.json +++ b/install/helm/charts/redis/values.schema.json @@ -24,6 +24,16 @@ "redis": { "type": "object", "properties": { + "auth": { + "type": "object", + "properties": { + "existingSecret": { + "type": "string", + "description": "Name of an existing secret that contains the certificates (overrides `auth.existingSecret`)", + "default": "" + } + } + }, "tls": { "type": "object", "properties": { @@ -305,11 +315,6 @@ "podSecurityContext": { "type": "object", "properties": { - "enabled": { - "type": "boolean", - "description": "Enabled Redis® master pods' Security Context", - "default": true - }, "fsGroupChangePolicy": { "type": "string", "description": "Set filesystem group change policy", @@ -337,11 +342,6 @@ "containerSecurityContext": { "type": "object", "properties": { - "enabled": { - "type": "boolean", - "description": "Enabled Redis® master containers' Security Context", - "default": true - }, "runAsUser": { "type": "number", "description": "Set Redis® master containers' Security Context runAsUser", @@ -397,11 +397,6 @@ "startupProbe": { "type": "object", "properties": { - "enabled": { - "type": "boolean", - "description": "Enable startupProbe on Redis® master nodes", - "default": false - }, "initialDelaySeconds": { "type": "number", "description": "Initial delay seconds for startupProbe", @@ -432,11 +427,6 @@ "livenessProbe": { "type": "object", "properties": { - "enabled": { - "type": "boolean", - "description": "Enable livenessProbe on Redis® master nodes", - "default": true - }, "initialDelaySeconds": { "type": "number", "description": "Initial delay seconds for livenessProbe", @@ -467,11 +457,6 @@ "readinessProbe": { "type": "object", "properties": { - "enabled": { - "type": "boolean", - "description": "Enable readinessProbe on Redis® master nodes", - "default": true - }, "initialDelaySeconds": { "type": "number", "description": "Initial delay seconds for readinessProbe", @@ -507,16 +492,6 @@ "description": "Redis® master service type", "default": "ClusterIP" }, - "portNames": { - "type": "object", - "properties": { - "redis": { - "type": "string", - "description": "Redis® master service port name", - "default": "tcp-redis" - } - } - }, "ports": { "type": "object", "properties": { @@ -549,7 +524,7 @@ "properties": { "enabled": { "type": "boolean", - "description": "Enable PostgreSQL data persistence using PVC", + "description": "Enable persistence using Persistent Volume Claims", "default": true }, "existingClaim": { @@ -559,12 +534,22 @@ }, "storageClass": { "type": "string", - "description": "PVC Storage Class for PostgreSQL data volume", + "description": "Storage class of backing PVC", "default": "" }, + "labels": { + "type": "object", + "description": "Persistent Volume Claim labels", + "default": {} + }, + "annotations": { + "type": "object", + "description": "Persistent Volume Claim annotations", + "default": {} + }, "accessModes": { "type": "array", - "description": "PVC Access Mode for PostgreSQL volume", + "description": "Persistent Volume Access Modes", "default": [ "ReadWriteOnce" ], @@ -574,22 +559,12 @@ }, "size": { "type": "string", - "description": "PVC Storage Request for PostgreSQL volume", + "description": "Size of data volume", "default": "8Gi" }, - "annotations": { - "type": "object", - "description": "Annotations for the PVC", - "default": {} - }, - "labels": { - "type": "object", - "description": "Labels for the PVC", - "default": {} - }, "selector": { "type": "object", - "description": "Selector to match an existing Persistent Volume (this value is evaluated as a template)", + "description": "Selector to match an existing Persistent Volume for ClickHouse data PVC", "default": {} }, "dataSource": { diff --git a/install/helm/charts/redis/values.yaml b/install/helm/charts/redis/values.yaml index 5c19d3ae..27f884ef 100644 --- a/install/helm/charts/redis/values.yaml +++ b/install/helm/charts/redis/values.yaml @@ -9,7 +9,11 @@ global: imagePullSecrets: [] # -- Global StorageClass for Persistent Volume(s) storageClass: "" + redis: + auth: + # -- Name of an existing secret that contains the certificates (overrides `auth.existingSecret`) + existingSecret: "" tls: # -- [boolean, nullable] Enable TLS traffic support (overrides `tls.enabled`) enabled: null @@ -45,7 +49,6 @@ imagePullSecrets: [] # @section Redis® parameters # Redis® image -# ref: https://hub.docker.com/r/bitnami/redis/tags/ image: # -- [default: REGISTRY_NAME] Redis® image registry registry: "" @@ -156,8 +159,6 @@ containerPorts: # Configure Pods Security Context # ref: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-pod podSecurityContext: - # -- Enabled Redis® master pods' Security Context - enabled: true # -- Set filesystem group change policy fsGroupChangePolicy: Always # -- Set kernel settings using the sysctl interface @@ -169,8 +170,6 @@ podSecurityContext: # Configure Container Security Context # ref: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-pod containerSecurityContext: - # -- Enabled Redis® master containers' Security Context - enabled: true # -- [object,nullable] Set SELinux options in container seLinuxOptions: {} # -- Set Redis® master containers' Security Context runAsUser @@ -192,8 +191,6 @@ containerSecurityContext: # Configure extra options for Redis® containers' liveness and readiness probes # ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-probes/#configure-probes startupProbe: - # -- Enable startupProbe on Redis® master nodes - enabled: false # -- Initial delay seconds for startupProbe initialDelaySeconds: 20 # -- Period seconds for startupProbe @@ -205,8 +202,6 @@ startupProbe: # -- Success threshold for startupProbe failureThreshold: 5 livenessProbe: - # -- Enable livenessProbe on Redis® master nodes - enabled: true # -- Initial delay seconds for livenessProbe initialDelaySeconds: 20 # -- Period seconds for livenessProbe @@ -218,8 +213,6 @@ livenessProbe: # -- Success threshold for livenessProbe failureThreshold: 5 readinessProbe: - # -- Enable readinessProbe on Redis® master nodes - enabled: true # -- Initial delay seconds for readinessProbe initialDelaySeconds: 20 # -- Period seconds for readinessProbe @@ -235,9 +228,6 @@ readinessProbe: service: # -- Redis® master service type type: ClusterIP - portNames: - # -- Redis® master service port name - redis: "tcp-redis" ports: # -- Redis® master service port redis: 6379 @@ -251,27 +241,29 @@ service: # Enable persistence using Persistent Volume Claims # ref: https://kubernetes.io/docs/concepts/storage/persistent-volumes/ persistence: - # -- Enable PostgreSQL data persistence using PVC + # -- Enable persistence using Persistent Volume Claims enabled: true # -- Name of an existing PVC to use existingClaim: "" - # -- PVC Storage Class for PostgreSQL data volume + # -- Storage class of backing PVC # If defined, storageClassName: # If set to "-", storageClassName: "", which disables dynamic provisioning # If undefined (the default) or set to null, no storageClassName spec is # set, choosing the default provisioner. (gp2 on AWS, standard on # GKE, AWS & OpenStack) storageClass: "" - # -- PVC Access Mode for PostgreSQL volume + # -- Persistent Volume Claim labels + labels: {} + # -- Persistent Volume Claim annotations + annotations: {} + # -- Persistent Volume Access Modes accessModes: - ReadWriteOnce - # -- PVC Storage Request for PostgreSQL volume + # -- Size of data volume size: 8Gi - # -- Annotations for the PVC - annotations: {} - # -- Labels for the PVC - labels: {} - # -- Selector to match an existing Persistent Volume (this value is evaluated as a template) + # -- Selector to match an existing Persistent Volume for ClickHouse data PVC + # If set, the PVC can't have a PV dynamically provisioned for it + # E.g. # selector: # matchLabels: # app: my-app diff --git a/install/helm/dashboards/cluster-status.json b/install/helm/dashboards/cluster-status.json new file mode 100644 index 00000000..a3d8ef91 --- /dev/null +++ b/install/helm/dashboards/cluster-status.json @@ -0,0 +1,1041 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "Provides information about the health of ClickHouse, RabbitMQ, and PostgreSQL, the current cluster state (central cluster, unregistered child cluster, and registered child cluster), and the license and vulnerability database update status. To get information about the health of external services, you must configure the internal Prometheus system for this purpose or install the external Prometheus system.", + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 5, + "links": [], + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "green", + "mode": "thresholds" + }, + "custom": { + "align": "left", + "cellOptions": { + "applyToRow": true, + "mode": "basic", + "type": "color-background" + }, + "filterable": true, + "inspect": false + }, + "mappings": [ + { + "options": { + "0": { + "color": "red", + "index": 0, + "text": "down" + }, + "1": { + "color": "green", + "index": 1, + "text": "up" + } + }, + "type": "value" + }, + { + "options": { + "match": "null+nan", + "result": { + "index": 2, + "text": "down" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "transparent" + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Cluster" + }, + "properties": [ + { + "id": "custom.width", + "value": 297 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Service" + }, + "properties": [ + { + "id": "custom.width", + "value": 114 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Status" + }, + "properties": [ + { + "id": "custom.width", + "value": 87 + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 0, + "y": 0 + }, + "id": 21, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": ["sum"], + "show": false + }, + "showHeader": true + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "up{cluster=~\"$cluster\",service=\"reverse-proxy\"}", + "format": "table", + "hide": false, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "B" + } + ], + "title": "Reverse-proxy availability", + "transformations": [ + { + "id": "labelsToFields", + "options": {} + }, + { + "id": "groupBy", + "options": { + "fields": { + "Value": { + "aggregations": ["last"], + "operation": "aggregate" + }, + "cluster": { + "aggregations": [], + "operation": "groupby" + }, + "instance": { + "aggregations": [] + }, + "service": { + "aggregations": [] + } + } + } + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "includeByName": {}, + "indexByName": {}, + "renameByName": { + "Value (last)": "Status", + "cluster": "Cluster", + "service": "Service" + } + } + } + ], + "type": "table" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "green", + "mode": "thresholds" + }, + "custom": { + "align": "left", + "cellOptions": { + "applyToRow": true, + "mode": "basic", + "type": "color-background" + }, + "filterable": true, + "inspect": false + }, + "mappings": [ + { + "options": { + "0": { + "color": "red", + "index": 0, + "text": "down" + }, + "1": { + "color": "green", + "index": 1, + "text": "up" + } + }, + "type": "value" + }, + { + "options": { + "match": "null+nan", + "result": { + "color": "red", + "index": 2, + "text": "down" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "transparent" + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Status" + }, + "properties": [ + { + "id": "custom.width", + "value": 76 + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 6, + "y": 0 + }, + "id": 25, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": ["sum"], + "show": false + }, + "showHeader": true + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "up{cluster=~\"$cluster\",service=\"clickhouse\"}", + "format": "table", + "hide": false, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "B" + } + ], + "title": "Clickhouse availability", + "transformations": [ + { + "id": "labelsToFields", + "options": {} + }, + { + "id": "groupBy", + "options": { + "fields": { + "Value": { + "aggregations": ["last"], + "operation": "aggregate" + }, + "cluster": { + "aggregations": [], + "operation": "groupby" + }, + "instance": { + "aggregations": [] + }, + "service": { + "aggregations": [] + } + } + } + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "includeByName": {}, + "indexByName": {}, + "renameByName": { + "Value (last)": "Status", + "cluster": "Cluster", + "service": "Service" + } + } + } + ], + "type": "table" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "green", + "mode": "thresholds" + }, + "custom": { + "align": "left", + "cellOptions": { + "applyToRow": true, + "mode": "basic", + "type": "color-background" + }, + "filterable": true, + "inspect": false + }, + "mappings": [ + { + "options": { + "0": { + "color": "red", + "index": 0, + "text": "down" + }, + "1": { + "color": "green", + "index": 1, + "text": "up" + } + }, + "type": "value" + }, + { + "options": { + "match": "null+nan", + "result": { + "index": 2, + "text": "down" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "transparent" + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Status" + }, + "properties": [ + { + "id": "custom.width", + "value": 76 + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 12, + "y": 0 + }, + "id": 26, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": ["sum"], + "show": false + }, + "showHeader": true + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "up{cluster=~\"$cluster\",service=\"rabbitmq\"}", + "format": "table", + "hide": false, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "B" + } + ], + "title": "RabbitMQ availability", + "transformations": [ + { + "id": "labelsToFields", + "options": {} + }, + { + "id": "groupBy", + "options": { + "fields": { + "Value": { + "aggregations": ["last"], + "operation": "aggregate" + }, + "cluster": { + "aggregations": [], + "operation": "groupby" + }, + "instance": { + "aggregations": [] + }, + "service": { + "aggregations": [] + } + } + } + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "includeByName": {}, + "indexByName": {}, + "renameByName": { + "Value (last)": "Status", + "cluster": "Cluster", + "service": "Service" + } + } + } + ], + "type": "table" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "green", + "mode": "thresholds" + }, + "custom": { + "align": "left", + "cellOptions": { + "applyToRow": true, + "mode": "basic", + "type": "color-background" + }, + "filterable": true, + "inspect": false + }, + "mappings": [ + { + "options": { + "0": { + "color": "red", + "index": 0, + "text": "down" + }, + "1": { + "color": "green", + "index": 1, + "text": "up" + } + }, + "type": "value" + }, + { + "options": { + "match": "null+nan", + "result": { + "index": 2, + "text": "down" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "transparent" + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Status" + }, + "properties": [ + { + "id": "custom.width", + "value": 76 + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 18, + "y": 0 + }, + "id": 27, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": ["sum"], + "show": false + }, + "showHeader": true + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "pg_up{cluster=~\"$cluster\"}", + "format": "table", + "hide": false, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "B" + } + ], + "title": "PostgreSQL availability", + "transformations": [ + { + "id": "labelsToFields", + "options": {} + }, + { + "id": "groupBy", + "options": { + "fields": { + "Value": { + "aggregations": ["last"], + "operation": "aggregate" + }, + "cluster": { + "aggregations": [], + "operation": "groupby" + }, + "instance": { + "aggregations": [] + }, + "service": { + "aggregations": [] + } + } + } + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "includeByName": {}, + "indexByName": {}, + "renameByName": { + "Value (last)": "Status", + "cluster": "Cluster", + "service": "Service" + } + } + } + ], + "type": "table" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "left", + "cellOptions": { + "type": "color-text" + }, + "filterable": true, + "inspect": false + }, + "mappings": [ + { + "options": { + "0": { + "color": "green", + "index": 0, + "text": "Central cluster" + }, + "1": { + "color": "red", + "index": 1, + "text": "Child unregistered " + }, + "2": { + "color": "green", + "index": 2, + "text": "Child registered" + } + }, + "type": "value" + }, + { + "options": { + "match": "null", + "result": { + "color": "yellow", + "index": 3, + "text": "Unknown cluster status" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 14, + "x": 0, + "y": 7 + }, + "id": 7, + "options": { + "cellHeight": "sm", + "enablePagination": false, + "footer": { + "countRows": false, + "fields": "", + "reducer": ["sum"], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "editorMode": "code", + "expr": "sum(cluster_state_gauge{cluster=~\"$cluster\",job=\"kubernetes-pods\",service=\"cs-manager\"})by(cluster,instance)", + "format": "table", + "legendFormat": "{{label_name}}", + "range": true, + "refId": "A" + } + ], + "title": "", + "transformations": [ + { + "id": "labelsToFields", + "options": { + "keepLabels": ["cluster"], + "mode": "columns", + "valueLabel": "instance" + } + }, + { + "id": "groupBy", + "options": { + "fields": { + "Cluster": { + "aggregations": ["last"], + "operation": "groupby" + }, + "Status": { + "aggregations": [], + "operation": "groupby" + }, + "Value": { + "aggregations": [], + "operation": "groupby" + }, + "cluster": { + "aggregations": [], + "operation": "groupby" + }, + "{cluster=\"$cluster\", instance=\"$instance\"}": { + "aggregations": [], + "operation": "groupby" + } + } + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true + }, + "includeByName": {}, + "indexByName": { + "Time": 0, + "cluster": 1, + "{cluster=\"$cluster\", instance=\"$instance\"}": 2 + }, + "renameByName": { + "Time": "", + "Value": "Status", + "cluster": "Cluster", + "{cluster=\"$cluster\", instance=\"$instance\"}": "Status" + } + } + }, + { + "id": "groupBy", + "options": { + "fields": { + "Cluster": { + "aggregations": [], + "operation": "groupby" + }, + "Status": { + "aggregations": ["last"], + "operation": "aggregate" + } + } + } + } + ], + "type": "table" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "green", + "mode": "fixed" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red" + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 5, + "x": 14, + "y": 7 + }, + "id": 14, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(rate(grpc_server_started_total{cluster=~\"$cluster\",job=\"kubernetes-pods\",service=\"cs-manager\"}[$interval]))", + "hide": false, + "instant": false, + "legendFormat": "{{cluster}} - {{instance}}", + "range": true, + "refId": "B" + } + ], + "title": "CS manager RPS", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "green", + "mode": "fixed" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red" + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 5, + "x": 19, + "y": 7 + }, + "id": 18, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(rate(grpc_server_started_total{cluster=~\"$cluster\",job=\"kubernetes-pods\",service=\"cluster-manager\"}[$interval]))", + "hide": false, + "instant": false, + "legendFormat": "{{cluster}} - {{instance}}", + "range": true, + "refId": "B" + } + ], + "title": "Cluster manager RPS", + "type": "stat" + } + ], + "preload": false, + "schemaVersion": 41, + "tags": ["cluster", "cs"], + "templating": { + "list": [ + { + "current": { + "text": "All", + "value": "$__all" + }, + "definition": "label_values(go_goroutines,cluster)", + "includeAll": true, + "label": "Cluster", + "name": "cluster", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(go_goroutines,cluster)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 2, + "regex": "", + "type": "query" + }, + { + "auto": true, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "$__auto", + "value": "$__auto" + }, + "label": "Interval", + "name": "interval", + "options": [ + { + "selected": false, + "text": "1m", + "value": "1m" + }, + { + "selected": false, + "text": "5m", + "value": "5m" + }, + { + "selected": false, + "text": "10m", + "value": "10m" + }, + { + "selected": false, + "text": "15m", + "value": "15m" + }, + { + "selected": false, + "text": "30m", + "value": "30m" + }, + { + "selected": false, + "text": "1h", + "value": "1h" + } + ], + "query": "1m,5m,10m,15m,30m,1h", + "refresh": 2, + "type": "interval" + } + ] + }, + "time": { + "from": "now-3h", + "to": "now" + }, + "timepicker": {}, + "timezone": "browser", + "title": "Cluster status", + "uid": "987694f4-187f-4c4e-b335-13ee42eee9bc", + "version": 1, + "weekStart": "monday" +} diff --git a/install/helm/dashboards/go-app.json b/install/helm/dashboards/go-app.json new file mode 100644 index 00000000..97c9f481 --- /dev/null +++ b/install/helm/dashboards/go-app.json @@ -0,0 +1,692 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "Provides the main metrics for Golang services. Shows the key performance and stability indicators for these components.", + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 4, + "links": [], + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 0 + }, + "id": 32, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "avg(go_goroutines{cluster=~\"$cluster\",job=~\"kubernetes-pods|caddy\",service=~\"$service\",instance=~\"$instance\"}) by (service,instance)", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{service}}-{{instance}}", + "metric": "go_goroutines", + "range": true, + "refId": "A", + "step": 4 + } + ], + "title": "Goroutines", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 0 + }, + "id": 30, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "avg(go_gc_duration_seconds{cluster=~\"$cluster\",job=~\"kubernetes-pods|caddy\",service=~\"$service\",instance=~\"$instance\"}) by (quantile)", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{quantile}}", + "metric": "go_gc_duration_seconds", + "range": true, + "refId": "A", + "step": 4 + } + ], + "title": "GC duration", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 8 + }, + "id": 79, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(increase(process_cpu_seconds_total{job=~\"kubernetes-pods|caddy\",cluster=~\"$cluster\",service=~\"$service\",instance=~\"$instance\"}[$interval])) by (service, instance)", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{service}}-{{instance}}", + "metric": "go_goroutines", + "range": true, + "refId": "A", + "step": 4 + } + ], + "title": "CPU utilizations", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "alloc rate" + }, + "properties": [ + { + "id": "unit", + "value": "Bps" + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 8 + }, + "id": 34, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "avg(go_memstats_alloc_bytes{cluster=~\"$cluster\",job=~\"kubernetes-pods|caddy\",service=~\"$service\",instance=~\"$instance\"})", + "interval": "", + "intervalFactor": 2, + "legendFormat": "bytes allocated", + "metric": "go_memstats_alloc_bytes", + "range": true, + "refId": "A", + "step": 4 + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "avg(rate(go_memstats_alloc_bytes_total{cluster=~\"$cluster\",job=\"kubernetes-pods\",service=~\"$service\",instance=~\"$instance\"}[$interval]))", + "interval": "", + "intervalFactor": 2, + "legendFormat": "alloc rate", + "metric": "go_memstats_alloc_bytes_total", + "range": true, + "refId": "B", + "step": 4 + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "avg(go_memstats_stack_inuse_bytes{cluster=~\"$cluster\",job=\"kubernetes-pods\",service=~\"$service\",instance=~\"$instance\"})", + "interval": "", + "intervalFactor": 2, + "legendFormat": "stack inuse", + "metric": "go_memstats_stack_inuse_bytes", + "range": true, + "refId": "C", + "step": 4 + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "avg(go_memstats_heap_inuse_bytes{cluster=~\"$cluster\",job=\"kubernetes-pods\",service=~\"$service\",instance=~\"$instance\"})", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "heap inuse", + "metric": "go_memstats_heap_inuse_bytes", + "range": true, + "refId": "D", + "step": 4 + } + ], + "title": "Memory", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 24, + "x": 0, + "y": 16 + }, + "id": 80, + "options": { + "minVizHeight": 75, + "minVizWidth": 75, + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "sizing": "auto" + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(process_open_fds{job=~\"kubernetes-pods|caddy\",cluster=~\"$cluster\",service=~\"$service\",instance=~\"$instance\"}) by (service,instance)", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{service}} - {{instance}}", + "metric": "go_goroutines", + "range": true, + "refId": "A", + "step": 4 + } + ], + "title": "Opened FD", + "type": "gauge" + } + ], + "preload": false, + "refresh": "30s", + "schemaVersion": 41, + "tags": [ + "go-app" + ], + "templating": { + "list": [ + { + "current": { + "text": "All", + "value": "$__all" + }, + "definition": "label_values(go_goroutines,cluster)", + "includeAll": true, + "label": "Cluster", + "name": "cluster", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(go_goroutines,cluster)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "type": "query" + }, + { + "current": { + "text": [ + "policy-enforcer", + "history-api", + "event-processor" + ], + "value": [ + "policy-enforcer", + "history-api", + "event-processor" + ] + }, + "definition": "label_values(go_goroutines{cluster=~\"$cluster\"},service)", + "includeAll": false, + "label": "Service", + "multi": true, + "name": "service", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(go_goroutines{cluster=~\"$cluster\"},service)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 2, + "regex": "", + "type": "query" + }, + { + "current": { + "text": "All", + "value": "$__all" + }, + "definition": "label_values(go_goroutines{cluster=~\"$cluster\", service=~\"$service\"},instance)", + "includeAll": true, + "label": "Instance", + "name": "instance", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(go_goroutines{cluster=~\"$cluster\", service=~\"$service\"},instance)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 2, + "regex": "", + "type": "query" + }, + { + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "2m", + "value": "2m" + }, + "name": "interval", + "options": [ + { + "selected": true, + "text": "2m", + "value": "2m" + }, + { + "selected": false, + "text": "5m", + "value": "5m" + }, + { + "selected": false, + "text": "10m", + "value": "10m" + }, + { + "selected": false, + "text": "15m", + "value": "15m" + }, + { + "selected": false, + "text": "30m", + "value": "30m" + }, + { + "selected": false, + "text": "1h", + "value": "1h" + } + ], + "query": "2m,5m,10m,15m,30m,1h", + "refresh": 2, + "type": "interval" + } + ] + }, + "time": { + "from": "now-3h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "10s", + "15s", + "30s", + "1m", + "5m", + "10m", + "15m" + ] + }, + "timezone": "", + "title": "GO applications", + "uid": "0a004197-a21e-46ee-be04-5dea5dbff5ee", + "version": 1 +} \ No newline at end of file diff --git a/install/helm/dashboards/grpc.json b/install/helm/dashboards/grpc.json new file mode 100644 index 00000000..c3afd8f7 --- /dev/null +++ b/install/helm/dashboards/grpc.json @@ -0,0 +1,1198 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "Gives a summary of gRPC communications between system services (request types and their statuses and duration). The majority of internal communications between services is done via gRPC. To make analysis easier, all related metrics are gathered in one place.", + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 3, + "links": [], + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 8, + "x": 0, + "y": 0 + }, + "id": 12, + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "(sum(rate(grpc_server_handled_total{cluster=~\"$cluster\",job=\"kubernetes-pods\",service=~\"$service\",instance=~\"$instance\",grpc_type=~\"unary|UNARY\",grpc_code=\"OK\"}[$interval]))\n) / sum(rate(grpc_server_started_total{cluster=~\"$cluster\",job=\"kubernetes-pods\",service=~\"$service\",instance=~\"$instance\",grpc_type=~\"unary|UNARY\"}[$interval]))", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "range": true, + "refId": "A" + } + ], + "title": "Success", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 8, + "x": 8, + "y": 0 + }, + "id": 14, + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(rate(grpc_server_started_total{cluster=~\"$cluster\",job=\"kubernetes-pods\",service=~\"$service\",instance=~\"$instance\"}[$interval]))", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "range": true, + "refId": "A" + } + ], + "title": "RPS", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "index": 0, + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 8, + "x": 16, + "y": 0 + }, + "id": 75, + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "histogram_quantile(0.95, \n sum(rate(grpc_server_handling_seconds_bucket{cluster=~\"$cluster\",job=\"kubernetes-pods\",grpc_type=~\"unary|UNARY\",service=~\"$service\",instance=~\"$instance\"}[$interval])) by (le)\n)", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "95%", + "range": true, + "refId": "A" + } + ], + "title": "Latency", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "fieldConfig": { + "defaults": { + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "scaleDistribution": { + "type": "linear" + } + } + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 5 + }, + "id": 76, + "options": { + "calculate": false, + "cellGap": 1, + "color": { + "exponent": 0.5, + "fill": "dark-orange", + "mode": "scheme", + "reverse": false, + "scale": "exponential", + "scheme": "Oranges", + "steps": 64 + }, + "exemplars": { + "color": "rgba(255,0,255,0.7)" + }, + "filterValues": { + "le": 1e-9 + }, + "legend": { + "show": true + }, + "rowsFrame": { + "layout": "auto" + }, + "tooltip": { + "mode": "single", + "showColorScale": false, + "yHistogram": false + }, + "yAxis": { + "axisPlacement": "left", + "reverse": false + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(increase(grpc_server_handling_seconds_bucket{cluster=~\"$cluster\", service=~\"$service\"}[5m])) by (le)", + "format": "heatmap", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{le}}", + "range": true, + "refId": "A" + } + ], + "title": "gRPC requests", + "type": "heatmap" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "points", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 14 + }, + "id": 79, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(rate(grpc_server_handled_total{cluster=~\"$cluster\",job=\"kubernetes-pods\",service=~\"$service\",instance=~\"$instance\",grpc_type=~\"unary|UNARY\",grpc_code=\"OK\"}[$interval]))\n/ sum(rate(grpc_server_handled_total{cluster=~\"$cluster\",job=\"kubernetes-pods\",service=~\"$service\",instance=~\"$instance\",grpc_type=~\"unary|UNARY\"}[$interval]))", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "OK", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "(sum(rate(grpc_server_handled_total{cluster=~\"$cluster\",job=\"kubernetes-pods\",service=~\"$service\",instance=~\"$instance\",grpc_type=~\"unary|UNARY\",grpc_code=~\"InvalidArgument|INVALID_ARGUMENT\"}[$interval])) +\nsum(rate(grpc_server_handled_total{cluster=~\"$cluster\",job=\"kubernetes-pods\",service=~\"$service\",instance=~\"$instance\",grpc_type=~\"unary|UNARY\",grpc_code=~\"NotFound|NOT_FOUND\"}[$interval])) + \nsum(rate(grpc_server_handled_total{cluster=~\"$cluster\",job=\"kubernetes-pods\",service=~\"$service\",instance=~\"$instance\",grpc_type=~\"unary|UNARY\",grpc_code=~\"AlreadyExists|ALREADY_EXISTS\"}[$interval])) + \nsum(rate(grpc_server_handled_total{cluster=~\"$cluster\",job=\"kubernetes-pods\",service=~\"$service\",instance=~\"$instance\",grpc_type=~\"unary|UNARY\",grpc_code=~\"Unauthenticated|UNAUTHENTICATED\"}[$interval])) +\nsum(rate(grpc_server_handled_total{cluster=~\"$cluster\",job=\"kubernetes-pods\",service=~\"$service\",instance=~\"$instance\",grpc_type=~\"unary|UNARY\",grpc_code=~\"PermissionDenied|PERMISSION_DENIED\"}[$interval])) + \nsum(rate(grpc_server_handled_total{cluster=~\"$cluster\",job=\"kubernetes-pods\",service=~\"$service\",instance=~\"$instance\",grpc_type=~\"unary|UNARY\",grpc_code=~\"FailedPrecondition|FAILED_PRECONDITION\"}[$interval]))\n)/ sum(rate(grpc_server_handled_total{cluster=~\"$cluster\",job=\"kubernetes-pods\",service=~\"$service\",instance=~\"$instance\",grpc_type=~\"unary|UNARY\"}[$interval]))", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "ClientError", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "(sum(rate(grpc_server_handled_total{cluster=~\"$cluster\",job=\"kubernetes-pods\",service=~\"$service\",instance=~\"$instance\",grpc_type=~\"unary|UNARY\",grpc_code=\"Internal|INTERVAL\"}[$interval])) +\nsum(rate(grpc_server_handled_total{cluster=~\"$cluster\",job=\"kubernetes-pods\",service=~\"$service\",instance=~\"$instance\",grpc_type=~\"unary|UNARY\",grpc_code=\"Unknown|UNKNOWN\"}[$interval])) + \nsum(rate(grpc_server_handled_total{cluster=~\"$cluster\",job=\"kubernetes-pods\",service=~\"$service\",instance=~\"$instance\",grpc_type=~\"unary|UNARY\",grpc_code=\"Unavailable|UNAVAILABLE\"}[$interval])) +\nsum(rate(grpc_server_handled_total{cluster=~\"$cluster\",job=\"kubernetes-pods\",service=~\"$service\",instance=~\"$instance\",grpc_type=~\"unary|UNARY\",grpc_code=\"Unimplemented|UNIMPLEMENTED\"}[$interval]))\n)/ sum(rate(grpc_server_handled_total{cluster=~\"$cluster\",job=\"kubernetes-pods\",service=~\"$service\",instance=~\"$instance\",grpc_type=~\"unary|UNARY\"}[$interval]))", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "ServerError", + "range": true, + "refId": "C" + } + ], + "title": "Status", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "points", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 14 + }, + "id": 26, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(rate(grpc_server_handled_total{cluster=~\"$cluster\",job=\"kubernetes-pods\",service=~\"$service\",instance=~\"$instance\",grpc_type=~\"unary|UNARY\"}[$interval])) by (grpc_code)\n/ ignoring(grpc_code) group_left sum(rate(grpc_server_handled_total{cluster=~\"$cluster\",job=\"kubernetes-pods\",service=~\"$service\",instance=~\"$instance\",grpc_type=~\"unary|UNARY\"}[$interval]))", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{grpc_code}}", + "range": true, + "refId": "A" + } + ], + "title": "Status distribution", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 22 + }, + "id": 2, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(rate(grpc_server_started_total{cluster=~\"$cluster\",job=\"kubernetes-pods\",service=~\"$service\",instance=~\"$instance\"}[$interval])) by (grpc_service)", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{grpc_service}}", + "range": true, + "refId": "A" + } + ], + "title": "RPS", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "points", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 22 + }, + "id": 78, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(rate(grpc_server_started_total{cluster=~\"$cluster\",job=\"kubernetes-pods\",service=~\"$service\",instance=~\"$instance\"}[$interval])) by (grpc_service) \n/ ignoring(grpc_service) group_left sum(rate(grpc_server_started_total{cluster=~\"$cluster\",job=\"kubernetes-pods\",service=~\"$service\",instance=~\"$instance\"}[$interval]))", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{grpc_service}}", + "range": true, + "refId": "A" + } + ], + "title": "Request distribution", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 30 + }, + "id": 77, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "histogram_quantile(0.95, \n sum(rate(grpc_server_handling_seconds_bucket{cluster=~\"$cluster\",job=\"kubernetes-pods\",grpc_type=~\"unary|UNARY\",service=~\"$service\",instance=~\"$instance\"}[$interval])) by (grpc_service,le)\n)", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{grpc_service}}", + "range": true, + "refId": "A" + } + ], + "title": "Latency", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 30 + }, + "id": 16, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "histogram_quantile(0.99, \n sum(rate(grpc_server_handling_seconds_bucket{cluster=~\"$cluster\",job=\"kubernetes-pods\",grpc_type=~\"unary|UNARY\",service=~\"$service\",instance=~\"$instance\"}[$interval])) by (le)\n)", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "histogram_quantile(0.95, \n sum(rate(grpc_server_handling_seconds_bucket{cluster=~\"$cluster\",job=\"kubernetes-pods\",grpc_type=~\"unary|UNARY\",service=~\"$service\",instance=~\"$instance\"}[$interval])) by (le)\n)", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "95%", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "histogram_quantile(0.90, \n sum(rate(grpc_server_handling_seconds_bucket{cluster=~\"$cluster\",job=\"kubernetes-pods\",grpc_type=~\"unary|UNARY\",service=~\"$service\",instance=~\"$instance\"}[$interval])) by (le)\n)", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "90%", + "range": true, + "refId": "E" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "histogram_quantile(0.75, \n sum(rate(grpc_server_handling_seconds_bucket{cluster=~\"$cluster\",job=\"kubernetes-pods\",grpc_type=~\"unary|UNARY\",service=~\"$service\",instance=~\"$instance\"}[$interval])) by (le)\n)", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "75%", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "histogram_quantile(0.50, \n sum(rate(grpc_server_handling_seconds_bucket{cluster=~\"$cluster\",job=\"kubernetes-pods\",grpc_type=~\"unary|UNARY\",service=~\"$service\",instance=~\"$instance\"}[$interval])) by (le)\n)", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "50%", + "range": true, + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "histogram_quantile(0.25, \n sum(rate(grpc_server_handling_seconds_bucket{cluster=~\"$cluster\",job=\"kubernetes-pods\",grpc_type=~\"unary|UNARY\",service=~\"$service\",instance=~\"$instance\"}[$interval])) by (le)\n)", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "25%", + "range": true, + "refId": "F" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "histogram_quantile(0.10, \n sum(rate(grpc_server_handling_seconds_bucket{cluster=~\"$cluster\",job=\"kubernetes-pods\",grpc_type=~\"unary|UNARY\",service=~\"$service\",instance=~\"$instance\"}[$interval])) by (le)\n)", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "10%", + "range": true, + "refId": "G" + } + ], + "title": "Latency distribution", + "type": "timeseries" + } + ], + "preload": false, + "refresh": "30s", + "schemaVersion": 41, + "tags": [ + "grpc" + ], + "templating": { + "list": [ + { + "current": { + "text": "All", + "value": "$__all" + }, + "definition": "label_values(grpc_server_handled_total,cluster)", + "includeAll": true, + "label": "Cluster", + "name": "cluster", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(grpc_server_handled_total,cluster)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "type": "query" + }, + { + "current": { + "text": [ + "history-api", + "policy-enforcer", + "event-processor" + ], + "value": [ + "history-api", + "policy-enforcer", + "event-processor" + ] + }, + "definition": "label_values(grpc_server_handled_total{cluster=~\"$cluster\"},service)", + "includeAll": false, + "label": "Service", + "multi": true, + "name": "service", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(grpc_server_handled_total{cluster=~\"$cluster\"},service)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 2, + "regex": "", + "type": "query" + }, + { + "current": { + "text": "All", + "value": "$__all" + }, + "definition": "label_values(grpc_server_handled_total{cluster=~\"$cluster\", service=~\"$service\"},instance)", + "includeAll": true, + "label": "Instance", + "name": "instance", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(grpc_server_handled_total{cluster=~\"$cluster\", service=~\"$service\"},instance)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 2, + "regex": "", + "type": "query" + }, + { + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "2m", + "value": "2m" + }, + "name": "interval", + "options": [ + { + "selected": true, + "text": "2m", + "value": "2m" + }, + { + "selected": false, + "text": "5m", + "value": "5m" + }, + { + "selected": false, + "text": "10m", + "value": "10m" + }, + { + "selected": false, + "text": "15m", + "value": "15m" + }, + { + "selected": false, + "text": "30m", + "value": "30m" + }, + { + "selected": false, + "text": "1h", + "value": "1h" + } + ], + "query": "2m,5m,10m,15m,30m,1h", + "refresh": 2, + "type": "interval" + } + ] + }, + "time": { + "from": "now-3h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "10s", + "15s", + "30s", + "1m", + "5m", + "10m", + "15m" + ] + }, + "timezone": "", + "title": "GRPC requests", + "uid": "982b1b61-c9de-4126-9698-79dff4d382f6", + "version": 1 +} \ No newline at end of file diff --git a/install/helm/dashboards/public-api.json b/install/helm/dashboards/public-api.json new file mode 100644 index 00000000..d6e66a9e --- /dev/null +++ b/install/helm/dashboards/public-api.json @@ -0,0 +1,829 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "For monitoring the public API. Shows the external interface health and performance to help control its availability and quality of provided services.", + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 7, + "links": [], + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "description": "Timeline of requests to the public API that finished with a specified status.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "points", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 0 + }, + "id": 81, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(rate(http_requests_total{cluster=~\"$cluster\",job=\"kubernetes-pods\",service=\"public-api\",code=\"2xx\"}[$interval]))\n/ sum(rate(http_requests_total{cluster=~\"$cluster\",job=\"kubernetes-pods\",service=\"public-api\"}[$interval]))", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "2xx", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(rate(http_requests_total{cluster=~\"$cluster\",job=\"kubernetes-pods\",service=\"public-api\",code=\"4xx\"}[$interval]))\n/ sum(rate(http_requests_total{cluster=~\"$cluster\",job=\"kubernetes-pods\",service=\"public-api\"}[$interval]))", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "4xx", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(rate(http_requests_total{cluster=~\"$cluster\",job=\"kubernetes-pods\",service=\"public-api\",code=\"5xx\"}[$interval]))\n/ sum(rate(http_requests_total{cluster=~\"$cluster\",job=\"kubernetes-pods\",service=\"public-api\"}[$interval]))", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "5xx", + "range": true, + "refId": "C" + } + ], + "title": "Status on the Public API service", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "description": "Distribution of requests with a specified status as a percentage of the total number of requests at a given time.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "points", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 0 + }, + "id": 26, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(rate(http_requests_total{cluster=~\"$cluster\",job=\"kubernetes-pods\",service=\"public-api\"}[$interval])) by (code)\n/ ignoring(code) group_left sum(rate(http_requests_total{cluster=~\"$cluster\",job=\"kubernetes-pods\",service=\"public-api\"}[$interval]))", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{code}}", + "range": true, + "refId": "A" + } + ], + "title": "Public API status distribution", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "description": "Public API RPS per endpoint.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 8 + }, + "id": 85, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(rate(http_requests_total{cluster=~\"$cluster\",job=\"kubernetes-pods\",service=\"public-api\"}[$interval])) by (method,path)", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{method}} - {{path}}", + "range": true, + "refId": "A" + } + ], + "title": "Public API RPS", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "description": "Distribution of requests of a specific type and to a specific endpoint as a percentage of the total number of requests at a given point in time.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "points", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 17 + }, + "id": 78, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(rate(http_requests_total{cluster=~\"$cluster\",job=\"kubernetes-pods\",service=\"public-api\"}[$interval])) by (method,path) \n/ ignoring(method,path) group_left sum(rate(http_requests_total{cluster=~\"$cluster\",job=\"kubernetes-pods\",service=\"public-api\"}[$interval]))", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{method}} - {{path}}", + "range": true, + "refId": "A" + } + ], + "title": "Public API request distribution", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "description": "Request latencies at a given point in time.", + "fieldConfig": { + "defaults": { + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "scaleDistribution": { + "type": "linear" + } + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 25 + }, + "id": 86, + "options": { + "calculate": false, + "cellGap": 1, + "color": { + "exponent": 0.5, + "fill": "dark-orange", + "mode": "scheme", + "reverse": false, + "scale": "exponential", + "scheme": "Oranges", + "steps": 64 + }, + "exemplars": { + "color": "rgba(255,0,255,0.7)" + }, + "filterValues": { + "le": 1e-9 + }, + "legend": { + "show": true + }, + "rowsFrame": { + "layout": "auto" + }, + "tooltip": { + "mode": "single", + "showColorScale": false, + "yHistogram": false + }, + "yAxis": { + "axisPlacement": "left", + "reverse": false + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(increase(http_request_duration_seconds_bucket{cluster=~\"$cluster\",handler!=\"metrics\"}[$interval])) by (le)", + "format": "heatmap", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{le}}", + "range": true, + "refId": "A" + } + ], + "title": "Public API latency", + "type": "heatmap" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "description": "Percentile of request latencies at a given point in time.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 25 + }, + "id": 16, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "histogram_quantile(0.99, \n sum(rate(http_request_duration_seconds_bucket{cluster=~\"$cluster\",job=\"kubernetes-pods\",service=\"public-api\"}[$interval])) by (le)\n)", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "histogram_quantile(0.95, \n sum(rate(http_request_duration_seconds_bucket{cluster=~\"$cluster\",job=\"kubernetes-pods\",service=\"public-api\"}[$interval])) by (le)\n)", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "95%", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "histogram_quantile(0.90, \n sum(rate(http_request_duration_seconds_bucket{cluster=~\"$cluster\",job=\"kubernetes-pods\",service=\"public-api\"}[$interval])) by (le)\n)", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "90%", + "range": true, + "refId": "E" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "histogram_quantile(0.75, \n sum(rate(http_request_duration_seconds_bucket{cluster=~\"$cluster\",job=\"kubernetes-pods\",service=\"public-api\"}[$interval])) by (le)\n)", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "75%", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "histogram_quantile(0.50, \n sum(rate(http_request_duration_seconds_bucket{cluster=~\"$cluster\",job=\"kubernetes-pods\",service=\"public-api\"}[$interval])) by (le)\n)", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "50%", + "range": true, + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "histogram_quantile(0.25, \n sum(rate(http_request_duration_seconds_bucket{cluster=~\"$cluster\",job=\"kubernetes-pods\",service=\"public-api\"}[$interval])) by (le)\n)", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "25%", + "range": true, + "refId": "F" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "histogram_quantile(0.10, \n sum(rate(http_request_duration_seconds_bucket{cluster=~\"$cluster\",job=\"kubernetes-pods\",service=\"public-api\"}[$interval])) by (le)\n)", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "10%", + "range": true, + "refId": "G" + } + ], + "title": "Public API latency distribution", + "type": "timeseries" + } + ], + "preload": false, + "refresh": "30s", + "schemaVersion": 41, + "tags": [ + "public-api" + ], + "templating": { + "list": [ + { + "current": { + "text": "All", + "value": "$__all" + }, + "definition": "label_values(go_goroutines,cluster)", + "includeAll": true, + "label": "Cluster", + "name": "cluster", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(go_goroutines,cluster)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "type": "query" + }, + { + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "2m", + "value": "2m" + }, + "name": "interval", + "options": [ + { + "selected": true, + "text": "2m", + "value": "2m" + }, + { + "selected": false, + "text": "3m", + "value": "3m" + }, + { + "selected": false, + "text": "5m", + "value": "5m" + }, + { + "selected": false, + "text": "10m", + "value": "10m" + }, + { + "selected": false, + "text": "30m", + "value": "30m" + }, + { + "selected": false, + "text": "1h", + "value": "1h" + } + ], + "query": "2m,3m,5m,10m,30m,1h", + "refresh": 2, + "type": "interval" + } + ] + }, + "time": { + "from": "now-3h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "10s", + "15s", + "30s", + "1m", + "5m", + "10m", + "15m" + ] + }, + "timezone": "", + "title": "Public-API", + "uid": "e53a5483-ecf2-4d61-ace7-cd4c2110ec63", + "version": 1 +} \ No newline at end of file diff --git a/install/helm/dashboards/reverse-proxy.json b/install/helm/dashboards/reverse-proxy.json new file mode 100644 index 00000000..a1581f00 --- /dev/null +++ b/install/helm/dashboards/reverse-proxy.json @@ -0,0 +1,1055 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "Provides information about the availability and operation of the reverse proxy component. All internal and external requests to PT CS are routed through the reverse proxy component. The dashboard allows you to detect anomalies in the service and promptly react to possible issues.", + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 7, + "links": [], + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 8, + "x": 0, + "y": 0 + }, + "id": 12, + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "(sum(rate(caddy_http_request_duration_seconds_count{cluster=~\"$cluster\",job=\"caddy\",handler!=\"metrics\", code=\"200\"}[$interval]))\n) / sum(rate(caddy_http_request_duration_seconds_count{cluster=~\"$cluster\",job=\"caddy\",handler!=\"metrics\"}[$interval]))", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "range": true, + "refId": "A" + } + ], + "title": "Reverse proxy success", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 8, + "x": 8, + "y": 0 + }, + "id": 14, + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(rate(caddy_http_request_duration_seconds_count{cluster=~\"$cluster\",job=\"caddy\",handler=\"static_response\"}[$interval]))", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "range": true, + "refId": "A" + } + ], + "title": "Reverse proxy RPS", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "index": 0, + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 8, + "x": 16, + "y": 0 + }, + "id": 75, + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "histogram_quantile(0.95, \n sum(rate(caddy_http_request_duration_seconds_bucket{cluster=~\"$cluster\",job=\"caddy\",handler!=\"metrics\"}[$interval])) by (le)\n)", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "95%", + "range": true, + "refId": "A" + } + ], + "title": "Reverse proxy latency", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "points", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "percentunit" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "1xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "blue", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "2xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "3xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "4xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "5xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 5 + }, + "id": 76, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "expr": "sum(rate(caddy_http_request_duration_seconds_count{cluster=~\"$cluster\",handler!=\"metrics\",job=\"caddy\",code=~\"(100|101)\"}[$interval]))\n/ sum(rate(caddy_http_request_duration_seconds_count{cluster=~\"$cluster\",handler!=\"metrics\",job=\"caddy\"}[$interval]))", + "hide": false, + "instant": false, + "legendFormat": "1xx", + "range": true, + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(rate(caddy_http_request_duration_seconds_count{cluster=~\"$cluster\",handler!=\"metrics\",job=\"caddy\",code=~\"(200|201)\"}[$interval]))\n/ sum(rate(caddy_http_request_duration_seconds_count{cluster=~\"$cluster\",handler!=\"metrics\",job=\"caddy\"}[$interval]))", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "2xx", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "expr": "sum(rate(caddy_http_request_duration_seconds_count{cluster=~\"$cluster\",handler!=\"metrics\",job=\"caddy\",code=~\"(300|301|302|303|304|307|308)\"}[$interval]))\n/ sum(rate(caddy_http_request_duration_seconds_count{cluster=~\"$cluster\",handler!=\"metrics\",job=\"caddy\"}[$interval]))", + "hide": false, + "instant": false, + "legendFormat": "3xx", + "range": true, + "refId": "E" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(rate(caddy_http_request_duration_seconds_count{cluster=~\"$cluster\",handler=\"static_response\",job=\"caddy\",code=~\"(400|401|403|404|409|422|429)\"}[$interval]))\n/ sum(rate(caddy_http_request_duration_seconds_count{cluster=~\"$cluster\",handler=\"static_response\",job=\"caddy\"}[$interval]))", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "4xx", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(rate(caddy_http_request_duration_seconds_count{cluster=~\"$cluster\",handler=\"static_response\",job=\"caddy\",code=~\"(500|501|502|503|504)\"}[$interval]))\n/ sum(rate(caddy_http_request_duration_seconds_count{cluster=~\"$cluster\",handler=\"static_response\",job=\"caddy\"}[$interval]))", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "5xx", + "range": true, + "refId": "C" + } + ], + "title": "Status on the Reverse proxy", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 5 + }, + "id": 2, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "right", + "showLegend": false + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(rate(caddy_http_requests_total{cluster=~\"$cluster\",job=\"caddy\",handler!=\"metrics\"}[$interval])) ", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "static_response", + "range": true, + "refId": "A" + } + ], + "title": "Reverse proxy RPS", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "fieldConfig": { + "defaults": { + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "scaleDistribution": { + "type": "linear" + } + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 13 + }, + "id": 77, + "options": { + "calculate": false, + "cellGap": 1, + "color": { + "exponent": 0.5, + "fill": "dark-orange", + "mode": "scheme", + "reverse": false, + "scale": "exponential", + "scheme": "Oranges", + "steps": 64 + }, + "exemplars": { + "color": "rgba(255,0,255,0.7)" + }, + "filterValues": { + "le": 1e-9 + }, + "legend": { + "show": true + }, + "rowsFrame": { + "layout": "auto" + }, + "tooltip": { + "mode": "single", + "showColorScale": false, + "yHistogram": false + }, + "yAxis": { + "axisPlacement": "left", + "reverse": false + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(increase(caddy_http_request_duration_seconds_bucket{cluster=~\"$cluster\",job=\"caddy\",handler!=\"metrics\"}[$interval])) by (le)", + "format": "heatmap", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{le}}", + "range": true, + "refId": "A" + } + ], + "title": "Reverse proxy latency", + "type": "heatmap" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "points", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 13 + }, + "id": 87, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(rate(caddy_http_request_duration_seconds_count{cluster=~\"$cluster\",job=\"caddy\",handler!=\"metrics\"}[$interval])) by (code)\n/ ignoring(code) group_left sum(rate(caddy_http_request_duration_seconds_count{cluster=~\"$cluster\",job=\"caddy\",handler!=\"metrics\"}[$interval]))", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{code}}", + "range": true, + "refId": "A" + } + ], + "title": "Reverse proxy status distribution", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 6, + "y": 21 + }, + "id": 88, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "histogram_quantile(0.99, \n sum(rate(caddy_http_request_duration_seconds_bucket{cluster=~\"$cluster\",job=\"caddy\",handler!=\"metrics\"}[$interval])) by (le)\n)", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "99%", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "histogram_quantile(0.95, \n sum(rate(caddy_http_request_duration_seconds_bucket{cluster=~\"$cluster\",job=\"caddy\",handler!=\"metrics\"}[$interval])) by (le)\n)", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "95%", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "histogram_quantile(0.90, \n sum(rate(caddy_http_request_duration_seconds_bucket{cluster=~\"$cluster\",job=\"caddy\",handler!=\"metrics\"}[$interval])) by (le)\n)", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "90%", + "range": true, + "refId": "E" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "histogram_quantile(0.75, \n sum(rate(caddy_http_request_duration_seconds_bucket{cluster=~\"$cluster\",job=\"caddy\",handler!=\"metrics\"}[$interval])) by (le)\n)", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "75%", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "histogram_quantile(0.50, \n sum(rate(caddy_http_request_duration_seconds_bucket{cluster=~\"$cluster\",job=\"caddy\",handler!=\"metrics\"}[$interval])) by (le)\n)", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "50%", + "range": true, + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "histogram_quantile(0.25, \n sum(rate(caddy_http_request_duration_seconds_bucket{cluster=~\"$cluster\",job=\"caddy\",handler!=\"metrics\"}[$interval])) by (le)\n)", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "25%", + "range": true, + "refId": "F" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "histogram_quantile(0.10, \n sum(rate(caddy_http_request_duration_seconds_bucket{cluster=~\"$cluster\",job=\"caddy\",handler!=\"metrics\"}[$interval])) by (le)\n)", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "10%", + "range": true, + "refId": "G" + } + ], + "title": "Reverse proxy latency distribution", + "type": "timeseries" + } + ], + "preload": false, + "refresh": "30s", + "schemaVersion": 41, + "tags": [], + "templating": { + "list": [ + { + "current": { + "text": "All", + "value": "$__all" + }, + "definition": "label_values(go_goroutines,cluster)", + "includeAll": true, + "label": "Cluster", + "name": "cluster", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(go_goroutines,cluster)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "type": "query" + }, + { + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "2m", + "value": "2m" + }, + "name": "interval", + "options": [ + { + "selected": true, + "text": "2m", + "value": "2m" + }, + { + "selected": false, + "text": "5m", + "value": "5m" + }, + { + "selected": false, + "text": "10m", + "value": "10m" + }, + { + "selected": false, + "text": "15m", + "value": "15m" + }, + { + "selected": false, + "text": "30m", + "value": "30m" + }, + { + "selected": false, + "text": "1h", + "value": "1h" + } + ], + "query": "2m,3m,5m,10m,30m,1h", + "refresh": 2, + "type": "interval" + } + ] + }, + "time": { + "from": "now-3h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "10s", + "15s", + "30s", + "1m", + "5m", + "10m", + "15m" + ] + }, + "timezone": "", + "title": "Reverse proxy", + "uid": "e53a5483-ecf2-4d61-ace7-cd4c63c2110e", + "version": 1 +} \ No newline at end of file diff --git a/install/helm/dashboards/runtime.json b/install/helm/dashboards/runtime.json new file mode 100644 index 00000000..6d4f674e --- /dev/null +++ b/install/helm/dashboards/runtime.json @@ -0,0 +1,2072 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "For managing metrics of services dedicated to in-cluster deployment. If you use external components, such as RabbitMQ or ClickHouse, for correct filtering by cluster and if Grafana is configured for several clusters, add the \"cluster\" label to the metrics of these components (similar to other system metrics). You can also view information about the runtime monitor service connection to RabbitMQ.", + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 4, + "links": [], + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "description": "Shows whether the Runtime-Monitor service producer has a connection with RabbitMQ.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "fixed" + }, + "custom": { + "axisPlacement": "auto", + "fillOpacity": 70, + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineWidth": 0, + "spanNulls": false + }, + "mappings": [ + { + "options": { + "0": { + "color": "red", + "index": 1, + "text": "disconnected" + }, + "1": { + "color": "green", + "index": 0, + "text": "connected" + } + }, + "type": "value" + }, + { + "options": { + "match": "null", + "result": { + "color": "red", + "index": 2, + "text": "disconnected" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "transparent" + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 0 + }, + "id": 28, + "options": { + "alignValue": "center", + "legend": { + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "mergeValues": true, + "perPage": 10, + "rowHeight": 0.8, + "showValue": "auto", + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "editorMode": "code", + "exemplar": false, + "expr": "rabbit_broker_connection_state_gauge{cluster=~\"$cluster\",service=\"runtime-monitor\",queue=\"runtime_events\",is_consumer=\"false\",instance=~\"$node\"}", + "format": "time_series", + "legendFormat": "{{cluster}} - {{instance}}", + "range": true, + "refId": "A" + } + ], + "title": "Runtime Monitor producer", + "transformations": [ + { + "id": "renameByRegex", + "options": { + "regex": "(.*):\\d+$", + "renamePattern": "$1" + } + } + ], + "type": "state-timeline" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "description": "Shows whether the Event-Processor service producer has a connection to RabbitMQ.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "fixed" + }, + "custom": { + "axisPlacement": "auto", + "fillOpacity": 70, + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineWidth": 0, + "spanNulls": false + }, + "mappings": [ + { + "options": { + "0": { + "color": "red", + "index": 1, + "text": "disconnected" + }, + "1": { + "color": "green", + "index": 0, + "text": "connected" + } + }, + "type": "value" + }, + { + "options": { + "match": "null", + "result": { + "color": "red", + "index": 2, + "text": "disconnected" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Status" + }, + "properties": [] + }, + { + "matcher": { + "id": "byName", + "options": "Instance" + }, + "properties": [] + } + ] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 0 + }, + "id": 26, + "options": { + "alignValue": "center", + "legend": { + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "mergeValues": true, + "perPage": 10, + "rowHeight": 0.8, + "showValue": "auto", + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "editorMode": "code", + "expr": "rabbit_broker_connection_state_gauge{cluster=~\"$cluster\",service=\"event-processor\",queue=\"history_events\", is_consumer=\"false\"}", + "legendFormat": "{{cluster}}-{{instance}}", + "range": true, + "refId": "A" + } + ], + "title": "Event Processor producer", + "transformations": [ + { + "id": "renameByRegex", + "options": { + "regex": "(.*):\\d+$", + "renamePattern": "$1" + } + } + ], + "type": "state-timeline" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "description": "Shows whether the Event-Processor service consumer has a connection to RabbitMQ.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "fixed" + }, + "custom": { + "axisPlacement": "auto", + "fillOpacity": 70, + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineWidth": 0, + "spanNulls": false + }, + "mappings": [ + { + "options": { + "0": { + "color": "red", + "index": 1, + "text": "disconnected" + }, + "1": { + "color": "green", + "index": 0, + "text": "connected" + } + }, + "type": "value" + }, + { + "options": { + "match": "null", + "result": { + "color": "red", + "index": 2, + "text": "disconnected" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "transparent" + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 7 + }, + "id": 27, + "options": { + "alignValue": "center", + "legend": { + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "mergeValues": true, + "perPage": 10, + "rowHeight": 0.8, + "showValue": "auto", + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "editorMode": "code", + "exemplar": false, + "expr": "rabbit_broker_connection_state_gauge{cluster=~\"$cluster\",service=\"event-processor\",queue=\"runtime_events\", is_consumer=\"true\"}", + "format": "time_series", + "instant": false, + "legendFormat": "{{cluster}} - {{instance}}", + "range": true, + "refId": "A" + } + ], + "title": "Event Processor consumer", + "transformations": [ + { + "id": "renameByRegex", + "options": { + "regex": "(.*):\\d+$", + "renamePattern": "$1" + } + } + ], + "type": "state-timeline" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "description": "Shows whether the History-API service consumer has a connection to RabbitMQ.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "fixed" + }, + "custom": { + "axisPlacement": "auto", + "fillOpacity": 70, + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineWidth": 0, + "spanNulls": false + }, + "mappings": [ + { + "options": { + "0": { + "color": "red", + "index": 1, + "text": "disconnected" + }, + "1": { + "color": "green", + "index": 0, + "text": "connected" + } + }, + "type": "value" + }, + { + "options": { + "match": "null", + "result": { + "color": "red", + "index": 2, + "text": "disconnected" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "transparent" + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Status" + }, + "properties": [] + }, + { + "matcher": { + "id": "byName", + "options": "Instance" + }, + "properties": [] + } + ] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 7 + }, + "id": 25, + "options": { + "alignValue": "center", + "legend": { + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "mergeValues": true, + "perPage": 10, + "rowHeight": 0.8, + "showValue": "auto", + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "editorMode": "code", + "exemplar": false, + "expr": "rabbit_broker_connection_state_gauge{cluster=~\"$cluster\",service=\"history-api\",queue=\"history_events\", is_consumer=\"true\"}", + "format": "time_series", + "legendFormat": "{{cluster}} - {{instance}}", + "range": true, + "refId": "A" + } + ], + "title": "History API consumer", + "transformations": [ + { + "id": "renameByRegex", + "options": { + "regex": "(.*):\\d+$", + "renamePattern": "$1" + } + } + ], + "type": "state-timeline" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 14 + }, + "id": 31, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "description": "Event processing time per detector.\n\nIf the duration is significantly higher than the average number, this may indicate an issue.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 12, + "x": 0, + "y": 15 + }, + "id": 5, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.2.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "expr": "sum(rate(operation_time_one_detector_histogram_sum{cluster=~\"$cluster\",service=\"event-processor\"}[5m])/ rate(operation_time_one_detector_histogram_count{cluster=~\"$cluster\",service=\"event-processor\"}[$interval])) by (detector)", + "format": "time_series", + "legendFormat": "{{detector}}", + "range": true, + "refId": "A" + } + ], + "title": "Detector operating time", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "description": "Duration of event processing by the entire detector chain.\n\nNormally, the distribution should be around the middle of the hitmap. If the processing time is closer to the upper bound or regularly falls into +Inf, this may be a sign of an issue.", + "fieldConfig": { + "defaults": { + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "scaleDistribution": { + "type": "linear" + } + } + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 12, + "x": 12, + "y": 15 + }, + "id": 6, + "options": { + "calculate": false, + "cellGap": 1, + "color": { + "exponent": 0.5, + "fill": "dark-orange", + "mode": "scheme", + "reverse": false, + "scale": "exponential", + "scheme": "Oranges", + "steps": 64 + }, + "exemplars": { + "color": "rgba(255,0,255,0.7)" + }, + "filterValues": { + "le": 1e-9 + }, + "legend": { + "show": true + }, + "rowsFrame": { + "layout": "auto" + }, + "tooltip": { + "mode": "single", + "showColorScale": false, + "yHistogram": false + }, + "yAxis": { + "axisPlacement": "left", + "reverse": false, + "unit": "s" + } + }, + "pluginVersion": "12.2.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(rate(operation_time_all_detectors_histogram_bucket{cluster=~\"$cluster\",service=\"event-processor\"}[$interval])) by (le)", + "format": "heatmap", + "instant": false, + "legendFormat": "{{le}}", + "range": true, + "refId": "A" + } + ], + "title": "Оperating time of the entire detector chain", + "type": "heatmap" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "description": "The Runtime-Monitor service has an internal buffer for Tetragon events. It is designed to smooth out peaks in the load on the RabbitMQ queue provider.\n\nIf everything runs properly, in most cases, the size of this buffer will be around zero. If the size reaches the maximum value (1,000 by default), there may be an issue.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "#EAB839", + "value": 750 + }, + { + "color": "red", + "value": 900 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 12, + "x": 0, + "y": 77 + }, + "id": 3, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.2.1", + "targets": [ + { + "editorMode": "code", + "expr": "avg(tetragon_events_buffer_size_gauge{cluster=~\"$cluster\",instance=~\"$node\"}) by (cluster,instance)", + "legendFormat": "{{cluster}} - {{instance}}", + "range": true, + "refId": "A" + } + ], + "title": "Tetragon events buffer size", + "transformations": [ + { + "id": "renameByRegex", + "options": { + "regex": "(.*):\\d+$", + "renamePattern": "$1" + } + } + ], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "description": "The Runtime-Monitor service has an internal buffer for Tetragon events. It is designed to smooth out peaks in the load on the RabbitMQ queue provider.\n\nWhen the buffer is full, the following Tetragon messages will be lost. This metric shows the number of lost events.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "#EAB839", + "value": 750 + }, + { + "color": "red", + "value": 900 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 12, + "x": 12, + "y": 77 + }, + "id": 29, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.2.1", + "targets": [ + { + "editorMode": "code", + "expr": "sum(rate(tetragon_events_dropped_count{cluster=~\"$cluster\",instance=~\"$node\"}[$interval])) by (cluster,instance)", + "legendFormat": "{{cluster}} - {{instance}}", + "range": true, + "refId": "A" + } + ], + "title": "Tetragon events dropped count", + "transformations": [ + { + "id": "renameByRegex", + "options": { + "regex": "(.*):\\d+$", + "renamePattern": "$1" + } + } + ], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "description": "Threats found during event analysis by detectors in time.", + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "red", + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 88 + }, + "id": 7, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.2.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(increase(detector_threats_counter{cluster=~\"$cluster\",service=\"event-processor\"}[$interval])) by (detector)", + "legendFormat": "{{detector}}", + "range": true, + "refId": "A" + } + ], + "title": "Threats found by the detector scan", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "description": "Shows detector errors.", + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "red", + "mode": "fixed" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 88 + }, + "id": 8, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.2.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "expr": "sum(increase(detector_errors_counter{cluster=~\"$cluster\",service=\"event-processor\"}[$interval])) by (detector)", + "legendFormat": "{{detector}}", + "range": true, + "refId": "A" + } + ], + "title": "Detector calls that ended in errors", + "type": "timeseries" + } + ], + "title": "Detectors", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 15 + }, + "id": 32, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "description": "The size of the event queue to be processed.\n\nEvents are processed by the Event-Processor service.\nIf the queue size grows, it means that Event-Processor cannot handle the load.\n\nCheck for errors in the logs and scale the Event-processor service accordingly.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 1000 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 16 + }, + "id": 4, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "avg(rabbitmq_queue_messages{queue=\"runtime_events\",cluster=~\"$cluster\"}) by (instance)", + "format": "time_series", + "instant": true, + "legendFormat": "{{instance}}", + "range": true, + "refId": "A" + } + ], + "title": "Queue size of events prepared for processing (Rabbit)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 16 + }, + "id": 1, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "expr": "sum(rate(rabbit_add_events_success_count{cluster=~\"$cluster\", instance=~\"$node\"}[$interval])) by (instance)", + "legendFormat": "{{instance}}", + "range": true, + "refId": "A" + } + ], + "title": "Rate of adding new events to rabbit", + "transformations": [ + { + "id": "renameByRegex", + "options": { + "regex": "(.*):\\d+$", + "renamePattern": "$1" + } + } + ], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "red", + "mode": "fixed" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 16 + }, + "id": 2, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "expr": "sum(rate(rabbit_add_events_failure_count{cluster=~\"$cluster\",instance=~\"$node\"}[$interval])) by (instance)", + "legendFormat": "{{instance}}", + "range": true, + "refId": "A" + } + ], + "title": "Failure rate when adding new events in Rabbit", + "transformations": [ + { + "id": "renameByRegex", + "options": { + "regex": "(.*):\\d+$", + "renamePattern": "$1" + } + } + ], + "type": "timeseries" + } + ], + "title": "Runtime events", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 16 + }, + "id": 33, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "description": "The size of the queue of processed events to be sent.\n\nEvents are sent by the History-API service.\nIf the queue size grows, it means that the History-API cannot handle the load.\n\nCheck for errors in the logs and scale the History-API service accordingly.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 1000 + } + ] + } + }, + "overrides": [ + { + "__systemRef": "hideSeriesFrom", + "matcher": { + "id": "byNames", + "options": { + "mode": "exclude", + "names": [ + "Value" + ], + "prefix": "All except:", + "readOnly": true + } + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": false, + "tooltip": true, + "viz": true + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 25 + }, + "id": 11, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "avg(rabbitmq_queue_messages{queue=\"history_events\",cluster=~\"$cluster\"}) by (instance)", + "format": "time_series", + "instant": true, + "legendFormat": "{{instance}}", + "range": true, + "refId": "A" + } + ], + "title": "Queue size of history events after processing (Rabbit)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 25 + }, + "id": 9, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "expr": "sum(rate(rabbit_add_history_success_count{cluster=~\"$cluster\"}[$interval])) by (instance)", + "legendFormat": "{{instance}}", + "range": true, + "refId": "A" + } + ], + "title": "Rate of adding history events to rabbit", + "transformations": [ + { + "id": "renameByRegex", + "options": { + "regex": "(.*):\\d+$", + "renamePattern": "$1" + } + } + ], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "red", + "mode": "fixed" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 25 + }, + "id": 10, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "expr": "sum(rate(rabbit_add_history_failure_count{cluster=~\"$cluster\"}[$interval])) by (instance)", + "legendFormat": "{{instance}}", + "range": true, + "refId": "A" + } + ], + "title": "Failure rate when adding history events in Rabbit", + "transformations": [ + { + "id": "renameByRegex", + "options": { + "regex": "(.*):\\d+$", + "renamePattern": "$1" + } + } + ], + "type": "timeseries" + } + ], + "title": "History events", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "description": "Distribution of the gRPS request processing time in a certain range of values.", + "fieldConfig": { + "defaults": { + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "scaleDistribution": { + "type": "linear" + } + } + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 14, + "x": 0, + "y": 17 + }, + "id": 12, + "options": { + "calculate": false, + "cellGap": 1, + "color": { + "exponent": 0.5, + "fill": "dark-orange", + "mode": "scheme", + "reverse": false, + "scale": "exponential", + "scheme": "Oranges", + "steps": 64 + }, + "exemplars": { + "color": "rgba(255,0,255,0.7)" + }, + "filterValues": { + "le": 1e-9 + }, + "legend": { + "show": true + }, + "rowsFrame": { + "layout": "auto" + }, + "tooltip": { + "mode": "single", + "showColorScale": false, + "yHistogram": false + }, + "yAxis": { + "axisPlacement": "left", + "reverse": false, + "unit": "s" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "editorMode": "code", + "expr": "sum(increase(grpc_server_handling_seconds_bucket{cluster=~\"$cluster\", service=~\"(runtime-monitor|policy-enforcer|event-processor|history-api)\"}[$interval])) by (le)", + "format": "heatmap", + "legendFormat": "{{le}}", + "range": true, + "refId": "A" + } + ], + "title": "gRPC requests histogram", + "type": "heatmap" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "description": "Processed events are sent to ClickHouse.\n\nThis graph shows the overall ClickHouse activity as well as the read and write speed.\n\nIf there is no activity, events are being processed and queued in RabbitMQ, or the speed is too low, this may indicate issues with the ClickHouse database.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 10, + "x": 14, + "y": 17 + }, + "id": 23, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "editorMode": "code", + "expr": "sum(rate(ClickHouseProfileEvents_Query{cluster=~\"$cluster\"}[$interval])) by (cluster)", + "legendFormat": "{{cluster}} ", + "range": true, + "refId": "A" + } + ], + "title": "Clickhouse activity", + "type": "timeseries" + } + ], + "preload": false, + "schemaVersion": 41, + "tags": [ + "runtime" + ], + "templating": { + "list": [ + { + "current": { + "text": "All", + "value": "$__all" + }, + "definition": "label_values(go_goroutines,cluster)", + "includeAll": true, + "label": "Cluster", + "name": "cluster", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(go_goroutines,cluster)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 2, + "regex": "", + "type": "query" + }, + { + "current": { + "text": "All", + "value": "$__all" + }, + "definition": "label_values(go_gc_duration_seconds{service=\"runtime-monitor\", cluster=~\"$cluster\"},instance)", + "description": "The Runtime-Monitor service deployment on each node in Daemon Sets mode.", + "includeAll": true, + "label": "Node", + "name": "node", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(go_gc_duration_seconds{service=\"runtime-monitor\", cluster=~\"$cluster\"},instance)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 2, + "regex": "", + "type": "query" + }, + { + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "2m", + "value": "2m" + }, + "label": "Interval", + "name": "interval", + "options": [ + { + "selected": true, + "text": "2m", + "value": "2m" + }, + { + "selected": false, + "text": "5m", + "value": "5m" + }, + { + "selected": false, + "text": "10m", + "value": "10m" + }, + { + "selected": false, + "text": "15m", + "value": "15m" + }, + { + "selected": false, + "text": "30m", + "value": "30m" + }, + { + "selected": false, + "text": "1h", + "value": "1h" + } + ], + "query": "2m,5m,10m,15m,30m,1h", + "refresh": 2, + "type": "interval" + } + ] + }, + "time": { + "from": "now-3h", + "to": "now" + }, + "timepicker": {}, + "timezone": "browser", + "title": "Runtime dashboard", + "uid": "besadoncp9fk0c", + "version": 1, + "weekStart": "monday" +} \ No newline at end of file diff --git a/install/helm/dashboards/tetragon-runtime.json b/install/helm/dashboards/tetragon-runtime.json new file mode 100644 index 00000000..4d633402 --- /dev/null +++ b/install/helm/dashboards/tetragon-runtime.json @@ -0,0 +1,600 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "Tetragon runtime metrics: resource usage, policy events, handling latency, dropped/overflowed buffers, and tracing policy state.", + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": null, + "links": [], + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 0 + }, + "id": 1, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "editorMode": "code", + "expr": "sum(rate(process_cpu_seconds_total{cluster=~\"$cluster\",instance=~\"$node\"}[$interval])) by (instance)", + "legendFormat": "{{instance}}", + "range": true, + "refId": "A" + } + ], + "description": "Overall Tetragon resource consumption", + "title": "Tetragon CPU", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 0 + }, + "id": 2, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "editorMode": "code", + "expr": "max(process_resident_memory_bytes{cluster=~\"$cluster\",instance=~\"$node\"}) by (instance)", + "legendFormat": "{{instance}}", + "range": true, + "refId": "A" + } + ], + "description": "Overall Tetragon resource consumption", + "title": "Tetragon Memory", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "ops" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 8 + }, + "id": 3, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "editorMode": "code", + "expr": "sum(rate(tetragon_events_total{cluster=~\"$cluster\",instance=~\"$node\"}[$interval])) by (type)", + "legendFormat": "{{type}}", + "range": true, + "refId": "A" + } + ], + "description": "Overall Tetragon resource consumption", + "title": "Event Throughput", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "ops" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 8 + }, + "id": 4, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "editorMode": "code", + "expr": "sum(rate(tetragon_policy_events_total{cluster=~\"$cluster\",instance=~\"$node\"}[$interval])) by (policy)", + "legendFormat": "{{policy}}", + "range": true, + "refId": "A" + } + ], + "description": "Policy event counters", + "title": "Policy Events", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 16, + "x": 0, + "y": 16 + }, + "id": 5, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "editorMode": "code", + "expr": "histogram_quantile(0.50, sum(rate(tetragon_handling_latency_bucket{cluster=~\"$cluster\",instance=~\"$node\"}[$interval])) by (le))", + "legendFormat": "p50", + "range": true, + "refId": "A" + }, + { + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum(rate(tetragon_handling_latency_bucket{cluster=~\"$cluster\",instance=~\"$node\"}[$interval])) by (le))", + "legendFormat": "p95", + "range": true, + "refId": "B" + }, + { + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum(rate(tetragon_handling_latency_bucket{cluster=~\"$cluster\",instance=~\"$node\"}[$interval])) by (le))", + "legendFormat": "p99", + "range": true, + "refId": "C" + } + ], + "description": "Event handling latency metrics", + "title": "Handling Latency", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 16 + }, + "id": 6, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "editorMode": "code", + "expr": "sum(tetragon_tracingpolicy_loaded{cluster=~\"$cluster\",instance=~\"$node\"})", + "refId": "A" + } + ], + "description": "Loaded policy state", + "title": "Loaded Policies", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "ops" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 24 + }, + "id": 7, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "editorMode": "code", + "expr": "sum(rate(tetragon_bpf_missed_events_total{cluster=~\"$cluster\",instance=~\"$node\"}[$interval])) by (instance)", + "legendFormat": "missed {{instance}}", + "range": true, + "refId": "A" + }, + { + "editorMode": "code", + "expr": "sum(rate(tetragon_notify_overflowed_events_total{cluster=~\"$cluster\",instance=~\"$node\"}[$interval])) by (instance)", + "legendFormat": "overflow {{instance}}", + "range": true, + "refId": "B" + } + ], + "description": "Buffer overflow and missed-event information", + "title": "Buffer Overflows", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "Prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 24 + }, + "id": 8, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "editorMode": "code", + "expr": "sum(rate(tetragon_overhead_program_seconds_total{cluster=~\"$cluster\",instance=~\"$node\"}[$interval])) / clamp_min(sum(rate(tetragon_overhead_program_runs_total{cluster=~\"$cluster\",instance=~\"$node\"}[$interval])), 1)", + "legendFormat": "avg overhead", + "range": true, + "refId": "A" + } + ], + "description": "Overall Tetragon resource consumption", + "title": "Program Overhead", + "type": "timeseries" + } + ], + "preload": false, + "schemaVersion": 41, + "tags": [ + "runtime", + "tetragon" + ], + "templating": { + "list": [ + { + "current": { + "text": "All", + "value": "$__all" + }, + "definition": "label_values(tetragon_events_total,cluster)", + "includeAll": true, + "label": "Cluster", + "name": "cluster", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(tetragon_events_total,cluster)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 2, + "regex": "", + "type": "query" + }, + { + "current": { + "text": "All", + "value": "$__all" + }, + "definition": "label_values(tetragon_events_total{cluster=~\"$cluster\"},instance)", + "description": "Tetragon instance/node.", + "includeAll": true, + "label": "Node", + "name": "node", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(tetragon_events_total{cluster=~\"$cluster\"},instance)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 2, + "regex": "", + "type": "query" + }, + { + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "2m", + "value": "2m" + }, + "label": "Interval", + "name": "interval", + "options": [ + { + "selected": true, + "text": "2m", + "value": "2m" + }, + { + "selected": false, + "text": "5m", + "value": "5m" + }, + { + "selected": false, + "text": "10m", + "value": "10m" + }, + { + "selected": false, + "text": "15m", + "value": "15m" + }, + { + "selected": false, + "text": "30m", + "value": "30m" + }, + { + "selected": false, + "text": "1h", + "value": "1h" + } + ], + "query": "2m,5m,10m,15m,30m,1h", + "refresh": 2, + "type": "interval" + } + ] + }, + "time": { + "from": "now-3h", + "to": "now" + }, + "timepicker": {}, + "timezone": "browser", + "title": "Tetragon Runtime", + "uid": "tetragon-runtime", + "version": 1, + "weekStart": "monday" +} diff --git a/install/helm/templates/_validations.tpl b/install/helm/templates/_validations.tpl index 69357342..9cda513c 100644 --- a/install/helm/templates/_validations.tpl +++ b/install/helm/templates/_validations.tpl @@ -91,27 +91,27 @@ cs: tls {{- $_ := set $annotations $key $value }} {{- end }} -{{- if ne ($annotation := get $annotations "runtime-monitor.tetragon") ($image := include "common.images.image" (dict "imageRoot" (get .Values "runtime-monitor").tetragon.image)) }} +{{- if ne ($annotation := get $annotations "runtime-monitor.tetragon") ($image := include "common.image" (dict "context" . "image" (get .Values "runtime-monitor").tetragon.image)) }} cs: runtime-monitor.tetragon.image is incorrect Image in annotation is not the same as in values of chart or subchart {{ printf "('%s' != '%s')" $annotation $image }} {{- end }} -{{- if ne ($annotation := get $annotations "postgresql") ($image := include "common.images.image" (dict "imageRoot" .Values.postgresql.image)) }} +{{- if ne ($annotation := get $annotations "postgresql") ($image := include "common.image" (dict "context" . "image" .Values.postgresql.image)) }} cs: postgresql.image is incorrect Image in annotation is not the same as in values of chart or subchart {{ printf "('%s' != '%s')" $annotation $image }} {{- end }} -{{- if ne ($annotation := get $annotations "postgresql.metrics") ($image := include "common.images.image" (dict "imageRoot" .Values.postgresql.metrics.image)) }} +{{- if ne ($annotation := get $annotations "postgresql.metrics") ($image := include "common.image" (dict "context" . "image" .Values.postgresql.metrics.image)) }} cs: postgresql.metrics.image is incorrect Image in annotation is not the same as in values of chart or subchart {{ printf "('%s' != '%s')" $annotation $image }} {{- end }} -{{- if ne ($annotation := get $annotations "redis") ($image := include "common.images.image" (dict "imageRoot" .Values.redis.image)) }} +{{- if ne ($annotation := get $annotations "redis") ($image := include "common.image" (dict "context" . "image" .Values.redis.image)) }} cs: redis.image is incorrect Image in annotation is not the same as in values of chart or subchart {{ printf "('%s' != '%s')" $annotation $image }} {{- end }} -{{- if ne ($annotation := get $annotations "rabbitmq") ($image := include "common.images.image" (dict "imageRoot" .Values.rabbitmq.image)) }} +{{- if ne ($annotation := get $annotations "rabbitmq") ($image := include "common.image" (dict "context" . "image" .Values.rabbitmq.image)) }} cs: rabbitmq.image is incorrect Image in annotation is not the same as in values of chart or subchart {{ printf "('%s' != '%s')" $annotation $image }} {{- end }} -{{- if ne ($annotation := get $annotations "clickhouse") ($image := include "common.images.image" (dict "imageRoot" .Values.clickhouse.image)) }} +{{- if ne ($annotation := get $annotations "clickhouse") ($image := include "common.image" (dict "context" . "image" .Values.clickhouse.image)) }} cs: clickhouse.image is incorrect Image in annotation is not the same as in values of chart or subchart {{ printf "('%s' != '%s')" $annotation $image }} {{- end }} diff --git a/install/helm/templates/configmap.yaml b/install/helm/templates/configmap.yaml index 2cc9ccaf..174dc162 100644 --- a/install/helm/templates/configmap.yaml +++ b/install/helm/templates/configmap.yaml @@ -15,7 +15,68 @@ data: AUTH: {{ include "common.cs.auth.enabled" . | quote }} IS_CHILD_CLUSTER: {{ include "common.cs.isChildCluster" . | quote }} OWN_CS_URL: {{ include "common.cs.ownCsUrl" . | quote }} + LOG_LEVEL: {{ include "common.cs.logLevel" . | quote }} {{- $centralCsUrl := include "common.cs.centralCsUrl" . }} CENTRAL_CS_URL: {{ $centralCsUrl | quote }} CENTRAL_CS_TLS_CHECK_CERT: {{ .Values.tls.verify | quote }} CENTRAL_CS_HOSTNAME: {{ $centralCsUrl | urlParse | pluck "hostname" | first | quote }} + GOPS_CONFIG_DIR: "/tmp" + {{- if ((.Values.global).logger).enabled }} + LOGGER_ENABLED: "true" + LOG_FILE: {{ printf "/var/log/cs/%s.log" (include "common.name" .) | quote }} + {{- end }} + {{- if or ((.Values.global).metrics).enabled .Values.metrics.enabled }} + METRICS_ENABLED: "true" + {{- end }} + {{- if or ((.Values.global).logger).enabled ((.Values.global).metrics).enabled .Values.metrics.enabled }} + GRAFANA: {{ include "common.cs.grafana.address" . | quote }} + GRAFANA_URL: {{ default (include "common.cs.ownCsUrl" . | trimSuffix "/" | printf "%s/grafana") .Values.grafana.externalHost | quote }} + GRAFANA_REDIRECT: {{ empty .Values.grafana.externalHost | not | quote }} + {{- end }} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "common.cs.postgresql.configmapName" . }} + namespace: {{ include "common.namespace" . | quote }} + labels: + {{- include "common.labels" . | nindent 4 }} +data: + POSTGRES_ADDR: {{ .Values.postgresql.externalHost | default "postgresql" | quote }} + POSTGRES_SSL_MODE: {{ .Values.global.postgresql.tls.enabled | toString | quote }} + POSTGRES_SSL_CHECK_CERT: {{ .Values.global.postgresql.tls.verify | toString | quote }} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "common.cs.redis.configmapName" . }} + namespace: {{ include "common.namespace" . | quote }} + labels: + {{- include "common.labels" . | nindent 4 }} +data: + REDIS_ADDR: {{ .Values.redis.externalHost | default "redis" | quote }} + REDIS_TLS_MODE: {{ .Values.global.redis.tls.enabled | toString | quote }} + REDIS_TLS_CHECK_CERT: {{ .Values.global.redis.tls.verify | toString | quote }} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "common.cs.rabbitmq.configmapName" . }} + namespace: {{ include "common.namespace" . | quote }} + labels: + {{- include "common.labels" . | nindent 4 }} +data: + RABBIT_ADDR: {{ .Values.rabbitmq.externalHost | default "rabbitmq" | quote }} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "common.cs.clickhouse.configmapName" . }} + namespace: {{ include "common.namespace" . | quote }} + labels: + {{- include "common.labels" . | nindent 4 }} +data: + {{- $clickhousePort := .Values.global.clickhouse.tls.enabled | ternary "9440" "9000" }} + CLICKHOUSE_ADDR: {{ .Values.clickhouse.externalHost | default (printf "clickhouse:%s" $clickhousePort) | quote }} + CLICKHOUSE_SSL_MODE: {{ .Values.global.clickhouse.tls.enabled | toString | quote }} + CLICKHOUSE_SSL_CHECK_CERT: {{ .Values.global.clickhouse.tls.verify | toString | quote }} diff --git a/install/helm/templates/grafana-datasources.yaml b/install/helm/templates/grafana-datasources.yaml new file mode 100644 index 00000000..6974beab --- /dev/null +++ b/install/helm/templates/grafana-datasources.yaml @@ -0,0 +1,26 @@ +{{- define "cs.grafana.datasources" -}} +apiVersion: 1 +datasources: + {{- if or .Values.prometheus.deployed .Values.prometheus.externalHost }} + - name: Prometheus + type: prometheus + url: {{ include "common.cs.prometheus.address" . }} + {{- end }} +{{- end -}} + +{{- if .Values.grafana.deployed }} +apiVersion: v1 +kind: Secret +metadata: + name: grafana-datasources + namespace: {{ include "common.namespace" . | quote }} + labels: + {{- include "common.labels" . | nindent 4 }} + {{- with (include "common.annotations" .) }} + annotations: + {{- . | nindent 4 }} + {{- end }} +type: Opaque +data: + datasources.yaml: {{ include "cs.grafana.datasources" . | b64enc | nindent 4 }} +{{- end }} diff --git a/install/helm/templates/metrics-configmap.yaml b/install/helm/templates/metrics-configmap.yaml new file mode 100644 index 00000000..c4744a6e --- /dev/null +++ b/install/helm/templates/metrics-configmap.yaml @@ -0,0 +1,196 @@ +{{- $metricsEnabled := or (.Values.global.metrics).enabled .Values.metrics.enabled .Values.postgresql.metrics.enabled .Values.rabbitmq.metrics.enabled .Values.clickhouse.metrics.enabled -}} +{{- if and .Values.prometheus.deployed $metricsEnabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: cs-metrics + namespace: {{ include "common.namespace" . | quote }} + labels: + {{- include "common.labels" . | nindent 4 }} + app.kubernetes.io/part-of: prometheus + {{- with (include "common.annotations" .) }} + annotations: + {{- . | nindent 4 }} + {{- end }} +data: + kubernetes.yaml: |- + scrape_configs: + - job_name: 'kubernetes-pods' + metrics_path: '/metrics' + kubernetes_sd_configs: + - role: pod + relabel_configs: + - source_labels: [__meta_kubernetes_pod_name] + regex: 'reverse-proxy-.*' + action: drop + - source_labels: [__meta_kubernetes_namespace] + regex: {{ include "common.namespace" . | squote }} + action: keep + - target_label: cluster + replacement: {{ include "common.cs.ownCsUrl" . | squote }} + action: replace + - source_labels: [__meta_kubernetes_pod_container_port_number] + regex: '^(9090|9187)$' + action: keep + - job_name: 'tetragon' + metrics_path: '/metrics' + kubernetes_sd_configs: + - role: pod + relabel_configs: + - source_labels: [__meta_kubernetes_namespace] + regex: {{ include "common.namespace" . | squote }} + action: keep + - source_labels: [__meta_kubernetes_pod_name] + regex: 'runtime-monitor-.*' + action: keep + - source_labels: [__meta_kubernetes_pod_container_port_number] + regex: '2112' + action: keep + + - target_label: cluster + replacement: {{ include "common.cs.ownCsUrl" . | squote }} + action: replace + - target_label: service + replacement: 'tetragon' + action: replace + - job_name: 'caddy' + metrics_path: '/metrics' + kubernetes_sd_configs: + - role: pod + relabel_configs: + - source_labels: [__meta_kubernetes_pod_name] + regex: 'reverse-proxy-.*' + action: keep + - source_labels: [__meta_kubernetes_pod_container_port_number] + regex: '9090' + action: keep + - source_labels: [__meta_kubernetes_namespace] + regex: {{ include "common.namespace" . | squote }} + action: keep + - target_label: cluster + replacement: {{ include "common.cs.ownCsUrl" . | squote }} + action: replace + - target_label: service + replacement: 'reverse-proxy' + action: replace + + {{- if and .Values.postgresql.metrics.enabled .Values.postgresql.metrics.externalHost }} + postgresql.yaml: |- + scrape_configs: + - job_name: 'postgresql' + metrics_path: '/metrics' + scrape_interval: 15s + static_configs: + - targets: ['{{ .Values.postgresql.metrics.externalHost }}'] + relabel_configs: + - target_label: cluster + replacement: {{ include "common.cs.ownCsUrl" . | squote }} + action: replace + - target_label: service + replacement: 'postgres' + action: replace {{- end }} + + {{- if and .Values.rabbitmq.metrics.enabled .Values.rabbitmq.metrics.externalHost }} + rabbitmq.yaml: |- + scrape_configs: + - job_name: 'rabbitmq' + metrics_path: '/metrics' + scrape_interval: 15s + static_configs: + - targets: ['{{ .Values.rabbitmq.metrics.externalHost }}'] + relabel_configs: + - target_label: cluster + replacement: {{ include "common.cs.ownCsUrl" . | squote }} + action: replace + - target_label: service + replacement: 'rabbitmq' + action: replace + {{- end }} + + {{- if and .Values.rabbitmq.metrics.enabled (not .Values.rabbitmq.metrics.externalHost) }} + rabbitmq.yaml: |- + scrape_configs: + - job_name: 'rabbitmq' + metrics_path: '/metrics' + scrape_interval: 15s + kubernetes_sd_configs: + - role: pod + relabel_configs: + - source_labels: [__meta_kubernetes_pod_name] + regex: 'rabbitmq-.*' + action: keep + - source_labels: [__meta_kubernetes_pod_container_port_number] + regex: '9419' + action: keep + - source_labels: [__meta_kubernetes_namespace] + regex: {{ include "common.namespace" . | squote }} + action: keep + - target_label: cluster + replacement: {{ include "common.cs.ownCsUrl" . | squote }} + action: replace + - target_label: service + replacement: 'rabbitmq' + action: replace + {{- end }} + + {{- if and .Values.clickhouse.metrics.enabled .Values.clickhouse.metrics.externalHost }} + clickhouse.yaml: |- + scrape_configs: + - job_name: 'clickhouse' + metrics_path: '/metrics' + scrape_interval: 15s + static_configs: + - targets: ['{{ .Values.clickhouse.metrics.externalHost }}'] + relabel_configs: + - target_label: cluster + replacement: {{ include "common.cs.ownCsUrl" . | squote }} + action: replace + - target_label: service + replacement: 'clickhouse' + action: replace + {{- end }} + + {{- if and .Values.clickhouse.metrics.enabled (not .Values.clickhouse.metrics.externalHost) }} + clickhouse.yaml: |- + scrape_configs: + - job_name: 'clickhouse' + metrics_path: '/metrics' + scrape_interval: 15s + kubernetes_sd_configs: + - role: pod + relabel_configs: + - source_labels: [__meta_kubernetes_pod_name] + regex: 'clickhouse-.*' + action: keep + - source_labels: [__meta_kubernetes_pod_container_port_number] + regex: '8001' + action: keep + - source_labels: [__meta_kubernetes_namespace] + regex: {{ include "common.namespace" . | squote }} + action: keep + - target_label: cluster + replacement: {{ include "common.cs.ownCsUrl" . | squote }} + action: replace + - target_label: service + replacement: 'clickhouse' + action: replace + {{- end }} +{{- end }} + +{{- if and .Values.grafana.deployed }} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: cs-dashboards + namespace: {{ include "common.namespace" . | quote }} + labels: + {{- include "common.labels" . | nindent 4 }} + app.kubernetes.io/part-of: grafana + {{- with (include "common.annotations" .) }} + annotations: + {{- . | nindent 4 }} + {{- end }} +data: +{{ (.Files.Glob "dashboards/*").AsConfig | indent 2 }} +{{- end }} diff --git a/install/helm/templates/secret.yaml b/install/helm/templates/secret.yaml index cabe9eb8..7a4c3145 100644 --- a/install/helm/templates/secret.yaml +++ b/install/helm/templates/secret.yaml @@ -1,9 +1,22 @@ +{{- if and (empty .Values.global.administrator.existingSecret) (ne (include "common.cs.isChildCluster" .) "true") }} +apiVersion: v1 +kind: Secret +type: Opaque +metadata: + name: {{ include "common.cs.auth.secretName" . }} + labels: + {{- include "common.labels" . | nindent 4 }} +data: + username: {{ required "global.administrator.username is required when global.administrator.existingSecret is empty" .Values.global.administrator.username | b64enc | quote }} + password: {{ required "global.administrator.password is required when global.administrator.existingSecret is empty" .Values.global.administrator.password | b64enc | quote }} +{{- end }} {{- if empty .Values.global.keys.existingSecret }} +--- apiVersion: v1 kind: Secret type: Opaque metadata: - name: {{ include "common.cs.keysSecretName" . }} + name: {{ include "common.cs.keys.secretName" . }} labels: {{- include "common.labels" . | nindent 4 }} data: @@ -15,78 +28,73 @@ data: token: {{ default (randBytes 32 | b64dec | printf "%x") .Values.global.keys.token | b64enc }} {{- end }} {{- end }} - ---- +{{- if empty (include "common.cs.auth.postgresql.existingSecret" .) }} {{- if and .Values.postgresql.externalHost .Values.postgresql.deploy }} {{- fail "postgresql.externalHost is not supported with postgresql.deploy" }} {{- end }} +--- apiVersion: v1 kind: Secret type: Opaque metadata: - name: postgresql + name: {{ include "common.cs.postgresql.secretName" . }} labels: {{- include "common.labels" . | nindent 4 }} data: - POSTGRES_ADDR: {{ .Values.postgresql.externalHost | default "postgresql" | b64enc }} - POSTGRES_DB: {{ .Values.postgresql.auth.database | b64enc }} POSTGRES_USER: {{ .Values.postgresql.auth.username | b64enc }} POSTGRES_PASSWORD: {{ include "common.secrets.password" (dict "context" . "secret" "postgresql" "key" "POSTGRES_PASSWORD" "defaultValue" .Values.postgresql.auth.password) | b64enc }} - POSTGRES_SSL_MODE: {{ .Values.global.postgresql.tls.enabled | toString | b64enc }} - POSTGRES_SSL_CHECK_CERT: {{ .Values.global.postgresql.tls.verify | toString | b64enc }} ---- + POSTGRES_DB: {{ .Values.postgresql.auth.database | b64enc }} +{{- end }} +{{- if empty (include "common.cs.auth.redis.existingSecret" .) }} {{- if and .Values.redis.externalHost .Values.redis.deploy }} {{- fail "redis.externalHost is not supported with redis.deploy" }} {{- end }} +--- apiVersion: v1 kind: Secret type: Opaque metadata: - name: redis + name: {{ include "common.cs.redis.secretName" . }} labels: {{- include "common.labels" . | nindent 4 }} data: - REDIS_ADDR: {{ .Values.redis.externalHost | default "redis" | b64enc }} REDIS_USER: {{ .Values.redis.auth.username | b64enc }} REDIS_PASSWORD: {{ include "common.secrets.password" (dict "context" . "secret" "redis" "key" "REDIS_PASSWORD" "defaultValue" .Values.redis.auth.password) | b64enc }} - REDIS_TLS_MODE: {{ .Values.global.redis.tls.enabled | toString | b64enc }} - REDIS_TLS_CHECK_CERT: {{ .Values.global.redis.tls.verify | toString | b64enc }} ---- +{{- end }} +{{- if empty (include "common.cs.auth.rabbitmq.existingSecret" .) }} {{- if and .Values.rabbitmq.externalHost .Values.rabbitmq.deploy }} {{- fail "rabbitmq.externalHost is not supported with rabbitmq.deploy" }} {{- end }} +--- apiVersion: v1 kind: Secret type: Opaque metadata: - name: rabbitmq + name: {{ include "common.cs.rabbitmq.secretName" . }} labels: {{- include "common.labels" . | nindent 4 }} data: - RABBIT_ADDR: {{ .Values.rabbitmq.externalHost | default "rabbitmq" | b64enc }} RABBIT_USER: {{ .Values.rabbitmq.auth.username | b64enc }} RABBIT_PASSWORD: {{ include "common.secrets.password" (dict "context" . "secret" "rabbitmq" "key" "RABBIT_PASSWORD" "defaultValue" .Values.rabbitmq.auth.password) | b64enc }} ---- +{{- end }} +{{- if empty (include "common.cs.auth.clickhouse.existingSecret" .) }} {{- if and .Values.clickhouse.externalHost .Values.clickhouse.deploy }} {{- fail "clickhouse.externalHost is not supported with clickhouse.deploy" }} {{- end }} +--- # Clickhouse credentials apiVersion: v1 kind: Secret type: Opaque metadata: - name: clickhouse + name: {{ include "common.cs.clickhouse.secretName" . }} labels: {{- include "common.labels" . | nindent 4 }} data: - {{- $clickhousePort := .Values.global.clickhouse.tls.enabled | ternary "9440" "9000" }} - {{- $clickhouseAddr := printf "clickhouse:%s" ($clickhousePort) }} - CLICKHOUSE_ADDR: {{ .Values.clickhouse.externalHost | default $clickhouseAddr | b64enc }} - CLICKHOUSE_DB: {{ .Values.clickhouse.auth.database | b64enc }} CLICKHOUSE_USER: {{ .Values.clickhouse.auth.username | b64enc }} CLICKHOUSE_PASSWORD: {{ include "common.secrets.password" (dict "context" . "secret" "clickhouse" "key" "CLICKHOUSE_PASSWORD" "defaultValue" .Values.clickhouse.auth.password) | b64enc }} - CLICKHOUSE_SSL_MODE: {{ .Values.global.clickhouse.tls.enabled | toString | b64enc }} - CLICKHOUSE_SSL_CHECK_CERT: {{ .Values.global.clickhouse.tls.verify | toString | b64enc }} + CLICKHOUSE_DB: {{ .Values.clickhouse.auth.database | b64enc }} +{{- end }} {{- if .Values.imagePullSecret.password }} --- diff --git a/install/helm/templates/tls-secret.yaml b/install/helm/templates/tls-secret.yaml index 6239b675..d4e8cf26 100644 --- a/install/helm/templates/tls-secret.yaml +++ b/install/helm/templates/tls-secret.yaml @@ -7,6 +7,10 @@ metadata: name: {{ $secretName }} labels: {{- include "common.labels" . | nindent 4 }} + {{- with (include "common.annotations" .) }} + annotations: + {{- . | nindent 4 }} + {{- end }} type: kubernetes.io/tls data: {{- $clusterDomain := "cluster.local" }} @@ -17,7 +21,10 @@ data: {{- continue }} {{- end }} {{- $altNames = append $altNames $dep.Name }} - {{- if eq $dep.Name "reverse-proxy" }} + {{- if eq $dep.Name "admission-controller" }} + {{- $altNames = append $altNames (printf "%s.%s.svc" $dep.Name $releaseNamespace) }} + {{- $altNames = append $altNames (printf "%s.%s.svc.%s" $dep.Name $releaseNamespace $clusterDomain) }} + {{- else if eq $dep.Name "reverse-proxy" }} {{- $altNames = append $altNames (printf "%s.%s.svc.%s" $dep.Name $releaseNamespace $clusterDomain) }} {{- else if eq $dep.Name "grafana" }} {{- $altNames = append $altNames (printf "%s.%s.svc.%s" $dep.Name $releaseNamespace $clusterDomain) }} @@ -36,6 +43,10 @@ metadata: name: {{ $secretName }} labels: {{- include "common.labels" . | nindent 4 }} + {{- with (include "common.annotations" .) }} + annotations: + {{- . | nindent 4 }} + {{- end }} data: ca.crt: {{ .Values.postgresql.tls.certCA | b64enc | quote }} {{- end }} @@ -50,6 +61,10 @@ metadata: name: {{ $secretName }} labels: {{- include "common.labels" . | nindent 4 }} + {{- with (include "common.annotations" .) }} + annotations: + {{- . | nindent 4 }} + {{- end }} data: ca.crt: {{ required "A valid .Values.redis.tls.certCA entry required!" .Values.redis.tls.certCA | b64enc | quote }} {{- end }} @@ -64,6 +79,10 @@ metadata: name: {{ $secretName }} labels: {{- include "common.labels" . | nindent 4 }} + {{- with (include "common.annotations" .) }} + annotations: + {{- . | nindent 4 }} + {{- end }} data: ca.crt: {{ required "A valid .Values.clickhouse.tls.certCA entry required!" .Values.clickhouse.tls.certCA | b64enc | quote }} {{- end }} diff --git a/install/helm/values.schema.json b/install/helm/values.schema.json index 0c8208f6..130d660a 100644 --- a/install/helm/values.schema.json +++ b/install/helm/values.schema.json @@ -50,7 +50,7 @@ "properties": { "existingSecret": { "type": "string", - "description": "Existing secret name with keys `encryption` and `token`", + "description": "Existing secret name with keys `encryption`, `token`, and `publicAccessTokenSalt`", "default": "" }, "encryption": { @@ -70,9 +70,39 @@ } } }, + "administrator": { + "type": "object", + "properties": { + "existingSecret": { + "type": "string", + "description": "Name of an existing secret with administrator credentials (must contain keys `username` and `password`). When empty, the chart creates a Secret named `cs-account` from `username` and `password` below.", + "default": "" + }, + "username": { + "type": "string", + "description": "Administrator username. Ignored when `existingSecret` is set.", + "default": "" + }, + "password": { + "type": "string", + "description": "Administrator password. Ignored when `existingSecret` is set.", + "default": "" + } + } + }, "postgresql": { "type": "object", "properties": { + "auth": { + "type": "object", + "properties": { + "existingSecret": { + "type": "string", + "description": "Name of an existing secret with PostgreSQL auth credentials (must contain `POSTGRES_USER`, `POSTGRES_PASSWORD`, `POSTGRES_DB`). When empty, the chart creates a Secret named `postgresql`. NOTE: when `postgresql.deploy=true` (default), you must also set `postgresql.auth.existingSecret` to the same value so the sub-chart reads from it. For external PostgreSQL, prefer `postgresql.deploy=false`.", + "default": "" + } + } + }, "tls": { "type": "object", "properties": { @@ -100,6 +130,16 @@ "redis": { "type": "object", "properties": { + "auth": { + "type": "object", + "properties": { + "existingSecret": { + "type": "string", + "description": "Name of an existing secret with Redis auth credentials (must contain `REDIS_USER`, `REDIS_PASSWORD`). When empty, the chart creates a Secret named `redis`. NOTE: when `redis.deploy=true` (default), you must also set `redis.auth.existingSecret` to the same value so the sub-chart reads from it.", + "default": "" + } + } + }, "tls": { "type": "object", "properties": { @@ -124,9 +164,34 @@ } } }, + "rabbitmq": { + "type": "object", + "properties": { + "auth": { + "type": "object", + "properties": { + "existingSecret": { + "type": "string", + "description": "Name of an existing secret with RabbitMQ auth credentials (must contain `RABBIT_USER`, `RABBIT_PASSWORD`). When empty, the chart creates a Secret named `rabbitmq`. NOTE: when `rabbitmq.deploy=true` (default), you must also set `rabbitmq.auth.existingSecret` to the same value so the sub-chart reads from it.", + "default": "" + } + } + } + } + }, "clickhouse": { "type": "object", "properties": { + "auth": { + "type": "object", + "properties": { + "existingSecret": { + "type": "string", + "description": "Name of an existing secret with ClickHouse auth credentials (must contain `CLICKHOUSE_USER`, `CLICKHOUSE_PASSWORD`, `CLICKHOUSE_DB`). When empty, the chart creates a Secret named `clickhouse`. NOTE: when `clickhouse.deploy=true` (default), you must also set `clickhouse.auth.existingSecret` to the same value so the sub-chart reads from it. For external ClickHouse, prefer `clickhouse.deploy=false`.", + "default": "" + } + } + }, "tls": { "type": "object", "properties": { @@ -276,6 +341,16 @@ } } }, + "metrics": { + "type": "object", + "properties": { + "enabled": { + "type": "boolean", + "description": "Enable CS metrics", + "default": false + } + } + }, "auth-center": { "type": "object", "properties": { @@ -288,26 +363,6 @@ "type": "number", "description": "Number of replicas for the auth-center component", "default": 2 - }, - "administrator": { - "type": "object", - "properties": { - "existingSecret": { - "type": "string", - "description": "Name of the existing secret with administrator credentials", - "default": "" - }, - "username": { - "type": "string", - "description": "Administrator name", - "default": "" - }, - "password": { - "type": "string", - "description": "Administrator password", - "default": "" - } - } } } }, @@ -620,6 +675,11 @@ "postgresql": { "type": "object", "properties": { + "deploy": { + "type": "boolean", + "description": "Deploy component", + "default": true + }, "externalHost": { "type": "string", "description": "External host with PostgreSQL. Requires setting `postgresql.deploy` to `false`.", @@ -661,7 +721,7 @@ "properties": { "existingSecret": { "type": "string", - "description": "Name of the existing secret with PostgreSQL credentials. The `auth.postgresPassword`, `auth.password`, and `auth.replicationPassword` values will be ignored and taken from this secret. The secret might also contain the `ldap-password` key if LDAP is enabled. If so, the `ldap.bind_password` value will be ignored and taken from this secret.", + "description": "Name of an existing secret with PostgreSQL credentials, read by the sub-chart when `postgresql.deploy=true`. The `auth.postgresPassword`, `auth.password`, and `auth.replicationPassword` values will be ignored and taken from this secret. The secret must carry AUTH credentials only — `POSTGRES_USER`, `POSTGRES_PASSWORD`, `POSTGRES_DB` (connection metadata `POSTGRES_ADDR`/`POSTGRES_SSL_*` lives in the always-created `cs-postgresql-config` ConfigMap and must not be placed in this Secret). NOTE: To make the same secret visible to all consumer services (auth-api, history-api, etc.), set `global.postgresql.auth.existingSecret` to the same value. The secret might also contain the `ldap-password` key if LDAP is enabled.", "default": "postgresql" }, "username": { @@ -741,6 +801,11 @@ "redis": { "type": "object", "properties": { + "deploy": { + "type": "boolean", + "description": "Deploy component", + "default": true + }, "externalHost": { "type": "string", "description": "External host with Redis. Requires setting `redis.deploy` to `false`.", @@ -782,7 +847,7 @@ "properties": { "existingSecret": { "type": "string", - "description": "Name of the existing secret with Redis credentials", + "description": "Name of an existing secret with Redis credentials, read by the sub-chart when `redis.deploy=true`. When set, the `auth.password` parameter is ignored. The secret must carry AUTH credentials only — `REDIS_USER`, `REDIS_PASSWORD` (connection metadata `REDIS_ADDR`/`REDIS_TLS_*` lives in the always-created `cs-redis-config` ConfigMap and must not be placed in this Secret). NOTE: To make the same secret visible to all consumer services (auth-api, history-api, etc.), set `global.redis.auth.existingSecret` to the same value.", "default": "redis" }, "username": { @@ -847,6 +912,11 @@ "rabbitmq": { "type": "object", "properties": { + "deploy": { + "type": "boolean", + "description": "Deploy component", + "default": true + }, "externalHost": { "type": "string", "description": "External host with RabbitMQ", @@ -872,7 +942,7 @@ }, "existingSecret": { "type": "string", - "description": "Existing secret with RabbitMQ credentials (must contain value for the `rabbitmq-password` parameter)", + "description": "Name of an existing secret with RabbitMQ credentials, read by the sub-chart when `rabbitmq.deploy=true`. When set, the `auth.password` parameter is ignored. The secret must carry AUTH credentials only — `RABBIT_USER`, `RABBIT_PASSWORD` (connection metadata `RABBIT_ADDR` lives in the always-created `cs-rabbitmq-config` ConfigMap and must not be placed in this Secret). NOTE: To make the same secret visible to all consumer services (event-processor, history-api, etc.), set `global.rabbitmq.auth.existingSecret` to the same value.", "default": "rabbitmq" }, "existingSecretPasswordKey": { @@ -937,6 +1007,11 @@ "clickhouse": { "type": "object", "properties": { + "deploy": { + "type": "boolean", + "description": "Deploy component", + "default": true + }, "externalHost": { "type": "string", "description": "External host with ClickHouse. Requires setting `clickhouse.deploy` to `false`.", @@ -1028,7 +1103,7 @@ }, "existingSecret": { "type": "string", - "description": "Name of the secret with the administrator password", + "description": "Name of an existing secret with ClickHouse credentials, read by the sub-chart when `clickhouse.deploy=true`. When set, the `auth.password` parameter is ignored. The secret must carry AUTH credentials only — `CLICKHOUSE_USER`, `CLICKHOUSE_PASSWORD`, `CLICKHOUSE_DB` (connection metadata `CLICKHOUSE_ADDR`/`CLICKHOUSE_SSL_*` lives in the always-created `cs-clickhouse-config` ConfigMap and must not be placed in this Secret). NOTE: To make the same secret visible to all consumer services (history-api, event-processor, etc.), set `global.clickhouse.auth.existingSecret` to the same value.", "default": "clickhouse" }, "existingSecretPasswordKey": { @@ -1059,6 +1134,157 @@ } } } + }, + "grafana": { + "type": "object", + "properties": { + "deploy": { + "type": "boolean", + "description": "Deploy component", + "default": false + }, + "externalHost": { + "type": "string", + "description": "External host with Grafana. Requires setting `grafana.deploy` to `false`.", + "default": "" + }, + "fullnameOverride": { + "type": "string", + "description": "String to fully override common.names.fullname", + "default": "grafana" + }, + "nodeSelector": { + "type": "object", + "description": "Node labels for pod assignment", + "default": {} + }, + "replicaCount": { + "type": "number", + "description": "Number of Grafana nodes", + "default": 1 + }, + "tls": { + "type": "object", + "properties": { + "autoGenerated": { + "type": "boolean", + "description": "Generate automatically self-signed TLS certificates if nothing is provided", + "default": true, + "nullable": true + }, + "cert": { + "type": "string", + "description": "Certificate value", + "default": "" + }, + "certKey": { + "type": "string", + "description": "Certificate key value", + "default": "" + }, + "certCA": { + "type": "string", + "description": "CA Certificate value", + "default": "" + } + } + }, + "auth": { + "type": "object", + "properties": { + "username": { + "type": "string", + "description": "Grafana administrator name", + "default": "runtime-radar" + }, + "password": { + "type": "string", + "description": "Grafana administartor password", + "default": "" + } + } + }, + "datasourcesSecretName": { + "type": "string", + "description": "The name of an externally-managed secret containing custom datasource files.", + "default": "grafana-datasources" + }, + "dashboardsProvider": { + "type": "object", + "properties": { + "enabled": { + "type": "boolean", + "description": "Enable the use of a Grafana dashboard provider", + "default": true + } + } + }, + "subPath": { + "type": "string", + "description": "Use sub path for grafana for exposing it via reverse proxy", + "default": "grafana" + } + } + }, + "prometheus": { + "type": "object", + "properties": { + "deploy": { + "type": "boolean", + "description": "Deploy component", + "default": false + }, + "externalHost": { + "type": "string", + "description": "External host with Prometheus. Requires setting `prometheus.deploy` to `false`.", + "default": "" + }, + "fullnameOverride": { + "type": "string", + "description": "String to fully override common.names.fullname", + "default": "prometheus" + }, + "replicaCount": { + "type": "number", + "description": "Number of Prometheus replicas to deploy", + "default": 1 + }, + "persistence": { + "type": "object", + "properties": { + "enabled": { + "type": "boolean", + "description": "Enable persistence using PVC", + "default": true + }, + "storageClass": { + "type": "string", + "description": "Persistent volume storage class", + "default": "" + }, + "size": { + "type": "string", + "description": "Data volume size", + "default": "5Gi" + }, + "existingClaim": { + "type": "string", + "description": "Name of an existing PVC", + "default": "" + }, + "selector": { + "type": "object", + "description": "Template to specify an existing persistent volume", + "default": {} + } + } + }, + "scrapeConfigmap": { + "type": "string", + "description": "ConfigMap which contains scrape config files", + "default": "cs-metrics" + } + } } } } \ No newline at end of file diff --git a/install/helm/values.yaml b/install/helm/values.yaml index a21fa8d6..5cba845e 100644 --- a/install/helm/values.yaml +++ b/install/helm/values.yaml @@ -1,3 +1,5 @@ +# yaml-language-server: $schema=values.schema.json + # @section Global parameters global: # -- Global Docker image registry @@ -18,7 +20,7 @@ global: enabled: true keys: - # -- Existing secret name with keys `encryption` and `token` + # -- Existing secret name with keys `encryption`, `token`, and `publicAccessTokenSalt` existingSecret: "" # -- Encryption key for secrets stored in database. Must be a 64-character hexadecimal string (32 bytes) # Generate with: openssl rand -hex 32 @@ -36,7 +38,18 @@ global: # 8848503f12a2cc1d1917045d0644680b63aa40616e4d51f1aca8143305a7f64197e28b051d691eb2b8a652f40ac0fae030056f56040fecec1f4b6c1429555068 publicAccessTokenSalt: "" + administrator: + # -- Name of an existing secret with administrator credentials (must contain keys `username` and `password`). When empty, the chart creates a Secret named `cs-account` from `username` and `password` below. + existingSecret: "" + # -- Administrator username. Ignored when `existingSecret` is set. + username: "" + # -- Administrator password. Ignored when `existingSecret` is set. + password: "" + postgresql: + auth: + # -- Name of an existing secret with PostgreSQL auth credentials (must contain `POSTGRES_USER`, `POSTGRES_PASSWORD`, `POSTGRES_DB`). When empty, the chart creates a Secret named `postgresql`. NOTE: when `postgresql.deploy=true` (default), you must also set `postgresql.auth.existingSecret` to the same value so the sub-chart reads from it. For external PostgreSQL, prefer `postgresql.deploy=false`. + existingSecret: "" tls: # -- [boolean, nullable] Enable TLS traffic support (overrides `tls.enabled`) enabled: true @@ -46,6 +59,9 @@ global: existingSecret: "" redis: + auth: + # -- Name of an existing secret with Redis auth credentials (must contain `REDIS_USER`, `REDIS_PASSWORD`). When empty, the chart creates a Secret named `redis`. NOTE: when `redis.deploy=true` (default), you must also set `redis.auth.existingSecret` to the same value so the sub-chart reads from it. + existingSecret: "" tls: # -- [boolean, nullable] Enable TLS traffic support (overrides `tls.enabled`) enabled: true @@ -54,7 +70,15 @@ global: # -- Name of an existing secret that contains the certificates (overrides `tls.existingSecret`) existingSecret: "" + rabbitmq: + auth: + # -- Name of an existing secret with RabbitMQ auth credentials (must contain `RABBIT_USER`, `RABBIT_PASSWORD`). When empty, the chart creates a Secret named `rabbitmq`. NOTE: when `rabbitmq.deploy=true` (default), you must also set `rabbitmq.auth.existingSecret` to the same value so the sub-chart reads from it. + existingSecret: "" + clickhouse: + auth: + # -- Name of an existing secret with ClickHouse auth credentials (must contain `CLICKHOUSE_USER`, `CLICKHOUSE_PASSWORD`, `CLICKHOUSE_DB`). When empty, the chart creates a Secret named `clickhouse`. NOTE: when `clickhouse.deploy=true` (default), you must also set `clickhouse.auth.existingSecret` to the same value so the sub-chart reads from it. For external ClickHouse, prefer `clickhouse.deploy=false`. + existingSecret: "" tls: # -- [boolean, nullable] Enable TLS traffic support (overrides `tls.enabled`) enabled: true @@ -83,7 +107,7 @@ global: # -- Is this a child cluster isChildCluster: false # @skip global.csVersion - csVersion: "v0.1.0" + csVersion: "" # @section Common RR parameters # -- String to fully override common.fullname @@ -115,19 +139,16 @@ tls: # -- TLS certificate CA certCA: "" +metrics: + # -- Enable CS metrics + enabled: false + # @section Auth-center component parameters auth-center: # -- Template to specify the labels of nodes for pod assignment nodeSelector: {} # -- Number of replicas for the auth-center component replicas: 2 - administrator: - # -- Name of the existing secret with administrator credentials - existingSecret: "" - # -- Administrator name - username: "" - # -- Administrator password - password: "" # @section Policy-enforcer component parameters policy-enforcer: @@ -203,7 +224,6 @@ reverse-proxy: # http: 32320 nodePorts: {} - # @section CS-manager component parameters cs-manager: # -- Template to specify the labels of nodes for pod assignment @@ -305,6 +325,8 @@ public-api: # @section Postgresql installation configuration postgresql: + # -- Deploy component + deploy: true # -- External host with PostgreSQL. Requires setting `postgresql.deploy` to `false`. externalHost: "" # -- String to fully override common.names.fullname template @@ -321,7 +343,7 @@ postgresql: certCA: "" # PostgreSQL authentication parameters auth: - # -- Name of the existing secret with PostgreSQL credentials. The `auth.postgresPassword`, `auth.password`, and `auth.replicationPassword` values will be ignored and taken from this secret. The secret might also contain the `ldap-password` key if LDAP is enabled. If so, the `ldap.bind_password` value will be ignored and taken from this secret. + # -- Name of an existing secret with PostgreSQL credentials, read by the sub-chart when `postgresql.deploy=true`. The `auth.postgresPassword`, `auth.password`, and `auth.replicationPassword` values will be ignored and taken from this secret. The secret must carry AUTH credentials only — `POSTGRES_USER`, `POSTGRES_PASSWORD`, `POSTGRES_DB` (connection metadata `POSTGRES_ADDR`/`POSTGRES_SSL_*` lives in the always-created `cs-postgresql-config` ConfigMap and must not be placed in this Secret). NOTE: To make the same secret visible to all consumer services (auth-api, history-api, etc.), set `global.postgresql.auth.existingSecret` to the same value. The secret might also contain the `ldap-password` key if LDAP is enabled. existingSecret: postgresql # -- Name of custom user to be created username: runtime-radar @@ -372,6 +394,8 @@ postgresql: # @section Redis installation configuration redis: + # -- Deploy component + deploy: true # -- External host with Redis. Requires setting `redis.deploy` to `false`. externalHost: "" # -- String to fully override common.names.fullname @@ -388,8 +412,7 @@ redis: certCA: "" # Redis authentication parameters auth: - # -- Name of the existing secret with Redis credentials - # NOTE: When it's set, the previous `auth.password` parameter is ignored + # -- Name of an existing secret with Redis credentials, read by the sub-chart when `redis.deploy=true`. When set, the `auth.password` parameter is ignored. The secret must carry AUTH credentials only — `REDIS_USER`, `REDIS_PASSWORD` (connection metadata `REDIS_ADDR`/`REDIS_TLS_*` lives in the always-created `cs-redis-config` ConfigMap and must not be placed in this Secret). NOTE: To make the same secret visible to all consumer services (auth-api, history-api, etc.), set `global.redis.auth.existingSecret` to the same value. existingSecret: redis # -- Redis username username: runtime-radar @@ -436,6 +459,8 @@ redis: # @section RabbitMQ installation configuration rabbitmq: + # -- Deploy component + deploy: true # -- External host with RabbitMQ externalHost: "" # -- String to fully override rabbitmq.fullname template @@ -446,7 +471,7 @@ rabbitmq: username: runtime-radar # -- RabbitMQ application password password: "" - # -- Existing secret with RabbitMQ credentials (must contain value for the `rabbitmq-password` parameter) + # -- Name of an existing secret with RabbitMQ credentials, read by the sub-chart when `rabbitmq.deploy=true`. When set, the `auth.password` parameter is ignored. The secret must carry AUTH credentials only — `RABBIT_USER`, `RABBIT_PASSWORD` (connection metadata `RABBIT_ADDR` lives in the always-created `cs-rabbitmq-config` ConfigMap and must not be placed in this Secret). NOTE: To make the same secret visible to all consumer services (event-processor, history-api, etc.), set `global.rabbitmq.auth.existingSecret` to the same value. existingSecret: rabbitmq # -- Password key to be retrieved from existing secret existingSecretPasswordKey: RABBIT_PASSWORD @@ -494,6 +519,8 @@ rabbitmq: # @section Clickhouse installation configuration clickhouse: + # -- Deploy component + deploy: true # -- External host with ClickHouse. Requires setting `clickhouse.deploy` to `false`. externalHost: "" # -- String to fully override common.names.fullname @@ -551,7 +578,7 @@ clickhouse: username: runtime-radar # -- ClickHouse administartor password password: "" - # -- Name of the secret with the administrator password + # -- Name of an existing secret with ClickHouse credentials, read by the sub-chart when `clickhouse.deploy=true`. When set, the `auth.password` parameter is ignored. The secret must carry AUTH credentials only — `CLICKHOUSE_USER`, `CLICKHOUSE_PASSWORD`, `CLICKHOUSE_DB` (connection metadata `CLICKHOUSE_ADDR`/`CLICKHOUSE_SSL_*` lives in the always-created `cs-clickhouse-config` ConfigMap and must not be placed in this Secret). NOTE: To make the same secret visible to all consumer services (history-api, event-processor, etc.), set `global.clickhouse.auth.existingSecret` to the same value. existingSecret: clickhouse # -- Name of the key stored in the existing secret existingSecretPasswordKey: CLICKHOUSE_PASSWORD @@ -564,9 +591,116 @@ clickhouse: # -- ClickHouse metrics external host externalHost: "" +# @section Grafana installation configuration +grafana: + # -- Deploy component + deploy: false + # -- External host with Grafana. Requires setting `grafana.deploy` to `false`. + externalHost: "" + # -- String to fully override common.names.fullname + fullnameOverride: grafana + # -- Node labels for pod assignment + # Ref: https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/ + nodeSelector: {} + # -- Number of Grafana nodes + replicaCount: 1 + # -- [object] Resource configuration for Clickhouse container + resources: + requests: + cpu: 500m + memory: 1024Mi + ephemeral-storage: 50Mi + limits: + cpu: "1.0" + memory: 1536Mi + ephemeral-storage: 1024Mi + # Grafana TLS parameters + tls: + # -- [boolean, nullable] Generate automatically self-signed TLS certificates if nothing is provided + autoGenerated: true + # -- Certificate value + cert: "" + # -- Certificate key value + certKey: "" + # -- CA Certificate value + certCA: "" + # Grafana Authentication + auth: + # -- Grafana administrator name + username: runtime-radar + # -- Grafana administartor password + password: "" + # Datasources configuration + # -- The name of an externally-managed secret containing custom datasource files. + datasourcesSecretName: grafana-datasources + # Create dasboard provider to load dashboards, a default one is created to load dashboards + # from "/var/lib/grafana/dashboards" + dashboardsProvider: + # -- Enable the use of a Grafana dashboard provider + enabled: true + # -- [object] Array with the names of a series of ConfigMaps containing dashboards files + dashboardsConfigMaps: + - configMapName: cs-dashboards + folderName: cs + # -- Use sub path for grafana for exposing it via reverse proxy + subPath: "grafana" + # @skip grafana.grafana.extraEnvVars Array containing extra env vars to configure Grafana + extraEnvVars: + - name: GF_LOKI_TLS_CA_CERT + valueFrom: + secretKeyRef: + name: loki-crt + key: ca.crt + optional: true + +# @section Prometheus installation configuration +prometheus: + # -- Deploy component + deploy: false + # -- External host with Prometheus. Requires setting `prometheus.deploy` to `false`. + externalHost: "" + # -- String to fully override common.names.fullname + fullnameOverride: prometheus + # -- Number of Prometheus replicas to deploy + replicaCount: 1 + # -- [object] Resource configuration for Clickhouse container + resources: + requests: + cpu: 250m + memory: 256Mi + ephemeral-storage: 50Mi + limits: + cpu: 375m + memory: 384Mi + ephemeral-storage: 1024Mi + # Enable persistence using Persistent Volume Claims + persistence: + # -- Enable persistence using PVC + enabled: true + # -- Persistent volume storage class + # If defined, storageClassName: + # If set to "-", storageClassName: "", which disables dynamic provisioning + # If undefined (the default) or set to null, no storageClassName spec is + # set, choosing the default provisioner. (gp2 on AWS, standard on + # GKE, AWS & OpenStack) + storageClass: "" + # -- Data volume size + size: 5Gi + # -- Name of an existing PVC + existingClaim: "" + # -- Template to specify an existing persistent volume + # If set, the PVC can't have a PV dynamically provisioned for it + # E.g. + # selector: + # matchLabels: + # app: my-app + selector: {} + # -- ConfigMap which contains scrape config files + # ref: + scrapeConfigmap: "cs-metrics" + # @skip List of disabled by default tags in helm chart tags: grafana: false prometheus: false - loki: false monitoring: false diff --git a/notifier/.helm/values.yaml b/notifier/.helm/values.yaml index 2a2edb8b..ba3af4f4 100644 --- a/notifier/.helm/values.yaml +++ b/notifier/.helm/values.yaml @@ -13,7 +13,6 @@ service: grpc: 8000 http: 9000 containerSecurityContext: - enabled: true seLinuxOptions: {} privileged: false allowPrivilegeEscalation: false @@ -21,7 +20,6 @@ containerSecurityContext: seccompProfile: type: "RuntimeDefault" podSecurityContext: - enabled: true fsGroupChangePolicy: Always sysctls: [] supplementalGroups: [] @@ -49,7 +47,6 @@ resources: memory: 128Mi ephemeral-storage: 1Mi livenessProbe: - enabled: true httpGet: path: /live scheme: HTTP @@ -58,7 +55,6 @@ livenessProbe: successThreshold: 1 failureThreshold: 2 readinessProbe: - enabled: true httpGet: path: /ready scheme: HTTP @@ -67,7 +63,6 @@ readinessProbe: successThreshold: 1 failureThreshold: 3 startupProbe: - enabled: true httpGet: path: /ready scheme: HTTP diff --git a/policy-enforcer/.helm/values.yaml b/policy-enforcer/.helm/values.yaml index 572b5d6d..269977a7 100644 --- a/policy-enforcer/.helm/values.yaml +++ b/policy-enforcer/.helm/values.yaml @@ -13,7 +13,6 @@ service: http: 9000 grpc: 8000 containerSecurityContext: - enabled: true seLinuxOptions: {} privileged: false allowPrivilegeEscalation: false @@ -21,7 +20,6 @@ containerSecurityContext: seccompProfile: type: "RuntimeDefault" podSecurityContext: - enabled: true fsGroupChangePolicy: Always sysctls: [] supplementalGroups: [] @@ -47,7 +45,6 @@ resources: memory: 128Mi ephemeral-storage: 1Mi livenessProbe: - enabled: true httpGet: path: /live scheme: HTTP @@ -56,7 +53,6 @@ livenessProbe: successThreshold: 1 failureThreshold: 2 readinessProbe: - enabled: true httpGet: path: /ready scheme: HTTP @@ -65,7 +61,6 @@ readinessProbe: successThreshold: 1 failureThreshold: 3 startupProbe: - enabled: true httpGet: path: /ready scheme: HTTP diff --git a/public-api/.helm/values.yaml b/public-api/.helm/values.yaml index c0c89d34..f27c0e56 100644 --- a/public-api/.helm/values.yaml +++ b/public-api/.helm/values.yaml @@ -11,7 +11,6 @@ service: ports: http: 9000 containerSecurityContext: - enabled: true seLinuxOptions: {} privileged: false allowPrivilegeEscalation: false @@ -19,21 +18,22 @@ containerSecurityContext: seccompProfile: type: "RuntimeDefault" podSecurityContext: - enabled: true fsGroupChangePolicy: Always sysctls: [] supplementalGroups: [] env: - name: AUTH_API_URL - value: '{{ printf "%s://auth-center:9000" (include "common.cs.http-scheme" .) }}' + value: '{{ printf "%s://auth-api:9000" (include "common.cs.http-scheme" .) }}' - name: POLICY_ENFORCER_GRPC_ADDR value: "policy-enforcer:8000" - name: HISTORY_API_GRPC_ADDR value: "history-api:8000" + - name: KUBE_MANAGER_GRPC_ADDR + value: "kube-manager:8000" - name: ACCESS_TOKEN_SALT valueFrom: secretKeyRef: - name: cs-keys + name: '{{ include "common.cs.keys.secretName" . }}' key: publicAccessTokenSalt postgresql: enabled: true @@ -56,7 +56,6 @@ resources: memory: 128Mi ephemeral-storage: 1Mi livenessProbe: - enabled: true httpGet: path: /live scheme: HTTP @@ -65,7 +64,6 @@ livenessProbe: successThreshold: 1 failureThreshold: 2 readinessProbe: - enabled: true httpGet: path: /ready scheme: HTTP @@ -74,7 +72,6 @@ readinessProbe: successThreshold: 1 failureThreshold: 3 startupProbe: - enabled: true httpGet: path: /ready scheme: HTTP diff --git a/reverse-proxy/.helm/values.yaml b/reverse-proxy/.helm/values.yaml index f9ae8ee8..4e5b32df 100644 --- a/reverse-proxy/.helm/values.yaml +++ b/reverse-proxy/.helm/values.yaml @@ -18,7 +18,6 @@ service: health: 9090 nodePorts: {} containerSecurityContext: - enabled: true seLinuxOptions: {} privileged: false allowPrivilegeEscalation: false @@ -31,7 +30,6 @@ containerSecurityContext: add: - NET_BIND_SERVICE podSecurityContext: - enabled: true fsGroupChangePolicy: Always sysctls: [] supplementalGroups: [] @@ -68,7 +66,6 @@ debugReverseProxy: "" isChildCluster: false centralCsUrl: "" livenessProbe: - enabled: true httpGet: path: /health port: health @@ -77,7 +74,6 @@ livenessProbe: successThreshold: 1 failureThreshold: 2 readinessProbe: - enabled: true httpGet: path: /ready port: health diff --git a/runtime-monitor/.helm/values.yaml b/runtime-monitor/.helm/values.yaml index e6449a45..d40e2438 100644 --- a/runtime-monitor/.helm/values.yaml +++ b/runtime-monitor/.helm/values.yaml @@ -45,7 +45,6 @@ resources: memory: 128Mi ephemeral-storage: 1Mi livenessProbe: - enabled: true httpGet: path: /live scheme: HTTP @@ -54,7 +53,6 @@ livenessProbe: successThreshold: 1 failureThreshold: 2 readinessProbe: - enabled: true httpGet: path: /ready scheme: HTTP @@ -63,7 +61,6 @@ readinessProbe: successThreshold: 1 failureThreshold: 3 startupProbe: - enabled: true httpGet: path: /ready scheme: HTTP From d60ab24a5d7880b99fb6fb433bdab96b23f8e9db Mon Sep 17 00:00:00 2001 From: Alexey Olshanskiy <234377865+avlllo@users.noreply.github.com> Date: Tue, 26 May 2026 16:32:32 +0300 Subject: [PATCH 04/17] feat: update quickstart to 0.2 --- .gitignore | 1 + README.md | 4 ++-- docs/quickstart/quickstart.md | 6 +++--- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/.gitignore b/.gitignore index 9ca068d8..69c82850 100644 --- a/.gitignore +++ b/.gitignore @@ -13,3 +13,4 @@ kubeconfig.yaml .task/ .build .helmpreview +.ignore diff --git a/README.md b/README.md index 5d2921af..2a6c7e80 100644 --- a/README.md +++ b/README.md @@ -52,8 +52,8 @@ Join the Runtime Radar community channels: ### v0.2.0 - [x] 🟢 Optimization of threat detection logic (PR #13). -- [ ] 🟢 Support for an expert mode that enables the addition of custom sources (`TracingPolicy`) and the modification/deletion of existing ones. -- [ ] 🟢 Metrics and dashboards enhancing observability. +- [x] 🟢 Support for an expert mode that enables the addition of custom sources (`TracingPolicy`) and the modification/deletion of existing ones. +- [x] 🟢 Metrics and dashboards enhancing observability. ### v0.3.0 diff --git a/docs/quickstart/quickstart.md b/docs/quickstart/quickstart.md index 12e1b3d8..7f0625fe 100644 --- a/docs/quickstart/quickstart.md +++ b/docs/quickstart/quickstart.md @@ -9,12 +9,12 @@ To install Runtime Radar using Helm, 1. Run the following command: ```bash - helm install runtime-radar -n runtime-radar --create-namespace oci://ghcr.io/runtime-radar/runtime-radar:v0.1.0 \ + helm install runtime-radar -n runtime-radar --create-namespace oci://ghcr.io/runtime-radar/runtime-radar:v0.2.0 \ --set-string 'global.ownCsUrl=https://your-domain.com:32000' \ --set-string 'global.keys.publicAccessTokenSalt=INIT-DO-NOT-USE' \ --set-string 'global.keys.encryption=INIT-DO-NOT-USE' \ - --set-string 'auth-center.administrator.username=admin' \ - --set-string 'auth-center.administrator.password=Password' \ + --set-string 'global.administrator.username=admin' \ + --set-string 'global.administrator.password=Password' \ --set-string 'reverse-proxy.service.type=NodePort' \ --set-string 'reverse-proxy.service.nodePorts.http=32000' ``` From 1c0471fb1004e95730f48f479a94c54ef414a139 Mon Sep 17 00:00:00 2001 From: Alexey Olshanskiy <234377865+avlllo@users.noreply.github.com> Date: Wed, 27 May 2026 11:32:39 +0300 Subject: [PATCH 05/17] feat: update ui to 0.2 --- radar-ui/Dockerfile | 2 +- radar-ui/apps/runtime-radar/jest.config.ts | 22 +- radar-ui/apps/runtime-radar/project.json | 19 +- .../runtime-radar/src/app/app.container.html | 21 +- .../runtime-radar/src/app/app.container.ts | 12 + .../components/navbar/navbar.component.html | 4 +- .../components/navbar/navbar.component.scss | 9 +- .../app/components/navbar/navbar.component.ts | 7 +- radar-ui/apps/runtime-radar/src/index.html | 2 +- .../runtime-radar/src/styles/_custom.scss | 8 - radar-ui/apps/runtime-radar/src/test-setup.ts | 2 - .../apps/runtime-radar/tsconfig.spec.json | 1 - radar-ui/jest.config.ts | 2 +- radar-ui/jest.preset.js | 33 +- radar-ui/libs/api/jest.config.ts | 22 +- radar-ui/libs/api/project.json | 4 +- .../contract/api-error-contract.interface.ts | 2 +- .../lib/services/api-utils.service.spec.ts | 42 + radar-ui/libs/api/src/test-setup.ts | 2 - radar-ui/libs/api/tsconfig.spec.json | 1 - radar-ui/libs/core/jest.config.ts | 22 +- radar-ui/libs/core/project.json | 4 +- .../src/lib/constants/core-router.constant.ts | 2 + radar-ui/libs/core/src/lib/core.module.ts | 2 + .../src/lib/services/core-init.service.ts | 6 +- .../lib/services/core-utils.service.spec.ts | 217 ++ .../src/lib/services/core-utils.service.ts | 4 +- .../src/lib/services/core-window.service.ts | 12 + .../lib/validators/core.validators.spec.ts | 37 + radar-ui/libs/core/src/test-setup.ts | 2 - radar-ui/libs/core/tsconfig.spec.json | 1 - radar-ui/libs/domains/auth/jest.config.ts | 22 +- radar-ui/libs/domains/auth/project.json | 4 +- radar-ui/libs/domains/auth/src/index.ts | 1 - .../contract/auth-api-contract.interface.ts | 8 - .../interfaces/state/auth-state.interface.ts | 2 - .../src/lib/services/auth-request.service.ts | 17 +- .../src/lib/services/auth-store.service.ts | 6 +- .../auth/src/lib/stores/auth-effect.store.ts | 37 +- .../auth/src/lib/stores/auth-reducer.store.ts | 4 +- .../src/lib/stores/auth-selector.store.ts | 4 - radar-ui/libs/domains/auth/src/test-setup.ts | 2 - radar-ui/libs/domains/auth/tsconfig.spec.json | 1 - radar-ui/libs/domains/cluster/jest.config.ts | 22 +- radar-ui/libs/domains/cluster/project.json | 4 +- .../cluster-api-contract.interface.ts | 1 + .../contract/cluster-contract.interface.ts | 15 + .../lib/services/cluster-request.service.ts | 20 +- .../libs/domains/cluster/src/test-setup.ts | 2 - .../libs/domains/cluster/tsconfig.spec.json | 1 - radar-ui/libs/domains/detector/jest.config.ts | 22 +- radar-ui/libs/domains/detector/project.json | 4 +- .../libs/domains/detector/src/test-setup.ts | 2 - .../libs/domains/detector/tsconfig.spec.json | 1 - .../libs/domains/integration/jest.config.ts | 22 +- .../libs/domains/integration/project.json | 4 +- .../domains/integration/src/test-setup.ts | 2 - .../domains/integration/tsconfig.spec.json | 1 - .../libs/domains/kube-manager/.eslintrc.json | 8 + .../libs/domains/kube-manager/jest.config.ts | 5 + .../libs/domains/kube-manager/project.json | 23 + .../libs/domains/kube-manager/src/index.ts | 4 + .../kube-manager-api-contract.interface.ts | 56 + .../kube-manager-contract.interface.ts | 43 + .../kube-manager-core-contract.interface.ts | 91 + .../kube-manager/src/lib/interfaces/index.ts | 4 + .../state/kube-manager-selector.interface.ts | 25 + .../state/kube-manager-state.interface.ts | 13 + .../src/lib/kube-manager.module.ts | 20 + .../src/lib/mocks/kube-manager.mock.ts | 114 + ...manager-group-namespaces-sort.pipe.spec.ts | 32 + ...kube-manager-group-namespaces-sort.pipe.ts | 22 + .../services/kube-manager-request.service.ts | 56 + .../services/kube-manager-store.service.ts | 53 + .../lib/stores/kube-manager-action.store.ts | 22 + .../stores/kube-manager-effect.store.spec.ts | 162 + .../lib/stores/kube-manager-effect.store.ts | 94 + .../lib/stores/kube-manager-reducer.store.ts | 35 + .../kube-manager-selector.store.spec.ts | 234 ++ .../lib/stores/kube-manager-selector.store.ts | 127 + .../libs/domains/kube-manager/tsconfig.json | 13 + .../domains/kube-manager/tsconfig.lib.json | 24 + .../domains/kube-manager/tsconfig.spec.json | 13 + radar-ui/libs/domains/license/.eslintrc.json | 8 + radar-ui/libs/domains/license/jest.config.ts | 5 + radar-ui/libs/domains/license/project.json | 23 + radar-ui/libs/domains/license/src/index.ts | 4 + .../license-api-contract.interface.ts | 7 + .../license/src/lib/interfaces/index.ts | 2 + .../state/license-state.interface.ts | 5 + .../domains/license/src/lib/license.module.ts | 17 + .../lib/services/license-request.service.ts | 26 + .../src/lib/services/license-store.service.ts | 19 + .../src/lib/stores/license-action.store.ts | 8 + .../src/lib/stores/license-effect.store.ts | 89 + .../src/lib/stores/license-reducer.store.ts | 19 + .../src/lib/stores/license-selector.store.ts | 23 + radar-ui/libs/domains/license/tsconfig.json | 13 + .../libs/domains/license/tsconfig.lib.json | 24 + .../libs/domains/license/tsconfig.spec.json | 13 + .../libs/domains/notification/jest.config.ts | 22 +- .../libs/domains/notification/project.json | 4 +- .../notification-contract.interface.ts | 6 +- .../src/lib/mocks/notification.mock.ts | 38 + .../lib/stores/notification-reducer.store.ts | 2 +- .../notification-selector.store.spec.ts | 34 + .../lib/stores/notification-selector.store.ts | 6 +- .../domains/notification/src/test-setup.ts | 2 - .../domains/notification/tsconfig.spec.json | 1 - radar-ui/libs/domains/role/jest.config.ts | 22 +- radar-ui/libs/domains/role/project.json | 4 +- .../contract/role-contract.interface.ts | 2 - .../src/lib/services/role-request.service.ts | 6 +- radar-ui/libs/domains/role/src/test-setup.ts | 2 - radar-ui/libs/domains/role/tsconfig.spec.json | 1 - radar-ui/libs/domains/rule/jest.config.ts | 22 +- radar-ui/libs/domains/rule/project.json | 4 +- radar-ui/libs/domains/rule/src/index.ts | 1 + .../domains/rule/src/lib/mocks/rule.mock.ts | 37 + .../src/lib/stores/rule-effect.store.spec.ts | 265 ++ .../rule/src/lib/stores/rule-reducer.store.ts | 2 +- .../lib/stores/rule-selector.store.spec.ts | 62 + .../src/lib/stores/rule-selector.store.ts | 4 +- radar-ui/libs/domains/rule/src/test-setup.ts | 2 - radar-ui/libs/domains/rule/tsconfig.spec.json | 1 - radar-ui/libs/domains/runtime/jest.config.ts | 22 +- radar-ui/libs/domains/runtime/project.json | 4 +- .../lib/guards/runtime-activate.guard.spec.ts | 61 + ...ime-config-modify-deactivate.guard.spec.ts | 64 + .../guards/runtime-deactivate.guard.spec.ts | 46 + .../runtime-api-contract.interface.ts | 4 + .../runtime-monitor-contract.interface.ts | 7 + .../state/runtime-state.interface.ts | 12 +- .../runtime/src/lib/mocks/runtime.mock.ts | 16 + .../lib/services/runtime-request.service.ts | 20 +- .../src/lib/services/runtime-store.service.ts | 15 +- .../src/lib/stores/runtime-action.store.ts | 6 + .../lib/stores/runtime-effect.store.spec.ts | 360 +++ .../src/lib/stores/runtime-effect.store.ts | 134 +- .../src/lib/stores/runtime-reducer.store.ts | 3 +- .../src/lib/stores/runtime-selector.store.ts | 18 +- .../libs/domains/runtime/src/test-setup.ts | 2 - .../libs/domains/runtime/tsconfig.spec.json | 1 - radar-ui/libs/domains/token/jest.config.ts | 22 +- radar-ui/libs/domains/token/project.json | 4 +- .../contract/token-contract.interface.ts | 1 - radar-ui/libs/domains/token/src/test-setup.ts | 2 - .../libs/domains/token/tsconfig.spec.json | 1 - radar-ui/libs/domains/user/jest.config.ts | 22 +- radar-ui/libs/domains/user/project.json | 4 +- .../src/lib/services/user-request.service.ts | 13 +- .../user/src/lib/stores/user-effect.store.ts | 4 +- radar-ui/libs/domains/user/src/test-setup.ts | 2 - radar-ui/libs/domains/user/tsconfig.spec.json | 1 - radar-ui/libs/features/cluster/jest.config.ts | 22 +- radar-ui/libs/features/cluster/project.json | 4 +- .../cluster/src/lib/cluster.module.ts | 2 + .../cluster-access-form.component.html | 6 +- .../cluster-access-form.component.ts | 2 +- .../cluster-database-form.component.html | 8 +- .../cluster-database-form.component.ts | 2 +- .../cluster-ingress-form.component.html | 8 +- .../cluster-ingress-form.component.ts | 2 +- .../cluster-metric-form.component.html | 126 + .../cluster-metric-form.component.ts | 125 + .../cluster-rabbit-form.component.html | 6 +- .../cluster-rabbit-form.component.ts | 2 +- .../cluster-registry-form.component.html | 2 +- .../cluster-registry-form.component.ts | 2 +- .../stepper/cluster-stepper.component.scss | 8 +- .../lib/constants/cluster-form.constant.ts | 11 + .../lib/constants/cluster-stepper.constant.ts | 5 + .../create/cluster-create.container.html | 7 + .../create/cluster-create.container.scss | 2 +- .../create/cluster-create.container.ts | 16 +- .../details/cluster-details.container.html | 22 +- .../details/cluster-details.container.ts | 10 +- .../cluster-form-state.interface.ts | 2 + .../lib/interfaces/cluster-form.interface.ts | 10 + .../interfaces/cluster-stepper.interface.ts | 1 + .../lib/resolvers/cluster-details.resolver.ts | 9 +- .../cluster-request-utils.service.spec.ts | 173 ++ .../services/cluster-request-utils.service.ts | 17 +- .../src/lib/stores/cluster-form.store.ts | 2 + .../libs/features/cluster/src/test-setup.ts | 2 - .../libs/features/cluster/tsconfig.spec.json | 1 - radar-ui/libs/features/error/jest.config.ts | 22 +- radar-ui/libs/features/error/project.json | 4 +- .../libs/features/error/src/test-setup.ts | 2 - .../libs/features/error/tsconfig.spec.json | 1 - .../libs/features/forbidden/jest.config.ts | 22 +- radar-ui/libs/features/forbidden/project.json | 4 +- .../libs/features/forbidden/src/test-setup.ts | 2 - .../features/forbidden/tsconfig.spec.json | 1 - .../libs/features/integration/jest.config.ts | 22 +- .../libs/features/integration/project.json | 4 +- ...on-sidepanel-recipient-form.component.html | 26 +- ...tion-sidepanel-recipient-form.component.ts | 43 +- .../integration-collapse-card.container.scss | 2 +- .../integration-collapse-card.container.ts | 10 +- .../list/integration-list.container.html | 5 +- .../list/integration-list.container.scss | 8 +- .../integration-recipient-form.interace.ts | 4 +- .../src/lib/mocks/integration.mock.ts | 19 + .../integration-helper.service.spec.ts | 32 + .../features/integration/src/test-setup.ts | 2 - .../features/integration/tsconfig.spec.json | 1 - .../libs/features/inventory/.eslintrc.json | 8 + .../libs/features/inventory/jest.config.ts | 5 + radar-ui/libs/features/inventory/project.json | 23 + radar-ui/libs/features/inventory/src/index.ts | 1 + .../inventory-detector-table.component.html | 53 + .../inventory-detector-table.component.scss | 52 + .../inventory-detector-table.component.ts | 26 + .../inventory-filter-popover.component.html | 123 + .../inventory-filter-popover.component.scss | 16 + .../inventory-filter-popover.component.ts | 104 + .../inventory-abstract.component.scss | 39 + .../legend/inventory-legend.component.html | 36 + .../legend/inventory-legend.component.scss | 57 + .../legend/inventory-legend.component.ts | 26 + .../inventory-namespace.component.html | 23 + .../inventory-namespace.component.scss | 48 + .../inventory-namespace.component.ts | 30 + .../pod/inventory-pod.component.html | 49 + .../pod/inventory-pod.component.scss | 78 + .../components/pod/inventory-pod.component.ts | 33 + ...ventory-sidepanel-container.component.html | 10 + ...ventory-sidepanel-container.component.scss | 16 + ...inventory-sidepanel-container.component.ts | 13 + ...ventory-sidepanel-namespace.component.html | 40 + ...ventory-sidepanel-namespace.component.scss | 57 + ...inventory-sidepanel-namespace.component.ts | 17 + .../inventory-sidepanel-pod.component.html | 89 + .../inventory-sidepanel-pod.component.scss | 59 + .../inventory-sidepanel-pod.component.ts | 53 + .../constants/inventory-scheme.constant.ts | 13 + .../map/inventory-map.container.html | 122 + .../map/inventory-map.container.scss | 100 + .../containers/map/inventory-map.container.ts | 306 ++ .../inventory-sidepanel-node.container.html | 81 + .../inventory-sidepanel-node.container.scss | 65 + .../inventory-sidepanel-node.container.ts | 62 + .../inventory-drag-area.directive.ts | 121 + .../interfaces/inventory-filter.interface.ts | 6 + .../inventory-sidepanel.interface.ts | 62 + .../src/lib/inventory-routing.module.ts | 20 + .../inventory/src/lib/inventory.module.ts | 46 + .../src/lib/mocks/inventory-context.mock.ts | 88 + ...nventory-sidepanel-context.service.spec.ts | 344 +++ .../inventory-sidepanel-context.service.ts | 72 + .../libs/features/inventory/tsconfig.json | 13 + .../libs/features/inventory/tsconfig.lib.json | 24 + .../features/inventory/tsconfig.spec.json | 13 + radar-ui/libs/features/rule/jest.config.ts | 22 +- radar-ui/libs/features/rule/project.json | 4 +- .../containers/list/rule-list.container.html | 4 +- .../containers/list/rule-list.container.scss | 4 - .../containers/list/rule-list.container.ts | 3 +- .../features/rule/src/lib/mocks/rule.mock.ts | 16 + .../src/lib/pipes/rule-filter.pipe.spec.ts | 40 + radar-ui/libs/features/rule/src/test-setup.ts | 2 - .../libs/features/rule/tsconfig.spec.json | 1 - radar-ui/libs/features/runtime/jest.config.ts | 22 +- radar-ui/libs/features/runtime/project.json | 4 +- .../runtime-context-popover.component.scss | 2 +- .../runtime-filter-popover.component.html | 5 +- .../runtime-filter-popover.component.scss | 2 +- .../runtime-filter-popover.constant.ts | 4 +- .../runtime-preset-dropdown.component.html | 1 - .../runtime-preset-dropdown.component.ts | 43 +- ...ntime-sidepanel-policy-form.component.scss | 2 +- ...runtime-sidepanel-policy-form.component.ts | 10 +- .../runtime-sidepanel-policy.component.html | 45 +- .../runtime-sidepanel-policy.component.scss | 13 - .../runtime-sidepanel-threats.component.html | 10 +- .../runtime-sidepanel-threats.component.scss | 2 +- .../runtime-sidepanel-threats.component.ts | 2 +- .../details/runtime-details.container.html | 2 - .../details/runtime-details.container.spec.ts | 189 ++ .../runtime-events-grid.container.html | 2 +- .../events/runtime-events.container.html | 4 +- .../events/runtime-events.container.ts | 2 +- .../settings/runtime-settings.container.html | 114 +- .../settings/runtime-settings.container.scss | 26 +- .../settings/runtime-settings.container.ts | 61 +- .../runtime-event-type-icon.directive.spec.ts | 41 + .../runtime/src/lib/mocks/runtime.mock.ts | 130 + .../lib/pipes/runtime-history-label.pipe.ts | 2 +- .../services/runtime-policy-name.service.ts | 5 + .../runtime-request-adapter.service.spec.ts | 115 + .../services/runtime-utils.service.spec.ts | 57 + .../runtime-policy-name.validator.spec.ts | 56 + .../runtime-policy-name.validator.ts | 21 +- .../libs/features/runtime/src/test-setup.ts | 2 - .../libs/features/runtime/tsconfig.spec.json | 1 - radar-ui/libs/features/sign-in/jest.config.ts | 22 +- radar-ui/libs/features/sign-in/project.json | 4 +- .../libs/features/sign-in/src/test-setup.ts | 2 - .../libs/features/sign-in/tsconfig.spec.json | 1 - radar-ui/libs/features/switch/jest.config.ts | 22 +- radar-ui/libs/features/switch/project.json | 4 +- .../containers/page/switch-page.container.ts | 10 +- .../features/switch/src/lib/switch.module.ts | 10 +- .../libs/features/switch/src/test-setup.ts | 2 - .../libs/features/switch/tsconfig.spec.json | 1 - radar-ui/libs/features/token/jest.config.ts | 22 +- radar-ui/libs/features/token/project.json | 4 +- .../libs/features/token/src/test-setup.ts | 2 - .../libs/features/token/tsconfig.spec.json | 1 - radar-ui/libs/features/user/jest.config.ts | 22 +- radar-ui/libs/features/user/project.json | 4 +- .../user-sidepanel-user-form.component.html | 3 + .../containers/list/user-list.container.html | 4 +- .../user/src/lib/user-routing.module.ts | 23 +- radar-ui/libs/features/user/src/test-setup.ts | 2 - .../libs/features/user/tsconfig.spec.json | 1 - .../libs/i18n-resources/en-US/cluster.json | 33 +- .../libs/i18n-resources/en-US/common.json | 21 +- .../libs/i18n-resources/en-US/inventory.json | 53 + radar-ui/libs/i18n-resources/en-US/rule.json | 37 +- .../libs/i18n-resources/en-US/runtime.json | 37 +- radar-ui/libs/i18n-resources/en-US/token.json | 3 +- radar-ui/libs/i18n/jest.config.ts | 22 +- radar-ui/libs/i18n/project.json | 4 +- .../src/lib/services/i18n.service.spec.ts | 174 ++ .../i18n/src/lib/services/i18n.service.ts | 6 +- radar-ui/libs/i18n/src/test-setup.ts | 2 - radar-ui/libs/i18n/tsconfig.spec.json | 1 - radar-ui/libs/shared/jest.config.ts | 22 +- radar-ui/libs/shared/project.json | 4 +- radar-ui/libs/shared/src/index.ts | 4 +- .../shared-detector-tree-select.component.ts | 2 +- .../shared-empty-screen.component.html | 11 +- .../shared-empty-screen.component.scss | 18 +- .../shared-empty-screen.component.ts | 6 +- .../shared-event-action.component.html | 2 +- .../shared-rule-sidepanel-form.component.ts | 20 +- .../shared-rule-sidepanel.component.html | 1 - .../severity/shared-severity.component.ts | 2 +- .../tag-input/shared-tag-input.component.html | 31 + .../tag-input/shared-tag-input.component.ts | 147 + .../shared-date-formatter.pipe.spec.ts | 58 + .../shared-hours-formatter.pipe.spec.ts | 29 + radar-ui/libs/shared/src/lib/shared.module.ts | 6 +- radar-ui/libs/shared/src/test-setup.ts | 2 - radar-ui/libs/shared/tsconfig.spec.json | 1 - radar-ui/nx.json | 44 +- radar-ui/package.json | 15 +- radar-ui/setup-jest.ts | 18 + radar-ui/tsconfig.base.json | 3 + radar-ui/yarn.lock | 2674 ++++++++++------- 352 files changed, 9836 insertions(+), 2309 deletions(-) delete mode 100644 radar-ui/apps/runtime-radar/src/test-setup.ts create mode 100644 radar-ui/libs/api/src/lib/services/api-utils.service.spec.ts delete mode 100644 radar-ui/libs/api/src/test-setup.ts create mode 100644 radar-ui/libs/core/src/lib/services/core-utils.service.spec.ts create mode 100644 radar-ui/libs/core/src/lib/validators/core.validators.spec.ts delete mode 100644 radar-ui/libs/core/src/test-setup.ts delete mode 100644 radar-ui/libs/domains/auth/src/test-setup.ts delete mode 100644 radar-ui/libs/domains/cluster/src/test-setup.ts delete mode 100644 radar-ui/libs/domains/detector/src/test-setup.ts delete mode 100644 radar-ui/libs/domains/integration/src/test-setup.ts create mode 100644 radar-ui/libs/domains/kube-manager/.eslintrc.json create mode 100644 radar-ui/libs/domains/kube-manager/jest.config.ts create mode 100644 radar-ui/libs/domains/kube-manager/project.json create mode 100644 radar-ui/libs/domains/kube-manager/src/index.ts create mode 100644 radar-ui/libs/domains/kube-manager/src/lib/interfaces/contract/kube-manager-api-contract.interface.ts create mode 100644 radar-ui/libs/domains/kube-manager/src/lib/interfaces/contract/kube-manager-contract.interface.ts create mode 100644 radar-ui/libs/domains/kube-manager/src/lib/interfaces/contract/kube-manager-core-contract.interface.ts create mode 100644 radar-ui/libs/domains/kube-manager/src/lib/interfaces/index.ts create mode 100644 radar-ui/libs/domains/kube-manager/src/lib/interfaces/state/kube-manager-selector.interface.ts create mode 100644 radar-ui/libs/domains/kube-manager/src/lib/interfaces/state/kube-manager-state.interface.ts create mode 100644 radar-ui/libs/domains/kube-manager/src/lib/kube-manager.module.ts create mode 100644 radar-ui/libs/domains/kube-manager/src/lib/mocks/kube-manager.mock.ts create mode 100644 radar-ui/libs/domains/kube-manager/src/lib/pipes/kube-manager-group-namespaces-sort.pipe.spec.ts create mode 100644 radar-ui/libs/domains/kube-manager/src/lib/pipes/kube-manager-group-namespaces-sort.pipe.ts create mode 100644 radar-ui/libs/domains/kube-manager/src/lib/services/kube-manager-request.service.ts create mode 100644 radar-ui/libs/domains/kube-manager/src/lib/services/kube-manager-store.service.ts create mode 100644 radar-ui/libs/domains/kube-manager/src/lib/stores/kube-manager-action.store.ts create mode 100644 radar-ui/libs/domains/kube-manager/src/lib/stores/kube-manager-effect.store.spec.ts create mode 100644 radar-ui/libs/domains/kube-manager/src/lib/stores/kube-manager-effect.store.ts create mode 100644 radar-ui/libs/domains/kube-manager/src/lib/stores/kube-manager-reducer.store.ts create mode 100644 radar-ui/libs/domains/kube-manager/src/lib/stores/kube-manager-selector.store.spec.ts create mode 100644 radar-ui/libs/domains/kube-manager/src/lib/stores/kube-manager-selector.store.ts create mode 100644 radar-ui/libs/domains/kube-manager/tsconfig.json create mode 100644 radar-ui/libs/domains/kube-manager/tsconfig.lib.json create mode 100644 radar-ui/libs/domains/kube-manager/tsconfig.spec.json create mode 100644 radar-ui/libs/domains/license/.eslintrc.json create mode 100644 radar-ui/libs/domains/license/jest.config.ts create mode 100644 radar-ui/libs/domains/license/project.json create mode 100644 radar-ui/libs/domains/license/src/index.ts create mode 100644 radar-ui/libs/domains/license/src/lib/interfaces/contract/license-api-contract.interface.ts create mode 100644 radar-ui/libs/domains/license/src/lib/interfaces/index.ts create mode 100644 radar-ui/libs/domains/license/src/lib/interfaces/state/license-state.interface.ts create mode 100644 radar-ui/libs/domains/license/src/lib/license.module.ts create mode 100644 radar-ui/libs/domains/license/src/lib/services/license-request.service.ts create mode 100644 radar-ui/libs/domains/license/src/lib/services/license-store.service.ts create mode 100644 radar-ui/libs/domains/license/src/lib/stores/license-action.store.ts create mode 100644 radar-ui/libs/domains/license/src/lib/stores/license-effect.store.ts create mode 100644 radar-ui/libs/domains/license/src/lib/stores/license-reducer.store.ts create mode 100644 radar-ui/libs/domains/license/src/lib/stores/license-selector.store.ts create mode 100644 radar-ui/libs/domains/license/tsconfig.json create mode 100644 radar-ui/libs/domains/license/tsconfig.lib.json create mode 100644 radar-ui/libs/domains/license/tsconfig.spec.json create mode 100644 radar-ui/libs/domains/notification/src/lib/mocks/notification.mock.ts create mode 100644 radar-ui/libs/domains/notification/src/lib/stores/notification-selector.store.spec.ts delete mode 100644 radar-ui/libs/domains/notification/src/test-setup.ts delete mode 100644 radar-ui/libs/domains/role/src/test-setup.ts create mode 100644 radar-ui/libs/domains/rule/src/lib/mocks/rule.mock.ts create mode 100644 radar-ui/libs/domains/rule/src/lib/stores/rule-effect.store.spec.ts create mode 100644 radar-ui/libs/domains/rule/src/lib/stores/rule-selector.store.spec.ts delete mode 100644 radar-ui/libs/domains/rule/src/test-setup.ts create mode 100644 radar-ui/libs/domains/runtime/src/lib/guards/runtime-activate.guard.spec.ts create mode 100644 radar-ui/libs/domains/runtime/src/lib/guards/runtime-config-modify-deactivate.guard.spec.ts create mode 100644 radar-ui/libs/domains/runtime/src/lib/guards/runtime-deactivate.guard.spec.ts create mode 100644 radar-ui/libs/domains/runtime/src/lib/mocks/runtime.mock.ts create mode 100644 radar-ui/libs/domains/runtime/src/lib/stores/runtime-effect.store.spec.ts delete mode 100644 radar-ui/libs/domains/runtime/src/test-setup.ts delete mode 100644 radar-ui/libs/domains/token/src/test-setup.ts delete mode 100644 radar-ui/libs/domains/user/src/test-setup.ts create mode 100644 radar-ui/libs/features/cluster/src/lib/components/metric-form/cluster-metric-form.component.html create mode 100644 radar-ui/libs/features/cluster/src/lib/components/metric-form/cluster-metric-form.component.ts create mode 100644 radar-ui/libs/features/cluster/src/lib/services/cluster-request-utils.service.spec.ts delete mode 100644 radar-ui/libs/features/cluster/src/test-setup.ts delete mode 100644 radar-ui/libs/features/error/src/test-setup.ts delete mode 100644 radar-ui/libs/features/forbidden/src/test-setup.ts create mode 100644 radar-ui/libs/features/integration/src/lib/mocks/integration.mock.ts create mode 100644 radar-ui/libs/features/integration/src/lib/services/integration-helper.service.spec.ts delete mode 100644 radar-ui/libs/features/integration/src/test-setup.ts create mode 100644 radar-ui/libs/features/inventory/.eslintrc.json create mode 100644 radar-ui/libs/features/inventory/jest.config.ts create mode 100644 radar-ui/libs/features/inventory/project.json create mode 100644 radar-ui/libs/features/inventory/src/index.ts create mode 100644 radar-ui/libs/features/inventory/src/lib/components/detector-table/inventory-detector-table.component.html create mode 100644 radar-ui/libs/features/inventory/src/lib/components/detector-table/inventory-detector-table.component.scss create mode 100644 radar-ui/libs/features/inventory/src/lib/components/detector-table/inventory-detector-table.component.ts create mode 100644 radar-ui/libs/features/inventory/src/lib/components/filter-popover/inventory-filter-popover.component.html create mode 100644 radar-ui/libs/features/inventory/src/lib/components/filter-popover/inventory-filter-popover.component.scss create mode 100644 radar-ui/libs/features/inventory/src/lib/components/filter-popover/inventory-filter-popover.component.ts create mode 100644 radar-ui/libs/features/inventory/src/lib/components/inventory-abstract.component.scss create mode 100644 radar-ui/libs/features/inventory/src/lib/components/legend/inventory-legend.component.html create mode 100644 radar-ui/libs/features/inventory/src/lib/components/legend/inventory-legend.component.scss create mode 100644 radar-ui/libs/features/inventory/src/lib/components/legend/inventory-legend.component.ts create mode 100644 radar-ui/libs/features/inventory/src/lib/components/namespace/inventory-namespace.component.html create mode 100644 radar-ui/libs/features/inventory/src/lib/components/namespace/inventory-namespace.component.scss create mode 100644 radar-ui/libs/features/inventory/src/lib/components/namespace/inventory-namespace.component.ts create mode 100644 radar-ui/libs/features/inventory/src/lib/components/pod/inventory-pod.component.html create mode 100644 radar-ui/libs/features/inventory/src/lib/components/pod/inventory-pod.component.scss create mode 100644 radar-ui/libs/features/inventory/src/lib/components/pod/inventory-pod.component.ts create mode 100644 radar-ui/libs/features/inventory/src/lib/components/sidepanel-container/inventory-sidepanel-container.component.html create mode 100644 radar-ui/libs/features/inventory/src/lib/components/sidepanel-container/inventory-sidepanel-container.component.scss create mode 100644 radar-ui/libs/features/inventory/src/lib/components/sidepanel-container/inventory-sidepanel-container.component.ts create mode 100644 radar-ui/libs/features/inventory/src/lib/components/sidepanel-namespace/inventory-sidepanel-namespace.component.html create mode 100644 radar-ui/libs/features/inventory/src/lib/components/sidepanel-namespace/inventory-sidepanel-namespace.component.scss create mode 100644 radar-ui/libs/features/inventory/src/lib/components/sidepanel-namespace/inventory-sidepanel-namespace.component.ts create mode 100644 radar-ui/libs/features/inventory/src/lib/components/sidepanel-pod/inventory-sidepanel-pod.component.html create mode 100644 radar-ui/libs/features/inventory/src/lib/components/sidepanel-pod/inventory-sidepanel-pod.component.scss create mode 100644 radar-ui/libs/features/inventory/src/lib/components/sidepanel-pod/inventory-sidepanel-pod.component.ts create mode 100644 radar-ui/libs/features/inventory/src/lib/constants/inventory-scheme.constant.ts create mode 100644 radar-ui/libs/features/inventory/src/lib/containers/map/inventory-map.container.html create mode 100644 radar-ui/libs/features/inventory/src/lib/containers/map/inventory-map.container.scss create mode 100644 radar-ui/libs/features/inventory/src/lib/containers/map/inventory-map.container.ts create mode 100644 radar-ui/libs/features/inventory/src/lib/containers/sidepanel-node/inventory-sidepanel-node.container.html create mode 100644 radar-ui/libs/features/inventory/src/lib/containers/sidepanel-node/inventory-sidepanel-node.container.scss create mode 100644 radar-ui/libs/features/inventory/src/lib/containers/sidepanel-node/inventory-sidepanel-node.container.ts create mode 100644 radar-ui/libs/features/inventory/src/lib/directives/inventory-drag-area.directive.ts create mode 100644 radar-ui/libs/features/inventory/src/lib/interfaces/inventory-filter.interface.ts create mode 100644 radar-ui/libs/features/inventory/src/lib/interfaces/inventory-sidepanel.interface.ts create mode 100644 radar-ui/libs/features/inventory/src/lib/inventory-routing.module.ts create mode 100644 radar-ui/libs/features/inventory/src/lib/inventory.module.ts create mode 100644 radar-ui/libs/features/inventory/src/lib/mocks/inventory-context.mock.ts create mode 100644 radar-ui/libs/features/inventory/src/lib/services/inventory-sidepanel-context.service.spec.ts create mode 100644 radar-ui/libs/features/inventory/src/lib/services/inventory-sidepanel-context.service.ts create mode 100644 radar-ui/libs/features/inventory/tsconfig.json create mode 100644 radar-ui/libs/features/inventory/tsconfig.lib.json create mode 100644 radar-ui/libs/features/inventory/tsconfig.spec.json create mode 100644 radar-ui/libs/features/rule/src/lib/mocks/rule.mock.ts create mode 100644 radar-ui/libs/features/rule/src/lib/pipes/rule-filter.pipe.spec.ts delete mode 100644 radar-ui/libs/features/rule/src/test-setup.ts create mode 100644 radar-ui/libs/features/runtime/src/lib/containers/details/runtime-details.container.spec.ts create mode 100644 radar-ui/libs/features/runtime/src/lib/directives/runtime-event-type-icon.directive.spec.ts create mode 100644 radar-ui/libs/features/runtime/src/lib/mocks/runtime.mock.ts create mode 100644 radar-ui/libs/features/runtime/src/lib/services/runtime-request-adapter.service.spec.ts create mode 100644 radar-ui/libs/features/runtime/src/lib/services/runtime-utils.service.spec.ts create mode 100644 radar-ui/libs/features/runtime/src/lib/validators/runtime-policy-name.validator.spec.ts delete mode 100644 radar-ui/libs/features/runtime/src/test-setup.ts delete mode 100644 radar-ui/libs/features/sign-in/src/test-setup.ts delete mode 100644 radar-ui/libs/features/switch/src/test-setup.ts delete mode 100644 radar-ui/libs/features/token/src/test-setup.ts delete mode 100644 radar-ui/libs/features/user/src/test-setup.ts create mode 100644 radar-ui/libs/i18n-resources/en-US/inventory.json create mode 100644 radar-ui/libs/i18n/src/lib/services/i18n.service.spec.ts delete mode 100644 radar-ui/libs/i18n/src/test-setup.ts create mode 100644 radar-ui/libs/shared/src/lib/components/tag-input/shared-tag-input.component.html create mode 100644 radar-ui/libs/shared/src/lib/components/tag-input/shared-tag-input.component.ts create mode 100644 radar-ui/libs/shared/src/lib/pipes/date-formatter/shared-date-formatter.pipe.spec.ts create mode 100644 radar-ui/libs/shared/src/lib/pipes/hours-formatter/shared-hours-formatter.pipe.spec.ts delete mode 100644 radar-ui/libs/shared/src/test-setup.ts create mode 100644 radar-ui/setup-jest.ts diff --git a/radar-ui/Dockerfile b/radar-ui/Dockerfile index 6015fbc5..e08bdd71 100644 --- a/radar-ui/Dockerfile +++ b/radar-ui/Dockerfile @@ -1,4 +1,4 @@ -FROM node:18.20.4-alpine3.20 AS builder +FROM node:20.18.0-alpine3.20 AS builder RUN --mount=type=cache,target=/var/cache/apk \ apk update && \ diff --git a/radar-ui/apps/runtime-radar/jest.config.ts b/radar-ui/apps/runtime-radar/jest.config.ts index c962d286..c3fe0afa 100644 --- a/radar-ui/apps/runtime-radar/jest.config.ts +++ b/radar-ui/apps/runtime-radar/jest.config.ts @@ -1,25 +1,5 @@ export default { displayName: 'runtime-radar', preset: '../../jest.preset.js', - setupFilesAfterEnv: [ - '/src/test-setup.ts' - ], - globals: { - 'ts-jest': { - stringifyContentPathRegex: '\\.(html|svg)$', - tsconfig: '/tsconfig.spec.json' - }, - }, - coverageDirectory: '../../coverage/apps/runtime-radar', - snapshotSerializers: [ - 'jest-preset-angular/build/serializers/no-ng-attributes', - 'jest-preset-angular/build/serializers/ng-snapshot', - 'jest-preset-angular/build/serializers/html-comment' - ], - transformIgnorePatterns: [ - 'node_modules/(?!.*\\.js$)' - ], - transform: { - '^.+\\.(ts|mjs|js|html)$': 'jest-preset-angular' - } + coverageDirectory: '../../coverage/apps/runtime-radar' }; diff --git a/radar-ui/apps/runtime-radar/project.json b/radar-ui/apps/runtime-radar/project.json index 1ecf5467..3369b4ba 100644 --- a/radar-ui/apps/runtime-radar/project.json +++ b/radar-ui/apps/runtime-radar/project.json @@ -1,7 +1,7 @@ { "name": "runtime-radar", + "$schema": "../../node_modules/nx/schemas/project-schema.json", "projectType": "application", - "root": "apps/runtime-radar", "sourceRoot": "apps/runtime-radar/src", "prefix": "runtime-radar", "targets": { @@ -14,11 +14,7 @@ "polyfills": "apps/runtime-radar/src/polyfills.ts", "tsConfig": "apps/runtime-radar/tsconfig.app.json", "inlineStyleLanguage": "scss", - "allowedCommonJsDependencies": [ - "@messageformat/core", - "luxon", - "flat" - ], + "allowedCommonJsDependencies": ["@messageformat/core", "luxon", "flat"], "assets": [ { "glob": "favicon.ico", @@ -56,9 +52,7 @@ "output": "./assets/i18n" } ], - "styles": [ - "apps/runtime-radar/src/styles.scss" - ], + "styles": ["apps/runtime-radar/src/styles.scss"], "scripts": [], "vendorChunk": true, "extractLicenses": false, @@ -126,15 +120,12 @@ "lint": { "executor": "@nx/linter:eslint", "options": { - "lintFilePatterns": [ - "apps/runtime-radar/src/**/*.ts", - "apps/runtime-radar/src/**/*.html" - ] + "lintFilePatterns": ["apps/runtime-radar/src/**/*.ts", "apps/runtime-radar/src/**/*.html"] } }, "test": { "executor": "@nx/jest:jest", - "outputs": ["coverage/apps/runtime-radar"], + "outputs": ["{workspaceRoot}/coverage/apps/runtime-radar"], "options": { "jestConfig": "apps/runtime-radar/jest.config.ts", "passWithNoTests": true diff --git a/radar-ui/apps/runtime-radar/src/app/app.container.html b/radar-ui/apps/runtime-radar/src/app/app.container.html index 84496197..f2d0b2ad 100644 --- a/radar-ui/apps/runtime-radar/src/app/app.container.html +++ b/radar-ui/apps/runtime-radar/src/app/app.container.html @@ -3,7 +3,10 @@ loadStatus: (loadStatus$ | async), routerName: (routerName$ | async), role: (role$ | async), - credentials: (credentials$ | async)! + credentials: (credentials$ | async)!, + appVersion: (appVersion$ | async), + hostAppVersion: (hostAppVersion$ | async), + isVersionDiff: (isVersionDiff$ | async) } as rxlet"> @if (rxlet.loadStatus === loadStatus.LOADED) { + @if (rxlet.loadStatus === loadStatus.LOADED && + rxlet.routerName !== routerName.FORBIDDEN && + rxlet.routerName !== routerName.ERROR && + rxlet.isVersionDiff) { + +
+ {{ tCommon('Common.Pseudo.Alert.Title.DifferentVersions', { appVersion: rxlet.appVersion, hostVersion: rxlet.hostAppVersion }) }} +
+ {{ tCommon('Common.Pseudo.Alert.Description.VersionWarning') }} +
+ } diff --git a/radar-ui/apps/runtime-radar/src/app/app.container.ts b/radar-ui/apps/runtime-radar/src/app/app.container.ts index 1aa12981..4dd28d39 100644 --- a/radar-ui/apps/runtime-radar/src/app/app.container.ts +++ b/radar-ui/apps/runtime-radar/src/app/app.container.ts @@ -4,6 +4,7 @@ import { Observable, bufferWhen, delay, distinctUntilChanged, map, switchMap } f import { ApiPathService } from '@cs/api'; import { I18nService } from '@cs/i18n'; +import { LicenseStoreService } from '@cs/domains/license'; import { AuthCredentials, AuthStoreService } from '@cs/domains/auth'; import { CoreNavigationStoreService, CoreWindowService, LoadStatus, RouterName } from '@cs/core'; import { Role, RoleStoreService } from '@cs/domains/role'; @@ -25,6 +26,16 @@ export class AppContainer implements OnInit { readonly routerName$: Observable = this.coreNavigationStoreService.routerName$; + readonly appVersion$: Observable = this.licenseStoreService.appVersion$; + + readonly hostAppVersion$: Observable = this.licenseStoreService.hostAppVersion$; + + readonly isVersionDiff$: Observable = this.licenseStoreService.appVersion$.pipe( + switchMap((appVersion) => + this.licenseStoreService.hostAppVersion$.pipe(map((hostVersion) => hostVersion !== appVersion)) + ) + ); + readonly role$: Observable = this.credentials$.pipe( map((credentials) => credentials.roleId), distinctUntilChanged(), @@ -40,6 +51,7 @@ export class AppContainer implements OnInit { private readonly authStoreService: AuthStoreService, private readonly coreNavigationStoreService: CoreNavigationStoreService, private readonly i18nService: I18nService, + private readonly licenseStoreService: LicenseStoreService, private readonly roleStoreService: RoleStoreService, private readonly toastService: KbqToastService, private readonly coreWindowService: CoreWindowService diff --git a/radar-ui/apps/runtime-radar/src/app/components/navbar/navbar.component.html b/radar-ui/apps/runtime-radar/src/app/components/navbar/navbar.component.html index 2fb16f86..5b317c67 100644 --- a/radar-ui/apps/runtime-radar/src/app/components/navbar/navbar.component.html +++ b/radar-ui/apps/runtime-radar/src/app/components/navbar/navbar.component.html @@ -71,10 +71,10 @@