Skip to content

Commit 2790bd7

Browse files
authored
feat: dashboard in gcp (#11201)
[new dashboard](https://console.cloud.google.com/monitoring/dashboards/builder/30d2d0d2-8dd2-4535-8074-e551dbc773aa;duration=PT15M?f.mlabel.k8s_namespace_name.namespace=mitch&f.mlabel.aztec_circuit_protocol_circuit_name.protocol_circuit=&project=testnet-440309) It also has [traces](https://console.cloud.google.com/traces/list?project=testnet-440309), and the [logs](https://cloudlogging.app.goo.gl/kV6xa4jZzP8ScDLM8) are much nicer looking now. We have a new env var, USE_GCLOUD_OBSERVABILITY, which takes precedence over the otel stuff. The "old" otel env vars can be used to use a custom metrics stack, e.g. in local testing or in CI.
1 parent b42da6d commit 2790bd7

31 files changed

+386
-88
lines changed

spartan/aztec-network/files/config/setup-service-addresses.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ else
8686
fi
8787

8888
# Configure OTEL_COLLECTOR_ENDPOINT if not set in values file
89-
if [ "${TELEMETRY:-false}" = "true" ] && [ "${OTEL_COLLECTOR_ENDPOINT}" = "" ]; then
89+
if [ "${TELEMETRY:-false}" = "true" ] && [ "${OTEL_COLLECTOR_ENDPOINT}" = "" ] && [ "${USE_GCLOUD_OBSERVABILITY:-false}" = "false" ]; then
9090
OTEL_COLLECTOR_PORT=${OTEL_COLLECTOR_PORT:-4318}
9191
OTEL_COLLECTOR_ENDPOINT="http://metrics-opentelemetry-collector.metrics:$OTEL_COLLECTOR_PORT"
9292
fi

spartan/aztec-network/templates/_helpers.tpl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,8 @@ Service Address Setup Container
155155
value: "{{ .Values.proverNode.service.nodePort }}"
156156
- name: PROVER_BROKER_PORT
157157
value: "{{ .Values.proverBroker.service.nodePort }}"
158+
- name: USE_GCLOUD_OBSERVABILITY
159+
value: "{{ .Values.telemetry.useGcloudObservability }}"
158160
- name: SERVICE_NAME
159161
value: {{ include "aztec-network.fullname" . }}
160162
volumeMounts:

spartan/aztec-network/templates/boot-node.yaml

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -55,13 +55,6 @@ spec:
5555
sleep 5
5656
done
5757
echo "Ethereum node is ready!"
58-
{{- if .Values.telemetry.enabled }}
59-
until curl --head --silent $OTEL_COLLECTOR_ENDPOINT > /dev/null; do
60-
echo "Waiting for OpenTelemetry collector $OTEL_COLLECTOR_ENDPOINT..."
61-
sleep 5
62-
done
63-
echo "OpenTelemetry collector is ready!"
64-
{{- end }}
6558
volumeMounts:
6659
- name: config
6760
mountPath: /shared/config
@@ -123,6 +116,12 @@ spec:
123116
valueFrom:
124117
fieldRef:
125118
fieldPath: metadata.name
119+
- name: K8S_NAMESPACE_NAME
120+
valueFrom:
121+
fieldRef:
122+
fieldPath: metadata.namespace
123+
- name: USE_GCLOUD_OBSERVABILITY
124+
value: "{{ .Values.telemetry.useGcloudObservability }}"
126125
{{- end }}
127126
containers:
128127
- name: boot-node
@@ -181,6 +180,10 @@ spec:
181180
fieldPath: metadata.name
182181
- name: OTEL_SERVICE_NAME
183182
value: boot-node
183+
- name: K8S_NAMESPACE_NAME
184+
valueFrom:
185+
fieldRef:
186+
fieldPath: metadata.namespace
184187
- name: NODE_OPTIONS
185188
value: "--max-old-space-size={{ .Values.bootNode.maxOldSpaceSize}}"
186189
- name: AZTEC_PORT
@@ -235,6 +238,8 @@ spec:
235238
value: "{{ .Values.storage.dataStoreMapSize }}"
236239
- name: WS_DB_MAP_SIZE_KB
237240
value: "{{ .Values.storage.worldStateMapSize }}"
241+
- name: USE_GCLOUD_OBSERVABILITY
242+
value: "{{ .Values.telemetry.useGcloudObservability }}"
238243
ports:
239244
- containerPort: {{ .Values.bootNode.service.nodePort }}
240245
- containerPort: {{ .Values.bootNode.service.p2pTcpPort }}

spartan/aztec-network/templates/deploy-l1-verifier.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,10 @@ spec:
8282
valueFrom:
8383
fieldRef:
8484
fieldPath: metadata.name
85+
- name: K8S_NAMESPACE_NAME
86+
valueFrom:
87+
fieldRef:
88+
fieldPath: metadata.namespace
8589
- name: NODE_NO_WARNINGS
8690
value: "1"
8791
- name: LOG_LEVEL
@@ -108,6 +112,8 @@ spec:
108112
value: "{{ .Values.proverNode.service.nodePort }}"
109113
- name: SERVICE_NAME
110114
value: {{ include "aztec-network.fullname" . }}
115+
- name: USE_GCLOUD_OBSERVABILITY
116+
value: "{{ .Values.telemetry.useGcloudObservability }}"
111117
volumeMounts:
112118
- name: config
113119
mountPath: /shared/config

spartan/aztec-network/templates/faucet.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,8 @@ spec:
9292
fieldPath: metadata.name
9393
- name: OTEL_SERVICE_NAME
9494
value: faucet
95+
- name: USE_GCLOUD_OBSERVABILITY
96+
value: "{{ .Values.telemetry.useGcloudObservability }}"
9597
ports:
9698
- name: http
9799
containerPort: {{ .Values.faucet.service.nodePort }}

spartan/aztec-network/templates/prover-agent.yaml

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -57,13 +57,6 @@ spec:
5757
sleep 5
5858
done
5959
echo "Broker is ready!"
60-
{{- if .Values.telemetry.enabled }}
61-
until curl --head --silent $OTEL_COLLECTOR_ENDPOINT > /dev/null; do
62-
echo "Waiting for OpenTelemetry collector $OTEL_COLLECTOR_ENDPOINT..."
63-
sleep 5
64-
done
65-
echo "OpenTelemetry collector is ready!"
66-
{{- end }}
6760
volumeMounts:
6861
- name: config
6962
mountPath: /shared/config
@@ -90,6 +83,10 @@ spec:
9083
fieldPath: metadata.name
9184
- name: OTEL_SERVICE_NAME
9285
value: prover-agent
86+
- name: K8S_NAMESPACE_NAME
87+
valueFrom:
88+
fieldRef:
89+
fieldPath: metadata.namespace
9390
- name: AZTEC_PORT
9491
value: "{{ .Values.proverAgent.service.nodePort }}"
9592
- name: LOG_LEVEL
@@ -106,6 +103,8 @@ spec:
106103
value: {{ join "," .Values.proverAgent.proofTypes | quote }}
107104
- name: OTEL_RESOURCE_ATTRIBUTES
108105
value: service.name={{ .Release.Name }},service.namespace={{ .Release.Namespace }},service.version={{ .Chart.AppVersion }},environment={{ .Values.environment | default "production" }}
106+
- name: USE_GCLOUD_OBSERVABILITY
107+
value: "{{ .Values.telemetry.useGcloudObservability }}"
109108
resources:
110109
{{- toYaml .Values.proverAgent.resources | nindent 12 }}
111110
{{- end }}

spartan/aztec-network/templates/prover-broker.yaml

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -54,13 +54,6 @@ spec:
5454
- |
5555
source /shared/config/service-addresses
5656
cat /shared/config/service-addresses
57-
{{- if .Values.telemetry.enabled }}
58-
until curl --head --silent $OTEL_COLLECTOR_ENDPOINT > /dev/null; do
59-
echo "Waiting for OpenTelemetry collector $OTEL_COLLECTOR_ENDPOINT..."
60-
sleep 5
61-
done
62-
echo "OpenTelemetry collector is ready!"
63-
{{- end }}
6457
volumeMounts:
6558
- name: config
6659
mountPath: /shared/config
@@ -89,6 +82,10 @@ spec:
8982
fieldPath: metadata.name
9083
- name: OTEL_SERVICE_NAME
9184
value: prover-broker
85+
- name: K8S_NAMESPACE_NAME
86+
valueFrom:
87+
fieldRef:
88+
fieldPath: metadata.namespace
9289
- name: NODE_OPTIONS
9390
value: "--max-old-space-size={{ .Values.proverBroker.maxOldSpaceSize}}"
9491
- name: AZTEC_PORT
@@ -109,6 +106,8 @@ spec:
109106
value: "{{ .Values.storage.dataStoreMapSize }}"
110107
- name: OTEL_RESOURCE_ATTRIBUTES
111108
value: service.name={{ .Release.Name }},service.namespace={{ .Release.Namespace }},service.version={{ .Chart.AppVersion }},environment={{ .Values.environment | default "production" }}
109+
- name: USE_GCLOUD_OBSERVABILITY
110+
value: "{{ .Values.telemetry.useGcloudObservability }}"
112111
resources:
113112
{{- toYaml .Values.proverBroker.resources | nindent 12 }}
114113
volumes:

spartan/aztec-network/templates/prover-node.yaml

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -64,13 +64,6 @@ spec:
6464
echo "Using built-in job broker"
6565
fi
6666
67-
{{- if .Values.telemetry.enabled }}
68-
until curl --head --silent $OTEL_COLLECTOR_ENDPOINT > /dev/null; do
69-
echo "Waiting for OpenTelemetry collector $OTEL_COLLECTOR_ENDPOINT..."
70-
sleep 5
71-
done
72-
echo "OpenTelemetry collector is ready!"
73-
{{- end }}
7467
until curl --head --silent $BOOT_NODE_HOST/status; do
7568
echo "Waiting for boot node..."
7669
sleep 5
@@ -132,6 +125,10 @@ spec:
132125
fieldPath: metadata.name
133126
- name: OTEL_SERVICE_NAME
134127
value: prover-node
128+
- name: K8S_NAMESPACE_NAME
129+
valueFrom:
130+
fieldRef:
131+
fieldPath: metadata.namespace
135132
- name: POD_IP
136133
valueFrom:
137134
fieldRef:
@@ -194,6 +191,8 @@ spec:
194191
value: "{{ .Values.storage.dataStoreMapSize }}"
195192
- name: WS_DB_MAP_SIZE_KB
196193
value: "{{ .Values.storage.worldStateMapSize }}"
194+
- name: USE_GCLOUD_OBSERVABILITY
195+
value: "{{ .Values.telemetry.useGcloudObservability }}"
197196
ports:
198197
- containerPort: {{ .Values.proverNode.service.nodePort }}
199198
- containerPort: {{ .Values.proverNode.service.p2pTcpPort }}

spartan/aztec-network/templates/pxe.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,10 @@ spec:
9191
fieldPath: metadata.name
9292
- name: OTEL_SERVICE_NAME
9393
value: pxe
94+
- name: K8S_NAMESPACE_NAME
95+
valueFrom:
96+
fieldRef:
97+
fieldPath: metadata.namespace
9498
- name: AZTEC_PORT
9599
value: "{{ .Values.pxe.service.nodePort }}"
96100
- name: LOG_JSON
@@ -99,6 +103,8 @@ spec:
99103
value: "{{ .Values.pxe.logLevel }}"
100104
- name: PXE_PROVER_ENABLED
101105
value: "{{ .Values.aztec.realProofs }}"
106+
- name: USE_GCLOUD_OBSERVABILITY
107+
value: "{{ .Values.telemetry.useGcloudObservability }}"
102108
ports:
103109
- name: http
104110
containerPort: {{ .Values.pxe.service.nodePort }}

spartan/aztec-network/templates/setup-l2-contracts.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,10 @@ spec:
7474
valueFrom:
7575
fieldRef:
7676
fieldPath: metadata.name
77+
- name: K8S_NAMESPACE_NAME
78+
valueFrom:
79+
fieldRef:
80+
fieldPath: metadata.namespace
7781
- name: TELEMETRY
7882
value: "{{ .Values.telemetry.enabled }}"
7983
- name: LOG_LEVEL
@@ -96,4 +100,6 @@ spec:
96100
value: "{{ .Values.proverNode.service.nodePort }}"
97101
- name: SERVICE_NAME
98102
value: {{ include "aztec-network.fullname" . }}
103+
- name: USE_GCLOUD_OBSERVABILITY
104+
value: "{{ .Values.telemetry.useGcloudObservability }}"
99105
{{ end }}

0 commit comments

Comments
 (0)