From cb38bb8c37dfc7ef25aa3969562094f22e0d73d4 Mon Sep 17 00:00:00 2001 From: minherz Date: Mon, 19 Jun 2023 02:51:11 +0000 Subject: [PATCH] feat: Support configuration-based provisioning of Cloud Ops artifacts (part 1) (#1036) Add schemas describing configuration for alerts, monitoring dashboards, log-based metrics, notification channels, SLOs and uptime checks. Follow JSON schema standard (draft 2020-12). Add validation checks including linting and schema validation. Setup `/configurations/test` configuration to validate schema. This configuration will be further used to validate Terraform execution plan. Constraint app-id field in all schemas to prevent path injection. --- .github/workflows/README.md | 10 + .github/workflows/cli.yaml | 7 +- .github/workflows/configurations.yaml | 66 +++++ .github/workflows/non-terraform.yaml | 12 +- .github/workflows/terraform.yaml | 12 +- configurations/test/alerts.yaml | 47 ++++ configurations/test/dashboards.yaml | 31 +++ configurations/test/healthchecks.yaml | 32 +++ configurations/test/metrics.yaml | 25 ++ configurations/test/services.yaml | 32 +++ configurations/test/slos.yaml | 33 +++ .../schemas/configuration/alerts.json | 256 ++++++++++++++++++ .../schemas/configuration/dashboards.json | 148 ++++++++++ .../schemas/configuration/healthchecks.json | 176 ++++++++++++ .../schemas/configuration/metrics.json | 115 ++++++++ .../schemas/configuration/services.json | 87 ++++++ provisioning/schemas/configuration/slos.json | 177 ++++++++++++ 17 files changed, 1249 insertions(+), 17 deletions(-) create mode 100644 .github/workflows/configurations.yaml create mode 100644 configurations/test/alerts.yaml create mode 100644 configurations/test/dashboards.yaml create mode 100644 configurations/test/healthchecks.yaml create mode 100644 configurations/test/metrics.yaml create mode 100644 configurations/test/services.yaml create mode 100644 configurations/test/slos.yaml create mode 100644 provisioning/schemas/configuration/alerts.json create mode 100644 provisioning/schemas/configuration/dashboards.json create mode 100644 provisioning/schemas/configuration/healthchecks.json create mode 100644 provisioning/schemas/configuration/metrics.json create mode 100644 provisioning/schemas/configuration/services.json create mode 100644 provisioning/schemas/configuration/slos.json diff --git a/.github/workflows/README.md b/.github/workflows/README.md index ac4b4ea5f..420b438c2 100644 --- a/.github/workflows/README.md +++ b/.github/workflows/README.md @@ -70,6 +70,15 @@ the use of the workflow as [required status check]. It is configured to run on any "non-terraform" changes, so the required workflow will always guaranteed to terminate. +### Configurations workflow ([configurations.yaml]) + +The `configurations` checks correctness of the Sandbox configurations. It includes: + +* yaml linting +* validation of the configuration's yaml files vs. schema that is defined following JSON schema [draft2020] +* json linting +* testing configuration vs. expected terraform plan to make sure that all components are built using "right" provider and resource definitions + ### Required workflows The workflows triggered by pull request modifications (excluding a closure of the request) @@ -114,3 +123,4 @@ For information about the customized workflow, see [workfows/README] [convention]: https://www.conventionalcommits.org/en/v1.0.0/ [snippets]: https://github.com/googleapis/repo-automation-bots/tree/main/packages/snippet-bot [trusted contributors]: https://github.com/googleapis/repo-automation-bots/tree/main/packages/trusted-contribution +[draft2020]: https://json-schema.org/draft/2020-12/release-notes.html diff --git a/.github/workflows/cli.yaml b/.github/workflows/cli.yaml index add901672..0bf30f558 100644 --- a/.github/workflows/cli.yaml +++ b/.github/workflows/cli.yaml @@ -13,6 +13,10 @@ # limitations under the License. name: CLI +concurrency: + group: "cli-${{ github.workflow }}-${{ github.ref }}" + cancel-in-progress: true + on: pull_request: types: [opened,synchronize,reopened] @@ -23,9 +27,6 @@ jobs: runs-on: ubuntu-latest permissions: contents: read - concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: true steps: - name: Checkout source code diff --git a/.github/workflows/configurations.yaml b/.github/workflows/configurations.yaml new file mode 100644 index 000000000..9e0d2e82d --- /dev/null +++ b/.github/workflows/configurations.yaml @@ -0,0 +1,66 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: Configurations + +concurrency: + group: "configurations-${{ github.workflow }}-${{ github.ref }}" + cancel-in-progress: true + +on: + pull_request: + types: [opened,synchronize,reopened] + paths: + - 'configurations/**/*.yaml' + - 'provisioning/schemas/configuration/**/*.json' + + +jobs: + + validation: + runs-on: ubuntu-latest + permissions: + contents: read + + steps: + - name: Checkout source code + uses: actions/checkout@v3 + + - name: install ajv + run: npm install -g ajv-cli + + - name: Lint configurations + run: find "${{ github.workspace }}/configurations" -mindepth 2 -maxdepth 2 -type f \( -name "*.yaml" -o -name "*.yml" \) -exec echo {} + + + - name: Lint schemas + run: echo "JSON liniting will be here" + + - name: Schema validation + run: |- + cd "${{ github.workspace }}" + for CONFIG_FILE in $(find configurations -mindepth 2 -maxdepth 2 -type f -name "*.yaml"); do + FILENAME=$(basename -s ".yaml" "${CONFIG_FILE@L}") + case "${FILENAME}" in + alerts | dashboards | healthchecks | metrics | services | slos) + ;; + *) + echo "Unknown file ${CONFIG_FILE}" 1>&2 + continue + ;; + esac + ajv --spec=draft2020 -s "provisioning/schemas/configuration/${FILENAME}.json" -d "${CONFIG_FILE}" + done + + - name: Dry run + run: echo "Validation of Terraform plan for test configuration will be here" diff --git a/.github/workflows/non-terraform.yaml b/.github/workflows/non-terraform.yaml index a728c29f8..461569900 100644 --- a/.github/workflows/non-terraform.yaml +++ b/.github/workflows/non-terraform.yaml @@ -13,6 +13,10 @@ # limitations under the License. name: Terraform +concurrency: + group: "terraform-${{ github.workflow }}-${{ github.ref }}" + cancel-in-progress: true + on: pull_request: types: [opened,synchronize,reopened] @@ -21,13 +25,10 @@ on: jobs: - tflint: + validation: runs-on: ubuntu-latest permissions: contents: read - concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: true steps: - run: 'echo "No work to do for non-terraform changes"' @@ -37,9 +38,6 @@ jobs: runs-on: ubuntu-latest permissions: contents: read - concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: true steps: - run: 'echo "No work to do for non-terraform changes"' diff --git a/.github/workflows/terraform.yaml b/.github/workflows/terraform.yaml index 611b8be42..d61ffce07 100644 --- a/.github/workflows/terraform.yaml +++ b/.github/workflows/terraform.yaml @@ -13,6 +13,10 @@ # limitations under the License. name: Terraform +concurrency: + group: "terraform-${{ github.workflow }}-${{ github.ref }}" + cancel-in-progress: true + on: pull_request: types: [opened,synchronize,reopened] @@ -22,13 +26,10 @@ on: jobs: - tflint: + validation: runs-on: ubuntu-latest permissions: contents: read - concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: true steps: - name: Checkout source code @@ -69,9 +70,6 @@ jobs: permissions: contents: 'read' id-token: 'write' - concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: true steps: - name: Checkout source code diff --git a/configurations/test/alerts.yaml b/configurations/test/alerts.yaml new file mode 100644 index 000000000..053ab109a --- /dev/null +++ b/configurations/test/alerts.yaml @@ -0,0 +1,47 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +app-id: "test" +version: "0.0.1" +alerts: +- name: test-alert-with-notifications + display-name: "Test alert with notification channel" + documentation: "Lorem ipsum Lorem ipsum Lorem ipsum" + notification-channels: [sms-test-channel, pubsub-test-channel] + conditions: + - display-name: "test condition 1" + condition: + filter: "metric.type=\"monitoring.googleapis.com/uptime_check/check_passed\"" + threshold: 3 + duration: "300s" + comparison: "COMPARISON_GT" +channels: +- name: sms-test-channel + display-name: "SMS test channel" + channel: + type: sms + labels: + number: "1234567890" +- name: email-test-channel + display-name: "E-mail test channel" + channel: + type: email + labels: + email_address: "johnsmith@example.com" +- name: pubsub-test-channel + display-name: "PubSub test channel" + channel: + type: pubsub + labels: + topic: "projects/unknown/topics/unknown" diff --git a/configurations/test/dashboards.yaml b/configurations/test/dashboards.yaml new file mode 100644 index 000000000..7cdd56085 --- /dev/null +++ b/configurations/test/dashboards.yaml @@ -0,0 +1,31 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +app-id: "test" +version: "0.0.1" +dashboards: +- name: test-dashboard-1 + display-name: "Test Dashboard 1" + widgets: + - title: "CPU Usage" + x-axis-label: "Time" + y-axis-label: "%" + datasets: + - time-series: + filter: + query: "metric.type=\"compute.googleapis.com/instance/cpu/usage_time\" resource.type=\"gce_instance\"" + alignment: ALIGN_PERCENTILE_99 + unit-override: "cpu" + + min-alignment-period: "60s" diff --git a/configurations/test/healthchecks.yaml b/configurations/test/healthchecks.yaml new file mode 100644 index 000000000..98d79bfa9 --- /dev/null +++ b/configurations/test/healthchecks.yaml @@ -0,0 +1,32 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +app-id: "test" +version: "0.0.1" +checks: +- name: test-uptime-alert-1 + display-name: "HTTP uptime check" + timeout: 10 + resource: + type: k8s_service + labels: + cluster_name: "test-cluster-1" + content: + content: "ok" + matcher: CONTAINS_STRING + type: + request-method: "GET" + path: "/healtz" + port: 8008 + diff --git a/configurations/test/metrics.yaml b/configurations/test/metrics.yaml new file mode 100644 index 000000000..d9f0e215a --- /dev/null +++ b/configurations/test/metrics.yaml @@ -0,0 +1,25 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +app-id: "test" +version: "0.0.1" +metrics: +- name: test-log-metric-1 + description: "Count audit resources" + filter: "resource.type=\"audited_resource\"" + labels: + - key: resource + extractor: "EXTRACT(resource.type)" + - key: project + extractor: "EXTRACT(resource.labels.project_id)" diff --git a/configurations/test/services.yaml b/configurations/test/services.yaml new file mode 100644 index 000000000..1a9a08221 --- /dev/null +++ b/configurations/test/services.yaml @@ -0,0 +1,32 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +app-id: "test" +version: "0.0.1" +services: +- name: custom-test-service-1 + display-name: "Custom service 1" + labels: + abc1: value1 + abc2: value2 +- name: custom-test-service-2 + display-name: "Custom service 2" + basic-service: + service-type: CLOUD_ENDPOINTS + labels: + endpoint1: value1 + endpoint2: value2 + labels: + bce1: value1 + bce2: value2 diff --git a/configurations/test/slos.yaml b/configurations/test/slos.yaml new file mode 100644 index 000000000..e1d99b4a9 --- /dev/null +++ b/configurations/test/slos.yaml @@ -0,0 +1,33 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +app-id: "test" +version: "0.0.1" +slos: +- name: test-service-slo-1 + display-name: "Test availability SLO for basic service" + goal: 0.5 + period: WEEK + service: "gae:test-service" + sli: + availability: true + +- name: test-service-slo-2 + display-name: "Test latency SLO for basic service" + goal: 0.5 + period: WEEK + service: "gae:test-service" + sli: + latency: + threshold: 10s diff --git a/provisioning/schemas/configuration/alerts.json b/provisioning/schemas/configuration/alerts.json new file mode 100644 index 000000000..cbe806680 --- /dev/null +++ b/provisioning/schemas/configuration/alerts.json @@ -0,0 +1,256 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "Monitoring alerts and notification channels schema", + "type": "object", + "required": [ + "app-id", + "version", + "alerts" + ], + "properties": { + "app-id": { + "type": "string", + "pattern": "^[^\\/\\.]+$", + "description": "Unique identifier of the application" + }, + "version": { + "const": "0.0.1", + "description": "Schema version" + }, + "alerts": { + "type": "array", + "description": "A collection of alert policy definitions", + "items": { + "$ref": "#/$defs/alert" + } + }, + "channels": { + "type": "array", + "description": "A collection of notification channel definitions", + "items": { + "$ref": "#/$defs/channel" + } + } + }, + "$defs": { + "alert": { + "type": "object", + "description": "Alert policy", + "required": [ + "name", + "display-name", + "conditions" + ], + "properties": { + "name": { + "type": "string", + "description": "Unique name of the dashboard resource" + }, + "display-name": { + "type": "string", + "description": "A short name describing alert" + }, + "documentation": { + "type": "string", + "description": "The body of the alert message. This text can be templatized using https://cloud.google.com/monitoring/alerts/doc-variables" + }, + "notification-channels": { + "type": "array", + "description": "The notification channels to which notifications should be sent when incidents are opened or closed or when new violations occur on an already opened incident.", + "items": { + "type": "string" + } + }, + "combiner": { + "enum": [ + "COMBINE_UNSPECIFIED", + "AND", + "OR", + "AND_WITH_MATCHING_RESOURCE" + ], + "description": "Method to combine the results of multiple conditions to determine if an incident should be opened.", + "default": "AND" + }, + "conditions": { + "type": "array", + "description": "A collection of policy conditions", + "minItems": 1, + "items": { + "$ref": "#/$defs/condition" + } + } + } + }, + "channel": { + "type": "object", + "description": "", + "required": [ + "name" + ], + "properties": { + "name": { + "type": "string", + "description": "A unique resource name for this notification channel", + "maxLength": 30 + }, + "display-name": { + "type": "string", + "description": "A human-readable name for this notification channel", + "maxLength": 512 + }, + "channel": { + "oneOf": [ + { + "$ref": "#/$defs/smsChannel" + }, + { + "$ref": "#/$defs/emailChannel" + }, + { + "$ref": "#/$defs/pubsubChannel" + } + ] + } + } + }, + "condition": { + "type": "object", + "description": "A true/false test that determines when an alerting policy should open an incident", + "required": [ + "condition" + ], + "properties": { + "display-name": { + "type": "string", + "description": "A short name or phrase used to identify the condition" + }, + "condition": { + "description": "Condition can be only one of the following:", + "anyOf": [ + { + "$ref": "#/$defs/conditionThreshold" + }, + { + "$ref": "#/$defs/conditionAbsent" + }, + { + "$ref": "#/$defs/conditionMatchedLog" + }, + { + "$ref": "#/$defs/conditionMonitoringQueryLanguage" + } + ] + } + } + }, + "conditionThreshold": { + "type": "object", + "description": "A condition that compares a collection of time series against a threshold", + "required": [ + "filter", + "threshold", + "comparison" + ], + "properties": { + "filter": { + "type": "string", + "description": "A filter that identifies which time series should be compared with the threshold" + }, + "threshold": { + "type": "number", + "description": "A value against which to compare the time series" + }, + "duration": { + "type": "string", + "description": "The amount of time that a time series must violate the threshold to be considered failing. only values that are a multiple of a minute--e.g., 0, 60, 120, or 300 seconds--are supported", + "pattern": "^[0-9]+s$" + }, + "comparison": { + "enum": [ + "COMPARISON_LT", + "COMPARISON_GT" + ], + "description": "The comparison to apply between the time series (indicated by filter)" + } + } + }, + "conditionAbsent": {}, + "conditionMatchedLog": {}, + "conditionMonitoringQueryLanguage": {}, + "emailChannel": { + "type": "object", + "required": [ + "type", + "labels" + ], + "properties": { + "type": { + "const": "email" + }, + "labels": { + "type": "object", + "description": "Configuration for email notification channel", + "required": [ + "email_address" + ], + "properties": { + "email_address": { + "type": "string", + "description": "An address to send email" + } + } + } + } + }, + "pubsubChannel": { + "type": "object", + "required": [ + "type", + "labels" + ], + "properties": { + "type": { + "const": "pubsub" + }, + "labels": { + "type": "object", + "description": "Configuration for PubSub notification channel", + "required": [ + "topic" + ], + "properties": { + "topic": { + "type": "string", + "description": "A fully qualified resource name of the Pub/Sub topic to post notifications" + } + } + } + } + }, + "smsChannel": { + "type": "object", + "required": [ + "type", + "labels" + ], + "properties": { + "type": { + "const": "sms" + }, + "labels": { + "type": "object", + "description": "Configuration for SMS notification channel", + "required": [ + "number" + ], + "properties": { + "number": { + "type": "string", + "description": "A phone number to text notifications" + } + } + } + } + } + } +} \ No newline at end of file diff --git a/provisioning/schemas/configuration/dashboards.json b/provisioning/schemas/configuration/dashboards.json new file mode 100644 index 000000000..52979216e --- /dev/null +++ b/provisioning/schemas/configuration/dashboards.json @@ -0,0 +1,148 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "Monitoring dashboard schema", + "type": "object", + "required": [ + "app-id", + "version", + "dashboards" + ], + "properties": { + "app-id": { + "type": "string", + "pattern": "^[^\\/\\.]+$", + "description": "Unique identifier of the application" + }, + "version": { + "const": "0.0.1", + "description": "Schema version" + }, + "dashboards": { + "type": "array", + "description": "A collection of dashboards", + "items": { + "$ref": "#/$defs/dashboard" + } + } + }, + "$defs": { + "dashboard": { + "type": "object", + "description": "Monitoring dashboard properties using GridLayout (https://cloud.google.com/monitoring/api/ref_v3/rest/v1/projects.dashboards#gridlayout)", + "required": [ + "name", + "display-name", + "widgets" + ], + "properties": { + "name": { + "type": "string", + "description": "Unique name of the dashboard resource" + }, + "display-name": { + "type": "string", + "description": "Human friendly name of dashboard to be displayed in Cloud Console" + }, + "widgets": { + "type": "array", + "description": "A collection of GridLayout widgets", + "items": { + "$ref": "#/$defs/widget" + } + } + } + }, + "widget": { + "type": "object", + "description": "A widget chart that displays data on a 2D (X and Y axes) plane (https://cloud.google.com/monitoring/api/ref_v3/rest/v1/projects.dashboards#xychart)", + "required": [ + "datasets" + ], + "properties": { + "title": { + "type": "string", + "description": "Human friendly name of widget" + }, + "x-axis-label": { + "type": "string", + "description": "Human friendly label of axis X" + }, + "y-axis-label": { + "type": "string", + "description": "Human friendly label of axis Y" + }, + "datasets": { + "type": "array", + "description": "The data displayed in this chart.", + "minItems": 1, + "items": { + "$ref": "#/$defs/dataset" + } + } + } + }, + "dataset": { + "type": "object", + "description": "Groups a time series query definition with charting options (https://cloud.google.com/monitoring/api/ref_v3/rest/v1/projects.dashboards#dataset)", + "required": [ + "time-series" + ], + "properties": { + "time-series": { + "type": "object", + "description": "A set of parameters for querying time series data", + "required": [ + "filter" + ], + "properties": { + "filter": { + "type": "object", + "description": "A set of parameters for querying time series data", + "properties": { + "query": { + "type": "string", + "description": "Identifies the metric types, resources, and projects to query" + }, + "alignment": { + "enum": [ + "ALIGN_NONE", + "ALIGN_DELTA", + "ALIGN_RATE", + "ALIGN_INTERPOLATE", + "ALIGN_NEXT_OLDER", + "ALIGN_MIN", + "ALIGN_MAX", + "ALIGN_MEAN", + "ALIGN_COUNT", + "ALIGN_SUM", + "ALIGN_STDDEV", + "ALIGN_COUNT_TRUE", + "ALIGN_COUNT_FALSE", + "ALIGN_FRACTION_TRUE", + "ALIGN_PERCENTILE_99", + "ALIGN_PERCENTILE_95", + "ALIGN_PERCENTILE_50", + "ALIGN_PERCENTILE_05", + "ALIGN_PERCENT_CHANGE" + ], + "description": "Mathematical method to group data points together into a single time series", + "default": "ALIGN_MEAN" + } + } + }, + "unit-override": { + "type": "string", + "description": "Label for unit of data contained in fetched time series", + "default": "1" + } + } + }, + "min-alignment-period": { + "type": "string", + "description": "The lower bound on data point frequency for this data set", + "pattern": "^[0-9]+(s|m|h)?$" + } + } + } + } +} \ No newline at end of file diff --git a/provisioning/schemas/configuration/healthchecks.json b/provisioning/schemas/configuration/healthchecks.json new file mode 100644 index 000000000..6c667ef08 --- /dev/null +++ b/provisioning/schemas/configuration/healthchecks.json @@ -0,0 +1,176 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "Uptime check schema", + "type": "object", + "required": [ + "app-id", + "version", + "checks" + ], + "properties": { + "app-id": { + "type": "string", + "pattern": "^[^\\/\\.]+$", + "description": "Unique identifier of the application" + }, + "version": { + "const": "0.0.1", + "description": "Schema version" + }, + "checks": { + "type": "array", + "description": "A collection of uptime checks", + "items": { + "$ref": "#/$defs/check" + } + } + }, + "$defs": { + "check": { + "type": "object", + "description": "Check configuration to monitor resource/service for availability", + "required": [ + "name", + "display-name", + "timeout", + "resource" + ], + "properties": { + "name": { + "type": "string", + "description": "A unique resource name for this Uptime check configuration", + "maxLength": 30 + }, + "display-name": { + "type": "string", + "description": "A human-readable name for this Uptime check configuration", + "maxLength": 512 + }, + "content": { + "type": "object", + "description": "aAllows matching based on substrings and regular expressions", + "properties": { + "content": { + "type": "string", + "description": "String, regex or JSON content to match", + "maxLength": 1024 + }, + "matcher": { + "description": "Options to perform content matching", + "enum": [ + "CONTENT_MATCHER_OPTION_UNSPECIFIED", + "CONTAINS_STRING", + "NOT_CONTAINS_STRING", + "MATCHES_REGEX", + "NOT_MATCHES_REGEX" + ], + "default": "CONTENT_MATCHER_OPTION_UNSPECIFIED" + } + } + }, + "period": { + "type": "string", + "description": "How often, in seconds, the Uptime check is performed", + "default": "60s", + "pattern": "^[0-9]+s$" + }, + "resource": { + "type": "object", + "description": "The monitored resource associated with the configuration", + "required": [ + "type", + "labels" + ], + "properties": { + "type": { + "description": "The monitored resource type", + "enum": [ + "uptime_url", + "gce_instance", + "gae_app", + "aws_ec2_instance", + "aws_elb_load_balancer", + "k8s_service", + "servicedirectory_service", + "cloudRunRevision" + ] + }, + "labels": { + "description": "Values for all of the labels listed in the associated monitored resource descriptor", + "$ref": "#/$defs/labels" + } + } + }, + "timeout": { + "type": "number", + "description": "A maximum amount of time to wait for the request to complete", + "minimum": 1, + "maximum": 60 + }, + "type": { + "description": "One of the following Uptime check types:", + "anyOf": [ + { + "$ref": "#/$defs/httpCheck" + }, + { + "$ref": "#/$defs/tcpCheck" + } + ] + } + } + }, + "httpCheck": { + "type": "object", + "description": "Information involved in an HTTP/HTTPS Uptime check request", + "properties": { + "request-method": { + "enum": [ + "METHOD_UNSPECIFIED", + "GET", + "POST" + ], + "default": "METHOD_UNSPECIFIED" + }, + "path": { + "type": "string", + "default": "/" + }, + "port": { + "type": "number", + "default": 80 + } + } + }, + "labels": { + "type": "object", + "description": "", + "patternProperties": { + "^[a-zA-Z][a-zA-Z0-9_\\-]*$": { + "type": "string", + "minLength": 0, + "maxLength": 63 + } + }, + "minProperties": 0, + "maxProperties": 64 + }, + "tcpCheck": { + "type": "object", + "description": "Information involved in a TCP Uptime check request", + "required": [ + "port" + ], + "properties": { + "port": { + "type": "number" + }, + "pingsCount": { + "type": "number", + "description": "Number of ICMP pings", + "default": 3 + } + } + } + } +} \ No newline at end of file diff --git a/provisioning/schemas/configuration/metrics.json b/provisioning/schemas/configuration/metrics.json new file mode 100644 index 000000000..827db5b3e --- /dev/null +++ b/provisioning/schemas/configuration/metrics.json @@ -0,0 +1,115 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "Log-based metric schema", + "type": "object", + "required": [ + "app-id", + "version", + "metrics" + ], + "properties": { + "app-id": { + "type": "string", + "pattern": "^[^\\/\\.]+$", + "description": "Unique identifier of the application" + }, + "version": { + "const": "0.0.1", + "description": "Schema version" + }, + "metrics": { + "type": "array", + "description": "A collection of log-based metric", + "items": { + "$ref": "#/$defs/metric" + } + } + }, + "$defs": { + "metric": { + "type": "object", + "description": "Describes a logs-based metric", + "required": [ + "name", + "filter" + ], + "properties": { + "name": { + "type": "string", + "description": "A metric identifier", + "pattern": "^[A-Za-z0-9_\\-]+$", + "maxLength": 100 + }, + "description": { + "type": "string", + "description": "A description of this metric, which is used in documentation", + "maxLength": 8000 + }, + "filter": { + "type": "string", + "description": "A logs filter which is used to match log entries (https://cloud.google.com/logging/docs/view/advanced_filters)" + }, + "bucket-name": { + "type": "string", + "description": "A fully qualified resource name of the Log Bucket that owns the Log Metric. The bucket has to be in the same project as the metric" + }, + "metric-descriptor": { + "type": "object", + "description": "A metric descriptor associated with the logs-based metric. If unspecified, it uses a default metric descriptor with a DELTA metric kind, INT64 value type, with no labels and a unit of '1'", + "properties": { + "kind": { + "const": "DELTA", + "description": "Fixed way to report data" + }, + "value-type": { + "enum": [ + "INT64", + "DISTRIBUTION" + ], + "description": "Types of the reported metric supported for log based metrics", + "default": "INT64" + }, + "unit": { + "const": "1", + "description": "Fixed as dimensionless" + } + } + }, + "extractor": { + "type": "string", + "description": "Method to extract metric values for distribution value type of the metric (https://cloud.google.com/logging/docs/reference/v2/rest/v2/projects.metrics#LogMetric.FIELDS.value_extractor)" + }, + "labels": { + "type": "array", + "description": "Collection of labels extracted together with the metric", + "items": { + "$ref": "#/$defs/metric-label" + } + } + } + }, + "metric-label": { + "type": "object", + "description": "A pair of label key and extractor expression to load label's value from logs", + "required": [ + "key", + "extractor" + ], + "properties": { + "key": { + "type": "string", + "description": "Unique label description", + "pattern": "^[a-z_\\-]+$" + }, + "description": { + "type": "string", + "description": "A human-readable description for the label" + }, + "extractor": { + "type": "string", + "description": "Same as #/$defs/log-based-metric/extractor" + } + } + } + } +} \ No newline at end of file diff --git a/provisioning/schemas/configuration/services.json b/provisioning/schemas/configuration/services.json new file mode 100644 index 000000000..990703306 --- /dev/null +++ b/provisioning/schemas/configuration/services.json @@ -0,0 +1,87 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "Monitored custom service schema", + "type": "object", + "required": [ + "app-id", + "version", + "services" + ], + "properties": { + "app-id": { + "type": "string", + "pattern": "^[^\\/\\.]+$", + "description": "Unique identifier of the application" + }, + "version": { + "const": "0.0.1", + "description": "Schema version" + }, + "services": { + "type": "array", + "description": "A collection of services", + "items": { + "$ref": "#/$defs/service" + } + } + }, + "$defs": { + "service": { + "type": "object", + "description": "", + "required": [ + "name" + ], + "properties": { + "name": { + "type": "string", + "description": "Unique service ID" + }, + "display-name": { + "type": "string", + "description": "Human friendly name of monitored service" + }, + "labels": { + "description": "Labels which have been used to annotate the service.", + "$ref": "#/$defs/labels" + }, + "basic-service": { + "type": "object", + "description": "", + "required": [ + "service-type" + ], + "properties": { + "service-type": { + "description": "The type of service that this basic service defines", + "enum": [ + "APP_ENGINE", + "CLOUD_ENDPOINTS", + "CLUSTER_ISTIO", + "ISTIO_CANONICAL_SERVICE", + "CLOUD_RUN" + ] + }, + "labels": { + "description": "Labels that specify the resource that emits the monitoring data which is used for SLO reporting of this Service.", + "$ref": "#/$defs/labels" + } + } + } + } + }, + "labels": { + "type": "object", + "description": "", + "patternProperties": { + "^[a-zA-Z][a-zA-Z0-9_\\-]*$": { + "type": "string", + "minLength": 0, + "maxLength": 63 + } + }, + "minProperties": 0, + "maxProperties": 64 + } + } +} \ No newline at end of file diff --git a/provisioning/schemas/configuration/slos.json b/provisioning/schemas/configuration/slos.json new file mode 100644 index 000000000..9209ffce3 --- /dev/null +++ b/provisioning/schemas/configuration/slos.json @@ -0,0 +1,177 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "Service's SLO schema", + "type": "object", + "required": [ + "app-id", + "version", + "slos" + ], + "properties": { + "app-id": { + "type": "string", + "pattern": "^[^\\/\\.]+$", + "description": "Unique identifier of the application" + }, + "version": { + "const": "0.0.1", + "description": "Schema version" + }, + "slos": { + "type": "array", + "description": "A collection of service SLOs", + "items": { + "$ref": "#/$defs/slo" + } + } + }, + "$defs": { + "slo": { + "type": "object", + "description": "A Service-Level Objective of desired good service", + "required": [ + "name", + "service", + "goal", + "period", + "sli" + ], + "properties": { + "name": { + "type": "string", + "description": "A unique SLO resource name for the service" + }, + "display-name": { + "type": "string", + "description": "A name used for UI elements listing this SLO" + }, + "goal": { + "type": "number", + "description": "The fraction of service that must be good in order for this objective to be met", + "minimum": 0, + "maximum": 0.999 + }, + "period": { + "description": "The time period over which the objective will be evaluated as one of the following:", + "anyOf": [ + { + "description": "A calendar period, semantically 'since the start of the current '", + "enum": [ + "DAY", + "WEEK", + "FORTNIGHT", + "MONTH" + ] + }, + { + "type": "number", + "description": "A rolling time period, semantically 'in the past ' measured in days", + "minimum": 1, + "maximum": 30 + } + ] + }, + "service": { + "type": "string", + "description": "A unique service resource name in the project" + }, + "sli": { + "description": "The definition of good service, used to measure and calculate the quality of the Service's performance with respect to a single aspect of service quality", + "anyOf": [ + { + "$ref": "#/$defs/basicSLI" + }, + { + "$ref": "#/$defs/requestSLI" + } + ] + } + } + }, + "basicSLI": { + "description": "Basic SLI on a well-known service type", + "oneOf": [ + { + "type": "object", + "description": "Good service is defined to be the count of requests made to this service that return successfully", + "required": [ + "availability" + ], + "properties": { + "availability": { + "const": true + } + } + }, + { + "type": "object", + "description": "Good service is defined to be the count of requests made to this service that are fast enough with respect to `latency.threshold`", + "required": [ + "latency" + ], + "properties": { + "latency": { + "type": "object", + "description": "Good service is defined to be the count of requests made to this service that are fast enough with respect to `latency.threshold`", + "required": [ + "threshold" + ], + "properties": { + "threshold": { + "type": "string", + "description": "A duration string, e.g. 10s. Good service is defined to be the count of requests made to this service that return in no more than threshold.", + "pattern": "^[0-9]+s$" + } + } + } + } + } + ] + }, + "requestSLI": { + "description": "Request-based SLI", + "anyOf": [ + { + "type": "object", + "description": "Ratio of good service to total service computed from two time series (https://cloud.google.com/monitoring/api/ref_v3/rest/v3/services.serviceLevelObjectives#timeseriesratio)", + "properties": { + "goodServiceFilter": { + "type": "string", + "description": "A monitoring filter quantifying good service (see object's description)" + }, + "badServiceFilter": { + "type": "string", + "description": "A monitoring filter quantifying bad service (see object's description)" + }, + "totalServiceFilter": { + "type": "string", + "description": "A monitoring filter quantifying total service provided (see object's description)" + } + } + }, + { + "type": "object", + "description": "Ratio of good service as a count of values aggregated in a Distribution that fall into a good range (https://cloud.google.com/monitoring/api/ref_v3/rest/v3/services.serviceLevelObjectives#distributioncut)", + "properties": { + "filter": { + "type": "string", + "description": "A monitoring filter specifying a TimeSeries aggregating values (see object's description)" + }, + "range": { + "type": "object", + "description": "Range of values considered 'good'. For a one-sided range, set one bound to an infinite value", + "properties": { + "min": { + "type": "number" + }, + "max": { + "type": "number" + } + } + } + } + } + ] + } + } +} \ No newline at end of file