From 301f699c35c251eed3edc751658d9216019a1372 Mon Sep 17 00:00:00 2001 From: Hubert Bugaj Date: Wed, 8 May 2024 14:48:21 +0200 Subject: [PATCH] cleanup terraform leftovers --- .github/workflows/deploy-new-relic.yml | 37 -- README.md | 101 +-- composite-action/terraform/action.yml | 172 ----- terraform/Makefile | 41 -- terraform/new-relic/.terraform.lock.hcl | 28 - terraform/new-relic/forest.json | 834 ------------------------ terraform/new-relic/main.tf | 227 ------- terraform/new-relic/variable.tf | 23 - 8 files changed, 1 insertion(+), 1462 deletions(-) delete mode 100644 .github/workflows/deploy-new-relic.yml delete mode 100644 composite-action/terraform/action.yml delete mode 100644 terraform/Makefile delete mode 100644 terraform/new-relic/.terraform.lock.hcl delete mode 100644 terraform/new-relic/forest.json delete mode 100644 terraform/new-relic/main.tf delete mode 100644 terraform/new-relic/variable.tf diff --git a/.github/workflows/deploy-new-relic.yml b/.github/workflows/deploy-new-relic.yml deleted file mode 100644 index 49b381593..000000000 --- a/.github/workflows/deploy-new-relic.yml +++ /dev/null @@ -1,37 +0,0 @@ -name: New-Relic -concurrency: ci-${{ github.ref }} - -on: - pull_request: - branches: - - main - paths: - - 'terraform/new-relic/**' - push: - branches: - - main - paths: - - 'terraform/new-relic/**' - workflow_dispatch: - -jobs: - deploy-newrelic: - name: Deploy - runs-on: ubuntu-latest - permissions: write-all - steps: - - name: Checkout the code - uses: actions/checkout@v4 - - # Using Custom Composite action in ./composite-action/terraform folder - - name: Composite Action for Deploying Terraform Resources - uses: ./composite-action/terraform - with: - do_token: ${{ secrets.DO_TOKEN }} - aws_access_key_id: ${{ secrets.AWS_ACCESS_KEY_ID }} - aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - ssh_private_key: ${{ secrets.SSH_PRIVATE_KEY }} - new_relic_api_key: ${{ secrets.NEW_RELIC_API_KEY }} - new_relic_account_id: ${{ secrets.NEW_RELIC_ACCOUNT_ID }} - working_directory: terraform/new-relic - environment: New Relic diff --git a/README.md b/README.md index fa517f955..d33141c7c 100644 --- a/README.md +++ b/README.md @@ -1,105 +1,6 @@ # 🌲 Forest IaC -This repository contains machine-readable specifications for the auxillilary services that [Forest](https://github.com/ChainSafe/forest) project running smoothly. The services include daily uploads of network snapshots, and automated testing of Forest's capabilities. - -# 🔧 Desired properties - - - Require minimal setup. Ideally any server with [docker](https://www.docker.com/) installed should be sufficient. - - Automatic and error-proof re-deployment when new infrastructure code is available. - - Runs without human intervention. The services should stay running unless explicitly stopped. - - Fault tolerant. - - Use a consistent strategy for uploading/storing logs and reporting errors. - - Is idempotent. Multiple instances can run without adverse effect. - - Is sanity checked. Shell scripts with `shellcheck`, Ruby scripts with `RuboCop`, Rust with `clippy`. - -# ⚡ Services - -- [x] Daily calibnet snapshots. -- [x] Sync testing for Forest docker image. -- [ ] Exhaustive RPC testing for Forest docker image. -- [ ] Sync testing for PRs. -- [ ] Export testing for PRs. - -## Pre-commit Hooks - -We've integrated several pre-commit hooks to enhance code quality and security. These hooks automatically analyze your code before each commit, ensuring it adheres to best practices and doesn't contain any sensitive secrets, especially important as you plan to run the forest-iac service in this repository. - -## Installation - -To use the pre-commit hooks in this repository, follow these steps: - -- **Install Pip**: If you don't have Pip installed on your system, you can find installation instructions [here](https://pip.pypa.io/en/stable/installation/). - -- **Install Pre-commit**: Run the following command to install Pre-commit - ```bash - pip install pre-commit - ``` - -- **Install the Pre-commit Hooks**: Run the following command in your project's directory to install the hooks: - - ```bash - pre-commit install - ``` - -- **(optional) Run against all the files**: it's usually a good idea to run the hooks against all of the files when adding new hooks (usually pre-commit will only run on the changed files during git hooks) - ```bash - pre-commit run --all-files - ``` - -That's it! From now on, every time you commit changes to your project, these hooks will automatically check your code. - -# 🛠️ Forest Cloud Infrastructure In DigitalOcean - -## Overview - -The Terraform folder contains terraform scripts to automate the setup of droplets on DigitalOcean. These scripts enable the configuration of essential infrastructure required for running Forest Mainnet or Calibnet Filecoin node. The script automates several steps, including: - -- Booting up a New Droplet: It initializes a new droplet with specified parameters such as image, name, region, and size. - -- Volume Attachment (optional): The script can optionally attach a storage volume to the droplet if the user specifies so (attach_volume variable set to false). To ensure compliance with device identifier restrictions on DigitalOcean, any "-" characters in the volume name are automatically replaced with "_" when mounting the volume on the droplet. - -- Running Initialization Script: The `user-data.sh` script is executed during the droplet's initialization. This script is powered by the Terraform engine and allows dynamic insertion of variables from the `terraform.tfvars` file. It handles crucial tasks such as creating a new user, configuring SSH settings, restricting SSH access, and managing Docker-related setups. Its purpose is to specifically run the Mainnet or Calibnet chain based on the specifications provided in the Terraform script. Additionally, it initializes Watchtower to ensure the Forest images are up to date and configures the New Relic infrastructure agent and Openmetrics New Relic container exclusively on the forest nodes. - -## Requirements -The droplet requirements to run Forest Mainnet or Calibnet nodes include: -- RAM: 8GB -- VCPU: 1 -- Disk Size: >100 GB - -The user's local machine requirements include the following: -- Install [Terraform](https://developer.hashicorp.com/terraform/downloads) -- Install `make` -- Basic DigitalOcean knowledge - -To implement the infrastructure, run the following: -- Create an `ssh-key` to be added to the DigitalOcean list and store the fingerprint for use in the next few steps; you can check more details [here](https://docs.digitalocean.com/products/droplets/how-to/add-ssh-keys/to-team/) - -- Create a space on DigitalOcean with any preferred unique name, then add the bucket name and endpoint to the `backend.tf` file located in the `forest-mainnet` or `forest-calibnet` directory, depending on which one you plan to run. - -- Generate `digitalocean_api_token` from DigitalOcean console; you can check [here](https://docs.digitalocean.com/reference/api/create-personal-access-token/) for more details. - -If you need to run this locally, you first need to set the following environment variables (you will be prompted later if you don't put these variables): - -```bash -# DigitalOcean personal access token -export TF_VAR_do_token= -# S3 access keys used by terraform. Can be generated here: https://cloud.digitalocean.com/account/api/spaces -export AWS_ACCESS_KEY_ID= -export AWS_SECRET_ACCESS_KEY= - -# Optional, only if you want install new relic agent -# New Relic details used, Can be gotten here: https://one.eu.newrelic.com/admin-portal/api-keys/home -export TF_VAR_NEW_RELIC_API_KEY= -export TF_VAR_NEW_RELIC_ACCOUNT_ID= -export TF_VAR_NR_LICENSE_KEY= -``` -Then save the file and restart the terminal for the changes to take effect. - -- Navigate to the terraform directory and run `make init_calib` for calibnet or `make init_main` for mainnet to initialize and verify variables. - -- Run `make plan_calib` for calibnet, or `make plan_main` for mainnet, or `make plan_lt_main` in the terraform directory to view all the configured resources. - -- To create the infrastructure, run `make apply_calib` for calibnet, or `make apply_main` for mainnet in the terraform directory. +This repository contains machine-readable specifications for the auxillilary services that [Forest](https://github.com/ChainSafe/forest) project running smoothly. The services include periodic uploads of network snapshots, and automated testing of Forest's capabilities. ## Collaborators Feel free to contribute to the codebase by resolving any open issues, refactoring, adding new features, writing test cases, or any other way to make the project better and helpful to the community. Feel free to fork and send pull requests. diff --git a/composite-action/terraform/action.yml b/composite-action/terraform/action.yml deleted file mode 100644 index 452310f20..000000000 --- a/composite-action/terraform/action.yml +++ /dev/null @@ -1,172 +0,0 @@ -name: Custom Composite action to deploy terraform resources - -description: | - This action deploys the Forest infrastructure with Terraform - -inputs: - environment: - description: 'The terraform plan for the the environment infrastructure to be deployed' - required: true - do_token: - description: 'The DigitalOcean access token to use for deploying the infrastructure' - required: true - aws_access_key_id: - description: 'S3 access keys id used by terraform and service like sync check, Deploy Snapshot Service etc' - required: true - aws_secret_access_key: - description: 'S3 secret access keys used by terraform and service like sync check, Deploy Snapshot Service etc' - required: true - working_directory: - description: 'The working Directory' - required: true - ssh_private_key: - description: 'The SSH private key to use for connecting to Droplets via SSH' - slack_token: - description: 'The slack token secret used to connect the Infrastructure to Slack' - new_relic_api_key: - description: 'The New Relic API KEY' - nr_license_key: - description: 'The New Relic Access Token' - new_relic_account_id: - description: 'The New Relic Platform Region' - r2_access_key: - description: 'CloudFlare R2 access key id' - r2_secret_key: - description: 'CloudFlare R2 private access key' - -runs: - using: "composite" - steps: - - name: Setup Terraform - uses: hashicorp/setup-terraform@v2 - with: - terraform_version: v1.6.3 - - - name: Terraform Init - run: terraform init - shell: bash - working-directory: ${{ inputs.working_directory }} - env: - AWS_ACCESS_KEY_ID: ${{ inputs.aws_access_key_id }} - AWS_SECRET_ACCESS_KEY: ${{ inputs.aws_secret_access_key }} - - - name: Terraform Validate - shell: bash - run: terraform validate -no-color - working-directory: ${{ inputs.working_directory }} - - - name: Terraform Plan - shell: bash - if: github.event_name == 'pull_request' - id: plan - run: | - terraform plan -detailed-exitcode -out=tfplan -no-color -input=false || echo "Terraform plan exit code: $?" - continue-on-error: true - working-directory: ${{ inputs.working_directory }} - env: - TF_VAR_do_token: ${{ inputs.do_token }} - TF_VAR_AWS_ACCESS_KEY_ID: ${{ inputs.aws_access_key_id }} - TF_VAR_AWS_SECRET_ACCESS_KEY: ${{ inputs.aws_secret_access_key }} - TF_VAR_R2_ACCESS_KEY: ${{ inputs.r2_access_key }} - TF_VAR_R2_SECRET_KEY: ${{ inputs.r2_secret_key }} - AWS_ACCESS_KEY_ID: ${{ inputs.aws_access_key_id }} - AWS_SECRET_ACCESS_KEY: ${{ inputs.aws_secret_access_key }} - TF_VAR_slack_token: ${{ inputs.slack_token }} - TF_VAR_NEW_RELIC_API_KEY: ${{ inputs.new_relic_api_key }} - TF_VAR_NR_LICENSE_KEY: ${{ inputs.nr_license_key }} - TF_VAR_NEW_RELIC_ACCOUNT_ID: ${{ inputs.new_relic_account_id }} - - - name: Find Comment - if: github.event.pull_request.draft == false && - github.event_name == 'pull_request' - uses: peter-evans/find-comment@v2 - id: fc - with: - issue-number: ${{ github.event.pull_request.number }} - comment-author: 'github-actions[bot]' - body-regex: "^### Forest: ${{ inputs.environment }} Infrastructure Plan" - - - - name: Create or Update Comment - if: github.event.pull_request.draft == false && - github.event_name == 'pull_request' && - !contains(steps.plan.outputs.stdout, 'No changes. Your infrastructure matches the configuration.') - uses: peter-evans/create-or-update-comment@v2 - with: - comment-id: ${{ steps.fc.outputs.comment-id }} - issue-number: ${{ github.event.pull_request.number }} - body: | - ### Forest: ${{ inputs.environment }} Infrastructure Plan: ${{ steps.plan.outcome }} - -
Show Plan - - ``` - ${{ steps.plan.outputs.stdout }} - ``` - -
- edit-mode: replace - - - name: Delete Comment - uses: detomarco/delete-comments@v1.0.4 - if: github.event.pull_request.draft == false && - github.event_name == 'pull_request' && - contains(steps.plan.outputs.stdout, 'No changes. Your infrastructure matches the configuration.') - with: - comment-id: ${{ steps.fc.outputs.comment-id }} - - - name: Terraform Plan Status - shell: bash - if: steps.plan.outcome == 'failure' - run: exit 1 - - - name: Configure ssh-agent - if: github.ref == 'refs/heads/main' && ( github.event_name == 'push' || github.event_name == 'workflow_dispatch' ) - uses: webfactory/ssh-agent@v0.8.0 - with: - ssh-private-key: ${{ inputs.ssh_private_key }} - - - name: Terraform Apply - if: github.ref == 'refs/heads/main' && github.event_name == 'push' - run: | - if grep -q 'No changes.' tfplan; then - echo "No changes detected." - else - echo "Changes detected. Redeploying everything..." - terraform destroy -auto-approve -input=false - terraform apply -auto-approve -input=false - fi - shell: bash - working-directory: ${{ inputs.working_directory }} - env: - TF_VAR_do_token: ${{ inputs.do_token }} - TF_VAR_AWS_ACCESS_KEY_ID: ${{ inputs.aws_access_key_id }} - TF_VAR_AWS_SECRET_ACCESS_KEY: ${{ inputs.aws_secret_access_key }} - AWS_ACCESS_KEY_ID: ${{ inputs.aws_access_key_id }} - AWS_SECRET_ACCESS_KEY: ${{ inputs.aws_secret_access_key }} - TF_VAR_slack_token: ${{ inputs.slack_token }} - TF_VAR_R2_ACCESS_KEY: ${{ inputs.r2_access_key }} - TF_VAR_R2_SECRET_KEY: ${{ inputs.r2_secret_key }} - TF_VAR_NEW_RELIC_API_KEY: ${{ inputs.NEW_RELIC_API_KEY }} - TF_VAR_NR_LICENSE_KEY: ${{ inputs.NR_LICENSE_KEY }} - TF_VAR_NEW_RELIC_ACCOUNT_ID: ${{ inputs.new_relic_account_id }} - - - name: Terraform Force Apply - if: github.ref == 'refs/heads/main' && github.event_name == 'workflow_dispatch' - shell: bash - working-directory: ${{ inputs.working_directory }} - env: - TF_VAR_do_token: ${{ inputs.do_token }} - TF_VAR_AWS_ACCESS_KEY_ID: ${{ inputs.aws_access_key_id }} - TF_VAR_AWS_SECRET_ACCESS_KEY: ${{ inputs.aws_secret_access_key }} - AWS_ACCESS_KEY_ID: ${{ inputs.aws_access_key_id }} - AWS_SECRET_ACCESS_KEY: ${{ inputs.aws_secret_access_key }} - TF_VAR_R2_ACCESS_KEY: ${{ inputs.r2_access_key }} - TF_VAR_R2_SECRET_KEY: ${{ inputs.r2_secret_key }} - TF_VAR_slack_token: ${{ inputs.slack_token }} - TF_VAR_NEW_RELIC_API_KEY: ${{ inputs.new_relic_api_key }} - TF_VAR_NR_LICENSE_KEY: ${{ inputs.nr_license_key }} - TF_VAR_NEW_RELIC_ACCOUNT_ID: ${{ inputs.new_relic_account_id }} - run: | - terraform destroy -auto-approve -input=false - terraform apply -auto-approve -input=false diff --git a/terraform/Makefile b/terraform/Makefile deleted file mode 100644 index 9cd3cf25a..000000000 --- a/terraform/Makefile +++ /dev/null @@ -1,41 +0,0 @@ -# Define variables for Terraform -TF = terraform -TF_MAIN_DIR = forest-mainnet -TF_CALIB_DIR = forest-calibnet - -# Define the default target -.DEFAULT_GOAL := help - -help: - @echo "Usage: make [target]" - @echo "" - @echo "Targets:" - @echo " init_calib or init_main Initialize Terraform for either Forest mainnet or calibnet" - @echo " plan_calib or plan_main Generate and show an execution plan for either Forest mainnet or calibnet" - @echo " apply_main or apply_calib Apply the changes for either Forest mainnet or calibnet" - @echo " destroy_main or destroy_calib Destroy the Terraform-managed infrastructure for either Forest mainnet or calibnet" - @echo " help Shows this help message" - -init_calib: - @cd $(TF_CALIB_DIR) && $(TF) init - -plan_calib: - @cd $(TF_CALIB_DIR) && $(TF) plan - -apply_calib: - @cd $(TF_CALIB_DIR) && $(TF) apply --auto-approve - -destroy_calib: - @cd $(TF_CALIB_DIR) && $(TF) destroy - -init_main: - @cd $(TF_MAIN_DIR) && $(TF) init - -plan_main: - @cd $(TF_MAIN_DIR) && $(TF) plan - -apply_main: - @cd $(TF_MAIN_DIR) && $(TF) apply --auto-approve - -destroy_main: - @cd $(TF_MAIN_DIR) && $(TF) destroy diff --git a/terraform/new-relic/.terraform.lock.hcl b/terraform/new-relic/.terraform.lock.hcl deleted file mode 100644 index 62a874fef..000000000 --- a/terraform/new-relic/.terraform.lock.hcl +++ /dev/null @@ -1,28 +0,0 @@ -# This file is maintained automatically by "terraform init". -# Manual edits may be lost in future updates. - -provider "registry.terraform.io/newrelic/newrelic" { - version = "3.26.0" - constraints = "~> 3.0" - hashes = [ - "h1:qNPVaUx9wRLlgrOVG/F4cWWr6C+xE1GX5lDQP3zaYa8=", - "zh:15923cfd57b34446476bff43076d45d678dc32621542a067f83f1729d40075e4", - "zh:1e83dac5cbbe1639013abf6991b79e9eb1094a1c43326b5874ba67edf88b63af", - "zh:2883a3a796dfb111e40307a2d2def5506a6f429ac3feee46c10c73cef1348e28", - "zh:5cdfb56b7f897532c31d822a72361e6c8942fc558ee847f92f0307e3bd278e35", - "zh:7194a6a2335410fcc48894b0db753cf245f392e09cca2738b018c9f36500653f", - "zh:81e554f6f90c4a2fe4ee837fd1813ddcbda4d21e79369172935605dad155b1cf", - "zh:89087f7a386d5109585fb81e1916c2ec655cb074907a2646ecb6449845f633e4", - "zh:8dee21af840093830d863145556cc768780dd1b29e0d6942f91788af398351be", - "zh:9b8ff1c2168b2747f92dde2c2a21a8622fdad48ba4c293df280b26a1ebc5424e", - "zh:a32b31800689d61fb1ca40479cdc6951511a4e0e6dd18a95d20e432fccf16cb2", - "zh:c351fbb794494288b0dcc9b92328ed72113fd0428682fc9b3107eb07b228ddaa", - "zh:c88715070115aefe34efdeec9461f87fc514c425a73e347d0a629ec82206b388", - "zh:d7b623f86234308dadd664a81805d770b5e21f724c413b86312d84cd711643ca", - "zh:dead9da259a08100e3147d9145e363a071a5d90b0b3b90c216abda59b808be54", - "zh:e525ea50abac3b90940f2d41deafa93956bd396bb58c91e8c81d665bb048f161", - "zh:e5f07df8b10bd2367e4a272a8ee04b7553e95b12e83740de6befb63beff89710", - "zh:f171b2cf5d16d22dd402ee06139839ee0e65b674e565aa9af7d7f9d28f2287b2", - "zh:fbd1fee2c9df3aa19cf8851ce134dea6e45ea01cb85695c1726670c285797e25", - ] -} diff --git a/terraform/new-relic/forest.json b/terraform/new-relic/forest.json deleted file mode 100644 index 0a35637ce..000000000 --- a/terraform/new-relic/forest.json +++ /dev/null @@ -1,834 +0,0 @@ -{ - "name": "${name}", - "description": "This dashboard provides comprehensive insights into the performance and status of Forest nodes in our network. It helps in monitoring node health, database size, process time, and other key metrics.", - "permissions": "PUBLIC_READ_WRITE", - "pages": [ - { - "name": "Overview", - "description": null, - "widgets": [ - { - "title": "Head Epoch", - "layout": { - "column": 1, - "row": 1, - "width": 6, - "height": 3 - }, - "linkedEntityGuids": null, - "visualization": { - "id": "viz.billboard" - }, - "rawConfiguration": { - "facet": { - "showOtherSeries": false - }, - "nrqlQueries": [ - { - "accountIds": [ - "${account_id}" - ], - "query": "SELECT latest(head_epoch) FROM Metric WHERE clusterName = '${name}' SINCE 1 minutes ago" - } - ], - "platformOptions": { - "ignoreTimeRange": false - } - } - }, - { - "title": "", - "layout": { - "column": 7, - "row": 1, - "width": 6, - "height": 3 - }, - "linkedEntityGuids": null, - "visualization": { - "id": "viz.billboard" - }, - "rawConfiguration": { - "facet": { - "showOtherSeries": false - }, - "nrqlQueries": [ - { - "accountId": "${account_id}", - "query": "SELECT (((aggregationendtime() / 1000) - latest(process_start_time_seconds)) / 3600) as 'Process Uptime Hour' FROM Metric WHERE scrapedTargetURL = 'http://${name}:6116/metrics' SINCE 1440 minutes AGO UNTIL NOW" - } - ], - "platformOptions": { - "ignoreTimeRange": false - } - } - }, - { - "title": "", - "layout": { - "column": 1, - "row": 4, - "width": 4, - "height": 3 - }, - "linkedEntityGuids": null, - "visualization": { - "id": "viz.billboard" - }, - "rawConfiguration": { - "facet": { - "showOtherSeries": false - }, - "nrqlQueries": [ - { - "accountIds": [ - "${account_id}" - ], - "query": "SELECT derivative(head_epoch, 1 minute) AS `Tipsets Validated Per Minute` FROM Metric WHERE clusterName = '${name}' SINCE 1 day ago" - } - ], - "platformOptions": { - "ignoreTimeRange": false - } - } - }, - { - "title": "", - "layout": { - "column": 5, - "row": 4, - "width": 8, - "height": 3 - }, - "linkedEntityGuids": null, - "visualization": { - "id": "viz.line" - }, - "rawConfiguration": { - "facet": { - "showOtherSeries": false - }, - "legend": { - "enabled": true - }, - "nrqlQueries": [ - { - "accountIds": [ - "${account_id}" - ], - "query": "SELECT latest(full_peers) AS 'Full Peers' FROM Metric WHERE clusterName = '${name}' TIMESERIES AUTO " - } - ], - "platformOptions": { - "ignoreTimeRange": false - }, - "yAxisLeft": { - "zero": true - } - } - }, - { - "title": "Forest Host Cpu Useage ", - "layout": { - "column": 1, - "row": 7, - "width": 6, - "height": 3 - }, - "linkedEntityGuids": null, - "visualization": { - "id": "viz.line" - }, - "rawConfiguration": { - "facet": { - "showOtherSeries": false - }, - "legend": { - "enabled": true - }, - "nrqlQueries": [ - { - "accountIds": [ - "${account_id}" - ], - "query": "SELECT (average(host.cpuPercent)/ 100) AS 'Host Cpu Useage %' FROM Metric WHERE host.hostname = '${name}' TIMESERIES AUTO " - } - ], - "nullValues": { - "nullValue": "preserve" - }, - "platformOptions": { - "ignoreTimeRange": false - }, - "units": { - "unit": "PERCENTAGE" - }, - "yAxisLeft": { - "zero": true - } - } - }, - { - "title": "Process Resident Memory", - "layout": { - "column": 7, - "row": 7, - "width": 6, - "height": 3 - }, - "linkedEntityGuids": null, - "visualization": { - "id": "viz.line" - }, - "rawConfiguration": { - "facet": { - "showOtherSeries": false - }, - "legend": { - "enabled": true - }, - "nrqlQueries": [ - { - "accountIds": [ - "${account_id}" - ], - "query": "SELECT (latest(process_resident_memory_bytes) / 1073741824) as 'Process Resident Memory GB' FROM Metric WHERE scrapedTargetURL = 'http://${name}:6116/metrics' SINCE 1 day ago UNTIL NOW TIMESERIES auto" - } - ], - "platformOptions": { - "ignoreTimeRange": false - }, - "yAxisLeft": { - "zero": true - } - } - }, - { - "title": "", - "layout": { - "column": 1, - "row": 10, - "width": 4, - "height": 3 - }, - "linkedEntityGuids": null, - "visualization": { - "id": "viz.billboard" - }, - "rawConfiguration": { - "facet": { - "showOtherSeries": false - }, - "nrqlQueries": [ - { - "accountIds": [ - "${account_id}" - ], - "query": "SELECT latest(host.disk.totalBytes/ 1073741824) as 'Host Disk Size GB' FROM Metric WHERE hostname = '${name}' " - } - ], - "platformOptions": { - "ignoreTimeRange": false - } - } - }, - { - "title": "", - "layout": { - "column": 5, - "row": 10, - "width": 4, - "height": 3 - }, - "linkedEntityGuids": null, - "visualization": { - "id": "viz.line" - }, - "rawConfiguration": { - "facet": { - "showOtherSeries": false - }, - "legend": { - "enabled": true - }, - "nrqlQueries": [ - { - "accountIds": [ - "${account_id}" - ], - "query": "SELECT (latest(host.disk.usedPercent) / 100) as 'Host Disk Used %' FROM Metric WHERE hostname = '${name}' TIMESERIES AUTO SINCE 1 day ago" - } - ], - "platformOptions": { - "ignoreTimeRange": false - }, - "units": { - "unit": "PERCENTAGE" - }, - "yAxisLeft": { - "zero": true - } - } - }, - { - "title": "Forest Host Disk Used", - "layout": { - "column": 9, - "row": 10, - "width": 4, - "height": 3 - }, - "linkedEntityGuids": null, - "visualization": { - "id": "viz.billboard" - }, - "rawConfiguration": { - "facet": { - "showOtherSeries": false - }, - "nrqlQueries": [ - { - "accountIds": [ - "${account_id}" - ], - "query": "SELECT latest(host.disk.usedBytes / 1073741824) as 'Host Disk Used GB' FROM Metric WHERE hostname = '${name}'" - } - ], - "platformOptions": { - "ignoreTimeRange": false - } - } - } - ] - }, - { - "name": "Peers", - "description": null, - "widgets": [ - { - "title": "Bad Peers", - "layout": { - "column": 1, - "row": 1, - "width": 6, - "height": 3 - }, - "linkedEntityGuids": null, - "visualization": { - "id": "viz.line" - }, - "rawConfiguration": { - "facet": { - "showOtherSeries": false - }, - "legend": { - "enabled": true - }, - "nrqlQueries": [ - { - "accountIds": [ - "${account_id}" - ], - "query": "SELECT latest(bad_peers) FROM Metric WHERE clusterName = '${name}' TIMESERIES AUTO " - } - ], - "platformOptions": { - "ignoreTimeRange": false - }, - "yAxisLeft": { - "zero": true - } - } - }, - { - "title": "Peer Disconnected P2P Events", - "layout": { - "column": 7, - "row": 1, - "width": 6, - "height": 3 - }, - "linkedEntityGuids": null, - "visualization": { - "id": "viz.line" - }, - "rawConfiguration": { - "facet": { - "showOtherSeries": false - }, - "legend": { - "enabled": true - }, - "nrqlQueries": [ - { - "accountIds": [ - "${account_id}" - ], - "query": "SELECT latest(libp2p_messsage_total) FROM Metric WHERE (libp2p_message_kind = 'peer_disconnected' and clusterName = '${name}' ) SINCE 1 day ago UNTIL NOW FACET dimensions() LIMIT 100 TIMESERIES AUTO " - } - ], - "platformOptions": { - "ignoreTimeRange": false - }, - "yAxisLeft": { - "zero": true - } - } - }, - { - "title": "Hello P2P Events", - "layout": { - "column": 1, - "row": 4, - "width": 6, - "height": 3 - }, - "linkedEntityGuids": null, - "visualization": { - "id": "viz.line" - }, - "rawConfiguration": { - "facet": { - "showOtherSeries": false - }, - "legend": { - "enabled": true - }, - "nrqlQueries": [ - { - "accountIds": [ - "${account_id}" - ], - "query": "SELECT latest(libp2p_messsage_total) FROM Metric WHERE clusterName = '${name}' SINCE 1 day ago UNTIL NOW TIMESERIES AUTO" - } - ], - "platformOptions": { - "ignoreTimeRange": false - }, - "yAxisLeft": { - "zero": true - } - } - }, - { - "title": "Block P2P Events", - "layout": { - "column": 7, - "row": 4, - "width": 6, - "height": 3 - }, - "linkedEntityGuids": null, - "visualization": { - "id": "viz.line" - }, - "rawConfiguration": { - "facet": { - "showOtherSeries": false - }, - "legend": { - "enabled": true - }, - "nrqlQueries": [ - { - "accountIds": [ - "${account_id}" - ], - "query": "SELECT latest(libp2p_messsage_total) FROM Metric WHERE (libp2p_message_kind = 'peer_disconnected' and clusterName = '${name}' ) SINCE 1 day ago UNTIL NOW FACET dimensions() LIMIT 100 TIMESERIES AUTO " - } - ], - "platformOptions": { - "ignoreTimeRange": false - }, - "units": { - "unit": "MS" - }, - "yAxisLeft": { - "zero": true - } - } - }, - { - "title": "Peer Connected P2P Events", - "layout": { - "column": 1, - "row": 7, - "width": 6, - "height": 3 - }, - "linkedEntityGuids": null, - "visualization": { - "id": "viz.line" - }, - "rawConfiguration": { - "facet": { - "showOtherSeries": false - }, - "legend": { - "enabled": true - }, - "nrqlQueries": [ - { - "accountIds": [ - "${account_id}" - ], - "query": "SELECT latest(libp2p_messsage_total) FROM Metric WHERE (libp2p_message_kind = 'peer_connected' and clusterName = '${name}' ) SINCE 1 day ago UNTIL NOW FACET dimensions() LIMIT 100 TIMESERIES AUTO " - } - ], - "platformOptions": { - "ignoreTimeRange": false - }, - "yAxisLeft": { - "zero": true - } - } - }, - { - "title": "Message P2P Events", - "layout": { - "column": 7, - "row": 7, - "width": 6, - "height": 3 - }, - "linkedEntityGuids": null, - "visualization": { - "id": "viz.line" - }, - "rawConfiguration": { - "facet": { - "showOtherSeries": false - }, - "legend": { - "enabled": true - }, - "nrqlQueries": [ - { - "accountIds": [ - "${account_id}" - ], - "query": "SELECT latest(libp2p_messsage_total) FROM Metric WHERE (libp2p_message_kind = 'pubsub_message_message' and clusterName = '${name}' ) SINCE 1 day ago UNTIL NOW FACET dimensions() LIMIT 100 TIMESERIES AUTO" - } - ], - "platformOptions": { - "ignoreTimeRange": false - }, - "yAxisLeft": { - "zero": true - } - } - }, - { - "title": "Peer Disconnected P2P Events", - "layout": { - "column": 1, - "row": 10, - "width": 4, - "height": 3 - }, - "linkedEntityGuids": null, - "visualization": { - "id": "viz.line" - }, - "rawConfiguration": { - "facet": { - "showOtherSeries": false - }, - "legend": { - "enabled": true - }, - "nrqlQueries": [ - { - "accountIds": [ - "${account_id}" - ], - "query": "SELECT latest(libp2p_messsage_total) FROM Metric WHERE (libp2p_message_kind = 'peer_disconnected' and clusterName = '${name}' ) SINCE 24 hours ago UNTIL NOW FACET dimensions() LIMIT 100 TIMESERIES AUTO" - } - ], - "platformOptions": { - "ignoreTimeRange": false - }, - "yAxisLeft": { - "zero": true - } - } - }, - { - "title": "Failed Peer Requests", - "layout": { - "column": 5, - "row": 10, - "width": 4, - "height": 3 - }, - "linkedEntityGuids": null, - "visualization": { - "id": "viz.line" - }, - "rawConfiguration": { - "facet": { - "showOtherSeries": false - }, - "legend": { - "enabled": true - }, - "nrqlQueries": [ - { - "accountIds": [ - "${account_id}" - ], - "query": "SELECT latest(peer_failure_total) FROM Metric WHERE clusterName = '${name}' SINCE 1 day ago UNTIL NOW FACET dimensions() LIMIT 100 TIMESERIES AUTO " - } - ], - "platformOptions": { - "ignoreTimeRange": false - }, - "units": { - "unit": "APDEX" - }, - "yAxisLeft": { - "zero": false - } - } - }, - { - "title": "Bitswap Block P2P Events", - "layout": { - "column": 9, - "row": 10, - "width": 4, - "height": 3 - }, - "linkedEntityGuids": null, - "visualization": { - "id": "viz.line" - }, - "rawConfiguration": { - "facet": { - "showOtherSeries": false - }, - "legend": { - "enabled": true - }, - "nrqlQueries": [ - { - "accountIds": [ - "${account_id}" - ], - "query": "SELECT latest(libp2p_messsage_total) FROM Metric WHERE (libp2p_message_kind = 'bitswap_block' and clusterName = '${name}' ) SINCE 1 day ago UNTIL NOW FACET dimensions() LIMIT 100 TIMESERIES AUTO " - } - ], - "platformOptions": { - "ignoreTimeRange": false - }, - "yAxisLeft": { - "zero": true - } - } - } - ] - }, - { - "name": "Stats", - "description": null, - "widgets": [ - { - "title": "Process CPU Time", - "layout": { - "column": 1, - "row": 1, - "width": 4, - "height": 3 - }, - "linkedEntityGuids": null, - "visualization": { - "id": "viz.area" - }, - "rawConfiguration": { - "facet": { - "showOtherSeries": false - }, - "legend": { - "enabled": true - }, - "nrqlQueries": [ - { - "accountIds": [ - "${account_id}" - ], - "query": "SELECT ((latest(process_cpu_seconds_total) / 60) / 60) FROM Metric WHERE scrapedTargetURL = 'http://${name}:6116/metrics' SINCE 1 day ago UNTIL NOW FACET dimensions() LIMIT 100 TIMESERIES AUTO" - } - ], - "platformOptions": { - "ignoreTimeRange": false - } - } - }, - { - "title": "Process Virtual Memory", - "layout": { - "column": 5, - "row": 1, - "width": 4, - "height": 3 - }, - "linkedEntityGuids": null, - "visualization": { - "id": "viz.line" - }, - "rawConfiguration": { - "facet": { - "showOtherSeries": false - }, - "legend": { - "enabled": true - }, - "nrqlQueries": [ - { - "accountIds": [ - "${account_id}" - ], - "query": "SELECT (latest(process_virtual_memory_bytes) / 1073741824) as 'Process Virtual Memory' FROM Metric WHERE scrapedTargetURL = 'http://${name}:6116/metrics' SINCE 1 day ago UNTIL NOW TIMESERIES AUTO" - } - ], - "platformOptions": { - "ignoreTimeRange": false - }, - "yAxisLeft": { - "zero": true - } - } - }, - { - "title": "Database Size", - "layout": { - "column": 9, - "row": 1, - "width": 4, - "height": 3 - }, - "linkedEntityGuids": null, - "visualization": { - "id": "viz.line" - }, - "rawConfiguration": { - "facet": { - "showOtherSeries": false - }, - "legend": { - "enabled": true - }, - "nrqlQueries": [ - { - "accountIds": [ - "${account_id}" - ], - "query": "SELECT (latest(forest_db_size) / 1073741824) as 'Forest Db Size GB ' FROM Metric WHERE clusterName = '${name}' SINCE 1 day ago UNTIL NOW TIMESERIES AUTO" - } - ], - "platformOptions": { - "ignoreTimeRange": false - }, - "yAxisLeft": { - "zero": true - } - } - }, - { - "title": "Range Sync Failure Count", - "layout": { - "column": 1, - "row": 4, - "width": 4, - "height": 3 - }, - "linkedEntityGuids": null, - "visualization": { - "id": "viz.line" - }, - "rawConfiguration": { - "facet": { - "showOtherSeries": false - }, - "legend": { - "enabled": true - }, - "nrqlQueries": [ - { - "accountIds": [ - "${account_id}" - ], - "query": "SELECT latest(tipset_range_sync_failure_total) FROM Metric WHERE clusterName = '${name}' SINCE 1 day ago UNTIL NOW FACET dimensions() LIMIT 100 TIMESERIES AUTO" - } - ], - "platformOptions": { - "ignoreTimeRange": false - }, - "yAxisLeft": { - "zero": true - } - } - }, - { - "title": "Open File Descriptors", - "layout": { - "column": 5, - "row": 4, - "width": 8, - "height": 3 - }, - "linkedEntityGuids": null, - "visualization": { - "id": "viz.line" - }, - "rawConfiguration": { - "facet": { - "showOtherSeries": false - }, - "legend": { - "enabled": true - }, - "nrqlQueries": [ - { - "accountIds": [ - "${account_id}" - ], - "query": "SELECT latest(process_open_fds) FROM Metric WHERE scrapedTargetURL = 'http://${name}:6116/metrics' SINCE 1 day ago UNTIL NOW FACET dimensions() LIMIT 100 TIMESERIES AUTO " - } - ], - "platformOptions": { - "ignoreTimeRange": false - }, - "yAxisLeft": { - "zero": true - } - } - }, - { - "title": "Forest logs", - "layout": { - "column": 1, - "row": 7, - "width": 12, - "height": 3 - }, - "linkedEntityGuids": null, - "visualization": { - "id": "logger.log-table-widget" - }, - "rawConfiguration": { - "nrqlQueries": [ - { - "accountIds": [ - "${account_id}" - ], - "query": "SELECT `log_severity`,`timestamp`,`message` FROM Log WHERE `hostname` = '${name}'" - } - ] - } - } - ] - } - ], - "variables": [] -} diff --git a/terraform/new-relic/main.tf b/terraform/new-relic/main.tf deleted file mode 100644 index b058a26b5..000000000 --- a/terraform/new-relic/main.tf +++ /dev/null @@ -1,227 +0,0 @@ -# This Terraform script configures an environment to use New Relic for infrastructure monitoring -# and alerting, including setting up alert policies and a notification channel for Slack. - -terraform { - required_version = "~> 1.3" - required_providers { - newrelic = { - source = "newrelic/newrelic" - version = "~> 3.0" - } - } - backend "s3" { - bucket = "forest-iac" - key = "new_relic/terraform.tfstate" - region = "us-west-1" - endpoints = { - s3 = "https://fra1.digitaloceanspaces.com" - } - skip_credentials_validation = true - skip_metadata_api_check = true - skip_requesting_account_id = true - skip_s3_checksum = true - } -} - -# Configure the New Relic provider -provider "newrelic" { - account_id = var.NEW_RELIC_ACCOUNT_ID - api_key = var.NEW_RELIC_API_KEY - region = "EU" # Valid regions are US and EU -} - -# This block of code uses Terraform's data source to fetch details of an existing New Relic -# alert policy named "Golden Signals". The "Golden Signals" are a set of monitoring parameters -# that originate from the Google SRE (Site Reliability Engineering) Handbook. They provide -# a high level overview of a system's health and are typically included in most monitoring setups. -# -# In the context of New Relic, the "Golden Signals" alert policy is created by default -# when a new New Relic account is created. This policy includes a set of predefined alert conditions based -# on the Google's Golden Signals concept. -# -# By fetching this policy using the data source, we can integrate these conditions with other -# resources managed in this script, such as linking it with a notification channel or adding it -# to a workflow. - -data "newrelic_alert_policy" "golden_signals" { - name = "Golden Signals" -} - -# Creation of a new New Relic alert policy for infrastructure or Contianer downtime -resource "newrelic_alert_policy" "alert" { - name = "Infrastruture Downtime Alert" -} - -# NRQL alert conditions for events such as host down, high disk/memory use, -# and container down, each with defined criteria and thresholds. - -resource "newrelic_nrql_alert_condition" "disk_space" { - policy_id = newrelic_alert_policy.alert.id - type = "static" - name = "High Disk Utilization" - description = "Alert when disk space usage is high on any host" - enabled = true - violation_time_limit_seconds = 3600 - - nrql { - query = "SELECT latest(diskUsedPercent) FROM StorageSample FACET hostname, mountPoint" - } - - critical { - operator = "above" - threshold = 85.0 - threshold_duration = 300 - threshold_occurrences = "ALL" - } - - warning { - operator = "above" - threshold = 70.0 - threshold_duration = 300 - threshold_occurrences = "ALL" - } -} - -resource "newrelic_nrql_alert_condition" "container_issue" { - policy_id = newrelic_alert_policy.alert.id - type = "static" - name = "Container Issue" - description = "Alert when any container on any host is restarting for more than 5 minutes" - enabled = true - violation_time_limit_seconds = 3600 - - nrql { - query = "SELECT count(*) FROM ContainerSample WHERE state = 'restarting' FACET containerName, entityName" - } - - critical { - operator = "above" - threshold = 0 - threshold_duration = 300 - threshold_occurrences = "all" - } - - fill_option = "none" - aggregation_window = 60 - aggregation_method = "event_flow" - aggregation_delay = 120 -} - -# This resource block defines a New Relic alert condition to monitor for host downtime. -# The NRQL query counts 'SystemSample' events from each host. -# If a host does not report any such events for a continuous 5-minute period (threshold_duration), it indicates the host might be down. -# The alert condition is critical and opens a violation when no events are detected from a host for the specified duration. -# This approach provides a proactive alerting mechanism to ensure system reliability. -resource "newrelic_nrql_alert_condition" "host_down" { - policy_id = newrelic_alert_policy.alert.id - type = "static" - name = "Host Down" - - description = <<-EOT - Host Down' alert indicates no SystemSample events from a host for 5 minutes. Action needed to avoid possible issues - EOT - - enabled = true - violation_time_limit_seconds = 259200 - - nrql { - query = "SELECT count(*) FROM SystemSample FACET entityName" - } - - critical { - operator = "below_or_equals" - threshold = 0 - threshold_duration = 300 - threshold_occurrences = "all" - } - fill_option = "none" - aggregation_window = 60 - aggregation_method = "event_flow" - aggregation_delay = 120 - expiration_duration = 600 - open_violation_on_expiration = true - close_violations_on_expiration = true -} - -resource "newrelic_nrql_alert_condition" "forestmainnet_not_working" { - policy_id = newrelic_alert_policy.alert.id - type = "static" - name = "Forest not working" - - description = <<-EOT - Error: forest is currently not functioning properly. The issue appears to be that the Epoch Count has fallen to zero. Please verify all necessary configurations and requirements. - EOT - - enabled = true - violation_time_limit_seconds = 21600 - - nrql { - query = "SELECT latest(head_epoch) FROM Metric WHERE clusterName = 'forest-mainnet' or clusterName = 'forest-calibnet'" - } - - critical { - operator = "below_or_equals" - threshold = 0 - threshold_duration = 300 - threshold_occurrences = "all" - } - fill_option = "none" - aggregation_window = 60 - aggregation_method = "event_flow" - aggregation_delay = 120 -} - -# Setting up a Slack channel as the notification channel for alerts -resource "newrelic_notification_channel" "slack-channel" { - name = "slack" - type = "SLACK" - destination_id = var.slack_destination_id - product = "IINT" - - property { - key = "channelId" - value = var.slack_channel_id - } - property { - key = "customDetailsSlack" - value = <<-EOT - 'The '{{ annotations.description }}' has been activated. The condition has exceeded the defined threshold. Kindly examine this issue on the New Relic dashboard for more extensive data and potential mitigation steps.' - EOT - } -} - - -# Creation of a New Relic workflow that includes issues filtered by the policy IDs -# and sends notifications to the configured Slack channel -resource "newrelic_workflow" "slack_workflow" { - name = "Slack Workflow" - muting_rules_handling = "NOTIFY_ALL_ISSUES" - - issues_filter { - name = "Filter-name" - type = "FILTER" - - predicate { - attribute = "labels.policyIds" - operator = "EXACTLY_MATCHES" - values = [newrelic_alert_policy.alert.id, data.newrelic_alert_policy.golden_signals.id] - } - } - - destination { - channel_id = newrelic_notification_channel.slack-channel.id - } -} - -locals { - name = split(",", "forest-mainnet,forest-calibnet") -} - -resource "newrelic_one_dashboard_json" "forest_dashboard" { - for_each = { for name in local.name : name => name } - - json = templatefile("forest.json", { - name = each.value - account_id = var.NEW_RELIC_ACCOUNT_ID - }) -} diff --git a/terraform/new-relic/variable.tf b/terraform/new-relic/variable.tf deleted file mode 100644 index c2c9f2c07..000000000 --- a/terraform/new-relic/variable.tf +++ /dev/null @@ -1,23 +0,0 @@ -variable "NEW_RELIC_ACCOUNT_ID" { - type = string - description = "The New Relic Account ID" - sensitive = true -} - -variable "NEW_RELIC_API_KEY" { - description = "The New Relic API KEY" - type = string - sensitive = true -} - -variable "slack_destination_id" { - description = "The unique identifier for the Slack workspace where notifications will be sent." - default = "f902e020-5993-4425-9ae3-133084fc870d" - type = string -} - -variable "slack_channel_id" { - description = "The unique identifier for the Slack channel(forest-notifications), where notifications will be posted." - type = string - default = "C036TCEF0CU" -}