From 2d30a5893fd9552f52f405f4a88426d666767500 Mon Sep 17 00:00:00 2001 From: Reinaldy Rafli Date: Sun, 28 Jan 2024 21:59:35 +0700 Subject: [PATCH] feat: monitoring --- captcha/docker-compose.yml | 32 +++++++++++ monitoring/.env.example | 2 + monitoring/docker-compose.yml | 88 +++++++++++++++++++++++++++++++ monitoring/grafana-datasource.yml | 50 ++++++++++++++++++ monitoring/prometheus.yml | 35 ++++++++++++ traefik/docker-compose.yml | 6 +++ 6 files changed, 213 insertions(+) create mode 100644 monitoring/.env.example create mode 100644 monitoring/docker-compose.yml create mode 100644 monitoring/grafana-datasource.yml create mode 100644 monitoring/prometheus.yml diff --git a/captcha/docker-compose.yml b/captcha/docker-compose.yml index 304eea3..afecbae 100644 --- a/captcha/docker-compose.yml +++ b/captcha/docker-compose.yml @@ -35,6 +35,36 @@ services: options: max-size: 10M max-file: 3 + + postgres-exporter: + hostname: captcha.postgres-exporter + image: quay.io/prometheuscommunity/postgres-exporter + platform: linux/amd64 + environment: + DATA_SOURCE_NAME: "${POSTGRES_URL}" + networks: + - monitoring + - internal + deploy: + mode: replicated + replicas: 1 + restart_policy: + condition: unless-stopped + delay: 30s + window: 120s + resources: + limits: + memory: 200MB + cpus: '1' + reservations: + memory: 50MB + cpus: '0.10' + logging: + driver: local + options: + max-size: 1M + max-file: 3 + application: image: ghcr.io/teknologi-umum/captcha:edge environment: @@ -127,3 +157,5 @@ networks: ipam: config: - subnet: 172.16.22.0/28 + monitoring: + external: true diff --git a/monitoring/.env.example b/monitoring/.env.example new file mode 100644 index 0000000..ed96233 --- /dev/null +++ b/monitoring/.env.example @@ -0,0 +1,2 @@ +GF_SECURITY_ADMIN_USER= +GF_SECURITY_ADMIN_PASSWORD= diff --git a/monitoring/docker-compose.yml b/monitoring/docker-compose.yml new file mode 100644 index 0000000..5a62040 --- /dev/null +++ b/monitoring/docker-compose.yml @@ -0,0 +1,88 @@ +services: + prometheus: + image: prom/prometheus:latest + volumes: + - ./prometheus.yml:/etc/prometheus/prometheus.yml + - prometheus-data:/prometheus + deploy: + mode: replicated + replicas: 1 + restart_policy: + condition: on-failure + delay: 30s + max_attempts: 10 + window: 120s + resources: + limits: + memory: 250MB + cpus: '1' + reservations: + memory: 50MB + cpus: '0.10' + healthcheck: + test: "wget -O - -T 5 -S http://localhost:9090/api/v1/status/runtimeinfo" + interval: 30s + timeout: 10s + retries: 5 + networks: + - monitoring + + grafana: + image: grafana/grafana:latest + user: '472' + deploy: + mode: replicated + replicas: 1 + restart_policy: + condition: on-failure + delay: 30s + max_attempts: 10 + window: 120s + resources: + limits: + memory: 250MB + cpus: '1' + reservations: + memory: 50MB + cpus: '0.10' + environment: + GF_INSTALL_PLUGINS: 'grafana-clock-panel,grafana-simple-json-datasource' + GF_SECURITY_ADMIN_USER: + GF_SECURITY_ADMIN_PASSWORD: + GF_USERS_ALLOW_SIGN_UP: false + healthcheck: + test: "wget -O - -T 5 -S http://localhost:3000/api/health" + interval: 30s + timeout: 10s + retries: 5 + volumes: + - grafana-data:/var/lib/grafana + - ./grafana-datasource.yml:/etc/grafana/provisioning/datasources/datasource.yml + labels: + - "traefik.enable=true" + - "traefik.http.routers.grafana.entrypoints=web,websecure" + - "traefik.http.routers.grafana.rule=Host(`grafana.teknologiumum.com`)" + - "traefik.http.routers.grafana.tls.certresolver=tlsresolver" + - "traefik.http.routers.grafana.middlewares=grafana-rate,grafana-redirectscheme" + - "traefik.http.services.grafana.loadbalancer.server.port=3000" + - "traefik.http.services.grafana.loadbalancer.server.scheme=http" + - "traefik.http.services.grafana.loadbalancer.healthcheck.interval=30s" + - "traefik.http.services.grafana.loadbalancer.healthcheck.path=/api/health" + - "traefik.http.middlewares.grafana-rate.ratelimit.average=100" + - "traefik.http.middlewares.grafana-rate.ratelimit.burst=20" + - "traefik.http.middlewares.grafana-rate.ratelimit.period=1s" + - "traefik.http.middlewares.grafana-redirectscheme.redirectscheme.scheme=https" + - "traefik.http.middlewares.grafana-redirectscheme.redirectscheme.permanent=true" + depends_on: + prometheus: + condition: service_healthy + networks: + - monitoring + +volumes: + prometheus-data: + grafana-data: + +networks: + monitoring: + external: true diff --git a/monitoring/grafana-datasource.yml b/monitoring/grafana-datasource.yml new file mode 100644 index 0000000..55836e6 --- /dev/null +++ b/monitoring/grafana-datasource.yml @@ -0,0 +1,50 @@ +# config file version +apiVersion: 1 + +# list of datasources that should be deleted from the database +deleteDatasources: + - name: Prometheus + orgId: 1 + +# list of datasources to insert/update depending +# whats available in the database +datasources: + # name of the datasource. Required + - name: Prometheus + # datasource type. Required + type: prometheus + # access mode. direct or proxy. Required + access: proxy + # org id. will default to orgId 1 if not specified + orgId: 1 + # url + url: http://prometheus:9090 + # database password, if used + # password: + # database user, if used + # user: + # database name, if used + # database: + # enable/disable basic auth + basicAuth: false + # basic auth username, if used + # basicAuthUser: + # basic auth password, if used + # basicAuthPassword: + # enable/disable with credentials headers + withCredentials: + # mark as default datasource. Max one per org + isDefault: true + # fields that will be converted to json and stored in json_data + jsonData: + graphiteVersion: "1.1" + tlsAuth: false + tlsAuthWithCACert: false + # json object of data that will be encrypted. + # secureJsonData: + # tlsCACert: "..." + # tlsClientCert: "..." + # tlsClientKey: "..." + version: 1 + # allow users to edit datasources from the UI. + editable: false diff --git a/monitoring/prometheus.yml b/monitoring/prometheus.yml new file mode 100644 index 0000000..b2d778d --- /dev/null +++ b/monitoring/prometheus.yml @@ -0,0 +1,35 @@ +global: + # How frequently to scrape targets by default. + scrape_interval: 30s + + # How long until a scrape request times out. + scrape_timeout: 15s + + # How frequently to evaluate rules. + evaluation_interval: 10m + +# A list of scrape configurations. +scrape_configs: + - job_name: Traefik + static_configs: + - labels: + application: Traefik + targets: + - traefik:8082 + + - job_name: Uptime Kuma + static_configs: + - labels: + application: Uptime Kuma + targets: + - 192.168.193.71:43241 + basic_auth: + username: + password: + + - job_name: Captcha Postgres + static_configs: + - labels: + application: PostgreSQL + targets: + - captcha.postgres-exporter:9187 diff --git a/traefik/docker-compose.yml b/traefik/docker-compose.yml index 8dbde0c..bf32899 100644 --- a/traefik/docker-compose.yml +++ b/traefik/docker-compose.yml @@ -19,6 +19,9 @@ services: - "--entrypoints.websecure.http3" - "--global.sendanonymoususage=true" - "--log.level=INFO" + - "--metrics.prometheus=true" + - "--entryPoints.metrics.address=:8082" + - "--metrics.prometheus.entryPoint=metrics" environment: - TZ=UTC ports: @@ -79,6 +82,7 @@ services: - projects - conference - gold + - monitoring networks: pesto: @@ -120,3 +124,5 @@ networks: external: true gold: external: true + monitoring: + external: true