charts/cloudnative-pg/values.yaml

#
# Copyright The CloudNativePG Contributors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Default values for CloudNativePG.
# This is a YAML-formatted file.
# Please declare variables to be passed to your templates.

replicaCount: 1

image:
  repository: ghcr.io/cloudnative-pg/cloudnative-pg
  pullPolicy: IfNotPresent
  # -- Overrides the image tag whose default is the chart appVersion.
  tag: ""

imagePullSecrets: []
nameOverride: ""
fullnameOverride: ""

crds:
  # -- Specifies whether the CRDs should be created when installing the chart.
  create: true

# -- The webhook configuration.
webhook:
  port: 9443
  mutating:
    create: true
    failurePolicy: Fail
  validating:
    create: true
    failurePolicy: Fail
  livenessProbe:
    initialDelaySeconds: 3
  readinessProbe:
    initialDelaySeconds: 3

# -- Operator configuration.
config:
  # -- Specifies whether the secret should be created.
  create: true
  # -- The name of the configmap/secret to use.
  name: cnpg-controller-manager-config
  # -- Specifies whether it should be stored in a secret, instead of a configmap.
  secret: false
  # -- The content of the configmap/secret, see
  # https://cloudnative-pg.io/documentation/current/operator_conf/#available-options
  # for all the available options.
  data: {}
  # INHERITED_ANNOTATIONS: categories
  # INHERITED_LABELS: environment, workload, app
  # WATCH_NAMESPACE: namespace-a,namespace-b

# -- Additinal arguments to be added to the operator's args list.
additionalArgs: []

# -- Array containing extra environment variables which can be templated.
# For example:
#  - name: RELEASE_NAME
#    value: "{{ .Release.Name }}"
#  - name: MY_VAR
#    value: "mySpecialKey"
additionalEnv: []

serviceAccount:
  # -- Specifies whether the service account should be created.
  create: true
  # -- The name of the service account to use.
  # If not set and create is true, a name is generated using the fullname template.
  name: ""

rbac:
  # -- Specifies whether ClusterRole and ClusterRoleBinding should be created.
  create: true
  # -- Aggregate ClusterRoles to Kubernetes default user-facing roles.
  # Ref: https://kubernetes.io/docs/reference/access-authn-authz/rbac/#user-facing-roles
  aggregateClusterRoles: false

# -- Annotations to be added to all other resources.
commonAnnotations: {}
# -- Annotations to be added to the pod.
podAnnotations: {}
# -- Labels to be added to the pod.
podLabels: {}

# -- Container Security Context.
containerSecurityContext:
  allowPrivilegeEscalation: false
  readOnlyRootFilesystem: true
  runAsUser: 10001
  runAsGroup: 10001
  seccompProfile:
    type: RuntimeDefault
  capabilities:
    drop:
      - "ALL"

# -- Security Context for the whole pod.
podSecurityContext:
  runAsNonRoot: true
  seccompProfile:
    type: RuntimeDefault
  # fsGroup: 2000

# -- Priority indicates the importance of a Pod relative to other Pods.
priorityClassName: ""

service:
  type: ClusterIP
  # -- DO NOT CHANGE THE SERVICE NAME as it is currently used to generate the certificate
  # and can not be configured
  name: cnpg-webhook-service
  port: 443

resources: {}
  # If you want to specify resources, uncomment the following
  # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
  #
  # limits:
  #   cpu: 100m
  #   memory: 200Mi
  # requests:
  #   cpu: 100m
  #   memory: 100Mi

# -- Nodeselector for the operator to be installed.
nodeSelector: {}

# -- Tolerations for the operator to be installed.
tolerations: []

# -- Affinity for the operator to be installed.
affinity: {}

monitoring:

  # -- Specifies whether the monitoring should be enabled. Requires Prometheus Operator CRDs.
  podMonitorEnabled: false
  # -- Additional labels for the podMonitor
  podMonitorAdditionalLabels: {}

  grafanaDashboard:
    create: false
    # -- Allows overriding the namespace where the ConfigMap will be created, defaulting to the same one as the Release.
    namespace: ""
    # -- The name of the ConfigMap containing the dashboard.
    configMapName: "cnpg-grafana-dashboard"
    # -- Label that ConfigMaps should have to be loaded as dashboards.  DEPRECATED: Use labels instead.
    sidecarLabel: "grafana_dashboard"
    # -- Label value that ConfigMaps should have to be loaded as dashboards.  DEPRECATED: Use labels instead.
    sidecarLabelValue: "1"
    # -- Labels that ConfigMaps should have to get configured in Grafana.
    labels: {}
    # -- Annotations that ConfigMaps can have to get configured in Grafana.
    annotations: {}

# Default monitoring queries
monitoringQueriesConfigMap:
  # -- The name of the default monitoring configmap.
  name: cnpg-default-monitoring
  # -- A string representation of a YAML defining monitoring queries.
  queries: |
    backends:
      query: |
       SELECT sa.datname
           , sa.usename
           , sa.application_name
           , states.state
           , COALESCE(sa.count, 0) AS total
           , COALESCE(sa.max_tx_secs, 0) AS max_tx_duration_seconds
           FROM ( VALUES ('active')
               , ('idle')
               , ('idle in transaction')
               , ('idle in transaction (aborted)')
               , ('fastpath function call')
               , ('disabled')
               ) AS states(state)
           LEFT JOIN (
               SELECT datname
                   , state
                   , usename
                   , COALESCE(application_name, '') AS application_name
                   , COUNT(*)
                   , COALESCE(EXTRACT (EPOCH FROM (max(now() - xact_start))), 0) AS max_tx_secs
               FROM pg_catalog.pg_stat_activity
               GROUP BY datname, state, usename, application_name
           ) sa ON states.state = sa.state
           WHERE sa.usename IS NOT NULL
      metrics:
        - datname:
            usage: "LABEL"
            description: "Name of the database"
        - usename:
            usage: "LABEL"
            description: "Name of the user"
        - application_name:
            usage: "LABEL"
            description: "Name of the application"
        - state:
            usage: "LABEL"
            description: "State of the backend"
        - total:
            usage: "GAUGE"
            description: "Number of backends"
        - max_tx_duration_seconds:
            usage: "GAUGE"
            description: "Maximum duration of a transaction in seconds"

    backends_waiting:
      query: |
       SELECT count(*) AS total
       FROM pg_catalog.pg_locks blocked_locks
       JOIN pg_catalog.pg_locks blocking_locks
         ON blocking_locks.locktype = blocked_locks.locktype
         AND blocking_locks.database IS NOT DISTINCT FROM blocked_locks.database
         AND blocking_locks.relation IS NOT DISTINCT FROM blocked_locks.relation
         AND blocking_locks.page IS NOT DISTINCT FROM blocked_locks.page
         AND blocking_locks.tuple IS NOT DISTINCT FROM blocked_locks.tuple
         AND blocking_locks.virtualxid IS NOT DISTINCT FROM blocked_locks.virtualxid
         AND blocking_locks.transactionid IS NOT DISTINCT FROM blocked_locks.transactionid
         AND blocking_locks.classid IS NOT DISTINCT FROM blocked_locks.classid
         AND blocking_locks.objid IS NOT DISTINCT FROM blocked_locks.objid
         AND blocking_locks.objsubid IS NOT DISTINCT FROM blocked_locks.objsubid
         AND blocking_locks.pid != blocked_locks.pid
       JOIN pg_catalog.pg_stat_activity blocking_activity ON blocking_activity.pid = blocking_locks.pid
       WHERE NOT blocked_locks.granted
      metrics:
        - total:
            usage: "GAUGE"
            description: "Total number of backends that are currently waiting on other queries"

    pg_database:
      query: |
        SELECT datname
          , pg_catalog.pg_database_size(datname) AS size_bytes
          , pg_catalog.age(datfrozenxid) AS xid_age
          , pg_catalog.mxid_age(datminmxid) AS mxid_age
        FROM pg_catalog.pg_database
      metrics:
        - datname:
            usage: "LABEL"
            description: "Name of the database"
        - size_bytes:
            usage: "GAUGE"
            description: "Disk space used by the database"
        - xid_age:
            usage: "GAUGE"
            description: "Number of transactions from the frozen XID to the current one"
        - mxid_age:
            usage: "GAUGE"
            description: "Number of multiple transactions (Multixact) from the frozen XID to the current one"

    pg_postmaster:
      query: |
        SELECT EXTRACT(EPOCH FROM pg_postmaster_start_time) AS start_time
        FROM pg_catalog.pg_postmaster_start_time()
      metrics:
        - start_time:
            usage: "GAUGE"
            description: "Time at which postgres started (based on epoch)"

    pg_replication:
      query: "SELECT CASE WHEN (
                NOT pg_catalog.pg_is_in_recovery()
                OR pg_catalog.pg_last_wal_receive_lsn() = pg_catalog.pg_last_wal_replay_lsn())
              THEN 0
              ELSE GREATEST (0,
                EXTRACT(EPOCH FROM (now() - pg_catalog.pg_last_xact_replay_timestamp())))
              END AS lag,
              pg_catalog.pg_is_in_recovery() AS in_recovery,
              EXISTS (TABLE pg_stat_wal_receiver) AS is_wal_receiver_up,
              (SELECT count(*) FROM pg_catalog.pg_stat_replication) AS streaming_replicas"
      metrics:
        - lag:
            usage: "GAUGE"
            description: "Replication lag behind primary in seconds"
        - in_recovery:
            usage: "GAUGE"
            description: "Whether the instance is in recovery"
        - is_wal_receiver_up:
            usage: "GAUGE"
            description: "Whether the instance wal_receiver is up"
        - streaming_replicas:
            usage: "GAUGE"
            description: "Number of streaming replicas connected to the instance"

    pg_replication_slots:
      query: |
        SELECT slot_name,
          slot_type,
          database,
          active,
          (CASE pg_catalog.pg_is_in_recovery()
            WHEN TRUE THEN pg_catalog.pg_wal_lsn_diff(pg_catalog.pg_last_wal_receive_lsn(), restart_lsn)
            ELSE pg_catalog.pg_wal_lsn_diff(pg_catalog.pg_current_wal_lsn(), restart_lsn)
          END) as pg_wal_lsn_diff
        FROM pg_catalog.pg_replication_slots
        WHERE NOT temporary
      metrics:
        - slot_name:
            usage: "LABEL"
            description: "Name of the replication slot"
        - slot_type:
            usage: "LABEL"
            description: "Type of the replication slot"
        - database:
            usage: "LABEL"
            description: "Name of the database"
        - active:
            usage: "GAUGE"
            description: "Flag indicating whether the slot is active"
        - pg_wal_lsn_diff:
            usage: "GAUGE"
            description: "Replication lag in bytes"

    pg_stat_archiver:
      query: |
        SELECT archived_count
          , failed_count
          , COALESCE(EXTRACT(EPOCH FROM (now() - last_archived_time)), -1) AS seconds_since_last_archival
          , COALESCE(EXTRACT(EPOCH FROM (now() - last_failed_time)), -1) AS seconds_since_last_failure
          , COALESCE(EXTRACT(EPOCH FROM last_archived_time), -1) AS last_archived_time
          , COALESCE(EXTRACT(EPOCH FROM last_failed_time), -1) AS last_failed_time
          , COALESCE(CAST(CAST('x'||pg_catalog.right(pg_catalog.split_part(last_archived_wal, '.', 1), 16) AS pg_catalog.bit(64)) AS pg_catalog.int8), -1) AS last_archived_wal_start_lsn
          , COALESCE(CAST(CAST('x'||pg_catalog.right(pg_catalog.split_part(last_failed_wal, '.', 1), 16) AS pg_catalog.bit(64)) AS pg_catalog.int8), -1) AS last_failed_wal_start_lsn
          , EXTRACT(EPOCH FROM stats_reset) AS stats_reset_time
        FROM pg_catalog.pg_stat_archiver
      metrics:
        - archived_count:
            usage: "COUNTER"
            description: "Number of WAL files that have been successfully archived"
        - failed_count:
            usage: "COUNTER"
            description: "Number of failed attempts for archiving WAL files"
        - seconds_since_last_archival:
            usage: "GAUGE"
            description: "Seconds since the last successful archival operation"
        - seconds_since_last_failure:
            usage: "GAUGE"
            description: "Seconds since the last failed archival operation"
        - last_archived_time:
            usage: "GAUGE"
            description: "Epoch of the last time WAL archiving succeeded"
        - last_failed_time:
            usage: "GAUGE"
            description: "Epoch of the last time WAL archiving failed"
        - last_archived_wal_start_lsn:
            usage: "GAUGE"
            description: "Archived WAL start LSN"
        - last_failed_wal_start_lsn:
            usage: "GAUGE"
            description: "Last failed WAL LSN"
        - stats_reset_time:
            usage: "GAUGE"
            description: "Time at which these statistics were last reset"

    pg_stat_bgwriter:
      runonserver: "<17.0.0"
      query: |
        SELECT checkpoints_timed
          , checkpoints_req
          , checkpoint_write_time
          , checkpoint_sync_time
          , buffers_checkpoint
          , buffers_clean
          , maxwritten_clean
          , buffers_backend
          , buffers_backend_fsync
          , buffers_alloc
        FROM pg_catalog.pg_stat_bgwriter
      metrics:
        - checkpoints_timed:
            usage: "COUNTER"
            description: "Number of scheduled checkpoints that have been performed"
        - checkpoints_req:
            usage: "COUNTER"
            description: "Number of requested checkpoints that have been performed"
        - checkpoint_write_time:
            usage: "COUNTER"
            description: "Total amount of time that has been spent in the portion of checkpoint processing where files are written to disk, in milliseconds"
        - checkpoint_sync_time:
            usage: "COUNTER"
            description: "Total amount of time that has been spent in the portion of checkpoint processing where files are synchronized to disk, in milliseconds"
        - buffers_checkpoint:
            usage: "COUNTER"
            description: "Number of buffers written during checkpoints"
        - buffers_clean:
            usage: "COUNTER"
            description: "Number of buffers written by the background writer"
        - maxwritten_clean:
            usage: "COUNTER"
            description: "Number of times the background writer stopped a cleaning scan because it had written too many buffers"
        - buffers_backend:
            usage: "COUNTER"
            description: "Number of buffers written directly by a backend"
        - buffers_backend_fsync:
            usage: "COUNTER"
            description: "Number of times a backend had to execute its own fsync call (normally the background writer handles those even when the backend does its own write)"
        - buffers_alloc:
            usage: "COUNTER"
            description: "Number of buffers allocated"

    pg_stat_database:
      query: |
        SELECT datname
          , xact_commit
          , xact_rollback
          , blks_read
          , blks_hit
          , tup_returned
          , tup_fetched
          , tup_inserted
          , tup_updated
          , tup_deleted
          , conflicts
          , temp_files
          , temp_bytes
          , deadlocks
          , blk_read_time
          , blk_write_time
        FROM pg_catalog.pg_stat_database
      metrics:
        - datname:
            usage: "LABEL"
            description: "Name of this database"
        - xact_commit:
            usage: "COUNTER"
            description: "Number of transactions in this database that have been committed"
        - xact_rollback:
            usage: "COUNTER"
            description: "Number of transactions in this database that have been rolled back"
        - blks_read:
            usage: "COUNTER"
            description: "Number of disk blocks read in this database"
        - blks_hit:
            usage: "COUNTER"
            description: "Number of times disk blocks were found already in the buffer cache, so that a read was not necessary (this only includes hits in the PostgreSQL buffer cache, not the operating system's file system cache)"
        - tup_returned:
            usage: "COUNTER"
            description: "Number of rows returned by queries in this database"
        - tup_fetched:
            usage: "COUNTER"
            description: "Number of rows fetched by queries in this database"
        - tup_inserted:
            usage: "COUNTER"
            description: "Number of rows inserted by queries in this database"
        - tup_updated:
            usage: "COUNTER"
            description: "Number of rows updated by queries in this database"
        - tup_deleted:
            usage: "COUNTER"
            description: "Number of rows deleted by queries in this database"
        - conflicts:
            usage: "COUNTER"
            description: "Number of queries canceled due to conflicts with recovery in this database"
        - temp_files:
            usage: "COUNTER"
            description: "Number of temporary files created by queries in this database"
        - temp_bytes:
            usage: "COUNTER"
            description: "Total amount of data written to temporary files by queries in this database"
        - deadlocks:
            usage: "COUNTER"
            description: "Number of deadlocks detected in this database"
        - blk_read_time:
            usage: "COUNTER"
            description: "Time spent reading data file blocks by backends in this database, in milliseconds"
        - blk_write_time:
            usage: "COUNTER"
            description: "Time spent writing data file blocks by backends in this database, in milliseconds"

    pg_stat_replication:
      primary: true
      query: |
       SELECT usename
         , COALESCE(application_name, '') AS application_name
         , COALESCE(client_addr::text, '') AS client_addr
         , COALESCE(client_port::text, '') AS client_port
         , EXTRACT(EPOCH FROM backend_start) AS backend_start
         , COALESCE(pg_catalog.age(backend_xmin), 0) AS backend_xmin_age
         , pg_catalog.pg_wal_lsn_diff(pg_catalog.pg_current_wal_lsn(), sent_lsn) AS sent_diff_bytes
         , pg_catalog.pg_wal_lsn_diff(pg_catalog.pg_current_wal_lsn(), write_lsn) AS write_diff_bytes
         , pg_catalog.pg_wal_lsn_diff(pg_catalog.pg_current_wal_lsn(), flush_lsn) AS flush_diff_bytes
         , COALESCE(pg_catalog.pg_wal_lsn_diff(pg_catalog.pg_current_wal_lsn(), replay_lsn),0) AS replay_diff_bytes
         , COALESCE((EXTRACT(EPOCH FROM write_lag)),0)::float AS write_lag_seconds
         , COALESCE((EXTRACT(EPOCH FROM flush_lag)),0)::float AS flush_lag_seconds
         , COALESCE((EXTRACT(EPOCH FROM replay_lag)),0)::float AS replay_lag_seconds
       FROM pg_catalog.pg_stat_replication
      metrics:
        - usename:
            usage: "LABEL"
            description: "Name of the replication user"
        - application_name:
            usage: "LABEL"
            description: "Name of the application"
        - client_addr:
            usage: "LABEL"
            description: "Client IP address"
        - client_port:
            usage: "LABEL"
            description: "Client TCP port"
        - backend_start:
            usage: "COUNTER"
            description: "Time when this process was started"
        - backend_xmin_age:
            usage: "COUNTER"
            description: "The age of this standby's xmin horizon"
        - sent_diff_bytes:
            usage: "GAUGE"
            description: "Difference in bytes from the last write-ahead log location sent on this connection"
        - write_diff_bytes:
            usage: "GAUGE"
            description: "Difference in bytes from the last write-ahead log location written to disk by this standby server"
        - flush_diff_bytes:
            usage: "GAUGE"
            description: "Difference in bytes from the last write-ahead log location flushed to disk by this standby server"
        - replay_diff_bytes:
            usage: "GAUGE"
            description: "Difference in bytes from the last write-ahead log location replayed into the database on this standby server"
        - write_lag_seconds:
            usage: "GAUGE"
            description: "Time elapsed between flushing recent WAL locally and receiving notification that this standby server has written it"
        - flush_lag_seconds:
            usage: "GAUGE"
            description: "Time elapsed between flushing recent WAL locally and receiving notification that this standby server has written and flushed it"
        - replay_lag_seconds:
            usage: "GAUGE"
            description: "Time elapsed between flushing recent WAL locally and receiving notification that this standby server has written, flushed and applied it"

    pg_settings:
      query: |
        SELECT name,
        CASE setting WHEN 'on' THEN '1' WHEN 'off' THEN '0' ELSE setting END AS setting
        FROM pg_catalog.pg_settings
        WHERE vartype IN ('integer', 'real', 'bool')
        ORDER BY 1
      metrics:
        - name:
            usage: "LABEL"
            description: "Name of the setting"
        - setting:
            usage: "GAUGE"
            description: "Setting value"