Skip to content

Commit

Permalink
Adds query_config_path to mssql integration to allow for custom metri…
Browse files Browse the repository at this point in the history
…cs through custom exporter config file
  • Loading branch information
StefanKurek committed Nov 14, 2023
1 parent c2c40f8 commit c692725
Show file tree
Hide file tree
Showing 6 changed files with 306 additions and 2 deletions.
4 changes: 3 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ Main (unreleased)
- Make component list sortable in web UI. (@hainenber)

- Adds new metrics (`mssql_server_total_memory_bytes`, `mssql_server_target_memory_bytes`,
and `mssql_available_commit_memory_bytes`) for `mssql` integration.
and `mssql_available_commit_memory_bytes`) for `mssql` integration (@StefanKurek).

- Grafana Agent Operator: `config-reloader` container no longer runs as root.
(@rootmout)
Expand All @@ -101,6 +101,8 @@ Main (unreleased)

- Improved resilience of graph evaluation in presence of slow components. (@thampiotr)

- Adds new optional config parameter `query_config_path` to `mssql` integration to allow for custom metrics (@StefanKurek)

### Bugfixes

- Set exit code 1 on grafana-agentctl non-runnable command. (@fgouteroux)
Expand Down
223 changes: 223 additions & 0 deletions docs/sources/static/configuration/integrations/mssql-config.md
Original file line number Diff line number Diff line change
Expand Up @@ -77,4 +77,227 @@ Full reference of options:
# The timeout for scraping metrics from the mssql instance.
[timeout: <duration> | default = "10s"]

# The file path for a YAML config file which allows for custom queries/metrics
# for the mssql instance
[query_config_path: <string>]

```

## Custom metrics
It is possible to retrieve custom prometheus metrics for a mssql instance using the optional `query_config_path` parameter.

This parameter should point to a YAML config file defined [here](https://github.com/burningalchemist/sql_exporter#collectors). If it does, it will use the new config to query your mssql instance and create whatever metrics are defined.
If you want additional metrics on top of the default provided ones, the default config should be used as a base.

The default config file used by this integration is as follows:
```
collector_name: mssql_standard
metrics:
- metric_name: mssql_local_time_seconds
type: gauge
help: 'Local time in seconds since epoch (Unix time).'
values: [unix_time]
query: |
SELECT DATEDIFF(second, '19700101', GETUTCDATE()) AS unix_time
- metric_name: mssql_connections
type: gauge
help: 'Number of active connections.'
key_labels:
- db
values: [count]
query: |
SELECT DB_NAME(sp.dbid) AS db, COUNT(sp.spid) AS count
FROM sys.sysprocesses sp
GROUP BY DB_NAME(sp.dbid)
#
# Collected from sys.dm_os_performance_counters
#
- metric_name: mssql_deadlocks_total
type: counter
help: 'Number of lock requests that resulted in a deadlock.'
values: [cntr_value]
query: |
SELECT cntr_value
FROM sys.dm_os_performance_counters WITH (NOLOCK)
WHERE counter_name = 'Number of Deadlocks/sec' AND instance_name = '_Total'
- metric_name: mssql_user_errors_total
type: counter
help: 'Number of user errors.'
values: [cntr_value]
query: |
SELECT cntr_value
FROM sys.dm_os_performance_counters WITH (NOLOCK)
WHERE counter_name = 'Errors/sec' AND instance_name = 'User Errors'
- metric_name: mssql_kill_connection_errors_total
type: counter
help: 'Number of severe errors that caused SQL Server to kill the connection.'
values: [cntr_value]
query: |
SELECT cntr_value
FROM sys.dm_os_performance_counters WITH (NOLOCK)
WHERE counter_name = 'Errors/sec' AND instance_name = 'Kill Connection Errors'
- metric_name: mssql_page_life_expectancy_seconds
type: gauge
help: 'The minimum number of seconds a page will stay in the buffer pool on this node without references.'
values: [cntr_value]
query: |
SELECT top(1) cntr_value
FROM sys.dm_os_performance_counters WITH (NOLOCK)
WHERE counter_name = 'Page life expectancy'
- metric_name: mssql_batch_requests_total
type: counter
help: 'Number of command batches received.'
values: [cntr_value]
query: |
SELECT cntr_value
FROM sys.dm_os_performance_counters WITH (NOLOCK)
WHERE counter_name = 'Batch Requests/sec'
- metric_name: mssql_log_growths_total
type: counter
help: 'Number of times the transaction log has been expanded, per database.'
key_labels:
- db
values: [cntr_value]
query: |
SELECT rtrim(instance_name) AS db, cntr_value
FROM sys.dm_os_performance_counters WITH (NOLOCK)
WHERE counter_name = 'Log Growths' AND instance_name <> '_Total'
- metric_name: mssql_buffer_cache_hit_ratio
type: gauge
help: 'Ratio of requests that hit the buffer cache'
values: [BufferCacheHitRatio]
query: |
SELECT (a.cntr_value * 1.0 / b.cntr_value) * 100.0 as BufferCacheHitRatio
FROM sys.dm_os_performance_counters a
JOIN (SELECT cntr_value, OBJECT_NAME
FROM sys.dm_os_performance_counters
WHERE counter_name = 'Buffer cache hit ratio base'
AND OBJECT_NAME = 'SQLServer:Buffer Manager') b ON a.OBJECT_NAME = b.OBJECT_NAME
WHERE a.counter_name = 'Buffer cache hit ratio'
AND a.OBJECT_NAME = 'SQLServer:Buffer Manager'
- metric_name: mssql_checkpoint_pages_sec
type: gauge
help: 'Checkpoint Pages Per Second'
values: [cntr_value]
query: |
SELECT cntr_value
FROM sys.dm_os_performance_counters
WHERE [counter_name] = 'Checkpoint pages/sec'
#
# Collected from sys.dm_io_virtual_file_stats
#
- metric_name: mssql_io_stall_seconds_total
type: counter
help: 'Stall time in seconds per database and I/O operation.'
key_labels:
- db
value_label: operation
values:
- read
- write
query_ref: mssql_io_stall
#
# Collected from sys.dm_os_process_memory
#
- metric_name: mssql_resident_memory_bytes
type: gauge
help: 'SQL Server resident memory size (AKA working set).'
values: [resident_memory_bytes]
query_ref: mssql_process_memory
- metric_name: mssql_virtual_memory_bytes
type: gauge
help: 'SQL Server committed virtual memory size.'
values: [virtual_memory_bytes]
query_ref: mssql_process_memory
- metric_name: mssql_available_commit_memory_bytes
type: gauge
help: 'SQL Server available to be committed memory size.'
values: [available_commit_limit_bytes]
query_ref: mssql_process_memory
- metric_name: mssql_memory_utilization_percentage
type: gauge
help: 'The percentage of committed memory that is in the working set.'
values: [memory_utilization_percentage]
query_ref: mssql_process_memory
- metric_name: mssql_page_fault_count_total
type: counter
help: 'The number of page faults that were incurred by the SQL Server process.'
values: [page_fault_count]
query_ref: mssql_process_memory
#
# Collected from sys.dm_os_sys_info
#
- metric_name: mssql_server_total_memory_bytes
type: gauge
help: 'SQL Server committed memory in the memory manager.'
values: [committed_memory_bytes]
query_ref: mssql_os_sys_info
- metric_name: mssql_server_target_memory_bytes
type: gauge
help: 'SQL Server target committed memory set for the memory manager.'
values: [committed_memory_target_bytes]
query_ref: mssql_os_sys_info
#
# Collected from sys.dm_os_sys_memory
#
- metric_name: mssql_os_memory
type: gauge
help: 'OS physical memory, used and available.'
value_label: 'state'
values: [used, available]
query: |
SELECT
(total_physical_memory_kb - available_physical_memory_kb) * 1024 AS used,
available_physical_memory_kb * 1024 AS available
FROM sys.dm_os_sys_memory
- metric_name: mssql_os_page_file
type: gauge
help: 'OS page file, used and available.'
value_label: 'state'
values: [used, available]
query: |
SELECT
(total_page_file_kb - available_page_file_kb) * 1024 AS used,
available_page_file_kb * 1024 AS available
FROM sys.dm_os_sys_memory
queries:
# Populates `mssql_io_stall` and `mssql_io_stall_total`
- query_name: mssql_io_stall
query: |
SELECT
cast(DB_Name(a.database_id) as varchar) AS [db],
sum(io_stall_read_ms) / 1000.0 AS [read],
sum(io_stall_write_ms) / 1000.0 AS [write]
FROM
sys.dm_io_virtual_file_stats(null, null) a
INNER JOIN sys.master_files b ON a.database_id = b.database_id AND a.file_id = b.file_id
GROUP BY a.database_id
# Populates `mssql_resident_memory_bytes`, `mssql_virtual_memory_bytes`, mssql_available_commit_memory_bytes,
# and `mssql_memory_utilization_percentage`, and `mssql_page_fault_count_total`
- query_name: mssql_process_memory
query: |
SELECT
physical_memory_in_use_kb * 1024 AS resident_memory_bytes,
virtual_address_space_committed_kb * 1024 AS virtual_memory_bytes,
available_commit_limit_kb * 1024 AS available_commit_limit_bytes,
memory_utilization_percentage,
page_fault_count
FROM sys.dm_os_process_memory
# Populates `mssql_server_total_memory_bytes` and `mssql_server_target_memory_bytes`.
- query_name: mssql_os_sys_info
query: |
SELECT
committed_kb * 1024 AS committed_memory_bytes,
committed_target_kb * 1024 AS committed_memory_target_bytes
FROM sys.dm_os_sys_info
```
2 changes: 1 addition & 1 deletion pkg/integrations/mssql/collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import (
"github.com/go-kit/log/level"
"github.com/prometheus/client_golang/prometheus"
dto "github.com/prometheus/client_model/go"
"gopkg.in/yaml.v2"
"gopkg.in/yaml.v3"
)

// Embedded config.CollectorConfig as yaml.
Expand Down
29 changes: 29 additions & 0 deletions pkg/integrations/mssql/sql_exporter.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,13 @@ import (
"errors"
"fmt"
"net/url"
"os"
"time"

"github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
config_util "github.com/prometheus/common/config"
"gopkg.in/yaml.v3"

"github.com/burningalchemist/sql_exporter"
"github.com/burningalchemist/sql_exporter/config"
Expand All @@ -31,6 +33,7 @@ type Config struct {
MaxIdleConnections int `yaml:"max_idle_connections,omitempty"`
MaxOpenConnections int `yaml:"max_open_connections,omitempty"`
Timeout time.Duration `yaml:"timeout,omitempty"`
QueryConfigPath string `yaml:"query_config_path,omitempty"`
}

func (c Config) validate() error {
Expand Down Expand Up @@ -59,6 +62,20 @@ func (c Config) validate() error {
return errors.New("timeout must be positive")
}

if c.QueryConfigPath != "" {
_, err := os.Stat(c.QueryConfigPath)

if err == nil {
return nil
}

if errors.Is(err, os.ErrNotExist) {
return errors.New("query_config_path must be a valid path of a YAML config file")
} else {
return errors.New("query_config_path config not in correct format")
}
}

return nil
}

Expand Down Expand Up @@ -96,6 +113,18 @@ func (c *Config) NewIntegration(l log.Logger) (integrations.Integration, error)
return nil, fmt.Errorf("failed to validate config: %w", err)
}

// Initialize collectorConfig from file if needed
if c.QueryConfigPath != "" {
yamlFile, err := os.ReadFile(c.QueryConfigPath)
if err != nil {
return nil, fmt.Errorf("failed to create mssql target: problem reading query_config_path: %w", err)
}
err = yaml.Unmarshal(yamlFile, &collectorConfig)
if err != nil {
return nil, fmt.Errorf("failed to create mssql target: query_config_path file not in correct format: %w", err)
}
}

t, err := sql_exporter.NewTarget(
"mssqlintegration",
"",
Expand Down
49 changes: 49 additions & 0 deletions pkg/integrations/mssql/sql_exporter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package mssql

import (
"os"
"path/filepath"
"testing"
"time"

Expand All @@ -11,6 +12,11 @@ import (
)

func TestConfig_validate(t *testing.T) {
badQueryPath, err := filepath.Abs("./test/bad_query_config.yaml")
goodQueryPath, err := filepath.Abs("./collector_config.yaml")
if err != nil {

}
testCases := []struct {
name string
input Config
Expand Down Expand Up @@ -84,6 +90,49 @@ func TestConfig_validate(t *testing.T) {
},
err: "timeout must be positive",
},
{
name: "bad query config path",
input: Config{
ConnectionString: "sqlserver://user:pass@localhost:1433",
MaxIdleConnections: 3,
MaxOpenConnections: 3,
Timeout: 10 * time.Second,
QueryConfigPath: "doesnotexist.YAML",
},
err: "query_config_path must be a valid path of a YAML config file",
},
{
name: "bad query config path",
input: Config{
ConnectionString: "sqlserver://user:pass@localhost:1433",
MaxIdleConnections: 3,
MaxOpenConnections: 3,
Timeout: 10 * time.Second,
QueryConfigPath: "doesnotexist.YAML",
},
err: "query_config_path must be a valid path of a YAML config file",
},
{
name: "bad query config file",
input: Config{
ConnectionString: "sqlserver://user:pass@localhost:1433",
MaxIdleConnections: 3,
MaxOpenConnections: 3,
Timeout: 10 * time.Second,
QueryConfigPath: badQueryPath,
},
err: "query_config_path config not in correct format",
},
{
name: "good query config file",
input: Config{
ConnectionString: "sqlserver://user:pass@localhost:1433",
MaxIdleConnections: 3,
MaxOpenConnections: 3,
Timeout: 10 * time.Second,
QueryConfigPath: goodQueryPath,
},
},
}

for _, tc := range testCases {
Expand Down
1 change: 1 addition & 0 deletions pkg/integrations/mssql/test/bad_query_config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
This is not a valid config file.

0 comments on commit c692725

Please sign in to comment.