From 90204b3888d033ef2e7c5cc563c041032ed92fdc Mon Sep 17 00:00:00 2001 From: Rory Z <16801068+Rory-Z@users.noreply.github.com> Date: Tue, 30 Apr 2024 17:02:20 +0800 Subject: [PATCH] chore: sync grafana dashboard and EMQX dashboard fix: https://github.com/emqx/emqx-exporter/issues/88 Signed-off-by: Rory Z <16801068+Rory-Z@users.noreply.github.com> --- collector/collector_rule_engine.go | 6 +-- grafana-dashboard/grafanalib/Makefile | 44 +++++++++++-------- grafana-dashboard/grafanalib/README.md | 26 ----------- grafana-dashboard/grafanalib/metrics.py | 10 ++--- grafana-dashboard/grafanalib/submetrics.py | 6 +-- .../template/emqx-4/overview.json | 8 ++-- .../template/emqx-4/rule-engine-count.json | 10 ++--- .../template/emqx-enterprise-5/overview.json | 8 ++-- .../emqx-enterprise-5/rule-engine-count.json | 10 ++--- 9 files changed, 55 insertions(+), 73 deletions(-) diff --git a/collector/collector_rule_engine.go b/collector/collector_rule_engine.go index c075c5a..3e8ef14 100644 --- a/collector/collector_rule_engine.go +++ b/collector/collector_rule_engine.go @@ -33,9 +33,9 @@ const ( ruleTopicHitCount = "topic_hit_count" ruleExecPassCount = "exec_pass_count" - ruleExecFailureCount = "exec_failure_count" - ruleNoResultCount = "exec_no_result_count" - ruleExecExceptionCount = "exec_exception_count" + ruleExecFailureCount = "exec_failure_count" // failure count = no result count + exec exception count, it's didn't show in EMQX dashboard + ruleNoResultCount = "exec_no_result_count" // show in EMQX dashboard + ruleExecExceptionCount = "exec_exception_count" // show in EMQX dashboard ruleExecRate = "exec_rate" ruleExecLast5mRate = "exec_last5m_rate" ruleExecMaxRate = "exec_max_rate" diff --git a/grafana-dashboard/grafanalib/Makefile b/grafana-dashboard/grafanalib/Makefile index 7f0136b..d7f067d 100644 --- a/grafana-dashboard/grafanalib/Makefile +++ b/grafana-dashboard/grafanalib/Makefile @@ -1,20 +1,33 @@ # Makefile to run Python scripts with customizable arguments - -# Default values for arguments -# EDITION_ARG can be ee or ce -EDITION_ARG ?= ee -# VERSION_ARG can be 4 or 5 -VERSION_ARG ?= 5 PROJECT_DIR := $(shell dirname $(abspath $(lastword $(MAKEFILE_LIST)))) -JSON_DIR = $(PROJECT_DIR)/json # Phony target to handle all tasks -.PHONY: all clean -ifeq ($(VERSION_ARG),4) -all: $(JSON_DIR) overview.json client-events.json messages.json rule-engine-rate.json rule-engine-count.json -else -all: $(JSON_DIR) overview.json authentication.json authorization.json client-events.json messages.json rule-engine-rate.json rule-engine-count.json -endif +.PHONY: all +all: emqx_4 emqx_ee_4 emqx_5 emqx_ee_5 + +.PHONY: emqx_4 +emqx_4: EDITION_ARG=ce +emqx_4: VERSION_ARG=4 +emqx_4: JSON_DIR = $(PROJECT_DIR)/../template/emqx-4 +emqx_4: $(JSON_DIR) overview.json client-events.json messages.json rule-engine-rate.json rule-engine-count.json + +.PHONY: emqx_ee_4 +emqx_ee_4: EDITION_ARG=ee +emqx_ee_4: VERSION_ARG=4 +emqx_ee_4: JSON_DIR = $(PROJECT_DIR)/../template/emqx-enterprise-4 +emqx_ee_4: $(JSON_DIR) overview.json client-events.json messages.json rule-engine-rate.json rule-engine-count.json + +.PHONY: emqx_5 +emqx_5: EDITION_ARG=ce +emqx_5: VERSION_ARG=5 +emqx_5: JSON_DIR = $(PROJECT_DIR)/../template/emqx-5 +emqx_5: $(JSON_DIR) overview.json authentication.json authorization.json client-events.json messages.json rule-engine-rate.json rule-engine-count.json + +.PHONY: emqx_ee_5 +emqx_ee_5: EDITION_ARG=ee +emqx_ee_5: VERSION_ARG=5 +emqx_ee_5: JSON_DIR = $(PROJECT_DIR)/../template/emqx-enterprise-5 +emqx_ee_5: $(JSON_DIR) overview.json authentication.json authorization.json client-events.json messages.json rule-engine-rate.json rule-engine-count.json # Create json directory if it doesn't exist $(JSON_DIR): @@ -41,8 +54,3 @@ rule-engine-rate.json: rule-engine-count.json: ./rule-engine-count.py > $(JSON_DIR)/rule-engine-count.json - -# Clean up by removing the json directory -clean: - rm -rf $(JSON_DIR) - diff --git a/grafana-dashboard/grafanalib/README.md b/grafana-dashboard/grafanalib/README.md index ceb2fbb..aeab7fe 100644 --- a/grafana-dashboard/grafanalib/README.md +++ b/grafana-dashboard/grafanalib/README.md @@ -24,32 +24,6 @@ pipenv shell ## Generate Dashboard JSON -- Generate EMQX 5.x Enterprise Dashboard - ```shell make ``` - -- Generate EMQX 5.x Community Edition Dashboard - -```shell -make EDITION_ARG=ce VERSION_ARG=5 -``` - -- Generate EMQX 4.x Enterprise Dashboard - -```shell -make EDITION_ARG=ee VERSION_ARG=4 -``` - -- Generate EMQX 4.x Community Edition Dashboard - -```shell -make EDITION_ARG=ce VERSION_ARG=4 -``` - -## Remove Dashboard JSON - -```shell -make clean -``` diff --git a/grafana-dashboard/grafanalib/metrics.py b/grafana-dashboard/grafanalib/metrics.py index 200aabb..452e8a1 100644 --- a/grafana-dashboard/grafanalib/metrics.py +++ b/grafana-dashboard/grafanalib/metrics.py @@ -264,12 +264,12 @@ "format": "timeseries" }, "rule_engine_exec_failure": { - "title": "Rule Engine Exec Failure", + "title": "Rule Engine Exec Exception", "subchart_links": (subcharts["rule-engine-count"], ["subchart_rule_engine_exec_failure"]), "targets": [ { "legendFormat": "{{rule}}", - "expr": "sum by(rule) (irate(emqx_rule_exec_failure_count{cluster=\"$cluster\", node=~\".*\"}[$__rate_interval]))" + "expr": "sum by(rule) (irate(emqx_rule_exec_exception_count{cluster=\"$cluster\", node=~\".*\"}[$__rate_interval]))" } ], "format": "timeseries" @@ -1270,7 +1270,7 @@ }, { "legendFormat": "Exec Exception last 15m", - "expr": "sum by(node, rule) (irate(emqx_rule_exec_failure_count{cluster=\"$cluster\", node=~\"$node\", rule=~\"$rule\"}[15m]))", + "expr": "sum by(node, rule) (irate(emqx_rule_exec_exception_count{cluster=\"$cluster\", node=~\"$node\", rule=~\"$rule\"}[15m]))", "thresholds": { "mode": "absolute", "steps": [ @@ -1341,11 +1341,11 @@ "format": "timeseries" }, "subchart_rule_engine_exec_failure": { - "title": "Rule Engine Exec Failure", + "title": "Rule Engine Exec Exception", "targets": [ { "legendFormat": "{{ node }}:{{ rule }}", - "expr": "sum by(rule) (irate(emqx_rule_exec_failure_count{cluster=\"$cluster\", node=~\"$node\", rule=~\"$rule\"}[$__rate_interval]))", + "expr": "sum by(rule) (irate(emqx_rule_exec_exception_count{cluster=\"$cluster\", node=~\"$node\", rule=~\"$rule\"}[$__rate_interval]))", } ], diff --git a/grafana-dashboard/grafanalib/submetrics.py b/grafana-dashboard/grafanalib/submetrics.py index a1d0bb2..93d0a5e 100644 --- a/grafana-dashboard/grafanalib/submetrics.py +++ b/grafana-dashboard/grafanalib/submetrics.py @@ -317,7 +317,7 @@ }, { "legendFormat": "Exec Exception last 15m", - "expr": "sum by(node, rule) (irate(emqx_rule_exec_failure_count{cluster=\"$cluster\", node=~\"$node\", rule=~\"$rule\"}[15m]))", + "expr": "sum by(node, rule) (irate(emqx_rule_exec_exception_count{cluster=\"$cluster\", node=~\"$node\", rule=~\"$rule\"}[15m]))", "thresholds": { "mode": "absolute", "steps": [ @@ -388,11 +388,11 @@ "format": "timeseries" }, "subchart_rule_engine_exec_failure": { - "title": "Rule Engine Exec Failure", + "title": "Rule Engine Exec Exception", "targets": [ { "legendFormat": "{{ node }}:{{ rule }}", - "expr": "sum by(rule) (irate(emqx_rule_exec_failure_count{cluster=\"$cluster\", node=~\"$node\", rule=~\"$rule\"}[$__rate_interval]))", + "expr": "sum by(rule) (irate(emqx_rule_exec_exception_count{cluster=\"$cluster\", node=~\"$node\", rule=~\"$rule\"}[$__rate_interval]))", } ], diff --git a/grafana-dashboard/template/emqx-4/overview.json b/grafana-dashboard/template/emqx-4/overview.json index b6f7fb5..1b8833a 100644 --- a/grafana-dashboard/template/emqx-4/overview.json +++ b/grafana-dashboard/template/emqx-4/overview.json @@ -1486,7 +1486,7 @@ { "targetBlank": true, "title": "Show Rule Engine Exec Failure Node Detail", - "url": "/d/rule-engine-count/Rule Engine Exec Failure?orgId=1&refresh=10s&var-node=All&viewPanel=3" + "url": "/d/rule-engine-count/Rule Engine Exec Exception?orgId=1&refresh=10s&var-node=All&viewPanel=3" } ], "maxDataPoints": 100, @@ -1508,7 +1508,7 @@ "targets": [ { "datasource": null, - "expr": "sum by(rule) (irate(emqx_rule_exec_failure_count{cluster=\"$cluster\", node=~\".*\"}[$__rate_interval]))", + "expr": "sum by(rule) (irate(emqx_rule_exec_exception_count{cluster=\"$cluster\", node=~\".*\"}[$__rate_interval]))", "format": "timeseries", "hide": false, "instant": false, @@ -1516,7 +1516,7 @@ "intervalFactor": 1, "legendFormat": "{{rule}}", "metric": "", - "query": "sum by(rule) (irate(emqx_rule_exec_failure_count{cluster=\"$cluster\", node=~\".*\"}[$__rate_interval]))", + "query": "sum by(rule) (irate(emqx_rule_exec_exception_count{cluster=\"$cluster\", node=~\".*\"}[$__rate_interval]))", "refId": "{{rule}}", "step": 10, "target": "" @@ -1524,7 +1524,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "Rule Engine Exec Failure", + "title": "Rule Engine Exec Exception", "transformations": [], "transparent": false, "type": "timeseries" diff --git a/grafana-dashboard/template/emqx-4/rule-engine-count.json b/grafana-dashboard/template/emqx-4/rule-engine-count.json index 0304020..da96433 100644 --- a/grafana-dashboard/template/emqx-4/rule-engine-count.json +++ b/grafana-dashboard/template/emqx-4/rule-engine-count.json @@ -287,7 +287,7 @@ }, { "datasource": null, - "expr": "sum by(node, rule) (irate(emqx_rule_exec_failure_count{cluster=\"$cluster\", node=~\"$node\", rule=~\"$rule\"}[15m]))", + "expr": "sum by(node, rule) (irate(emqx_rule_exec_exception_count{cluster=\"$cluster\", node=~\"$node\", rule=~\"$rule\"}[15m]))", "format": "table", "hide": false, "instant": true, @@ -295,7 +295,7 @@ "intervalFactor": 1, "legendFormat": "Exec Exception last 15m", "metric": "", - "query": "sum by(node, rule) (irate(emqx_rule_exec_failure_count{cluster=\"$cluster\", node=~\"$node\", rule=~\"$rule\"}[15m]))", + "query": "sum by(node, rule) (irate(emqx_rule_exec_exception_count{cluster=\"$cluster\", node=~\"$node\", rule=~\"$rule\"}[15m]))", "refId": "exec_exception_last_15m", "step": 10, "target": "" @@ -583,7 +583,7 @@ "targets": [ { "datasource": null, - "expr": "sum by(rule) (irate(emqx_rule_exec_failure_count{cluster=\"$cluster\", node=~\"$node\", rule=~\"$rule\"}[$__rate_interval]))", + "expr": "sum by(rule) (irate(emqx_rule_exec_exception_count{cluster=\"$cluster\", node=~\"$node\", rule=~\"$rule\"}[$__rate_interval]))", "format": "timeseries", "hide": false, "instant": false, @@ -591,7 +591,7 @@ "intervalFactor": 1, "legendFormat": "{{ node }}:{{ rule }}", "metric": "", - "query": "sum by(rule) (irate(emqx_rule_exec_failure_count{cluster=\"$cluster\", node=~\"$node\", rule=~\"$rule\"}[$__rate_interval]))", + "query": "sum by(rule) (irate(emqx_rule_exec_exception_count{cluster=\"$cluster\", node=~\"$node\", rule=~\"$rule\"}[$__rate_interval]))", "refId": "{{_node_}}:{{_rule_}}", "step": 10, "target": "" @@ -599,7 +599,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "Rule Engine Exec Failure", + "title": "Rule Engine Exec Exception", "transformations": [], "transparent": false, "type": "timeseries" diff --git a/grafana-dashboard/template/emqx-enterprise-5/overview.json b/grafana-dashboard/template/emqx-enterprise-5/overview.json index 90b574f..ccecf9a 100644 --- a/grafana-dashboard/template/emqx-enterprise-5/overview.json +++ b/grafana-dashboard/template/emqx-enterprise-5/overview.json @@ -1857,7 +1857,7 @@ { "targetBlank": true, "title": "Show Rule Engine Exec Failure Node Detail", - "url": "/d/rule-engine-count/Rule Engine Exec Failure?orgId=1&refresh=10s&var-node=All&viewPanel=3" + "url": "/d/rule-engine-count/Rule Engine Exec Exception?orgId=1&refresh=10s&var-node=All&viewPanel=3" } ], "maxDataPoints": 100, @@ -1879,7 +1879,7 @@ "targets": [ { "datasource": null, - "expr": "sum by(rule) (irate(emqx_rule_exec_failure_count{cluster=\"$cluster\", node=~\".*\"}[$__rate_interval]))", + "expr": "sum by(rule) (irate(emqx_rule_exec_exception_count{cluster=\"$cluster\", node=~\".*\"}[$__rate_interval]))", "format": "timeseries", "hide": false, "instant": false, @@ -1887,7 +1887,7 @@ "intervalFactor": 1, "legendFormat": "{{rule}}", "metric": "", - "query": "sum by(rule) (irate(emqx_rule_exec_failure_count{cluster=\"$cluster\", node=~\".*\"}[$__rate_interval]))", + "query": "sum by(rule) (irate(emqx_rule_exec_exception_count{cluster=\"$cluster\", node=~\".*\"}[$__rate_interval]))", "refId": "{{rule}}", "step": 10, "target": "" @@ -1895,7 +1895,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "Rule Engine Exec Failure", + "title": "Rule Engine Exec Exception", "transformations": [], "transparent": false, "type": "timeseries" diff --git a/grafana-dashboard/template/emqx-enterprise-5/rule-engine-count.json b/grafana-dashboard/template/emqx-enterprise-5/rule-engine-count.json index 0304020..da96433 100644 --- a/grafana-dashboard/template/emqx-enterprise-5/rule-engine-count.json +++ b/grafana-dashboard/template/emqx-enterprise-5/rule-engine-count.json @@ -287,7 +287,7 @@ }, { "datasource": null, - "expr": "sum by(node, rule) (irate(emqx_rule_exec_failure_count{cluster=\"$cluster\", node=~\"$node\", rule=~\"$rule\"}[15m]))", + "expr": "sum by(node, rule) (irate(emqx_rule_exec_exception_count{cluster=\"$cluster\", node=~\"$node\", rule=~\"$rule\"}[15m]))", "format": "table", "hide": false, "instant": true, @@ -295,7 +295,7 @@ "intervalFactor": 1, "legendFormat": "Exec Exception last 15m", "metric": "", - "query": "sum by(node, rule) (irate(emqx_rule_exec_failure_count{cluster=\"$cluster\", node=~\"$node\", rule=~\"$rule\"}[15m]))", + "query": "sum by(node, rule) (irate(emqx_rule_exec_exception_count{cluster=\"$cluster\", node=~\"$node\", rule=~\"$rule\"}[15m]))", "refId": "exec_exception_last_15m", "step": 10, "target": "" @@ -583,7 +583,7 @@ "targets": [ { "datasource": null, - "expr": "sum by(rule) (irate(emqx_rule_exec_failure_count{cluster=\"$cluster\", node=~\"$node\", rule=~\"$rule\"}[$__rate_interval]))", + "expr": "sum by(rule) (irate(emqx_rule_exec_exception_count{cluster=\"$cluster\", node=~\"$node\", rule=~\"$rule\"}[$__rate_interval]))", "format": "timeseries", "hide": false, "instant": false, @@ -591,7 +591,7 @@ "intervalFactor": 1, "legendFormat": "{{ node }}:{{ rule }}", "metric": "", - "query": "sum by(rule) (irate(emqx_rule_exec_failure_count{cluster=\"$cluster\", node=~\"$node\", rule=~\"$rule\"}[$__rate_interval]))", + "query": "sum by(rule) (irate(emqx_rule_exec_exception_count{cluster=\"$cluster\", node=~\"$node\", rule=~\"$rule\"}[$__rate_interval]))", "refId": "{{_node_}}:{{_rule_}}", "step": 10, "target": "" @@ -599,7 +599,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "Rule Engine Exec Failure", + "title": "Rule Engine Exec Exception", "transformations": [], "transparent": false, "type": "timeseries"