From 786c769d1fd4373ade6f16a04f7d38b90ca2b92d Mon Sep 17 00:00:00 2001 From: Robert Gildein Date: Tue, 19 Nov 2024 11:17:03 +0100 Subject: [PATCH] Update TF plan and use history-server rev33 Update TF plan to integrate grafana-agent and integration-hub via logging endpoint. Update history server to rev33. Signed-off-by: Robert Gildein --- python/tests/integration/test_spark_job.py | 144 +++++++++--------- releases/3.4/terraform/base/applications.tf | 122 +++++++-------- releases/3.4/terraform/cos/integrations.tf | 70 +++++---- .../3.4/yaml/bundle-azure-storage.yaml.j2 | 2 +- releases/3.4/yaml/bundle.yaml.j2 | 2 +- releases/3.5/terraform/base/applications.tf | 122 +++++++-------- releases/3.5/terraform/cos/integrations.tf | 70 +++++---- .../3.5/yaml/bundle-azure-storage.yaml.j2 | 2 +- releases/3.5/yaml/bundle.yaml.j2 | 2 +- 9 files changed, 262 insertions(+), 274 deletions(-) diff --git a/python/tests/integration/test_spark_job.py b/python/tests/integration/test_spark_job.py index 45396092..064d5dbf 100644 --- a/python/tests/integration/test_spark_job.py +++ b/python/tests/integration/test_spark_job.py @@ -295,75 +295,75 @@ async def test_spark_logforwaring_to_loki( assert len(query["data"]["result"]) != 0, "no logs was found" -# @pytest.mark.abort_on_fail -# @pytest.mark.asyncio -# async def test_history_server_metrics_in_cos(ops_test: OpsTest, cos): -# if not cos: -# pytest.skip("Not possible to test without cos") - -# cos_model_name = cos -# # Prometheus data is being published by the app -# assert await all_prometheus_exporters_data( -# ops_test, check_field="jmx_scrape_duration_seconds", app_name=HISTORY_SERVER -# ) - -# # We should leave time for Prometheus data to be published -# for attempt in Retrying(stop=stop_after_attempt(5), wait=wait_fixed(30)): -# with attempt: - -# cos_address = await get_cos_address(ops_test, cos_model_name=cos_model_name) -# assert published_prometheus_data( -# ops_test, cos_model_name, cos_address, "jmx_scrape_duration_seconds" -# ) - -# # Alerts got published to Prometheus -# alerts_data = published_prometheus_alerts( -# ops_test, cos_model_name, cos_address -# ) -# logger.info(f"Alerts data: {alerts_data}") - -# logger.info("Rules: ") -# for group in alerts_data["data"]["groups"]: -# for rule in group["rules"]: -# logger.info(f"Rule: {rule['name']}") -# logger.info("End of rules.") - -# for alert in [ -# "Spark History Server Missing", -# "Spark History Server Threads Dead Locked", -# ]: -# assert any( -# rule["name"] == alert -# for group in alerts_data["data"]["groups"] -# for rule in group["rules"] -# ) - -# # Grafana dashboard got published -# dashboards_info = await published_grafana_dashboards( -# ops_test, cos_model_name -# ) -# logger.info(f"Dashboard info {dashboards_info}") -# assert any( -# board["title"] == "Spark History Server JMX Dashboard" -# for board in dashboards_info -# ) - -# # Loki logs are ingested -# logs = await published_loki_logs( -# ops_test, -# cos_model_name, -# cos_address, -# "juju_application", -# HISTORY_SERVER, -# ) -# logger.info(f"Retrieved logs: {logs}") - -# # check for non empty logs -# assert len(logs) > 0 -# # check if startup messages are there -# c = 0 -# for timestamp, message in logs.items(): -# if "INFO HistoryServer" in message: -# c = c + 1 -# logger.info(f"Number of line found: {c}") -# assert c > 0 +@pytest.mark.abort_on_fail +@pytest.mark.asyncio +async def test_history_server_metrics_in_cos(ops_test: OpsTest, cos): + if not cos: + pytest.skip("Not possible to test without cos") + + cos_model_name = cos + # Prometheus data is being published by the app + assert await all_prometheus_exporters_data( + ops_test, check_field="jmx_scrape_duration_seconds", app_name=HISTORY_SERVER + ) + + # We should leave time for Prometheus data to be published + for attempt in Retrying(stop=stop_after_attempt(5), wait=wait_fixed(30)): + with attempt: + + cos_address = await get_cos_address(ops_test, cos_model_name=cos_model_name) + assert published_prometheus_data( + ops_test, cos_model_name, cos_address, "jmx_scrape_duration_seconds" + ) + + # Alerts got published to Prometheus + alerts_data = published_prometheus_alerts( + ops_test, cos_model_name, cos_address + ) + logger.info(f"Alerts data: {alerts_data}") + + logger.info("Rules: ") + for group in alerts_data["data"]["groups"]: + for rule in group["rules"]: + logger.info(f"Rule: {rule['name']}") + logger.info("End of rules.") + + for alert in [ + "Spark History Server Missing", + "Spark History Server Threads Dead Locked", + ]: + assert any( + rule["name"] == alert + for group in alerts_data["data"]["groups"] + for rule in group["rules"] + ) + + # Grafana dashboard got published + dashboards_info = await published_grafana_dashboards( + ops_test, cos_model_name + ) + logger.info(f"Dashboard info {dashboards_info}") + assert any( + board["title"] == "Spark History Server JMX Dashboard" + for board in dashboards_info + ) + + # Loki logs are ingested + logs = await published_loki_logs( + ops_test, + cos_model_name, + cos_address, + "juju_application", + HISTORY_SERVER, + ) + logger.info(f"Retrieved logs: {logs}") + + # check for non empty logs + assert len(logs) > 0 + # check if startup messages are there + c = 0 + for timestamp, message in logs.items(): + if "INFO HistoryServer" in message: + c = c + 1 + logger.info(f"Number of line found: {c}") + assert c > 0 diff --git a/releases/3.4/terraform/base/applications.tf b/releases/3.4/terraform/base/applications.tf index 98469e4e..0202f96a 100644 --- a/releases/3.4/terraform/base/applications.tf +++ b/releases/3.4/terraform/base/applications.tf @@ -2,86 +2,75 @@ # See LICENSE file for licensing details. resource "juju_application" "history_server" { - name = "history-server" - - model = var.model + name = "history-server" + model = var.model charm { - name = "spark-history-server-k8s" - channel = "3.4/edge" - revision = 30 + name = "spark-history-server-k8s" + channel = "3.4/edge" + revision = 33 } resources = { - spark-history-server-image = 17 # 3.4.2 + spark-history-server-image = 17 # 3.4.2 } - units = 1 - + units = 1 constraints = "arch=amd64" - } resource "juju_application" "s3" { - name = "s3" - - model = var.model + name = "s3" + model = var.model charm { - name = "s3-integrator" - channel = "latest/edge" + name = "s3-integrator" + channel = "latest/edge" revision = 17 } config = { - path = "spark-events" - bucket = var.s3.bucket - endpoint = var.s3.endpoint + path = "spark-events" + bucket = var.s3.bucket + endpoint = var.s3.endpoint } - units = 1 - + units = 1 constraints = "arch=amd64" - } resource "juju_application" "kyuubi" { - - name = "kyuubi" - - model = var.model + name = "kyuubi" + model = var.model charm { - name = "kyuubi-k8s" - channel = "latest/edge" + name = "kyuubi-k8s" + channel = "latest/edge" revision = 27 } resources = { - kyuubi-image = "ghcr.io/canonical/charmed-spark-kyuubi@sha256:9268d19a6eef91914e874734b320fab64908faf0f7adb8856be809bc60ecd1d0" + kyuubi-image = "ghcr.io/canonical/charmed-spark-kyuubi@sha256:9268d19a6eef91914e874734b320fab64908faf0f7adb8856be809bc60ecd1d0" } config = { - namespace = var.model + namespace = var.model service-account = var.kyuubi_user } - units = 3 - trust = true - + units = 3 + trust = true constraints = "arch=amd64" } resource "juju_application" "zookeeper" { - - name = "zookeeper" - - model = var.model + name = "zookeeper" + model = var.model charm { - name = "zookeeper-k8s" - channel = "3/edge" + name = "zookeeper-k8s" + channel = "3/edge" revision = 59 } @@ -89,72 +78,63 @@ resource "juju_application" "zookeeper" { zookeeper-image = 31 } - units = 3 + units = 3 constraints = "arch=amd64" } resource "juju_application" "kyuubi_users" { - name = "kyuubi-users" - - model = var.model + name = "kyuubi-users" + model = var.model charm { - name = "postgresql-k8s" - channel = "14/stable" + name = "postgresql-k8s" + channel = "14/stable" revision = 281 } resources = { - postgresql-image = 159 + postgresql-image = 159 } - units = 1 - trust = true - + units = 1 + trust = true constraints = "arch=amd64" - } resource "juju_application" "metastore" { - name = "metastore" - - model = var.model + name = "metastore" + model = var.model charm { - name = "postgresql-k8s" - channel = "14/stable" + name = "postgresql-k8s" + channel = "14/stable" revision = 281 } resources = { - postgresql-image = 159 + postgresql-image = 159 } - units = 1 - trust = true - + units = 1 + trust = true constraints = "arch=amd64" - } resource "juju_application" "hub" { - name = "integration-hub" - - model = var.model + name = "integration-hub" + model = var.model charm { - name = "spark-integration-hub-k8s" - channel = "latest/edge" - revision = 20 + name = "spark-integration-hub-k8s" + channel = "latest/edge" + revision = 22 } resources = { - integration-hub-image = 3 + integration-hub-image = 3 } - units = 1 - trust = true - + units = 1 + trust = true constraints = "arch=amd64" - -} \ No newline at end of file +} diff --git a/releases/3.4/terraform/cos/integrations.tf b/releases/3.4/terraform/cos/integrations.tf index e77e081f..06dad337 100644 --- a/releases/3.4/terraform/cos/integrations.tf +++ b/releases/3.4/terraform/cos/integrations.tf @@ -15,66 +15,66 @@ data "juju_offer" "loki" { resource "juju_integration" "cos_configuration_agent" { - model = var.model + model = var.model application { - name = juju_application.cos_configuration.name + name = juju_application.cos_configuration.name endpoint = "grafana-dashboards" } application { - name = juju_application.agent.name + name = juju_application.agent.name endpoint = "grafana-dashboards-consumer" } } resource "juju_integration" "pushgateway_scrape_config" { - model = var.model + model = var.model application { - name = juju_application.pushgateway.name + name = juju_application.pushgateway.name endpoint = "metrics-endpoint" } application { - name = juju_application.scrape_config.name + name = juju_application.scrape_config.name endpoint = "configurable-scrape-jobs" } } resource "juju_integration" "scrape_config_agent" { - model = var.model + model = var.model application { - name = juju_application.scrape_config.name + name = juju_application.scrape_config.name endpoint = "metrics-endpoint" } application { - name = juju_application.agent.name + name = juju_application.agent.name endpoint = "metrics-endpoint" } } resource "juju_integration" "pushgateway_integration_hub" { - model = var.model + model = var.model application { - name = juju_application.pushgateway.name + name = juju_application.pushgateway.name endpoint = "push-endpoint" } application { - name = var.integration_hub + name = var.integration_hub endpoint = "cos" } } resource "juju_integration" "agent_grafana_dashboards" { - model = var.model + model = var.model application { - name = juju_application.agent.name + name = juju_application.agent.name endpoint = "grafana-dashboards-provider" } @@ -84,10 +84,10 @@ resource "juju_integration" "agent_grafana_dashboards" { } resource "juju_integration" "agent_prometheus" { - model = var.model + model = var.model application { - name = juju_application.agent.name + name = juju_application.agent.name endpoint = "send-remote-write" } @@ -97,56 +97,70 @@ resource "juju_integration" "agent_prometheus" { } resource "juju_integration" "history_server_agent_dashboard" { - model = var.model + model = var.model application { - name = var.history_server + name = var.history_server endpoint = "grafana-dashboard" } application { - name = juju_application.agent.name + name = juju_application.agent.name endpoint = "grafana-dashboards-consumer" } } resource "juju_integration" "history_server_agent_logging" { - model = var.model + model = var.model application { - name = var.history_server + name = var.history_server endpoint = "logging" } application { - name = juju_application.agent.name + name = juju_application.agent.name endpoint = "logging-provider" } } resource "juju_integration" "history_server_agent_metrics" { - model = var.model + model = var.model application { - name = var.history_server + name = var.history_server endpoint = "metrics-endpoint" } application { - name = juju_application.agent.name + name = juju_application.agent.name endpoint = "metrics-endpoint" } } resource "juju_integration" "agent_loki" { - model = var.model + model = var.model application { - name = juju_application.agent.name + name = juju_application.agent.name endpoint = "logging-consumer" } application { offer_url = data.juju_offer.loki.url } -} \ No newline at end of file +} + +resource "juju_integration" "integration_hub_logging" { + model = var.model + + application { + name = juju_application.agent.name + endpoint = "logging-provider" + } + + application { + name = var.integration_hub + endpoint = "logging" + } +} diff --git a/releases/3.4/yaml/bundle-azure-storage.yaml.j2 b/releases/3.4/yaml/bundle-azure-storage.yaml.j2 index 5857be81..cace484b 100644 --- a/releases/3.4/yaml/bundle-azure-storage.yaml.j2 +++ b/releases/3.4/yaml/bundle-azure-storage.yaml.j2 @@ -51,7 +51,7 @@ applications: history-server: charm: spark-history-server-k8s channel: 3.4/edge - revision: 30 + revision: 33 resources: spark-history-server-image: ghcr.io/canonical/charmed-spark@sha256:321b6deb13f10c045028c9b25264b8113c6fdcbe4f487ff472a06fd7bdcb2758 # 3.4.2 scale: 1 diff --git a/releases/3.4/yaml/bundle.yaml.j2 b/releases/3.4/yaml/bundle.yaml.j2 index a51fd82a..8db006b1 100644 --- a/releases/3.4/yaml/bundle.yaml.j2 +++ b/releases/3.4/yaml/bundle.yaml.j2 @@ -58,7 +58,7 @@ applications: history-server: charm: spark-history-server-k8s channel: 3.4/edge - revision: 30 + revision: 33 resources: spark-history-server-image: ghcr.io/canonical/charmed-spark@sha256:321b6deb13f10c045028c9b25264b8113c6fdcbe4f487ff472a06fd7bdcb2758 # 3.4.2 scale: 1 diff --git a/releases/3.5/terraform/base/applications.tf b/releases/3.5/terraform/base/applications.tf index be2e6abb..0a03c8ae 100644 --- a/releases/3.5/terraform/base/applications.tf +++ b/releases/3.5/terraform/base/applications.tf @@ -2,86 +2,75 @@ # See LICENSE file for licensing details. resource "juju_application" "history_server" { - name = "history-server" - - model = var.model + name = "history-server" + model = var.model charm { - name = "spark-history-server-k8s" - channel = "3.4/edge" - revision = 30 + name = "spark-history-server-k8s" + channel = "3.4/edge" + revision = 33 } resources = { - spark-history-server-image = 17 # 3.4.2 + spark-history-server-image = 17 # 3.4.2 } - units = 1 - + units = 1 constraints = "arch=amd64" - } resource "juju_application" "s3" { - name = "s3" - - model = var.model + name = "s3" + model = var.model charm { - name = "s3-integrator" - channel = "latest/edge" + name = "s3-integrator" + channel = "latest/edge" revision = 17 } config = { - path = "spark-events" - bucket = var.s3.bucket - endpoint = var.s3.endpoint + path = "spark-events" + bucket = var.s3.bucket + endpoint = var.s3.endpoint } - units = 1 - + units = 1 constraints = "arch=amd64" - } resource "juju_application" "kyuubi" { - - name = "kyuubi" - - model = var.model + name = "kyuubi" + model = var.model charm { - name = "kyuubi-k8s" - channel = "latest/edge" + name = "kyuubi-k8s" + channel = "latest/edge" revision = 27 } resources = { - kyuubi-image = "ghcr.io/canonical/charmed-spark-kyuubi@sha256:9268d19a6eef91914e874734b320fab64908faf0f7adb8856be809bc60ecd1d0" + kyuubi-image = "ghcr.io/canonical/charmed-spark-kyuubi@sha256:9268d19a6eef91914e874734b320fab64908faf0f7adb8856be809bc60ecd1d0" } config = { - namespace = var.model + namespace = var.model service-account = var.kyuubi_user } - units = 3 - trust = true - + units = 3 + trust = true constraints = "arch=amd64" } resource "juju_application" "zookeeper" { - - name = "zookeeper" - - model = var.model + name = "zookeeper" + model = var.model charm { - name = "zookeeper-k8s" - channel = "3/edge" + name = "zookeeper-k8s" + channel = "3/edge" revision = 59 } @@ -89,73 +78,64 @@ resource "juju_application" "zookeeper" { zookeeper-image = 31 } - units = 3 + units = 3 constraints = "arch=amd64" } resource "juju_application" "kyuubi_users" { - name = "kyuubi-users" - - model = var.model + name = "kyuubi-users" + model = var.model charm { - name = "postgresql-k8s" - channel = "14/stable" + name = "postgresql-k8s" + channel = "14/stable" revision = 281 } resources = { - postgresql-image = 159 + postgresql-image = 159 } - units = 1 - trust = true - + units = 1 + trust = true constraints = "arch=amd64" - } resource "juju_application" "metastore" { - name = "metastore" - - model = var.model + name = "metastore" + model = var.model charm { - name = "postgresql-k8s" - channel = "14/stable" + name = "postgresql-k8s" + channel = "14/stable" revision = 281 } resources = { - postgresql-image = 159 + postgresql-image = 159 } - units = 1 - trust = true - + units = 1 + trust = true constraints = "arch=amd64" - } resource "juju_application" "hub" { - name = "integration-hub" - - model = var.model + name = "integration-hub" + model = var.model charm { - name = "spark-integration-hub-k8s" - channel = "latest/edge" - revision = 20 + name = "spark-integration-hub-k8s" + channel = "latest/edge" + revision = 22 } resources = { - integration-hub-image = 3 + integration-hub-image = 3 } - units = 1 - trust = true - + units = 1 + trust = true constraints = "arch=amd64" - -} \ No newline at end of file +} diff --git a/releases/3.5/terraform/cos/integrations.tf b/releases/3.5/terraform/cos/integrations.tf index e77e081f..06dad337 100644 --- a/releases/3.5/terraform/cos/integrations.tf +++ b/releases/3.5/terraform/cos/integrations.tf @@ -15,66 +15,66 @@ data "juju_offer" "loki" { resource "juju_integration" "cos_configuration_agent" { - model = var.model + model = var.model application { - name = juju_application.cos_configuration.name + name = juju_application.cos_configuration.name endpoint = "grafana-dashboards" } application { - name = juju_application.agent.name + name = juju_application.agent.name endpoint = "grafana-dashboards-consumer" } } resource "juju_integration" "pushgateway_scrape_config" { - model = var.model + model = var.model application { - name = juju_application.pushgateway.name + name = juju_application.pushgateway.name endpoint = "metrics-endpoint" } application { - name = juju_application.scrape_config.name + name = juju_application.scrape_config.name endpoint = "configurable-scrape-jobs" } } resource "juju_integration" "scrape_config_agent" { - model = var.model + model = var.model application { - name = juju_application.scrape_config.name + name = juju_application.scrape_config.name endpoint = "metrics-endpoint" } application { - name = juju_application.agent.name + name = juju_application.agent.name endpoint = "metrics-endpoint" } } resource "juju_integration" "pushgateway_integration_hub" { - model = var.model + model = var.model application { - name = juju_application.pushgateway.name + name = juju_application.pushgateway.name endpoint = "push-endpoint" } application { - name = var.integration_hub + name = var.integration_hub endpoint = "cos" } } resource "juju_integration" "agent_grafana_dashboards" { - model = var.model + model = var.model application { - name = juju_application.agent.name + name = juju_application.agent.name endpoint = "grafana-dashboards-provider" } @@ -84,10 +84,10 @@ resource "juju_integration" "agent_grafana_dashboards" { } resource "juju_integration" "agent_prometheus" { - model = var.model + model = var.model application { - name = juju_application.agent.name + name = juju_application.agent.name endpoint = "send-remote-write" } @@ -97,56 +97,70 @@ resource "juju_integration" "agent_prometheus" { } resource "juju_integration" "history_server_agent_dashboard" { - model = var.model + model = var.model application { - name = var.history_server + name = var.history_server endpoint = "grafana-dashboard" } application { - name = juju_application.agent.name + name = juju_application.agent.name endpoint = "grafana-dashboards-consumer" } } resource "juju_integration" "history_server_agent_logging" { - model = var.model + model = var.model application { - name = var.history_server + name = var.history_server endpoint = "logging" } application { - name = juju_application.agent.name + name = juju_application.agent.name endpoint = "logging-provider" } } resource "juju_integration" "history_server_agent_metrics" { - model = var.model + model = var.model application { - name = var.history_server + name = var.history_server endpoint = "metrics-endpoint" } application { - name = juju_application.agent.name + name = juju_application.agent.name endpoint = "metrics-endpoint" } } resource "juju_integration" "agent_loki" { - model = var.model + model = var.model application { - name = juju_application.agent.name + name = juju_application.agent.name endpoint = "logging-consumer" } application { offer_url = data.juju_offer.loki.url } -} \ No newline at end of file +} + +resource "juju_integration" "integration_hub_logging" { + model = var.model + + application { + name = juju_application.agent.name + endpoint = "logging-provider" + } + + application { + name = var.integration_hub + endpoint = "logging" + } +} diff --git a/releases/3.5/yaml/bundle-azure-storage.yaml.j2 b/releases/3.5/yaml/bundle-azure-storage.yaml.j2 index 8fbaa5a3..c6f1c983 100644 --- a/releases/3.5/yaml/bundle-azure-storage.yaml.j2 +++ b/releases/3.5/yaml/bundle-azure-storage.yaml.j2 @@ -51,7 +51,7 @@ applications: history-server: charm: spark-history-server-k8s channel: 3.4/edge - revision: 30 + revision: 33 resources: spark-history-server-image: ghcr.io/canonical/charmed-spark@sha256:321b6deb13f10c045028c9b25264b8113c6fdcbe4f487ff472a06fd7bdcb2758 # 3.4.2 scale: 1 diff --git a/releases/3.5/yaml/bundle.yaml.j2 b/releases/3.5/yaml/bundle.yaml.j2 index 9e6d91ab..4cc95cfa 100644 --- a/releases/3.5/yaml/bundle.yaml.j2 +++ b/releases/3.5/yaml/bundle.yaml.j2 @@ -58,7 +58,7 @@ applications: history-server: charm: spark-history-server-k8s channel: 3.4/edge - revision: 30 + revision: 33 resources: # We use 3.4.2 here because we don't have metrics added in version 3.5.1 of the rock image spark-history-server-image: ghcr.io/canonical/charmed-spark@sha256:321b6deb13f10c045028c9b25264b8113c6fdcbe4f487ff472a06fd7bdcb2758 # 3.4.2