diff --git a/.config/nextest.toml b/.config/nextest.toml index ee7117506a..f5c58a5224 100644 --- a/.config/nextest.toml +++ b/.config/nextest.toml @@ -23,6 +23,13 @@ fail-fast = false # invocations of nextest happen. command = 'cargo run -p crdb-seed --profile test' +[[profile.default.scripts]] +filter = 'package(omicron-clickhouse-admin)' +setup = 'clickhouse-cluster' + +[script.clickhouse-cluster] +command = 'cargo run -p clickhouse-cluster-dev' + [test-groups] # The ClickHouse cluster tests currently rely on a hard-coded set of ports for # the nodes in the cluster. We would like to relax this in the future, at which @@ -39,7 +46,7 @@ live-tests = { max-threads = 1 } default-filter = 'all() - package(omicron-live-tests) - package(end-to-end-tests)' [[profile.default.overrides]] -filter = 'package(oximeter-db) and test(replicated)' +filter = 'package(oximeter-db) and test(replicated) + package(omicron-clickhouse-admin)' test-group = 'clickhouse-cluster' [[profile.default.overrides]] diff --git a/Cargo.lock b/Cargo.lock index 1abfb1400f..7f8bdad908 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1375,6 +1375,17 @@ dependencies = [ "slog", ] +[[package]] +name = "clickhouse-admin-test-utils" +version = "0.1.0" +dependencies = [ + "camino", + "clickhouse-admin-types", + "clickward", + "dropshot", + "omicron-workspace-hack", +] + [[package]] name = "clickhouse-admin-types" version = "0.1.0" @@ -1397,6 +1408,20 @@ dependencies = [ "slog-term", ] +[[package]] +name = "clickhouse-cluster-dev" +version = "0.1.0" +dependencies = [ + "anyhow", + "clickhouse-admin-test-utils", + "clickward", + "omicron-workspace-hack", + "oximeter-db", + "oximeter-test-utils", + "slog", + "tokio", +] + [[package]] name = "clickward" version = "0.1.0" @@ -6438,13 +6463,13 @@ dependencies = [ "chrono", "clap", "clickhouse-admin-api", + "clickhouse-admin-test-utils", "clickhouse-admin-types", "clickward", "dropshot", "expectorate", "http", "illumos-utils", - "nexus-test-utils", "omicron-common", "omicron-test-utils", "omicron-uuid-kinds", @@ -6460,6 +6485,7 @@ dependencies = [ "slog-async", "slog-dtrace", "slog-error-chain", + "slog-term", "subprocess", "thiserror", "tokio", @@ -7824,6 +7850,7 @@ dependencies = [ "clickward", "omicron-test-utils", "omicron-workspace-hack", + "oximeter-db", "oximeter-macro-impl", "oximeter-types", "slog", diff --git a/Cargo.toml b/Cargo.toml index 5093ccec29..7b6e62a6bb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,6 +5,7 @@ members = [ "certificates", "clickhouse-admin", "clickhouse-admin/api", + "clickhouse-admin/test-utils", "clients/bootstrap-agent-client", "clients/clickhouse-admin-keeper-client", "clients/clickhouse-admin-server-client", @@ -25,6 +26,7 @@ members = [ "cockroach-admin/types", "common", "dev-tools/cert-dev", + "dev-tools/clickhouse-cluster-dev", "dev-tools/ch-dev", "dev-tools/crdb-seed", "dev-tools/db-dev", @@ -130,6 +132,7 @@ default-members = [ "clickhouse-admin", "clickhouse-admin/api", "clickhouse-admin/types", + "clickhouse-admin/test-utils", "clients/bootstrap-agent-client", "clients/clickhouse-admin-keeper-client", "clients/clickhouse-admin-server-client", @@ -150,6 +153,7 @@ default-members = [ "cockroach-admin/types", "common", "dev-tools/cert-dev", + "dev-tools/clickhouse-cluster-dev", "dev-tools/ch-dev", "dev-tools/crdb-seed", "dev-tools/db-dev", @@ -324,6 +328,7 @@ clickhouse-admin-api = { path = "clickhouse-admin/api" } clickhouse-admin-keeper-client = { path = "clients/clickhouse-admin-keeper-client" } clickhouse-admin-server-client = { path = "clients/clickhouse-admin-server-client" } clickhouse-admin-types = { path = "clickhouse-admin/types" } +clickhouse-admin-test-utils = { path = "clickhouse-admin/test-utils" } clickward = { git = "https://github.com/oxidecomputer/clickward", rev = "a1b342c2558e835d09e6e39a40d3de798a29c2f" } cockroach-admin-api = { path = "cockroach-admin/api" } cockroach-admin-client = { path = "clients/cockroach-admin-client" } diff --git a/clickhouse-admin/Cargo.toml b/clickhouse-admin/Cargo.toml index 84b04f6caa..3a8d45dd64 100644 --- a/clickhouse-admin/Cargo.toml +++ b/clickhouse-admin/Cargo.toml @@ -31,15 +31,16 @@ omicron-workspace-hack.workspace = true [dev-dependencies] clickward.workspace = true +clickhouse-admin-test-utils.workspace = true dropshot.workspace = true expectorate.workspace = true -nexus-test-utils.workspace = true omicron-test-utils.workspace = true oximeter-db.workspace = true oximeter-test-utils.workspace = true openapi-lint.workspace = true openapiv3.workspace = true serde_json.workspace = true +slog-term.workspace = true subprocess.workspace = true url.workspace = true diff --git a/clickhouse-admin/test-utils/Cargo.toml b/clickhouse-admin/test-utils/Cargo.toml new file mode 100644 index 0000000000..8ce2966206 --- /dev/null +++ b/clickhouse-admin/test-utils/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "clickhouse-admin-test-utils" +version = "0.1.0" +edition = "2021" +license = "MPL-2.0" + +[lints] +workspace = true + +[dependencies] +camino.workspace = true +clickhouse-admin-types.workspace = true +clickward.workspace = true +dropshot.workspace = true + +omicron-workspace-hack.workspace = true \ No newline at end of file diff --git a/clickhouse-admin/test-utils/src/lib.rs b/clickhouse-admin/test-utils/src/lib.rs new file mode 100644 index 0000000000..7bc73a43a8 --- /dev/null +++ b/clickhouse-admin/test-utils/src/lib.rs @@ -0,0 +1,43 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Integration testing facilities for clickhouse-admin + +use camino::Utf8PathBuf; +use clickhouse_admin_types::OXIMETER_CLUSTER; +use clickward::{BasePorts, Deployment, DeploymentConfig}; +use dropshot::test_util::{log_prefix_for_test, LogContext}; +use dropshot::{ConfigLogging, ConfigLoggingLevel}; + +pub const DEFAULT_CLICKHOUSE_ADMIN_BASE_PORTS: BasePorts = BasePorts { + keeper: 29000, + raft: 29100, + clickhouse_tcp: 29200, + clickhouse_http: 29300, + clickhouse_interserver_http: 29400, +}; + +pub fn default_clickhouse_cluster_test_deployment( + path: Utf8PathBuf, +) -> Deployment { + let config = DeploymentConfig { + path, + base_ports: DEFAULT_CLICKHOUSE_ADMIN_BASE_PORTS, + cluster_name: OXIMETER_CLUSTER.to_string(), + }; + + Deployment::new(config) +} + +pub fn default_clickhouse_log_ctx_and_path() -> (LogContext, Utf8PathBuf) { + let logctx = LogContext::new( + "clickhouse_cluster", + &ConfigLogging::StderrTerminal { level: ConfigLoggingLevel::Info }, + ); + + let (parent_dir, _prefix) = log_prefix_for_test("clickhouse_cluster"); + let path = parent_dir.join("clickward_test"); + + (logctx, path) +} diff --git a/clickhouse-admin/tests/integration_test.rs b/clickhouse-admin/tests/integration_test.rs index eb26bec668..7c0d12b74b 100644 --- a/clickhouse-admin/tests/integration_test.rs +++ b/clickhouse-admin/tests/integration_test.rs @@ -2,130 +2,89 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -use anyhow::Context; use camino::Utf8PathBuf; +use clickhouse_admin_test_utils::{ + default_clickhouse_cluster_test_deployment, + default_clickhouse_log_ctx_and_path, DEFAULT_CLICKHOUSE_ADMIN_BASE_PORTS, +}; use clickhouse_admin_types::{ ClickhouseHost, ClickhouseKeeperClusterMembership, KeeperId, KeeperServerInfo, KeeperServerType, RaftConfig, }; -use clickward::{BasePorts, Deployment, DeploymentConfig}; -use dropshot::test_util::log_prefix_for_test; use omicron_clickhouse_admin::ClickhouseCli; -use omicron_test_utils::dev::test_setup_log; -use oximeter_test_utils::wait_for_keepers; -use slog::info; +use slog::{info, o, Drain}; +use slog_term::{FullFormat, PlainDecorator, TestStdoutWriter}; use std::collections::BTreeSet; use std::net::{Ipv6Addr, SocketAddrV6}; use std::str::FromStr; +fn log() -> slog::Logger { + let decorator = PlainDecorator::new(TestStdoutWriter); + let drain = FullFormat::new(decorator).build().fuse(); + let drain = slog_async::Async::new(drain).build().fuse(); + slog::Logger::root(drain, o!()) +} + +// In Clickward, keeper server ports are assigned by adding i to each +// base port. Keeper IDs are also assigned with consecutive numbers +// starting with 1. +fn get_keeper_server_port(keeper_id: KeeperId) -> u16 { + let raw_id = keeper_id.0; + // We can safely unwrap raw_id as the Keeper IDs we use for testing are + // all in the single digits + DEFAULT_CLICKHOUSE_ADMIN_BASE_PORTS.keeper + u16::try_from(raw_id).unwrap() +} + +fn get_keeper_raft_port(keeper_id: KeeperId) -> u16 { + let raw_id = keeper_id.0; + DEFAULT_CLICKHOUSE_ADMIN_BASE_PORTS.raft + u16::try_from(raw_id).unwrap() +} + #[tokio::test] async fn test_lgif_parsing() -> anyhow::Result<()> { - let logctx = test_setup_log("test_lgif_parsing"); - let log = logctx.log.clone(); - - let (parent_dir, prefix) = log_prefix_for_test(logctx.test_name()); - let path = parent_dir.join(format!("{prefix}-oximeter-clickward-test")); - std::fs::create_dir(&path)?; - - // We spin up several replicated clusters and must use a - // separate set of ports in case the tests run concurrently. - let base_ports = BasePorts { - keeper: 29000, - raft: 29100, - clickhouse_tcp: 29200, - clickhouse_http: 29300, - clickhouse_interserver_http: 29400, - }; - - let config = DeploymentConfig { - path: path.clone(), - base_ports, - cluster_name: "oximeter_cluster".to_string(), - }; - - let mut deployment = Deployment::new(config); - - // We only need a single keeper to test the lgif command - let num_keepers = 1; - let num_replicas = 1; - deployment - .generate_config(num_keepers, num_replicas) - .context("failed to generate config")?; - deployment.deploy().context("failed to deploy")?; - - wait_for_keepers(&log, &deployment, vec![clickward::KeeperId(1)]).await?; + let log = log(); let clickhouse_cli = ClickhouseCli::new( - Utf8PathBuf::from_str("clickhouse").unwrap(), - SocketAddrV6::new(Ipv6Addr::LOCALHOST, 29001, 0, 0), + Utf8PathBuf::from_str("clickhouse")?, + SocketAddrV6::new( + Ipv6Addr::LOCALHOST, + get_keeper_server_port(KeeperId(1)), + 0, + 0, + ), ) - .with_log(log.clone()); + .with_log(log); let lgif = clickhouse_cli.lgif().await.unwrap(); // The first log index from a newly created cluster should always be 1 assert_eq!(lgif.first_log_idx, 1); - info!(&log, "Cleaning up test"); - deployment.teardown()?; - std::fs::remove_dir_all(path)?; - logctx.cleanup_successful(); Ok(()) } #[tokio::test] async fn test_raft_config_parsing() -> anyhow::Result<()> { - let logctx = test_setup_log("test_raft_config_parsing"); - let log = logctx.log.clone(); - - let (parent_dir, prefix) = log_prefix_for_test(logctx.test_name()); - let path = parent_dir.join(format!("{prefix}-oximeter-clickward-test")); - std::fs::create_dir(&path)?; - - // We spin up several replicated clusters and must use a - // separate set of ports in case the tests run concurrently. - let base_ports = BasePorts { - keeper: 29500, - raft: 29600, - clickhouse_tcp: 29700, - clickhouse_http: 29800, - clickhouse_interserver_http: 29900, - }; - - let config = DeploymentConfig { - path: path.clone(), - base_ports, - cluster_name: "oximeter_cluster".to_string(), - }; - - let mut deployment = Deployment::new(config); - - let num_keepers = 3; - let num_replicas = 1; - deployment - .generate_config(num_keepers, num_replicas) - .context("failed to generate config")?; - deployment.deploy().context("failed to deploy")?; - - wait_for_keepers( - &log, - &deployment, - (1..=num_keepers).map(clickward::KeeperId).collect(), - ) - .await?; + let log = log(); let clickhouse_cli = ClickhouseCli::new( Utf8PathBuf::from_str("clickhouse").unwrap(), - SocketAddrV6::new(Ipv6Addr::LOCALHOST, 29501, 0, 0), + SocketAddrV6::new( + Ipv6Addr::LOCALHOST, + get_keeper_server_port(KeeperId(1)), + 0, + 0, + ), ) - .with_log(log.clone()); + .with_log(log); let raft_config = clickhouse_cli.raft_config().await.unwrap(); let mut keeper_servers = BTreeSet::new(); + let num_keepers = 3; for i in 1..=num_keepers { - let raft_port = u16::try_from(29600 + i).unwrap(); + let raft_port = get_keeper_raft_port(KeeperId(i)); keeper_servers.insert(KeeperServerInfo { server_id: clickhouse_admin_types::KeeperId(i), host: ClickhouseHost::Ipv6("::1".parse().unwrap()), @@ -139,121 +98,54 @@ async fn test_raft_config_parsing() -> anyhow::Result<()> { assert_eq!(raft_config, expected_raft_config); - info!(&log, "Cleaning up test"); - deployment.teardown()?; - std::fs::remove_dir_all(path)?; - logctx.cleanup_successful(); Ok(()) } #[tokio::test] async fn test_keeper_conf_parsing() -> anyhow::Result<()> { - let logctx = test_setup_log("test_keeper_conf_parsing"); - let log = logctx.log.clone(); - - let (parent_dir, prefix) = log_prefix_for_test(logctx.test_name()); - let path = parent_dir.join(format!("{prefix}-oximeter-clickward-test")); - std::fs::create_dir(&path)?; - - // We spin up several replicated clusters and must use a - // separate set of ports in case the tests run concurrently. - let base_ports = BasePorts { - keeper: 30000, - raft: 30100, - clickhouse_tcp: 30200, - clickhouse_http: 30300, - clickhouse_interserver_http: 30400, - }; - - let config = DeploymentConfig { - path: path.clone(), - base_ports, - cluster_name: "oximeter_cluster".to_string(), - }; - - let mut deployment = Deployment::new(config); - - // We only need a single keeper to test the conf command - let num_keepers = 1; - let num_replicas = 1; - deployment - .generate_config(num_keepers, num_replicas) - .context("failed to generate config")?; - deployment.deploy().context("failed to deploy")?; - - wait_for_keepers(&log, &deployment, vec![clickward::KeeperId(1)]).await?; + let log = log(); let clickhouse_cli = ClickhouseCli::new( Utf8PathBuf::from_str("clickhouse").unwrap(), - SocketAddrV6::new(Ipv6Addr::LOCALHOST, 30001, 0, 0), + SocketAddrV6::new( + Ipv6Addr::LOCALHOST, + get_keeper_server_port(KeeperId(1)), + 0, + 0, + ), ) - .with_log(log.clone()); + .with_log(log); let conf = clickhouse_cli.keeper_conf().await.unwrap(); - assert_eq!(conf.server_id, clickhouse_admin_types::KeeperId(1)); + assert_eq!(conf.server_id, KeeperId(1)); - info!(&log, "Cleaning up test"); - deployment.teardown()?; - std::fs::remove_dir_all(path)?; - logctx.cleanup_successful(); Ok(()) } #[tokio::test] async fn test_keeper_cluster_membership() -> anyhow::Result<()> { - let logctx = test_setup_log("test_keeper_cluster_membership"); - let log = logctx.log.clone(); - - let (parent_dir, prefix) = log_prefix_for_test(logctx.test_name()); - let path = parent_dir.join(format!("{prefix}-oximeter-clickward-test")); - std::fs::create_dir(&path)?; - - // We spin up several replicated clusters and must use a - // separate set of ports in case the tests run concurrently. - let base_ports = BasePorts { - keeper: 30500, - raft: 30600, - clickhouse_tcp: 30700, - clickhouse_http: 30800, - clickhouse_interserver_http: 30900, - }; - - let config = DeploymentConfig { - path: path.clone(), - base_ports, - cluster_name: "oximeter_cluster".to_string(), - }; - - let mut deployment = Deployment::new(config); - - let num_keepers = 3; - let num_replicas = 1; - deployment - .generate_config(num_keepers, num_replicas) - .context("failed to generate config")?; - deployment.deploy().context("failed to deploy")?; - - wait_for_keepers( - &log, - &deployment, - (1..=num_keepers).map(clickward::KeeperId).collect(), - ) - .await?; + let log = log(); let clickhouse_cli = ClickhouseCli::new( Utf8PathBuf::from_str("clickhouse").unwrap(), - SocketAddrV6::new(Ipv6Addr::LOCALHOST, 30501, 0, 0), + SocketAddrV6::new( + Ipv6Addr::LOCALHOST, + get_keeper_server_port(KeeperId(1)), + 0, + 0, + ), ) - .with_log(log.clone()); + .with_log(log); let keeper_cluster_membership = clickhouse_cli.keeper_cluster_membership().await.unwrap(); let mut raft_config = BTreeSet::new(); + let num_keepers = 3; for i in 1..=num_keepers { - raft_config.insert(clickhouse_admin_types::KeeperId(i)); + raft_config.insert(KeeperId(i)); } let expected_keeper_cluster_membership = @@ -273,9 +165,19 @@ async fn test_keeper_cluster_membership() -> anyhow::Result<()> { expected_keeper_cluster_membership.raft_config ); - info!(&log, "Cleaning up test"); + Ok(()) +} + +#[tokio::test] +async fn test_teardown() -> anyhow::Result<()> { + let (logctx, path) = default_clickhouse_log_ctx_and_path(); + + info!(&logctx.log, "Tearing down ClickHouse cluster"; "path" => ?path); + + let deployment = default_clickhouse_cluster_test_deployment(path.clone()); deployment.teardown()?; std::fs::remove_dir_all(path)?; logctx.cleanup_successful(); + Ok(()) } diff --git a/dev-tools/clickhouse-cluster-dev/Cargo.toml b/dev-tools/clickhouse-cluster-dev/Cargo.toml new file mode 100644 index 0000000000..2e23b7d52c --- /dev/null +++ b/dev-tools/clickhouse-cluster-dev/Cargo.toml @@ -0,0 +1,19 @@ +[package] +name = "clickhouse-cluster-dev" +version = "0.1.0" +edition = "2021" +license = "MPL-2.0" +readme = "README.md" + +[lints] +workspace = true + +[dependencies] +anyhow.workspace = true +clickhouse-admin-test-utils.workspace = true +clickward.workspace = true +slog.workspace = true +tokio.workspace = true +omicron-workspace-hack.workspace = true +oximeter-db.workspace = true +oximeter-test-utils.workspace = true diff --git a/dev-tools/clickhouse-cluster-dev/src/main.rs b/dev-tools/clickhouse-cluster-dev/src/main.rs new file mode 100644 index 0000000000..54714ac368 --- /dev/null +++ b/dev-tools/clickhouse-cluster-dev/src/main.rs @@ -0,0 +1,62 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Sets up a 3 keeper 2 replica ClickHouse cluster for clickhouse-admin +//! integration tests. +//! +//! NB: This should only be used for testing that doesn't write data to +//! ClickHouse. Otherwise, it may result in flaky tests. + +use anyhow::{Context, Result}; +use clickhouse_admin_test_utils::{ + default_clickhouse_cluster_test_deployment, + default_clickhouse_log_ctx_and_path, +}; +use clickward::KeeperId; +use oximeter_db::Client; +use oximeter_test_utils::{wait_for_keepers, wait_for_ping}; +use std::time::Duration; + +#[tokio::main] +async fn main() -> Result<()> { + let request_timeout = Duration::from_secs(15); + let (logctx, path) = default_clickhouse_log_ctx_and_path(); + std::fs::create_dir(&path)?; + + slog::info!(logctx.log, "Setting up a ClickHouse cluster"); + + let mut deployment = + default_clickhouse_cluster_test_deployment(path.clone()); + + let num_keepers = 3; + let num_replicas = 2; + deployment + .generate_config(num_keepers, num_replicas) + .context("failed to generate config")?; + deployment.deploy().context("failed to deploy")?; + + let client1 = Client::new_with_request_timeout( + deployment.http_addr(1.into()), + deployment.native_addr(1.into()), + &logctx.log, + request_timeout, + ); + let client2 = Client::new_with_request_timeout( + deployment.http_addr(2.into()), + deployment.native_addr(2.into()), + &logctx.log, + request_timeout, + ); + + wait_for_ping(&logctx.log, &client1).await?; + wait_for_ping(&logctx.log, &client2).await?; + wait_for_keepers( + &logctx.log, + &deployment, + (1..=num_keepers).map(KeeperId).collect(), + ) + .await?; + + Ok(()) +} diff --git a/oximeter/db/tests/integration_test.rs b/oximeter/db/tests/integration_test.rs index b34c962881..14246b6345 100644 --- a/oximeter/db/tests/integration_test.rs +++ b/oximeter/db/tests/integration_test.rs @@ -455,6 +455,7 @@ async fn wait_for_num_points( Ok(()) } +// TODO: Use the function in the other package /// Try to ping the server until it responds. async fn wait_for_ping(log: &Logger, client: &Client) -> anyhow::Result<()> { poll::wait_for_condition( diff --git a/oximeter/test-utils/Cargo.toml b/oximeter/test-utils/Cargo.toml index 0bff56583e..adca1a51c8 100644 --- a/oximeter/test-utils/Cargo.toml +++ b/oximeter/test-utils/Cargo.toml @@ -13,6 +13,7 @@ chrono.workspace = true clickward.workspace = true omicron-workspace-hack.workspace = true omicron-test-utils.workspace = true +oximeter-db.workspace = true oximeter-macro-impl.workspace = true oximeter-types.workspace = true slog.workspace =true diff --git a/oximeter/test-utils/src/lib.rs b/oximeter/test-utils/src/lib.rs index 02f928abc0..01d32576b0 100644 --- a/oximeter/test-utils/src/lib.rs +++ b/oximeter/test-utils/src/lib.rs @@ -17,6 +17,7 @@ extern crate self as oximeter; use anyhow::Context; use clickward::{Deployment, KeeperClient, KeeperError, KeeperId}; use omicron_test_utils::dev::poll; +use oximeter_db::Client; use oximeter_macro_impl::{Metric, Target}; use oximeter_types::histogram; use oximeter_types::histogram::{Histogram, Record}; @@ -185,6 +186,29 @@ pub async fn wait_for_keepers( Ok(()) } +/// Try to ping the server until it responds. +pub async fn wait_for_ping( + log: &Logger, + client: &Client, +) -> anyhow::Result<()> { + poll::wait_for_condition( + || async { + client + .ping() + .await + .map_err(|_| poll::CondCheckError::::NotYet) + }, + &Duration::from_millis(100), + &Duration::from_secs(30), + ) + .await + .with_context(|| { + format!("failed to ping clickhouse server: {}", client.url()) + })?; + info!(log, "Clickhouse server ready: {}", client.url()); + Ok(()) +} + #[cfg(test)] mod tests { use chrono::Utc;