diff --git a/mgmt_cli_test.py b/mgmt_cli_test.py index 726bddfad24..9f950ebecde 100644 --- a/mgmt_cli_test.py +++ b/mgmt_cli_test.py @@ -28,7 +28,6 @@ import boto3 import yaml -from docker.errors import InvalidArgument from invoke import exceptions @@ -46,6 +45,7 @@ from sdcm.nemesis import MgmtRepair from sdcm.utils.adaptive_timeouts import adaptive_timeout, Operations from sdcm.utils.common import reach_enospc_on_node, clean_enospc_on_node +from sdcm.utils.decorators import latency_calculator_decorator from sdcm.utils.issues import SkipPerIssues from sdcm.utils.loader_utils import LoaderUtilsMixin from sdcm.utils.time_utils import ExecutionTimer @@ -1656,7 +1656,7 @@ def report_to_argus(self, report_type: ManagerReportType, data: dict, label: str table = ManagerBackupBenchmarkResult(sut_timestamp=mgmt.get_scylla_manager_tool( manager_node=self.monitors.nodes[0]).sctool.client_version_timestamp) else: - raise InvalidArgument("Unknown report type") + raise ValueError("Unknown report type") for key, value in data.items(): table.add_result(column=key, value=value, row=label, status=Status.UNSET) @@ -1678,53 +1678,35 @@ def create_backup_and_report(self, mgr_cluster, label: str): self.report_to_argus(ManagerReportType.BACKUP, backup_report, label) return task - def run_read_stress_and_report(self, label): - stress_queue = [] - - for command in self.params.get('stress_read_cmd'): - stress_queue.append(self.run_stress_thread(command, round_robin=True, stop_test_on_failure=False)) + @latency_calculator_decorator + def mixed_latency_load(self): + stress_load = self.run_stress_thread(self.params.get('stress_cmd')) + self.get_stress_results(queue=stress_load) - with ExecutionTimer() as stress_timer: - for stress in stress_queue: - assert self.verify_stress_thread(cs_thread_pool=stress), "Read stress command" - InfoEvent(message=f'Read stress duration: {stress_timer.duration}s.').publish() - - read_stress_report = { - "read time": int(stress_timer.duration.total_seconds()), - } - self.report_to_argus(ManagerReportType.READ, read_stress_report, label) - - def test_backup_benchmark(self): + def test_backup_benchmark_mixed(self): self.log.info("Executing test_backup_restore_benchmark...") self.log.info("Write data to table") self.run_prepare_write_cmd() - self.log.info("Disable clusterwide compaction") - compaction_ops = CompactionOps(cluster=self.db_cluster) - # Disable keyspace autocompaction cluster-wide since we dont want it to interfere with our restore timing - for node in self.db_cluster.nodes: - compaction_ops.disable_autocompaction_on_ks_cf(node=node) - manager_tool = mgmt.get_scylla_manager_tool(manager_node=self.monitors.nodes[0]) mgr_cluster = self.ensure_and_get_cluster(manager_tool) + self.log.info("Run read test") + self.mixed_latency_load() + self.log.info("Create and report backup time") backup_task = self.create_backup_and_report(mgr_cluster, "Backup") self.log.info("Remove backup") backup_task.delete_backup_snapshot() - self.log.info("Run read test") - self.run_read_stress_and_report("Read stress") - self.log.info("Create and report backup time during read stress") backup_thread = threading.Thread(target=self.create_backup_and_report, kwargs={"mgr_cluster": mgr_cluster, "label": "Backup during read stress"}) - read_stress_thread = threading.Thread(target=self.run_read_stress_and_report, - kwargs={"label": "Read stress during backup"}) + read_stress_thread = threading.Thread(target=self.mixed_latency_load) backup_thread.start() read_stress_thread.start() diff --git a/test-cases/manager/manager-backup-restore-baseline.yaml b/test-cases/manager/manager-backup-restore-baseline.yaml new file mode 100644 index 00000000000..d239a069098 --- /dev/null +++ b/test-cases/manager/manager-backup-restore-baseline.yaml @@ -0,0 +1,27 @@ +test_duration: 120 + +round_robin: true + +# 100GB dataset +prepare_write_cmd: [ "cassandra-stress write cl=ALL n=26214400 -schema 'replication(strategy=NetworkTopologyStrategy,replication_factor=3)' -mode cql3 native -rate threads=500 -col 'size=FIXED(1024) n=FIXED(1)' -pop seq=1..26214400", + "cassandra-stress write cl=ALL n=26214400 -schema 'replication(strategy=NetworkTopologyStrategy,replication_factor=3)' -mode cql3 native -rate threads=500 -col 'size=FIXED(1024) n=FIXED(1)' -pop seq=26214401..52428800", + "cassandra-stress write cl=ALL n=26214400 -schema 'replication(strategy=NetworkTopologyStrategy,replication_factor=3)' -mode cql3 native -rate threads=500 -col 'size=FIXED(1024) n=FIXED(1)' -pop seq=52428801..78643200", + "cassandra-stress write cl=ALL n=26214400 -schema 'replication(strategy=NetworkTopologyStrategy,replication_factor=3)' -mode cql3 native -rate threads=500 -col 'size=FIXED(1024) n=FIXED(1)' -pop seq=78643201..104857600" ] + +stress_cmd: "cassandra-stress mixed cl=QUORUM duration=10m -schema 'replication(strategy=NetworkTopologyStrategy,replication_factor=3)' -mode cql3 native -rate 'threads=100 fixed=50000/s' -col 'size=FIXED(1024) n=FIXED(1)'" + + +instance_type_db: 'i4i.4xlarge' +instance_type_loader: 'c6i.xlarge' + +region_name: us-east-1 +n_db_nodes: 3 +n_loaders: 4 +n_monitor_nodes: 1 + +post_behavior_db_nodes: "destroy" +post_behavior_loader_nodes: "destroy" +post_behavior_monitor_nodes: "destroy" + +user_prefix: 'manager-backup-restore-baseline' +use_hdr_cs_histogram: true