From 450d793c3334338c511f51c5aa03df3630b524f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A5vard=20Berland?= Date: Thu, 23 Jan 2025 12:16:51 +0100 Subject: [PATCH] Install correct dependency and adjust slurm config Dramatic speedup in test time (~10-fold) --- .github/workflows/test_ert_with_slurm.yml | 9 ++++++--- tests/ert/unit_tests/scheduler/test_slurm_driver.py | 6 +----- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/.github/workflows/test_ert_with_slurm.yml b/.github/workflows/test_ert_with_slurm.yml index e90d430aaac..80041c70bad 100644 --- a/.github/workflows/test_ert_with_slurm.yml +++ b/.github/workflows/test_ert_with_slurm.yml @@ -34,7 +34,7 @@ jobs: run: | set -e - sudo apt install slurmd slurmctld -y + sudo apt install libpmix-dev slurmd slurmctld -y sudo mkdir /var/spool/slurm sudo chown slurm /var/spool/slurm @@ -43,9 +43,11 @@ jobs: ClusterName=localcluster SlurmUser=slurm SlurmctldHost=localhost + SchedulerType=sched/builtin # Avoid default backfill scheduler which adds delays SelectType=select/cons_tres # Select nodes based on consumable resources SelectTypeParameters=CR_Core # Cores are the consumable resource StateSaveLocation=/var/spool/slurm + PriorityType=priority/basic # Tests depend on FIFO scheduling ProctrackType=proctrack/linuxproc # Use /proc to track processes PartitionName=LocalQ Nodes=ALL Default=YES MaxTime=INFINITE State=UP PartitionName=AlternativeQ Nodes=ALL Default=YES MaxTime=INFINITE State=UP @@ -59,12 +61,13 @@ jobs: sudo mv slurm.conf /etc/slurm/ sudo systemctl start slurmd # The compute node slurm daemon sudo systemctl start slurmctld # The slurm controller daemon - + sleep 1 systemctl status slurmd systemctl status slurmctld # Show partition and node information configured: sinfo + scontrol show nodes - name: Verify slurm cluster works # Timeout is set low to catch a misconfigured cluster where srun will hang. @@ -77,7 +80,7 @@ jobs: run: | set -e export _ERT_TESTS_ALTERNATIVE_QUEUE=AlternativeQ - pytest tests/ert/unit_tests/scheduler/test_{generic,slurm}_driver.py --slurm \ + pytest tests/ert/unit_tests/scheduler/test_{generic,slurm}_driver.py -sv --slurm \ -n 8 --durations=10 -k "not (LsfDriver or LocalDriver or OpenPBSDriver)" scontrol show job diff --git a/tests/ert/unit_tests/scheduler/test_slurm_driver.py b/tests/ert/unit_tests/scheduler/test_slurm_driver.py index 0152608ca63..83a3cdbf7fe 100644 --- a/tests/ert/unit_tests/scheduler/test_slurm_driver.py +++ b/tests/ert/unit_tests/scheduler/test_slurm_driver.py @@ -373,11 +373,7 @@ async def test_kill_before_submit_is_finished( ): os.chdir(tmp_path) - if pytestconfig.getoption("slurm"): - # Allow more time when tested on a real compute cluster to avoid false positives. - job_kill_window = 5 - test_grace_time = 10 - elif sys.platform.startswith("darwin"): + if sys.platform.startswith("darwin"): # Mitigate flakiness on low-power test nodes job_kill_window = 5 test_grace_time = 10