Skip to content

Commit 66219a9

Browse files
authored
Merge pull request #682 from casparvl/fix_missing_num_cores_per_numa_node_in_test_step
Use ReFrame's CPU autodetect in test step
2 parents 9ce35c2 + c6e0cc2 commit 66219a9

File tree

2 files changed

+7
-42
lines changed

2 files changed

+7
-42
lines changed

reframe_config_bot.py.tmpl

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,19 +15,13 @@ site_configuration = {
1515
'modules_system': 'lmod',
1616
'partitions': [
1717
{
18-
'name': 'default',
18+
'name': '__RFM_PARTITION__',
1919
'scheduler': 'local',
2020
'launcher': 'mpirun',
2121
'environs': ['default'],
2222
'features': [
2323
FEATURES[CPU]
2424
] + list(SCALES.keys()),
25-
'processor': {
26-
'num_cpus': __NUM_CPUS__,
27-
'num_sockets': __NUM_SOCKETS__,
28-
'num_cpus_per_core': __NUM_CPUS_PER_CORE__,
29-
'num_cpus_per_socket': __NUM_CPUS_PER_SOCKET__,
30-
},
3125
'resources': [
3226
{
3327
'name': 'memory',
@@ -56,8 +50,7 @@ site_configuration = {
5650
{
5751
'purge_environment': True,
5852
'resolve_module_conflicts': False, # avoid loading the module before submitting the job
59-
# disable automatic detection of CPU architecture (since we're using local scheduler)
60-
'remote_detect': False,
53+
'remote_detect': True,
6154
}
6255
],
6356
'logging': common_logging_config(),

test_suite.sh

Lines changed: 5 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -141,34 +141,6 @@ export RFM_PREFIX=$PWD/reframe_runs
141141
echo "Configured reframe with the following environment variables:"
142142
env | grep "RFM_"
143143

144-
# Inject correct CPU/memory properties into the ReFrame config file
145-
echo "Collecting system-specific input for the ReFrame configuration file"
146-
cpuinfo=$(lscpu)
147-
if [[ "${cpuinfo}" =~ CPU\(s\):[^0-9]*([0-9]+) ]]; then
148-
cpu_count=${BASH_REMATCH[1]}
149-
echo "Detected CPU count: ${cpu_count}"
150-
else
151-
fatal_error "Failed to get the number of CPUs for the current test hardware with lscpu."
152-
fi
153-
if [[ "${cpuinfo}" =~ Socket\(s\):[^0-9]*([0-9]+) ]]; then
154-
socket_count=${BASH_REMATCH[1]}
155-
echo "Detected socket count: ${socket_count}"
156-
else
157-
fatal_error "Failed to get the number of sockets for the current test hardware with lscpu."
158-
fi
159-
if [[ "${cpuinfo}" =~ (Thread\(s\) per core:[^0-9]*([0-9]+)) ]]; then
160-
threads_per_core=${BASH_REMATCH[2]}
161-
echo "Detected threads per core: ${threads_per_core}"
162-
else
163-
fatal_error "Failed to get the number of threads per core for the current test hardware with lscpu."
164-
fi
165-
if [[ "${cpuinfo}" =~ (Core\(s\) per socket:[^0-9]*([0-9]+)) ]]; then
166-
cores_per_socket=${BASH_REMATCH[2]}
167-
echo "Detected cores per socket: ${cores_per_socket}"
168-
else
169-
fatal_error "Failed to get the number of cores per socket for the current test hardware with lscpu."
170-
fi
171-
172144
# The /sys inside the container is not the same as the /sys of the host
173145
# We want to extract the memory limit from the cgroup on the host (which is typically set by SLURM).
174146
# Thus, bot/test.sh bind-mounts the host's /sys/fs/cgroup into /hostsys/fs/cgroup
@@ -201,13 +173,13 @@ else
201173
fi
202174
echo "Detected available memory: ${cgroup_mem_mib} MiB"
203175

204-
echo "Replacing detected system information in template ReFrame config file..."
205176
cp ${RFM_CONFIG_FILE_TEMPLATE} ${RFM_CONFIG_FILES}
206-
sed -i "s/__NUM_CPUS__/${cpu_count}/g" $RFM_CONFIG_FILES
207-
sed -i "s/__NUM_SOCKETS__/${socket_count}/g" $RFM_CONFIG_FILES
208-
sed -i "s/__NUM_CPUS_PER_CORE__/${threads_per_core}/g" $RFM_CONFIG_FILES
209-
sed -i "s/__NUM_CPUS_PER_SOCKET__/${cores_per_socket}/g" $RFM_CONFIG_FILES
177+
echo "Replacing memory limit in the ReFrame config file with the detected CGROUP memory limit: ${cgroup_mem_mib} MiB"
210178
sed -i "s/__MEM_PER_NODE__/${cgroup_mem_mib}/g" $RFM_CONFIG_FILES
179+
RFM_PARTITION="${SLURM_JOB_PARTITION}"
180+
echo "Replacing partition name in the template ReFrame config file: ${RFM_PARTITION}"
181+
sed -i "s/__RFM_PARTITION__/${RFM_PARTITION}/g" $RFM_CONFIG_FILES
182+
211183
# Make debugging easier by printing the final config file:
212184
echo "Final config file (after replacements):"
213185
cat "${RFM_CONFIG_FILES}"

0 commit comments

Comments
 (0)