Skip to content

Commit c7360eb

Browse files
committed
undo phoenix stuff
1 parent 93e0fac commit c7360eb

File tree

5 files changed

+200
-1
lines changed

5 files changed

+200
-1
lines changed

.github/workflows/bench.yml

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,43 @@ on:
66
workflow_dispatch:
77

88
jobs:
9+
file-changes:
10+
name: Detect File Changes
11+
runs-on: 'ubuntu-latest'
12+
outputs:
13+
checkall: ${{ steps.changes.outputs.checkall }}
14+
steps:
15+
- name: Clone
16+
uses: actions/checkout@v4
17+
18+
- name: Detect Changes
19+
uses: dorny/paths-filter@v3
20+
id: changes
21+
with:
22+
filters: ".github/file-filter.yml"
23+
924
self:
1025
name: ${{ matrix.name }} (${{ matrix.device }})
11-
if: github.repository == 'MFlowCode/MFC'
26+
if: github.repository == 'MFlowCode/MFC' && needs.file-changes.outputs.checkall == 'true' && ${{ github.event.review.state == 'approved' }}
27+
needs: file-changes
1228
strategy:
1329
fail-fast: false
1430
matrix:
1531
include:
32+
- cluster: phoenix
33+
name: Georgia Tech | Phoenix (NVHPC)
34+
group: phoenix
35+
labels: gt
36+
flag: p
37+
device: cpu
38+
build_script: ""
39+
- cluster: phoenix
40+
name: Georgia Tech | Phoenix (NVHPC)
41+
group: phoenix
42+
labels: gt
43+
flag: p
44+
device: gpu
45+
build_script: ""
1646
- cluster: frontier
1747
name: Oak Ridge | Frontier (CCE)
1848
group: phoenix

.github/workflows/phoenix/bench.sh

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
#!/bin/bash
2+
3+
n_ranks=12
4+
5+
if [ "$job_device" == "gpu" ]; then
6+
n_ranks=$(nvidia-smi -L | wc -l) # number of GPUs on node
7+
gpu_ids=$(seq -s ' ' 0 $(($n_ranks-1))) # 0,1,2,...,gpu_count-1
8+
device_opts="--gpu -g $gpu_ids"
9+
fi
10+
11+
mkdir -p /storage/scratch1/6/sbryngelson3/mytmp_build
12+
export TMPDIR=/storage/scratch1/6/sbryngelson3/mytmp_build
13+
14+
if ["$job_device" == "gpu"]; then
15+
./mfc.sh bench --mem 12 -j $(nproc) -o "$job_slug.yaml" -- -c phoenix-bench $device_opts -n $n_ranks
16+
else
17+
./mfc.sh bench --mem 1 -j $(nproc) -o "$job_slug.yaml" -- -c phoenix-bench $device_opts -n $n_ranks
18+
fi
19+
20+
unset TMPDIR
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
#!/bin/bash
2+
3+
set -e
4+
5+
usage() {
6+
echo "Usage: $0 [script.sh] [cpu|gpu]"
7+
}
8+
9+
if [ ! -z "$1" ]; then
10+
sbatch_script_contents=`cat $1`
11+
else
12+
usage
13+
exit 1
14+
fi
15+
16+
sbatch_cpu_opts="\
17+
#SBATCH -p cpu-small # partition
18+
#SBATCH --ntasks-per-node=24 # Number of cores per node required
19+
#SBATCH --mem-per-cpu=2G # Memory per core\
20+
"
21+
22+
sbatch_gpu_opts="\
23+
#SBATCH -CL40S
24+
#SBATCH --ntasks-per-node=4 # Number of cores per node required
25+
#SBATCH -G2\
26+
"
27+
28+
if [ "$2" == "cpu" ]; then
29+
sbatch_device_opts="$sbatch_cpu_opts"
30+
elif [ "$2" == "gpu" ]; then
31+
sbatch_device_opts="$sbatch_gpu_opts"
32+
else
33+
usage
34+
exit 1
35+
fi
36+
37+
job_slug="`basename "$1" | sed 's/\.sh$//' | sed 's/[^a-zA-Z0-9]/-/g'`-$2"
38+
39+
sbatch <<EOT
40+
#!/bin/bash
41+
#SBATCH -Jshb-$job_slug # Job name
42+
#SBATCH --account=gts-sbryngelson3 # charge account
43+
#SBATCH -N1 # Number of nodes required
44+
$sbatch_device_opts
45+
#SBATCH -t 02:00:00 # Duration of the job (Ex: 15 mins)
46+
#SBATCH -q embers # QOS Name
47+
#SBATCH -o$job_slug.out # Combined output and error messages file
48+
#SBATCH -W # Do not exit until the submitted job terminates.
49+
50+
set -e
51+
set -x
52+
53+
cd "\$SLURM_SUBMIT_DIR"
54+
echo "Running in $(pwd):"
55+
56+
job_slug="$job_slug"
57+
job_device="$2"
58+
59+
. ./mfc.sh load -c p -m $2
60+
61+
$sbatch_script_contents
62+
63+
EOT
64+

.github/workflows/phoenix/submit.sh

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
#!/bin/bash
2+
3+
set -e
4+
5+
usage() {
6+
echo "Usage: $0 [script.sh] [cpu|gpu]"
7+
}
8+
9+
if [ ! -z "$1" ]; then
10+
sbatch_script_contents=`cat $1`
11+
else
12+
usage
13+
exit 1
14+
fi
15+
16+
sbatch_cpu_opts="\
17+
#SBATCH -p cpu-small # partition
18+
#SBATCH --ntasks-per-node=24 # Number of cores per node required
19+
#SBATCH --mem-per-cpu=2G # Memory per core\
20+
"
21+
22+
sbatch_gpu_opts="\
23+
#SBATCH -p gpu-v100,gpu-a100,gpu-h100,gpu-l40s
24+
#SBATCH --ntasks-per-node=4 # Number of cores per node required
25+
#SBATCH -G2\
26+
"
27+
28+
if [ "$2" == "cpu" ]; then
29+
sbatch_device_opts="$sbatch_cpu_opts"
30+
elif [ "$2" == "gpu" ]; then
31+
sbatch_device_opts="$sbatch_gpu_opts"
32+
else
33+
usage
34+
exit 1
35+
fi
36+
37+
job_slug="`basename "$1" | sed 's/\.sh$//' | sed 's/[^a-zA-Z0-9]/-/g'`-$2"
38+
39+
sbatch <<EOT
40+
#!/bin/bash
41+
#SBATCH -Jshb-$job_slug # Job name
42+
#SBATCH --account=gts-sbryngelson3 # charge account
43+
#SBATCH -N1 # Number of nodes required
44+
$sbatch_device_opts
45+
#SBATCH -t 03:00:00 # Duration of the job (Ex: 15 mins)
46+
#SBATCH -q embers # QOS Name
47+
#SBATCH -o$job_slug.out # Combined output and error messages file
48+
#SBATCH -W # Do not exit until the submitted job terminates.
49+
50+
set -e
51+
set -x
52+
53+
cd "\$SLURM_SUBMIT_DIR"
54+
echo "Running in $(pwd):"
55+
56+
job_slug="$job_slug"
57+
job_device="$2"
58+
59+
. ./mfc.sh load -c p -m $2
60+
61+
$sbatch_script_contents
62+
63+
EOT
64+

.github/workflows/phoenix/test.sh

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
#!/bin/bash
2+
3+
build_opts=""
4+
if [ "$job_device" == "gpu" ]; then
5+
build_opts="--gpu"
6+
fi
7+
8+
./mfc.sh test --dry-run -j 8 $build_opts
9+
10+
n_test_threads=8
11+
12+
if [ "$job_device" == "gpu" ]; then
13+
gpu_count=$(nvidia-smi -L | wc -l) # number of GPUs on node
14+
gpu_ids=$(seq -s ' ' 0 $(($gpu_count-1))) # 0,1,2,...,gpu_count-1
15+
device_opts="-g $gpu_ids"
16+
n_test_threads=`expr $gpu_count \* 2`
17+
fi
18+
19+
./mfc.sh test --max-attempts 3 -a -j $n_test_threads $device_opts -- -c phoenix
20+
21+

0 commit comments

Comments
 (0)