Skip to content

Commit

Permalink
test cleanup
Browse files Browse the repository at this point in the history
Signed-off-by: Terry Kong <[email protected]>
  • Loading branch information
terrykong committed Nov 7, 2024
1 parent 96b4f01 commit 8e06e03
Show file tree
Hide file tree
Showing 10 changed files with 75 additions and 20 deletions.
4 changes: 3 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -131,17 +131,19 @@ git fetch -a
# 0deaf6716cb4f20766c995ce25d129795f1ae200: fix[export]: update API for disabling device reassignment in TRTLLM for Aligner NeMo#10863
# (superceded by 10863) 148543d6e9c66ff1f8562e84484448202249811d: feat: Migrate GPTSession refit path in Nemo export to ModelRunner for Aligner NeMo#10654
# ba8edbd2063f3349c40c9c73e5bae46abbe65f94: fix: regular torch optims (e.g., sgd) no longer error with closure spec NeMo#11189
# 35a7f718237cf011215db9e92273ed7236d0e8b1: Fix for crash with LoRA + tp_overlap_comm=false + sequence_parallel=true NeMo#10920
for pr_and_commit in \
"10651 0c92fe17df4642ffc33d5d8c0c83fda729e3910c" \
"10652 60e677423667c029dd05875da72bf0719774f844" \
"10863 0deaf6716cb4f20766c995ce25d129795f1ae200" \
"11189 ba8edbd2063f3349c40c9c73e5bae46abbe65f94" \
"10920 53cf6527571b29379188c8bb0dba8e507db3cca1" \
; do
pr=$(cut -f1 -d' ' <<<"$pr_and_commit")
head_pr_commit=$(cut -f2 -d' ' <<<"$pr_and_commit")
git fetch origin $head_pr_commit:PR-${pr}
# cherry-picks all commits between main and the top of the PR
git cherry-pick --allow-empty $(git merge-base origin/main PR-${pr})..PR-${pr}
git cherry-pick -m 1 --allow-empty $(git merge-base origin/main PR-${pr})..PR-${pr}
# Tag cherry-picks to help
git tag cherry-pick-PR-${pr}
done
Expand Down
Empty file modified tests/functional/dpo.sh
100644 → 100755
Empty file.
Empty file modified tests/functional/ppo.sh
100644 → 100755
Empty file.
Empty file modified tests/functional/rm.sh
100644 → 100755
Empty file.
7 changes: 5 additions & 2 deletions tests/functional/test_cases/dpo-llama3
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#!/bin/bash
# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.

# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -19,4 +20,6 @@ cd $SCRIPT_DIR
set -eoux pipefail

PRETRAINED_CHECKPOINT_NEMO_FILE=${ALIGNER_CI_DIR}/checkpoints/tiny-llama3-results-nlayers2-hidden128-ffn448-nhead4-qgroup2-megatron_gpt.nemo \
bash ../dpo.sh
bash ../dpo.sh \
++model.optim.name=mcore_distributed_optim \
2>&1 | tee $(basename $0).log
19 changes: 17 additions & 2 deletions tests/functional/test_cases/dpo-mixtral-ep
Original file line number Diff line number Diff line change
@@ -1,12 +1,27 @@
#!/bin/bash

# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
cd $SCRIPT_DIR

set -eoux pipefail

PRETRAINED_CHECKPOINT_NEMO_FILE=/home/terryk/saved_experiments/tiny-mixtral-nlayers2-hidden128-ffn448-nhead4-qgroup2.nemo \
PRETRAINED_CHECKPOINT_NEMO_FILE=$ALIGNER_CI_DIR/checkpoints/tiny-mixtral-nlayers2-hidden128-ffn448-nhead4-qgroup2.nemo \
bash ../dpo.sh \
++model.optim.name=mcore_distributed_optim \
++model.expert_model_parallel_size=2 \
2>&1 | tee $(basename $0 .sh).log
2>&1 | tee $(basename $0).log

31 changes: 31 additions & 0 deletions tests/functional/test_cases/dpo-mixtral-peft-tp-sp
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#!/bin/bash

# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
cd $SCRIPT_DIR

set -eoux pipefail

PRETRAINED_CHECKPOINT_NEMO_FILE=$ALIGNER_CI_DIR/checkpoints/tiny-mixtral-nlayers2-hidden128-ffn448-nhead4-qgroup2.nemo \
bash ../dpo.sh \
++model.optim.name=mcore_distributed_optim \
++model.tensor_model_parallel_size=2 \
++model.expert_model_parallel_size=1 \
++model.sequence_parallel=True \
++model.tp_comm_overlap_disable_qkv=True \
model.data.pad_length_to_multiple_of=2 \
model.peft.peft_scheme=lora \
2>&1 | tee $(basename $0).log
13 changes: 0 additions & 13 deletions tests/functional/test_cases/dpo-mixtral-sp

This file was deleted.

3 changes: 2 additions & 1 deletion tests/functional/test_cases/ppo-llama3-pp2-reshard
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,5 @@ GBS=2 \
RESHARD=True \
RM_NEMO_FILE=${ALIGNER_CI_DIR}/checkpoints/llama3--nlayers4-hidden64-ffn224-dummy_rm-megatron_gpt.nemo \
ACTOR_NEMO_FILE=${ALIGNER_CI_DIR}/checkpoints/tiny-llama3-results-nlayers2-hidden128-ffn448-nhead4-qgroup2-megatron_gpt.nemo \
bash ../ppo.sh
bash ../ppo.sh \
2>&1 | tee $(basename $0).log
18 changes: 17 additions & 1 deletion tests/functional/test_cases/rm-llama3
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,8 +1,24 @@
#!/bin/bash

# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
cd $SCRIPT_DIR

set -eoux pipefail

PRETRAINED_CHECKPOINT_NEMO_FILE=${ALIGNER_CI_DIR}/checkpoints/tiny-llama3-results-nlayers2-hidden128-ffn448-nhead4-qgroup2-megatron_gpt.nemo \
bash ../rm.sh
bash ../rm.sh \
2>&1 | tee $(basename $0).log

0 comments on commit 8e06e03

Please sign in to comment.