Skip to content

Commit

Permalink
[PyTorch] Reorganize L1 tests (#1255)
Browse files Browse the repository at this point in the history
* Reorganize PyTorch L1 tests

Signed-off-by: Tim Moon <[email protected]>

* Move ONNX tests to L1

Signed-off-by: Tim Moon <[email protected]>

* Move FA version test to L3

Signed-off-by: Tim Moon <[email protected]>

* Limit parallel build jobs in FA version test

Signed-off-by: Tim Moon <[email protected]>

---------

Signed-off-by: Tim Moon <[email protected]>
  • Loading branch information
timmoon10 authored Oct 18, 2024
1 parent a488b8b commit 41fe1e5
Show file tree
Hide file tree
Showing 7 changed files with 45 additions and 24 deletions.
7 changes: 0 additions & 7 deletions qa/L0_pytorch_unittest/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,4 @@ pytest -v -s $TE_PATH/tests/pytorch/test_gqa.py
pytest -v -s $TE_PATH/tests/pytorch/test_fused_optimizer.py
pytest -v -s $TE_PATH/tests/pytorch/test_multi_tensor.py
pytest -v -s $TE_PATH/tests/pytorch/test_fusible_ops.py
pytest -v -s $TE_PATH/tests/pytorch/test_fusible_ops_distributed.py
pytest -v -s $TE_PATH/tests/pytorch/test_permutation.py

# Build custom ONNX extensions for ONNX export test
pip install onnxruntime==1.19.2
export CUSTOM_ORT_OPS_PATH=$TE_PATH/tests/pytorch/custom_ort_ops
bash $CUSTOM_ORT_OPS_PATH/build.sh
NVTE_TORCH_COMPILE=0 pytest -v -s $TE_PATH/tests/pytorch/test_onnx_export.py
10 changes: 0 additions & 10 deletions qa/L1_pytorch_context_parallel_test/test.sh

This file was deleted.

12 changes: 5 additions & 7 deletions qa/L1_pytorch_distributed_unittest/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,9 @@
set -e

: ${TE_PATH:=/opt/transformerengine}
pytest -v -s $TE_PATH/tests/pytorch/distributed/test_comm_gemm_overlap.py

pip install prettytable
git clone https://github.com/NVIDIA/Megatron-LM.git
cd Megatron-LM
git checkout b3375a0e38c10e2300ef4be031f7dcabab52b448
pytest -v -s $TE_PATH/tests/pytorch/distributed/test_convergence.py
python $TE_PATH/tests/pytorch/distributed/print_logs.py
pip install pytest==8.2.1
pytest -v -s $TE_PATH/tests/pytorch/distributed/test_numerics.py
pytest -v -s $TE_PATH/tests/pytorch/distributed/test_comm_gemm_overlap.py
pytest -v -s $TE_PATH/tests/pytorch/distributed/test_fusible_ops.py
pytest -v -s $TE_PATH/tests/pytorch/fused_attn/test_fused_attn_with_cp.py
16 changes: 16 additions & 0 deletions qa/L1_pytorch_onnx_test/test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# See LICENSE for license information.

set -e

: ${TE_PATH:=/opt/transformerengine}

pip install pytest==8.2.1 onnxruntime==1.19.2

# Build custom ONNX Runtime operators
export CUSTOM_ORT_OPS_PATH=$TE_PATH/tests/pytorch/custom_ort_ops
bash $CUSTOM_ORT_OPS_PATH/build.sh

# Run tests
NVTE_TORCH_COMPILE=0 pytest -v -s $TE_PATH/tests/pytorch/test_onnx_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,16 @@ set -e
: ${TE_PATH:=/opt/transformerengine}

pip install pytest==8.2.1

# Limit parallel build jobs to avoid overwhelming system resources
export MAX_JOBS=4

# Iterate over Flash Attention versions
FA_versions=(2.1.1 2.3.0 2.4.0.post1 2.4.1 2.5.7 2.6.3 3.0.0b1)
for fa_version in "${FA_versions[@]}"
do

# Build Flash Attention
if [ "${fa_version}" \< "3.0.0" ]
then
pip install flash-attn==${fa_version}
Expand All @@ -19,5 +26,8 @@ do
mkdir -p $python_path/flashattn_hopper
wget -P $python_path/flashattn_hopper https://raw.githubusercontent.com/Dao-AILab/flash-attention/main/hopper/flash_attn_interface.py
fi

# Run tests
NVTE_TORCH_COMPILE=0 pytest -v -s $TE_PATH/tests/pytorch/fused_attn/test_fused_attn.py

done
14 changes: 14 additions & 0 deletions qa/L3_pytorch_convergence_test/test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# See LICENSE for license information.

set -e

: ${TE_PATH:=/opt/transformerengine}

pip install prettytable
git clone https://github.com/NVIDIA/Megatron-LM.git
cd Megatron-LM
git checkout b3375a0e38c10e2300ef4be031f7dcabab52b448
pytest -v -s $TE_PATH/tests/pytorch/distributed/test_convergence.py
python $TE_PATH/tests/pytorch/distributed/print_logs.py

0 comments on commit 41fe1e5

Please sign in to comment.