Skip to content

Commit

Permalink
Debug Mcore integration test
Browse files Browse the repository at this point in the history
Avoid FP8 on Ampere and older. Generate synthetic data instead of depending on external data.

Signed-off-by: Tim Moon <[email protected]>
  • Loading branch information
timmoon10 committed Dec 5, 2024
1 parent 64126aa commit 101ec84
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 4 deletions.
2 changes: 2 additions & 0 deletions qa/L1_pytorch_mcore_integration/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Megatron-LM
vocab.json
1 change: 1 addition & 0 deletions qa/L1_pytorch_mcore_integration/merges.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
#version: 0.2
22 changes: 18 additions & 4 deletions qa/L1_pytorch_mcore_integration/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,27 @@ set -e
: ${TE_PATH:=/opt/transformerengine}
: ${MCORE_PATH:=${TE_PATH}/qa/L1_pytorch_mcore_integration/Megatron-LM}

# Check whether FP8 is supported
DEVICE_ARCH=$(nvidia-smi --query-gpu=compute_cap --format=csv,noheader | head -n 1 | sed 's/[^0-9]//g')
if [[ ${DEVICE_ARCH} -ge 89 ]]; then
WITH_FP8=1
fi

# Download Megatron-LM if needed
if [ ! -d "${MCORE_PATH}" ]; then
pushd $(dirname ${MCORE_PATH})
git clone -b core_r0.9.0 https://github.com/NVIDIA/Megatron-LM.git Megatron-LM
popd
fi

# Create mock vocab
VOCAB_FILE=${TE_PATH}/qa/L1_pytorch_mcore_integration/vocab.json
printf "" > ${VOCAB_FILE}
printf "{" >> ${VOCAB_FILE}
printf "\"<|endoftext|>\": 0" >> ${VOCAB_FILE}
seq 1 4095 | awk '{ printf(", \"%d\": %d", $1, $1) }' >> ${VOCAB_FILE}
printf "}" >> ${VOCAB_FILE}

# Megatron-LM invocation
COMMAND="
NVTE_TORCH_COMPILE=0
Expand All @@ -40,17 +54,17 @@ ${MCORE_PATH}/pretrain_gpt.py
--hidden-size 128
--num-attention-heads 8
--seq-length 128
--max-position-embeddings 2048
--max-position-embeddings 128
--micro-batch-size 1
--global-batch-size 8
--train-iters 10
--eval-iters 10
--lr 1e-4
--mock-data
--vocab-file /data/gpt3/pile-cc1-cc2-shuf/bpe/gpt2-vocab.json
--merge-file /data/gpt3/pile-cc1-cc2-shuf/bpe/gpt2-merges.txt
--vocab-file ${VOCAB_FILE}
--merge-file ${TE_PATH}/qa/L1_pytorch_mcore_integration/merges.txt
--transformer-impl transformer_engine
--fp8-format hybrid
${WITH_FP8:+--fp8-format hybrid}
"
COMMAND=$(echo "${COMMAND}" | tr '\n' ' ')

Expand Down

0 comments on commit 101ec84

Please sign in to comment.