8
8
: ${TE_PATH:=/ opt/ transformerengine}
9
9
: ${MCORE_PATH:= ${TE_PATH} / qa/ L1_pytorch_mcore_integration/ Megatron-LM}
10
10
11
+ # Check whether FP8 is supported
12
+ DEVICE_ARCH=$( nvidia-smi --query-gpu=compute_cap --format=csv,noheader | head -n 1 | sed ' s/[^0-9]//g' )
13
+ if [[ ${DEVICE_ARCH} -ge 89 ]]; then
14
+ WITH_FP8=1
15
+ fi
16
+
11
17
# Download Megatron-LM if needed
12
18
if [ ! -d " ${MCORE_PATH} " ]; then
13
19
pushd $( dirname ${MCORE_PATH} )
14
20
git clone -b core_r0.9.0 https://github.com/NVIDIA/Megatron-LM.git Megatron-LM
15
21
popd
16
22
fi
17
23
24
+ # Create mock vocab
25
+ VOCAB_FILE=${TE_PATH} /qa/L1_pytorch_mcore_integration/vocab.json
26
+ printf " " > ${VOCAB_FILE}
27
+ printf " {" >> ${VOCAB_FILE}
28
+ printf " \" <|endoftext|>\" : 0" >> ${VOCAB_FILE}
29
+ seq 1 4095 | awk ' { printf(", \"%d\": %d", $1, $1) }' >> ${VOCAB_FILE}
30
+ printf " }" >> ${VOCAB_FILE}
31
+
18
32
# Megatron-LM invocation
19
33
COMMAND="
20
34
NVTE_TORCH_COMPILE=0
@@ -40,17 +54,17 @@ ${MCORE_PATH}/pretrain_gpt.py
40
54
--hidden-size 128
41
55
--num-attention-heads 8
42
56
--seq-length 128
43
- --max-position-embeddings 2048
57
+ --max-position-embeddings 128
44
58
--micro-batch-size 1
45
59
--global-batch-size 8
46
60
--train-iters 10
47
61
--eval-iters 10
48
62
--lr 1e-4
49
63
--mock-data
50
- --vocab-file /data/gpt3/pile-cc1-cc2-shuf/bpe/gpt2-vocab.json
51
- --merge-file /data/gpt3/pile-cc1-cc2-shuf/bpe/gpt2- merges.txt
64
+ --vocab-file ${VOCAB_FILE}
65
+ --merge-file ${TE_PATH} /qa/L1_pytorch_mcore_integration/ merges.txt
52
66
--transformer-impl transformer_engine
53
- --fp8-format hybrid
67
+ ${WITH_FP8 : + --fp8-format hybrid}
54
68
"
55
69
COMMAND=$( echo " ${COMMAND} " | tr ' \n' ' ' )
56
70
0 commit comments