1
1
.test_rules :
2
2
rules :
3
- - if : $CI_PIPELINE_SOURCE == 'merge_request_event' && $CI_MERGE_REQUEST_TARGET_BRANCH_PROTECTED != "true"
3
+ - if : $UNIT_TEST == 'yes' && $ CI_PIPELINE_SOURCE == 'merge_request_event' && $CI_MERGE_REQUEST_TARGET_BRANCH_PROTECTED != "true"
4
4
allow_failure : true
5
5
when : on_success
6
6
- when : on_success
@@ -46,7 +46,7 @@ test:build_image:
46
46
47
47
ADDITIONAL_PARAMS=()
48
48
49
- if [[ "$CI_COMMIT_BRANCH" == "$CI_DEFAULT_BRANCH " ]]; then
49
+ if [[ "$CI_COMMIT_BRANCH" == "ci-rebuild-mcore-nemo-image " ]]; then
50
50
ADDITIONAL_PARAMS+=("--pull")
51
51
ADDITIONAL_PARAMS+=("--cache-to type=registry,ref=${IMAGE}-buildcache:main")
52
52
fi
@@ -118,10 +118,10 @@ test:build_image:
118
118
paths :
119
119
- coverage
120
120
rules :
121
- - if : $CI_PIPELINE_SOURCE == 'merge_request_event' && $CI_MERGE_REQUEST_TARGET_BRANCH_PROTECTED != "true" && $UNIT_TEST_REPEAT != '0'
121
+ - if : $UNIT_TEST == 'yes' && $ CI_PIPELINE_SOURCE == 'merge_request_event' && $CI_MERGE_REQUEST_TARGET_BRANCH_PROTECTED != "true" && $UNIT_TEST_REPEAT != '0'
122
122
allow_failure : true
123
123
when : on_success
124
- - if : $UNIT_TEST_REPEAT != '0'
124
+ - if : $UNIT_TEST == 'yes' && $ UNIT_TEST_REPEAT != '0'
125
125
when : on_success
126
126
127
127
test:pyt(LTS)_mcore(latest) :
@@ -135,6 +135,8 @@ test:pyt(LTS)_mcore(0.9.0):
135
135
variables :
136
136
TAG : core_r0.9.0
137
137
IMAGE : ${CI_MCORE_LTS_IMAGE}
138
+ UNIT_TEST_REPEAT : 1
139
+ UNIT_TEST_TIMEOUT : 15
138
140
139
141
test:pyt(DEV)_mcore(latest) :
140
142
extends : [.unit_tests]
@@ -147,8 +149,10 @@ test:pyt(DEV)_mcore(0.9.0):
147
149
variables :
148
150
TAG : core_r0.9.0
149
151
IMAGE : ${CI_MCORE_DEV_IMAGE}
152
+ UNIT_TEST_REPEAT : 1
153
+ UNIT_TEST_TIMEOUT : 15
150
154
151
- test:notify :
155
+ test:notify_unit_tests :
152
156
extends : [.test_rules]
153
157
image : ${CI_MCORE_LTS_IMAGE}:${CI_PIPELINE_ID}
154
158
needs :
@@ -229,4 +233,145 @@ test:secret_detection:
229
233
echo "Atleast one vulnerability has been found"
230
234
cat gl-secret-detection-report.json | jq '.'
231
235
exit 1
232
- fi
236
+ fi
237
+
238
+ test:pypi_build_wheel :
239
+ extends : [.test_rules]
240
+ image :
241
+ name : quay.io/pypa/manylinux_2_28_x86_64
242
+ entrypoint : [""]
243
+ tags : [mcore-docker-node-small]
244
+ variables :
245
+ PUBLISH_DRYRUN : " yes"
246
+ script :
247
+ - echo $PUBLISH_DRYRUN
248
+ - >
249
+ if [ "$PUBLISH_DRYRUN" = "yes" ]; then
250
+ sed -i "/^PATCH/c\PATCH = $((RANDOM % 9000 + 1000))" megatron/core/package_info.py
251
+ fi
252
+ - /opt/python/cp310-cp310/bin/python -m build
253
+ - /opt/python/cp311-cp311/bin/python -m build
254
+ - auditwheel repair dist/*.whl
255
+ artifacts :
256
+ paths :
257
+ - megatron/core/package_info.py
258
+ - wheelhouse/
259
+
260
+ test:pypi_test_wheel :
261
+ extends : [.test_rules]
262
+ image : nvcr.io/nvidia/pytorch:24.01-py3
263
+ needs : [test:pypi_build_wheel]
264
+ tags : [mcore-docker-node-small]
265
+ variables :
266
+ PUBLISH_DRYRUN : " yes"
267
+ script :
268
+ - EXPECTED_RELEASE_NUMBER=$(python -c "from megatron import core; print(core.__version__)")
269
+ - rm -rf megatron
270
+ - pip install wheelhouse/*cp310*.whl
271
+
272
+ - RELEASE_NUMBER=$(python -c "from megatron import core; print(core.__version__)")
273
+ - >
274
+ echo "$EXPECTED_RELEASE_NUMBER" == "$RELEASE_NUMBER"
275
+ - test "$EXPECTED_RELEASE_NUMBER" == "$RELEASE_NUMBER"
276
+ artifacts :
277
+ paths :
278
+ - wheelhouse/
279
+
280
+ test:pypi_push_wheel :
281
+ extends : [.test_rules]
282
+ image : python:3.10
283
+ tags : [mcore-docker-node-small]
284
+ needs : [test:pypi_test_wheel]
285
+ variables :
286
+ PUBLISH_DRYRUN : " yes"
287
+ script :
288
+ - >
289
+ if [ "$PUBLISH_DRYRUN" = "yes" ]; then
290
+ REPOSITORY=testpypi
291
+ export TWINE_USERNAME=$TWINE_TEST_USERNAME
292
+ export TWINE_PASSWORT=$TWINE_TEST_PASSWORD
293
+ else
294
+ REPOSITORY=pypi
295
+ export TWINE_USERNAME=$TWINE_PROD_USERNAME
296
+ export TWINE_PASSWORT=$TWINE_PROD_PASSWORD
297
+ fi
298
+ - pip install twine
299
+ - twine upload -u $TWINE_USERNAME -p $TWINE_PASSWORT --repository $REPOSITORY wheelhouse/*
300
+
301
+ test:gh_release :
302
+ extends : [.test_rules]
303
+ tags : [mcore-docker-node-small]
304
+ image : nvcr.io/nvidia/pytorch:24.01-py3
305
+ variables :
306
+ PUBLISH_DRYRUN : " yes"
307
+ script :
308
+ - RELEASE_NUMBER=$(python -c "from megatron import core; print(core.__version__)")
309
+ - NAME="NVIDIA Megatron Core $RELEASE_NUMBER"
310
+ - CHANGELOG=$(awk '/^## '"$NAME"'/{flag=1; next} /^## /{flag=0} flag' CHANGELOG.md)
311
+ - CHANGELOG=$(echo "$CHANGELOG" | sed '/./!d')
312
+ - >
313
+ PAYLOAD=$(jq -nc \
314
+ --arg CI_COMMIT_BRANCH "$CI_COMMIT_BRANCH" \
315
+ --arg NAME "$NAME" \
316
+ --arg BODY "$CHANGELOG" \
317
+ '{
318
+ "tag_name": $CI_COMMIT_BRANCH,
319
+ "target_commitish": $CI_COMMIT_BRANCH,
320
+ "name": $NAME,
321
+ "body": $BODY,
322
+ "draft": false,
323
+ "prerelease": false,
324
+ "generate_release_notes": false
325
+ }'
326
+ )
327
+ - >
328
+ CMD=$(echo curl -L \
329
+ -X POST \
330
+ -H "Accept: application/vnd.github+json" \
331
+ -H "Authorization: Bearer $GH_TOKEN" \
332
+ -H "X-GitHub-Api-Version: 2022-11-28" \
333
+ https://api.github.com/repos/NVIDIA/Megatron-LM/releases \
334
+ -d "$PAYLOAD"
335
+ )
336
+
337
+ if [[ "$PUBLISH_DRYRUN" == "yes" ]]; then
338
+ echo "$CMD"
339
+ else
340
+ eval "$CMD"
341
+ fi
342
+
343
+ test:notify_release :
344
+ needs : [test:pypi_push_wheel, test:gh_release]
345
+ extends : [.test_rules]
346
+ image : nvcr.io/nvidia/pytorch:24.01-py3
347
+ tags : [mcore-docker-node-small]
348
+ variables :
349
+ PUBLISH_DRYRUN : " yes"
350
+ script :
351
+ - VERSION=$(python -c "from megatron import core; print(core.__version__)")
352
+ - URL="https://github.com/NVIDIA/Megatron-LM/releases/tag/core_r$VERSION"
353
+ - >
354
+ MESSAGE='{
355
+ "blocks": [
356
+ {
357
+ "type": "section",
358
+ "text": {
359
+ "type": "mrkdwn",
360
+ "text": "Releasebot 🤖: Megatron-Core released <'$URL'|core_r'$VERSION'> 🚀"
361
+ }
362
+ }
363
+ ]
364
+ }'
365
+ - echo "$MESSAGE"
366
+ - >
367
+ CMD=$(echo curl \
368
+ -X POST \
369
+ -H "Content-type: application/json" \
370
+ --data "$MESSAGE" ${MCORE_NOTIFICATION_HOOK_MAIN}
371
+ )
372
+
373
+ if [[ "$PUBLISH_DRYRUN" == "yes" ]]; then
374
+ echo "$CMD"
375
+ else
376
+ eval "$CMD"
377
+ fi
0 commit comments