Skip to content

Commit

Permalink
Merge branch 'ko3n1g/ci/fix-inputs-to-nemo-ci' into 'main'
Browse files Browse the repository at this point in the history
ci: nemo-ci inputs

See merge request ADLR/megatron-lm!2522
  • Loading branch information
ko3n1g committed Jan 8, 2025
2 parents a26b93d + 67130c9 commit bafab5a
Show file tree
Hide file tree
Showing 4 changed files with 15 additions and 6 deletions.
6 changes: 3 additions & 3 deletions .gitlab/stages/02.functional-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -117,9 +117,9 @@ functional:run_nemo:
variables: true
variables:
MCORE_COMMIT: $CI_COMMIT_SHA
TEST_LLM_MODULE: true
TEST_ALIGNER_MODULE: false
TEST_DATA_CURATOR_MODULE: false
TEST_LLM_MODULE: 'True'
TEST_ALIGNER_MODULE: 'False'
TEST_DATA_CURATOR_MODULE: 'False'
TESTS_TO_RUN_ON_THIS_COMMIT: nightly
rules:
- if: $FUNCTIONAL_TEST == "yes"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,4 +58,5 @@ MODEL_ARGS:
--data-cache-path: ${DATA_CACHE_PATH}
--bf16: true
--attention-backend: unfused
--log-memory-to-tensorboard: true
TEST_TYPE: regular
9 changes: 8 additions & 1 deletion tests/test_utils/python_scripts/launch_jet_workload.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,11 +259,18 @@ def main(
jet_log = main_job.get_logs()
logs = extract_logs_to_string(logs=jet_log)
download_job_assets(logs=jet_log, iteration=n_iteration)
no_log = False
break
except (requests.exceptions.ConnectionError, json.decoder.JSONDecodeError) as e:
print(e)
logger.error(e)
time.sleep((3**n_download_attempt) * 60)
n_download_attempt += 1
except KeyError as e:
logger.error(e)
no_log = True

if no_log:
continue

concat_logs = "\n".join(logs)
print(f"Logs:\n{concat_logs}")
Expand Down
5 changes: 3 additions & 2 deletions tests/unit_tests/test_model_configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import pytest
import yaml

YAML_DIR = pathlib.Path(__file__).parent / ".." / "tests/" / "functional_tests" / "test_cases"
YAML_DIR = pathlib.Path(__file__).parent / ".." / "functional_tests" / "test_cases"


def get_yaml_files(directory):
Expand All @@ -24,6 +24,7 @@ def load_yaml(file_path):
@pytest.mark.parametrize("yaml_file", get_yaml_files(YAML_DIR))
def test_model_config_tracks_memory(yaml_file, metric):
"""Test if each YAML file contains the required record."""
print("gpt3-nemo" in str(yaml_file) or "ckpt_converter" in str(yaml_file))
if "gpt3-nemo" in str(yaml_file) or "ckpt_converter" in str(yaml_file):
pytest.skip("Skipping for gpt-nemo")

Expand All @@ -33,4 +34,4 @@ def test_model_config_tracks_memory(yaml_file, metric):
"MODEL_ARGS" in model_config
and metric in model_config["MODEL_ARGS"]
and model_config["MODEL_ARGS"][metric] is True
), f"Please add {metric} to {yaml_file.parent.name}."
), f"Please add argument `{metric}` to `{yaml_file.parent.name}/model_config.yaml` that its metric gets tracked."

0 comments on commit bafab5a

Please sign in to comment.