Skip to content

Commit

Permalink
Merge branch 'ko3n1g/ci/flaky-marker' into 'main'
Browse files Browse the repository at this point in the history
tests: Use flaky instead of skip marker

See merge request ADLR/megatron-lm!2208
  • Loading branch information
ko3n1g committed Oct 11, 2024
2 parents 47bb8d1 + 8c018ca commit 772faca
Show file tree
Hide file tree
Showing 12 changed files with 19 additions and 17 deletions.
2 changes: 2 additions & 0 deletions .gitlab/stages/01.tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,8 @@ unit_tests:
ARGS=()
if [[ $TAG != latest ]]; then
ARGS+=(-m "not internal")
else
ARGS+=(-m "not flaky")
fi
if [[ $IMAGE == ${CI_MCORE_DEV_IMAGE} ]]; then
ARGS+=(-m "experimental")
Expand Down
2 changes: 1 addition & 1 deletion tests/unit_tests/data/test_bin_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ class _LocalClientError(Exception):
setattr(exceptions, "ClientError", _LocalClientError)


@pytest.mark.skip(reason="Tests are flaky and need to be debugged")
@pytest.mark.flaky
def test_bin_reader():
with tempfile.TemporaryDirectory() as temp_dir:
# set the default nltk data path
Expand Down
2 changes: 1 addition & 1 deletion tests/unit_tests/data/test_gpt_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def sample_N(dataset, N, randomize):
return samples


@pytest.mark.skip(reason="Tests are flaky and need to be debugged")
@pytest.mark.flaky
def test_mock_gpt_dataset():
if torch.distributed.is_available():
Utils.initialize_distributed()
Expand Down
4 changes: 2 additions & 2 deletions tests/unit_tests/data/test_preprocess_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ def gpt2_merge(odir):
return path


@pytest.mark.skip(reason="Tests are flaky and need to be debugged")
@pytest.mark.flaky
def test_preprocess_data_gpt():
with tempfile.TemporaryDirectory() as temp_dir:

Expand Down Expand Up @@ -214,7 +214,7 @@ def bert_vocab(odir):
return path


@pytest.mark.skip(reason="Tests are flaky and need to be debugged")
@pytest.mark.flaky
def test_preprocess_data_bert():
with tempfile.TemporaryDirectory() as temp_dir:

Expand Down
2 changes: 1 addition & 1 deletion tests/unit_tests/dist_checkpointing/models/test_mamba.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ class TestMambaReconfiguration:
# (False, (1, 1, 4), (8, 1, 1), True),
],
)
@pytest.mark.skip(reason="Flaky test; needs to be debugged")
@pytest.mark.flaky
def test_parallel_reconfiguration_e2e(
self, tmp_path_dist_ckpt, src_tp_pp_exp, dest_tp_pp_exp, use_glu, use_fpsl
):
Expand Down
2 changes: 1 addition & 1 deletion tests/unit_tests/dist_checkpointing/test_fp8.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def get_ten(dtype: str = 'fp8'):
(False, (2, 4), (2, 4), None),
],
)
@pytest.mark.skip(reason="Flaky test")
@pytest.mark.flaky
def test_fp8_save_load(
self, tmp_path_dist_ckpt, use_fpsl, src_tp_pp, dest_tp_pp, load_exchange_algo
):
Expand Down
2 changes: 1 addition & 1 deletion tests/unit_tests/dist_checkpointing/test_fully_parallel.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,8 +280,8 @@ def test_load_distribution(self, parallelization_along_dp, tmp_path_dist_ckpt):

assert loaded_state_dict.keys() == state_dict.keys()

@pytest.mark.skip(reason="Tests are flaky and need to be debugged")
@pytest.mark.parametrize('state_dict_device', ['cpu', 'cuda'])
@pytest.mark.flaky
def test_memory_usage(self, state_dict_device, tmp_path_dist_ckpt):
Utils.initialize_model_parallel(2, 1)

Expand Down
4 changes: 2 additions & 2 deletions tests/unit_tests/dist_checkpointing/test_nonpersistent.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def teardown_method(self, method):
Utils.destroy_model_parallel()

@pytest.mark.parametrize(('tp,pp'), [(2, 4)])
@pytest.mark.skip(reason="Flaky test")
@pytest.mark.flaky
def test_basic_save_load_scenarios(self, tmp_path_dist_ckpt, tp, pp):
Utils.initialize_model_parallel(tp, pp)
num_floating_point_operations_so_far = 0
Expand Down Expand Up @@ -118,7 +118,7 @@ def test_basic_save_load_scenarios(self, tmp_path_dist_ckpt, tp, pp):

class TestLegacySaveAndLoad:
@pytest.mark.parametrize(('tp,pp'), [(2, 4)])
@pytest.mark.skip(reason="Flaky test")
@pytest.mark.flaky
def test_basic_save_load_scenario(self, tmp_path_dist_ckpt, tp, pp):
Utils.initialize_model_parallel(tp, pp)
num_floating_point_operations_so_far = 0
Expand Down
10 changes: 5 additions & 5 deletions tests/unit_tests/dist_checkpointing/test_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ def teardown_method(self, method):
# ((2, 1), 2, 2),
],
)
@pytest.mark.skip(reason="Tests are flaky and need to be debugged")
@pytest.mark.flaky
def test_dp_sharding(self, tmp_path_dist_ckpt, tp_pp, src_dp, dest_dp, use_fpsl, initialize_fn):
src_world_size = tp_pp[0] * tp_pp[1] * src_dp
dest_world_size = tp_pp[0] * tp_pp[1] * dest_dp
Expand Down Expand Up @@ -256,7 +256,7 @@ def test_dp_sharding(self, tmp_path_dist_ckpt, tp_pp, src_dp, dest_dp, use_fpsl,
('src_tp_pp', 'dest_tp_pp', 'use_glu'),
[((2, 2), (2, 4), False), ((1, 8), (4, 1), True), ((2, 4), (4, 2), False)],
)
@pytest.mark.skip(reason="Tests are flaky and need to be debugged")
@pytest.mark.flaky
def test_finetune_doesnt_load_optimizer(
self, tmp_path_dist_ckpt, src_tp_pp, dest_tp_pp, use_glu
):
Expand Down Expand Up @@ -329,7 +329,7 @@ def test_finetune_doesnt_load_optimizer(
assert not diffs[0] and not diffs[1] and diffs[2]
assert not any(diff(optimizer.state_dict(), optim_unloaded_state_dict))

@pytest.mark.skip(reason="Tests are flaky and need to be debugged")
@pytest.mark.flaky
def test_can_load_deprecated_bucket_space_format(self, tmp_path_dist_ckpt):
# sync=True to make sure other ranks wait for rank 0 to finish creating directory.
tp = 4
Expand Down Expand Up @@ -398,7 +398,7 @@ def teardown_method(self, method):
@pytest.mark.parametrize(
('src_tp_pp', 'dest_tp_pp'), [((2, 4), (2, 4)), ((2, 4), (4, 2)), ((8, 1), (1, 2))]
)
@pytest.mark.skip(reason="Tests are flaky and need to be debugged")
@pytest.mark.flaky
def test_fp32_optimizer_resharding(self, tmp_path_dist_ckpt, src_tp_pp, dest_tp_pp):
# sync=True to make sure other ranks wait for rank 0 to finish creating directory.
Utils.initialize_model_parallel(*src_tp_pp)
Expand Down Expand Up @@ -465,7 +465,7 @@ def teardown_method(self, method):
('src_tp_pp', 'dest_tp_pp'),
[((2, 4), (2, 4)), ((2, 4), (2, 2)), ((2, 4), (4, 2)), ((8, 1), (1, 2))],
)
@pytest.mark.skip(reason="Tests are flaky and need to be debugged")
@pytest.mark.flaky
def test_optimizer_resharding(
self, tmp_path_dist_ckpt, src_tp_pp, dest_tp_pp, use_dist_opt, bf16
):
Expand Down
2 changes: 1 addition & 1 deletion tests/unit_tests/distributed/test_param_and_grad_buffer.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def get_model_and_buffers(
@pytest.mark.parametrize("use_distributed_optimizer", [False, True])
@pytest.mark.parametrize("bias", [False, True])
@pytest.mark.parametrize("shared_embedding", [False, True])
@pytest.mark.skip(reason="Flaky test")
@pytest.mark.flaky
def test_bucket_sizes(
bucket_size: Optional[int], use_distributed_optimizer: bool, bias: bool, shared_embedding: bool
):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def test_capacity_forward_backward(self, tp_size, ep_size):
@pytest.mark.internal
@pytest.mark.timeout(120)
@pytest.mark.parametrize("tp_size,ep_size", [(1, 8), (8, 1), (4, 2), (1, 1)])
@pytest.mark.skip(reason="Tests are flaky and need to be debugged")
@pytest.mark.flaky
def test_capacity_padding_forward_backward(self, tp_size, ep_size):
container = MoEModelTestContainer(
tp_size=tp_size,
Expand Down
2 changes: 1 addition & 1 deletion tests/unit_tests/transformer/moe/test_upcycling.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ def teardown_method(self, method):
destroy_num_microbatches_calculator()

@pytest.mark.internal
@pytest.mark.skipif(True, reason="The test is flaky") # TODO: Fix the test
@pytest.mark.flaky # TODO: Fix the test
@pytest.mark.parametrize(
('tp_pp_ep', 'enable_vp', 'enable_grouped_gemm'), [((1, 1, 2), (False), (False))]
)
Expand Down

0 comments on commit 772faca

Please sign in to comment.