From a41f2d73845e6d3990b33c6fe79f67e1cc9ab80b Mon Sep 17 00:00:00 2001 From: Jared Casper Date: Tue, 12 Sep 2023 16:32:05 -0700 Subject: [PATCH] Get normalization from the checkpoint when using checkpoint args. Needed for using checkpoint/util.py with RMSNorm. Also remove now-removed arg DDP-impl from llama2.md. --- docs/llama2.md | 1 - megatron/checkpointing.py | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/llama2.md b/docs/llama2.md index b70d7f28ed..9043a2b95d 100644 --- a/docs/llama2.md +++ b/docs/llama2.md @@ -86,7 +86,6 @@ If loading for either inference or finetuning, use the following arguments: --no-load-optim \ --no-load-rng \ --fp16 \ ---DDP-impl local \ --untie-embeddings-and-output-weights \ --use-rotary-position-embeddings \ --normalization RMSNorm \ diff --git a/megatron/checkpointing.py b/megatron/checkpointing.py index 1ee1ddf1a3..94725405ac 100644 --- a/megatron/checkpointing.py +++ b/megatron/checkpointing.py @@ -482,6 +482,7 @@ def _set_arg(arg_name, old_arg_name=None, force=False): _set_arg('swiglu', force=True) _set_arg('untie_embeddings_and_output_weights', force=True) _set_arg('apply_layernorm_1p', force=True) + _set_arg('normalization', force=True) _set_arg('tokenizer_type') _set_arg('padded_vocab_size') if checkpoint_version < 3.0: