Skip to content

Commit

Permalink
Merge branch 'main' into 'main'
Browse files Browse the repository at this point in the history
MCore generate: read vocab size from model, not tokenizer

See merge request ADLR/megatron-lm!2440
  • Loading branch information
ericharper committed Dec 16, 2024
2 parents 71c394b + f33d9fe commit 3d2297e
Showing 1 changed file with 2 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,7 @@ def generate_all_output_tokens_static_batch(
if self.model_is_pipeline_parallel:
context_length = context_end_position - context_start_position
logits = broadcast_from_last_pipeline_stage(
[batch_size, context_length, self.tokenizer.vocab_size],
[batch_size, context_length, self.inference_wrapped_model.model.vocab_size],
dtype=self.inference_wrapped_model.inference_wrapper_config.params_dtype,
tensor=logits,
)
Expand All @@ -316,7 +316,7 @@ def generate_all_output_tokens_static_batch(
generation_started = prompt_lengths_in_batch <= context_end_position
last_token_logits = logits[:, -1, :]
sampled_logits = self.sample_from_logits(
last_token_logits, common_inference_params, self.tokenizer.vocab_size
last_token_logits, common_inference_params, self.inference_wrapped_model.model.vocab_size
)

# Substitute the sampled logits only for only the prompts that
Expand Down

0 comments on commit 3d2297e

Please sign in to comment.