Skip to content

Commit

Permalink
Merge branch 'lm-rope-bugfix' into 'main'
Browse files Browse the repository at this point in the history
change megatron-lm to use core rope api

See merge request ADLR/megatron-lm!842
  • Loading branch information
jaredcasper committed Oct 12, 2023
2 parents 8ca2d59 + 32749ea commit 3a8ff0f
Showing 1 changed file with 1 addition and 3 deletions.
4 changes: 1 addition & 3 deletions megatron/model/language_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -366,14 +366,12 @@ def __init__(self,
rotary_dim = args.hidden_size // args.num_attention_heads \
if args.kv_channels is None else args.kv_channels

if args.rotary_percent < 1.0:
rotary_dim = int(rotary_dim * args.rotary_percent)

# partial rotary embeddings, which is better than full rotary
# Wang and Komatsuzaki et al
# https://github.com/kingoflolz/mesh-transformer-jax/
self.rotary_pos_emb = RotaryEmbedding(
rotary_dim,
args.rotary_percent,
seq_len_interpolation_factor=args.rotary_seq_len_interpolation_factor
)

Expand Down

0 comments on commit 3a8ff0f

Please sign in to comment.