We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
2 parents ea0f289 + 3a8ff0f commit 3f3b436Copy full SHA for 3f3b436
megatron/model/language_model.py
@@ -366,15 +366,12 @@ def __init__(self,
366
rotary_dim = args.hidden_size // args.num_attention_heads \
367
if args.kv_channels is None else args.kv_channels
368
369
- if args.rotary_percent < 1.0:
370
- rotary_dim = int(rotary_dim * args.rotary_percent)
371
-
372
# partial rotary embeddings, which is better than full rotary
373
# Wang and Komatsuzaki et al
374
# https://github.com/kingoflolz/mesh-transformer-jax/
375
self.rotary_pos_emb = RotaryEmbedding(
376
rotary_dim,
377
- rotary_percent=args.rotary_percent,
+ args.rotary_percent,
378
seq_len_interpolation_factor=args.rotary_seq_len_interpolation_factor
379
)
380
0 commit comments