We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
2 parents 8ca2d59 + 32749ea commit 3a8ff0fCopy full SHA for 3a8ff0f
megatron/model/language_model.py
@@ -366,14 +366,12 @@ def __init__(self,
366
rotary_dim = args.hidden_size // args.num_attention_heads \
367
if args.kv_channels is None else args.kv_channels
368
369
- if args.rotary_percent < 1.0:
370
- rotary_dim = int(rotary_dim * args.rotary_percent)
371
-
372
# partial rotary embeddings, which is better than full rotary
373
# Wang and Komatsuzaki et al
374
# https://github.com/kingoflolz/mesh-transformer-jax/
375
self.rotary_pos_emb = RotaryEmbedding(
376
rotary_dim,
+ args.rotary_percent,
377
seq_len_interpolation_factor=args.rotary_seq_len_interpolation_factor
378
)
379
0 commit comments