Skip to content

Commit d4c65ac

Browse files
committed
make model wider, shorter
1 parent 0ef2a8a commit d4c65ac

File tree

1 file changed

+5
-2
lines changed

1 file changed

+5
-2
lines changed

src/olmo_core/nn/transformer/config.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -414,8 +414,11 @@ def olmo2_26B(cls, vocab_size: int, **kwargs) -> "TransformerConfig":
414414
"""
415415
A 26B OLMo model config.
416416
"""
417-
return cls.llama2_26B(
418-
vocab_size,
417+
return cls.llama_like(
418+
vocab_size=vocab_size,
419+
d_model=6656,
420+
n_layers=kwargs.pop("n_layers", 48),
421+
n_heads=kwargs.pop("n_heads", 52),
419422
block_name=kwargs.pop("block_name", TransformerBlockType.reordered_norm),
420423
qk_norm=kwargs.pop("qk_norm", True),
421424
rope_theta=kwargs.pop("rope_theta", 500_000),

0 commit comments

Comments
 (0)