Skip to content

Commit cbfca94

Browse files
authored
Update gpt-oss configs (#14674)
* remove local attn constraint Signed-off-by: Chen Cui <[email protected]> * fix Signed-off-by: Chen Cui <[email protected]> --------- Signed-off-by: Chen Cui <[email protected]>
1 parent 56ddc45 commit cbfca94

File tree

1 file changed

+3
-3
lines changed

1 file changed

+3
-3
lines changed

nemo/collections/llm/gpt/model/gpt_oss.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
from typing import TYPE_CHECKING, Annotated, Callable, List, Literal, Optional, Tuple, Union
2121

2222
import torch
23-
from megatron.core.transformer.enums import AttnBackend
2423
from safetensors import safe_open
2524
from torch import nn
2625
from transformers import AutoModelForCausalLM, GenerationConfig
@@ -63,11 +62,13 @@ class GPTOSSConfig(GPTConfig):
6362

6463
position_embedding_type: str = "yarn"
6564
rotary_base: int = 150000
66-
rotary_scaling_factor: float = 32.0
65+
yarn_rotary_scaling_factor: float = 32.0
6766
yarn_original_max_position_embeddings: int = 4096
6867
yarn_beta_fast: float = 32.0
6968
yarn_beta_slow: float = 1.0
7069
yarn_correction_range_round_to_int: bool = False
70+
yarn_mscale: float = 1.0
71+
yarn_mscale_all_dim: float = 1.0
7172

7273
moe_router_topk: int = 4
7374
moe_router_pre_softmax: bool = False
@@ -83,7 +84,6 @@ class GPTOSSConfig(GPTConfig):
8384
glu_linear_offset: float = 1.0
8485
bias_activation_fusion: bool = True
8586
window_attn_skip_freq: Optional[Union[int, List[int]]] = 2 # alternative SWA/full
86-
attention_backend: AttnBackend = AttnBackend.local # currently only "local" is supported
8787
activation_func_clamp_value: Optional[float] = 7.0
8888

8989

0 commit comments

Comments
 (0)