@@ -2856,9 +2856,6 @@ class LLaDAModel(TextModel):
2856
2856
model_arch = gguf .MODEL_ARCH .LLADA
2857
2857
undo_permute = True
2858
2858
2859
- def __init__ (self , * args , ** kwargs ):
2860
- super ().__init__ (* args , ** kwargs )
2861
-
2862
2859
def get_vocab_base (self ) -> tuple [list [str ], list [int ], str ]:
2863
2860
tokens : list [str ] = []
2864
2861
toktypes : list [int ] = []
@@ -2897,14 +2894,7 @@ def get_vocab_base(self) -> tuple[list[str], list[int], str]:
2897
2894
return tokens , toktypes , tokpre
2898
2895
2899
2896
def set_vocab (self ):
2900
- try :
2901
- self ._set_vocab_sentencepiece ()
2902
- except FileNotFoundError :
2903
- try :
2904
- self ._set_vocab_llama_hf ()
2905
- except (FileNotFoundError , TypeError ):
2906
- # Llama 3
2907
- self ._set_vocab_gpt2 ()
2897
+ self ._set_vocab_gpt2 ()
2908
2898
2909
2899
def set_gguf_parameters (self ):
2910
2900
super ().set_gguf_parameters ()
@@ -2942,14 +2932,6 @@ def set_gguf_parameters(self):
2942
2932
# LLaDA models use non-causal attention for diffusion, similar to Dream
2943
2933
self .gguf_writer .add_causal_attention (False )
2944
2934
# Handle RoPE scaling similar to LlamaModel and Dream
2945
- rope_scaling = self .hparams .get ("rope_scaling" ) or {}
2946
- if rope_scaling .get ("rope_type" , rope_scaling .get ("type" )) == "linear" and "factor" in rope_scaling :
2947
- self .gguf_writer .add_rope_scaling_type (gguf .RopeScalingType .LINEAR )
2948
- self .gguf_writer .add_rope_scaling_factor (rope_scaling ["factor" ])
2949
- elif rope_scaling .get ("rope_type" , rope_scaling .get ("type" )) == "yarn" and "factor" in rope_scaling :
2950
- self .gguf_writer .add_rope_scaling_type (gguf .RopeScalingType .YARN )
2951
- self .gguf_writer .add_rope_scaling_factor (rope_scaling ["factor" ])
2952
- self .gguf_writer .add_rope_scaling_orig_ctx_len (rope_scaling ["original_max_position_embeddings" ])
2953
2935
2954
2936
# Add LLaDA-specific parameters
2955
2937
mask_token_id = self .hparams .get ("mask_token_id" )
0 commit comments