@@ -27,6 +27,7 @@ pub enum Tokenizer {
27
27
Python {
28
28
tokenizer_name : String ,
29
29
revision : Option < String > ,
30
+ trust_remote_code : bool ,
30
31
} ,
31
32
Rust ( tokenizers:: Tokenizer ) ,
32
33
}
@@ -38,15 +39,20 @@ impl<'a> PyTokenizer<'a> {
38
39
py : Python < ' a > ,
39
40
tokenizer_name : String ,
40
41
revision : Option < String > ,
42
+ trust_remote_code : bool ,
41
43
) -> PyResult < PyTokenizer < ' a > > {
42
44
let transformers = py. import_bound ( "transformers" ) ?;
43
45
let auto = transformers. getattr ( "AutoTokenizer" ) ?;
44
46
let from_pretrained = auto. getattr ( "from_pretrained" ) ?;
45
47
let args = ( tokenizer_name, ) ;
46
48
let kwargs = if let Some ( rev) = & revision {
47
- [ ( "revision" , rev. to_string ( ) ) ] . into_py_dict_bound ( py)
49
+ [
50
+ ( "revision" , rev. to_string ( ) . into_py ( py) ) ,
51
+ ( "trust_remote_code" , trust_remote_code. into_py ( py) ) ,
52
+ ]
53
+ . into_py_dict_bound ( py)
48
54
} else {
49
- pyo3 :: types :: PyDict :: new_bound ( py)
55
+ [ ( "trust_remote_code" , trust_remote_code . into_py ( py ) ) ] . into_py_dict_bound ( py)
50
56
} ;
51
57
let tokenizer = from_pretrained. call ( args, Some ( & kwargs) ) ?;
52
58
tracing:: info!( "Loaded a python tokenizer" ) ;
0 commit comments