File tree Expand file tree Collapse file tree 2 files changed +60
-0
lines changed
Tests/TokenizersTests/Resources/Offline Expand file tree Collapse file tree 2 files changed +60
-0
lines changed Original file line number Diff line number Diff line change 1+ {
2+ "version" : " 1.0" ,
3+ "truncation" : null ,
4+ "padding" : null ,
5+ "added_tokens" : [
6+ {
7+ "id" : 0 ,
8+ "content" : " <bos>"
9+ },
10+ {
11+ "id" : 1 ,
12+ "content" : " <pad>"
13+ },
14+ {
15+ "id" : 2 ,
16+ "content" : " <eos>"
17+ },
18+ {
19+ "id" : 3 ,
20+ "content" : " <unk>"
21+ }
22+ ],
23+ "model" : {
24+ "type" : " BPE" ,
25+ "vocab" : {
26+ "<bos>" : 0 ,
27+ "<pad>" : 1 ,
28+ "<eos>" : 2 ,
29+ "<unk>" : 3 ,
30+ "offline" : 4 ,
31+ "path" : 5 ,
32+ "_" : 6
33+ },
34+ "merges" : [
35+ " off line" ,
36+ " li ne" ,
37+ " pa th" ,
38+ " _ of" ,
39+ " _ pa"
40+ ],
41+ "continuing_subword_prefix" : " " ,
42+ "end_of_word_suffix" : " " ,
43+ "unk_token" : " <unk>"
44+ },
45+ "normalizer" : {
46+ "type" : " Lowercase"
47+ },
48+ "pre_tokenizer" : {
49+ "type" : " Whitespace"
50+ }
51+ }
Original file line number Diff line number Diff line change 1+ {
2+ "tokenizer_class" : " GPT2Tokenizer" ,
3+ "bos_token" : " <bos>" ,
4+ "eos_token" : " <eos>" ,
5+ "unk_token" : " <unk>" ,
6+ "pad_token" : " <pad>" ,
7+ "model_max_length" : 128 ,
8+ "do_lower_case" : false
9+ }
You can’t perform that action at this time.
0 commit comments