Skip to content

Commit d3f71c6

Browse files
committed
add fixtures
1 parent 94e9312 commit d3f71c6

File tree

2 files changed

+60
-0
lines changed

2 files changed

+60
-0
lines changed
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
{
2+
"version": "1.0",
3+
"truncation": null,
4+
"padding": null,
5+
"added_tokens": [
6+
{
7+
"id": 0,
8+
"content": "<bos>"
9+
},
10+
{
11+
"id": 1,
12+
"content": "<pad>"
13+
},
14+
{
15+
"id": 2,
16+
"content": "<eos>"
17+
},
18+
{
19+
"id": 3,
20+
"content": "<unk>"
21+
}
22+
],
23+
"model": {
24+
"type": "BPE",
25+
"vocab": {
26+
"<bos>": 0,
27+
"<pad>": 1,
28+
"<eos>": 2,
29+
"<unk>": 3,
30+
"offline": 4,
31+
"path": 5,
32+
"_": 6
33+
},
34+
"merges": [
35+
"off line",
36+
"li ne",
37+
"pa th",
38+
"_ of",
39+
"_ pa"
40+
],
41+
"continuing_subword_prefix": "",
42+
"end_of_word_suffix": "",
43+
"unk_token": "<unk>"
44+
},
45+
"normalizer": {
46+
"type": "Lowercase"
47+
},
48+
"pre_tokenizer": {
49+
"type": "Whitespace"
50+
}
51+
}
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
{
2+
"tokenizer_class": "GPT2Tokenizer",
3+
"bos_token": "<bos>",
4+
"eos_token": "<eos>",
5+
"unk_token": "<unk>",
6+
"pad_token": "<pad>",
7+
"model_max_length": 128,
8+
"do_lower_case": false
9+
}

0 commit comments

Comments
 (0)