Skip to content

Commit d39944b

Browse files
authored
Release 1.2.0
2 parents 0448c73 + 852f5be commit d39944b

File tree

85 files changed

+3601
-2327
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

85 files changed

+3601
-2327
lines changed

deeppavlov/_meta.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
__version__ = '1.1.1'
1+
__version__ = '1.2.0'
22
__author__ = 'Neural Networks and Deep Learning lab, MIPT'
33
__description__ = 'An open source library for building end-to-end dialog systems and training chatbots.'
44
__keywords__ = ['NLP', 'NER', 'SQUAD', 'Intents', 'Chatbot']

deeppavlov/configs/classifiers/glue/glue_cola_cased_bert_torch.json renamed to deeppavlov/configs/classifiers/glue/glue_cola_roberta.json

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
{
22
"dataset_reader": {
33
"class_name": "huggingface_dataset_reader",
4-
"path": "glue",
5-
"name": "cola",
4+
"path": "{COMPETITION}",
5+
"name": "{TASK}",
66
"train": "train",
77
"valid": "validation",
88
"test": "test"
@@ -120,11 +120,19 @@
120120
},
121121
"metadata": {
122122
"variables": {
123+
"BASE_MODEL": "roberta-large",
123124
"ROOT_PATH": "~/.deeppavlov",
124125
"DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
125126
"MODELS_PATH": "{ROOT_PATH}/models",
126-
"MODEL_PATH": "{MODELS_PATH}/classifiers/glue_cola_torch_cased_bert",
127-
"BASE_MODEL": "bert-base-cased"
128-
}
127+
"COMPETITION": "glue",
128+
"TASK": "cola",
129+
"MODEL_PATH": "{MODELS_PATH}/{COMPETITION}/{TASK}/{BASE_MODEL}"
130+
},
131+
"download": [
132+
{
133+
"url": "http://files.deeppavlov.ai/v1/glue/glue_cola_roberta.tar.gz",
134+
"subdir": "{MODEL_PATH}"
135+
}
136+
]
129137
}
130138
}
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
{
22
"dataset_reader": {
33
"class_name": "huggingface_dataset_reader",
4-
"path": "glue",
5-
"name": "mrpc",
4+
"path": "{COMPETITION}",
5+
"name": "{TASK}",
66
"train": "train",
77
"valid": "validation",
88
"test": "test"
@@ -11,38 +11,46 @@
1111
"class_name": "huggingface_dataset_iterator",
1212
"features": ["sentence1", "sentence2"],
1313
"label": "label",
14-
"use_label_name": false,
1514
"seed": 42
1615
},
1716
"chainer": {
1817
"in": ["sentence1", "sentence2"],
19-
"in_y": ["y_ids"],
18+
"in_y": ["y"],
2019
"pipe": [
2120
{
2221
"class_name": "torch_transformers_preprocessor",
2322
"vocab_file": "{BASE_MODEL}",
2423
"do_lower_case": false,
25-
"max_seq_length": 100,
24+
"max_seq_length": 256,
2625
"in": ["sentence1", "sentence2"],
2726
"out": ["bert_features"]
2827
},
28+
{
29+
"id": "classes_vocab",
30+
"class_name": "simple_vocab",
31+
"fit_on": ["y"],
32+
"save_path": "{MODEL_PATH}/classes.dict",
33+
"load_path": "{MODEL_PATH}/classes.dict",
34+
"in": ["y"],
35+
"out": ["y_ids"]
36+
},
2937
{
3038
"in": ["y_ids"],
3139
"out": ["y_onehot"],
3240
"class_name": "one_hotter",
33-
"depth": 2,
41+
"depth": "#classes_vocab.len",
3442
"single_vector": true
3543
},
3644
{
3745
"class_name": "torch_transformers_classifier",
38-
"n_classes": 2,
46+
"n_classes": "#classes_vocab.len",
3947
"return_probas": true,
4048
"pretrained_bert": "{BASE_MODEL}",
4149
"save_path": "{MODEL_PATH}/model",
4250
"load_path": "{MODEL_PATH}/model",
4351
"optimizer": "AdamW",
4452
"optimizer_parameters": {
45-
"lr": 2e-05
53+
"lr": 1e-06
4654
},
4755
"learning_rate_drop_patience": 3,
4856
"learning_rate_drop_div": 2.0,
@@ -55,32 +63,42 @@
5563
"out": ["y_pred_ids"],
5664
"class_name": "proba2labels",
5765
"max_proba": true
66+
},
67+
{
68+
"in": ["y_pred_ids"],
69+
"out": ["y_pred_labels"],
70+
"ref": "classes_vocab"
5871
}
5972
],
60-
"out": ["y_pred_ids"]
73+
"out": ["y_pred_labels"]
6174
},
6275
"train": {
63-
"batch_size": 100,
64-
"metrics": [
65-
"f1",
66-
"accuracy"
67-
],
68-
"validation_patience": 10,
76+
"batch_size": 4,
77+
"metrics": ["accuracy"],
78+
"epochs": 2,
6979
"val_every_n_epochs": 1,
7080
"log_every_n_epochs": 1,
7181
"show_examples": false,
72-
"evaluation_targets": ["train", "valid"],
82+
"evaluation_targets": ["valid"],
7383
"class_name": "torch_trainer",
7484
"tensorboard_log_dir": "{MODEL_PATH}/",
7585
"pytest_max_batches": 2
7686
},
7787
"metadata": {
7888
"variables": {
89+
"BASE_MODEL": "roberta-large",
7990
"ROOT_PATH": "~/.deeppavlov",
8091
"DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
8192
"MODELS_PATH": "{ROOT_PATH}/models",
82-
"MODEL_PATH": "{MODELS_PATH}/classifiers/glue_mrpc_torch_cased_bert",
83-
"BASE_MODEL": "bert-base-cased"
84-
}
93+
"COMPETITION": "glue",
94+
"TASK": "mrpc",
95+
"MODEL_PATH": "{MODELS_PATH}/{COMPETITION}/{TASK}/{BASE_MODEL}"
96+
},
97+
"download": [
98+
{
99+
"url": "http://files.deeppavlov.ai/v1/glue/glue_mrpc_roberta.tar.gz",
100+
"subdir": "{MODEL_PATH}"
101+
}
102+
]
85103
}
86104
}

deeppavlov/configs/classifiers/glue/glue_qnli_cased_bert_torch.json renamed to deeppavlov/configs/classifiers/glue/glue_qnli_roberta.json

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
{
22
"dataset_reader": {
33
"class_name": "huggingface_dataset_reader",
4-
"path": "glue",
5-
"name": "qnli",
4+
"path": "{COMPETITION}",
5+
"name": "{TASK}",
66
"train": "train",
77
"valid": "validation",
88
"test": "test"
@@ -86,11 +86,19 @@
8686
},
8787
"metadata": {
8888
"variables": {
89+
"BASE_MODEL": "roberta-large",
8990
"ROOT_PATH": "~/.deeppavlov",
9091
"DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
9192
"MODELS_PATH": "{ROOT_PATH}/models",
92-
"MODEL_PATH": "{MODELS_PATH}/classifiers/glue_qnli_torch_cased_bert",
93-
"BASE_MODEL": "bert-base-cased"
94-
}
93+
"COMPETITION": "glue",
94+
"TASK": "qnli",
95+
"MODEL_PATH": "{MODELS_PATH}/{COMPETITION}/{TASK}/{BASE_MODEL}"
96+
},
97+
"download": [
98+
{
99+
"url": "http://files.deeppavlov.ai/v1/glue/glue_qnli_roberta.tar.gz",
100+
"subdir": "{MODEL_PATH}"
101+
}
102+
]
95103
}
96104
}

deeppavlov/configs/classifiers/glue/glue_qqp_cased_bert_torch.json renamed to deeppavlov/configs/classifiers/glue/glue_qqp_roberta.json

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
{
22
"dataset_reader": {
33
"class_name": "huggingface_dataset_reader",
4-
"path": "glue",
5-
"name": "qqp",
4+
"path": "{COMPETITION}",
5+
"name": "{TASK}",
66
"train": "train",
77
"valid": "validation",
88
"test": "test"
@@ -76,11 +76,19 @@
7676
},
7777
"metadata": {
7878
"variables": {
79+
"BASE_MODEL": "roberta-large",
7980
"ROOT_PATH": "~/.deeppavlov",
8081
"DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
8182
"MODELS_PATH": "{ROOT_PATH}/models",
82-
"MODEL_PATH": "{MODELS_PATH}/classifiers/glue_qqp_torch_cased_bert",
83-
"BASE_MODEL" : "bert-base-cased"
84-
}
83+
"COMPETITION": "glue",
84+
"TASK": "qqp",
85+
"MODEL_PATH": "{MODELS_PATH}/{COMPETITION}/{TASK}/{BASE_MODEL}"
86+
},
87+
"download": [
88+
{
89+
"url": "http://files.deeppavlov.ai/v1/glue/glue_qqp_roberta.tar.gz",
90+
"subdir": "{MODEL_PATH}"
91+
}
92+
]
8593
}
8694
}

deeppavlov/configs/classifiers/glue/glue_sst2_cased_bert_torch.json renamed to deeppavlov/configs/classifiers/glue/glue_sst2_roberta.json

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
{
22
"dataset_reader": {
33
"class_name": "huggingface_dataset_reader",
4-
"path": "glue",
5-
"name": "sst2",
4+
"path": "{COMPETITION}",
5+
"name": "{TASK}",
66
"train": "train",
77
"valid": "validation",
88
"test": "test"
@@ -120,11 +120,19 @@
120120
},
121121
"metadata": {
122122
"variables": {
123+
"BASE_MODEL": "roberta-large",
123124
"ROOT_PATH": "~/.deeppavlov",
124125
"DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
125126
"MODELS_PATH": "{ROOT_PATH}/models",
126-
"MODEL_PATH": "{MODELS_PATH}/classifiers/glue_sst2_torch_cased_bert",
127-
"BASE_MODEL": "bert-base-cased"
128-
}
127+
"COMPETITION": "glue",
128+
"TASK": "sst2",
129+
"MODEL_PATH": "{MODELS_PATH}/{COMPETITION}/{TASK}/{BASE_MODEL}"
130+
},
131+
"download": [
132+
{
133+
"url": "http://files.deeppavlov.ai/v1/glue/glue_sst2_roberta.tar.gz",
134+
"subdir": "{MODEL_PATH}"
135+
}
136+
]
129137
}
130138
}

deeppavlov/configs/classifiers/glue/glue_stsb_cased_bert_torch.json renamed to deeppavlov/configs/classifiers/glue/glue_stsb_roberta.json

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
{
22
"dataset_reader": {
33
"class_name": "huggingface_dataset_reader",
4-
"path": "glue",
5-
"name": "stsb",
4+
"path": "{COMPETITION}",
5+
"name": "{TASK}",
66
"train": "train",
77
"valid": "validation",
88
"test": "test"
@@ -47,7 +47,7 @@
4747
"out": ["y_pred"]
4848
},
4949
"train": {
50-
"batch_size": 128,
50+
"batch_size": 32,
5151
"metrics": [
5252
"pearson_correlation",
5353
"spearman_correlation"
@@ -63,11 +63,19 @@
6363
},
6464
"metadata": {
6565
"variables": {
66+
"BASE_MODEL": "roberta-large",
6667
"ROOT_PATH": "~/.deeppavlov",
6768
"DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
6869
"MODELS_PATH": "{ROOT_PATH}/models",
69-
"MODEL_PATH": "{MODELS_PATH}/classifiers/glue_stsb_torch_cased_bert",
70-
"BASE_MODEL": "bert-base-cased"
71-
}
70+
"COMPETITION": "glue",
71+
"TASK": "stsb",
72+
"MODEL_PATH": "{MODELS_PATH}/{COMPETITION}/{TASK}/{BASE_MODEL}"
73+
},
74+
"download": [
75+
{
76+
"url": "http://files.deeppavlov.ai/v1/glue/glue_stsb_roberta.tar.gz",
77+
"subdir": "{MODEL_PATH}"
78+
}
79+
]
7280
}
7381
}

deeppavlov/configs/entity_extraction/entity_detection_en.json

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,17 +5,15 @@
55
{
66
"class_name": "ner_chunker",
77
"batch_size": 16,
8-
"max_chunk_len" : 180,
98
"max_seq_len" : 300,
109
"vocab_file": "{TRANSFORMER}",
1110
"in": ["x"],
1211
"out": ["x_chunk", "chunk_nums", "chunk_sentences_offsets", "chunk_sentences"]
1312
},
1413
{
15-
"thres_proba": 0.05,
14+
"thres_proba": 0.6,
1615
"o_tag": "O",
1716
"tags_file": "{NER_PATH}/tag.dict",
18-
"return_entities_with_tags": true,
1917
"class_name": "entity_detection_parser",
2018
"id": "edp"
2119
},

deeppavlov/configs/entity_extraction/entity_detection_ru.json

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
{
66
"class_name": "ner_chunker",
77
"batch_size": 16,
8-
"max_chunk_len" : 180,
98
"max_seq_len" : 300,
109
"vocab_file": "{TRANSFORMER}",
1110
"in": ["x"],
@@ -15,7 +14,6 @@
1514
"thres_proba": 0.05,
1615
"o_tag": "O",
1716
"tags_file": "{NER_PATH}/tag.dict",
18-
"return_entities_with_tags": true,
1917
"class_name": "entity_detection_parser",
2018
"id": "edp"
2119
},

deeppavlov/configs/entity_extraction/entity_extraction_en.json

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,11 @@
99
},
1010
{
1111
"config_path": "{CONFIGS_PATH}/entity_extraction/entity_linking_en.json",
12-
"in": ["entity_substr", "tags", "sentences", "entity_offsets", "sentences_offsets"],
13-
"out": ["entity_ids", "entity_conf", "entity_pages"]
12+
"in": ["entity_substr", "tags", "probas", "sentences", "entity_offsets", "sentences_offsets"],
13+
"out": ["entity_ids", "entity_conf", "entity_pages", "entity_labels"]
1414
}
1515
],
16-
"out": ["entity_substr", "tags", "entity_offsets", "entity_ids", "entity_conf", "entity_pages"]
16+
"out": ["entity_substr", "tags", "entity_offsets", "entity_ids", "entity_conf", "entity_pages", "entity_labels"]
1717
},
1818
"metadata": {
1919
"variables": {

0 commit comments

Comments
 (0)