Skip to content

Commit b301d12

Browse files
authored
Merge pull request #32 from wellcometrust/new-release
Preparing for new release
2 parents bbca141 + b51452c commit b301d12

File tree

3 files changed

+52
-6
lines changed

3 files changed

+52
-6
lines changed

Makefile

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,14 @@
11
.DEFAULT_GOAL := all
22

3+
# Determine OS (from https://gist.github.com/sighingnow/deee806603ec9274fd47)
4+
UNAME_S := $(shell uname -s)
5+
ifeq ($(UNAME_S),Linux)
6+
OSFLAG := linux
7+
endif
8+
ifeq ($(UNAME_S),Darwin)
9+
OSFLAG := macosx
10+
endif
11+
312
#
413
# Set file and version for embeddings and model, plus local paths
514
#
@@ -84,9 +93,9 @@ datasets = data/splitting/2019.12.0_splitting_train.tsv \
8493
data/parsing/2020.3.2_parsing_train.tsv \
8594
data/parsing/2020.3.2_parsing_test.tsv \
8695
data/parsing/2020.3.2_parsing_valid.tsv \
87-
data/multitask/2020.3.19_multitask_train.tsv \
88-
data/multitask/2020.3.19_multitask_test.tsv \
89-
data/multitask/2020.3.19_multitask_valid.tsv
96+
data/multitask/2020.3.18_multitask_train.tsv \
97+
data/multitask/2020.3.18_multitask_test.tsv \
98+
data/multitask/2020.3.18_multitask_valid.tsv
9099

91100

92101
rodrigues_datasets = data/rodrigues/clean_train.txt \
@@ -121,9 +130,10 @@ sync_model_to_s3:
121130
# artefacts otherwise they can make a mess of your build! Public access to
122131
# the wheel is granted with the --acl public-read flag.
123132

133+
124134
.PHONY: dist
125135
dist:
126-
-rm build/bin build/bdist.linux-x86_64 -r
136+
-rm build/lib build/bin build/bdist.$(OSFLAG)* -r
127137
-rm deep_reference_parser-20* -r
128138
-rm deep_reference_parser.egg-info -r
129139
-rm dist/*
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
__name__ = "deep_reference_parser"
2-
__version__ = "2020.3.3"
2+
__version__ = "2020.4.5"
33
__description__ = "Deep learning model for finding and parsing references"
44
__url__ = "https://github.com/wellcometrust/deep_reference_parser"
55
__author__ = "Wellcome Trust DataLabs Team"
66
__author_email__ = "[email protected]"
77
__license__ = "MIT"
88
__splitter_model_version__ = "2020.3.6_splitting"
99
__parser_model_version__ = "2020.3.8_parsing"
10-
__splitparser_model_version__ = "2020.3.19_multitask"
10+
__splitparser_model_version__ = "2020.4.5_multitask"
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
[DEFAULT]
2+
version = 2020.4.5_multitask
3+
description = Uses 2020.3.18 data
4+
deep_reference_parser_version = 9432b6e
5+
6+
[data]
7+
# Note that test and valid proportion are only used for data creation steps,
8+
# not when running the train command.
9+
test_proportion = 0.25
10+
valid_proportion = 0.25
11+
data_path = data/
12+
respect_line_endings = 0
13+
respect_doc_endings = 1
14+
line_limit = 150
15+
policy_train = data/processed/annotated/deep_reference_parser/multitask/2020.3.18_multitask_train.tsv
16+
policy_test = data/processed/annotated/deep_reference_parser/multitask/2020.3.18_multitask_test.tsv
17+
policy_valid = data/processed/annotated/deep_reference_parser/multitask/2020.3.18_multitask_valid.tsv
18+
s3_slug = https://datalabs-public.s3.eu-west-2.amazonaws.com/deep_reference_parser/
19+
20+
[build]
21+
output_path = data/models/multitask/2020.4.5_multitask/
22+
output = crf
23+
word_embeddings = embeddings/2020.1.1-wellcome-embeddings-300.txt
24+
pretrained_embedding = 0
25+
dropout = 0.5
26+
lstm_hidden = 400
27+
word_embedding_size = 300
28+
char_embedding_size = 100
29+
char_embedding_type = BILSTM
30+
optimizer = adam
31+
32+
[train]
33+
epochs = 60
34+
batch_size = 100
35+
early_stopping_patience = 5
36+
metric = val_f1

0 commit comments

Comments
 (0)