File tree Expand file tree Collapse file tree 3 files changed +52
-6
lines changed Expand file tree Collapse file tree 3 files changed +52
-6
lines changed Original file line number Diff line number Diff line change 11.DEFAULT_GOAL := all
22
3+ # Determine OS (from https://gist.github.com/sighingnow/deee806603ec9274fd47)
4+ UNAME_S := $(shell uname -s)
5+ ifeq ($(UNAME_S ) ,Linux)
6+ OSFLAG := linux
7+ endif
8+ ifeq ($(UNAME_S ) ,Darwin)
9+ OSFLAG := macosx
10+ endif
11+
312#
413# Set file and version for embeddings and model, plus local paths
514#
@@ -84,9 +93,9 @@ datasets = data/splitting/2019.12.0_splitting_train.tsv \
8493 data/parsing/2020.3.2_parsing_train.tsv \
8594 data/parsing/2020.3.2_parsing_test.tsv \
8695 data/parsing/2020.3.2_parsing_valid.tsv \
87- data/multitask/2020.3.19_multitask_train .tsv \
88- data/multitask/2020.3.19_multitask_test .tsv \
89- data/multitask/2020.3.19_multitask_valid .tsv
96+ data/multitask/2020.3.18_multitask_train .tsv \
97+ data/multitask/2020.3.18_multitask_test .tsv \
98+ data/multitask/2020.3.18_multitask_valid .tsv
9099
91100
92101rodrigues_datasets = data/rodrigues/clean_train.txt \
@@ -121,9 +130,10 @@ sync_model_to_s3:
121130# artefacts otherwise they can make a mess of your build! Public access to
122131# the wheel is granted with the --acl public-read flag.
123132
133+
124134.PHONY : dist
125135dist :
126- -rm build/bin build/bdist.linux-x86_64 -r
136+ -rm build/lib build/ bin build/bdist.$( OSFLAG ) * -r
127137 -rm deep_reference_parser-20* -r
128138 -rm deep_reference_parser.egg-info -r
129139 -rm dist/*
Original file line number Diff line number Diff line change 11__name__ = "deep_reference_parser"
2- __version__ = "2020.3.3 "
2+ __version__ = "2020.4.5 "
33__description__ = "Deep learning model for finding and parsing references"
44__url__ = "https://github.com/wellcometrust/deep_reference_parser"
55__author__ = "Wellcome Trust DataLabs Team"
66__author_email__ = "[email protected] " 77__license__ = "MIT"
88__splitter_model_version__ = "2020.3.6_splitting"
99__parser_model_version__ = "2020.3.8_parsing"
10- __splitparser_model_version__ = "2020.3.19_multitask "
10+ __splitparser_model_version__ = "2020.4.5_multitask "
Original file line number Diff line number Diff line change 1+ [DEFAULT]
2+ version = 2020.4.5_multitask
3+ description = Uses 2020.3.18 data
4+ deep_reference_parser_version = 9432b6e
5+
6+ [data]
7+ # Note that test and valid proportion are only used for data creation steps,
8+ # not when running the train command.
9+ test_proportion = 0.25
10+ valid_proportion = 0.25
11+ data_path = data/
12+ respect_line_endings = 0
13+ respect_doc_endings = 1
14+ line_limit = 150
15+ policy_train = data/processed/annotated/deep_reference_parser/multitask/2020.3.18_multitask_train.tsv
16+ policy_test = data/processed/annotated/deep_reference_parser/multitask/2020.3.18_multitask_test.tsv
17+ policy_valid = data/processed/annotated/deep_reference_parser/multitask/2020.3.18_multitask_valid.tsv
18+ s3_slug = https://datalabs-public.s3.eu-west-2.amazonaws.com/deep_reference_parser/
19+
20+ [build]
21+ output_path = data/models/multitask/2020.4.5_multitask/
22+ output = crf
23+ word_embeddings = embeddings/2020.1.1-wellcome-embeddings-300.txt
24+ pretrained_embedding = 0
25+ dropout = 0.5
26+ lstm_hidden = 400
27+ word_embedding_size = 300
28+ char_embedding_size = 100
29+ char_embedding_type = BILSTM
30+ optimizer = adam
31+
32+ [train]
33+ epochs = 60
34+ batch_size = 100
35+ early_stopping_patience = 5
36+ metric = val_f1
You can’t perform that action at this time.
0 commit comments