File tree Expand file tree Collapse file tree 5 files changed +21
-12
lines changed
Expand file tree Collapse file tree 5 files changed +21
-12
lines changed Original file line number Diff line number Diff line change @@ -9,4 +9,5 @@ deep_reference_parser/models/
99* .whl
1010embeddings /
1111models /
12- .tox /
12+ .tox /
13+ * __pycache__ /
Original file line number Diff line number Diff line change @@ -78,9 +78,12 @@ $(artefacts):
7878models : $(artefacts )
7979
8080
81- datasets = data/2019.12.0_train.tsv \
82- data/2019.12.0_test.tsv \
83- data/2019.12.0_valid.tsv
81+ datasets = data/splitting/2019.12.0_splitting_train.tsv \
82+ data/splitting/2019.12.0_splitting_test.tsv \
83+ data/splitting/2019.12.0_splitting_valid.tsv \
84+ data/splitting/2020.2.0_parsing_train.tsv \
85+ data/splitting/2020.2.0_parsing_test.tsv \
86+ data/splitting/2020.2.0_parsing_valid.tsv
8487
8588
8689rodrigues_datasets = data/rodrigues/clean_train.txt \
@@ -90,7 +93,7 @@ rodrigues_datasets = data/rodrigues/clean_train.txt \
9093RODRIGUES_DATA_URL = https://github.com/dhlab-epfl/LinkedBooksDeepReferenceParsing/raw/master/dataset/
9194
9295$(datasets ) :
93- @ mkdir -p data
96+ @ mkdir -p $( @D )
9497 curl -s $(S3_BUCKET_HTTP ) /$@ --output $@
9598
9699$(rodrigues_datasets ) :
Original file line number Diff line number Diff line change 55__author__ = "Wellcome Trust DataLabs Team"
66__author_email__ = "[email protected] " 77__license__ = "MIT"
8- __model_version__ = "2019.12.0 "
8+ __model_version__ = "2019.12.0_splitting "
Original file line number Diff line number Diff line change 11[DEFAULT]
2- version = 2019.12.0
2+ version = 2019.12.0_splitting
33
44[data]
55test_proportion = 0.25
@@ -8,13 +8,13 @@ data_path = data/
88respect_line_endings = 0
99respect_doc_endings = 1
1010line_limit = 250
11- policy_train = data/2019.12.0_train .tsv
12- policy_test = data/2019.12.0_test .tsv
13- policy_valid = data/2019.12.0_valid .tsv
11+ policy_train = data/splitting/ 2019.12.0_splitting_train .tsv
12+ policy_test = data/splitting/ 2019.12.0_splitting_test .tsv
13+ policy_valid = data/splitting/ 2019.12.0_splitting_valid .tsv
1414s3_slug = https://datalabs-public.s3.eu-west-2.amazonaws.com/deep_reference_parser/
1515
1616[build]
17- output_path = models/2019.12.0 /
17+ output_path = models/splitting/ 2019.12.0_splitting /
1818output = crf
1919word_embeddings = embeddings/2020.1.1-wellcome-embeddings-300.txt
2020pretrained_embedding = 0
Original file line number Diff line number Diff line change 1+ from .numbered_reference_annotator import (NumberedReferenceAnnotator ,
2+ annotate_numbered_references )
3+ from .prodigy_to_tsv import TokenLabelPairs , prodigy_to_tsv
4+ from .reach_to_prodigy import ReachToProdigy , reach_to_prodigy
5+ from .reference_to_token_annotations import (TokenTagger ,
6+ reference_to_token_annotations )
17from .spacy_doc_to_prodigy import SpacyDocToProdigy
2- from .reference_to_token_annotations import TokenTagger
You can’t perform that action at this time.
0 commit comments