-
Notifications
You must be signed in to change notification settings - Fork 0
/
example_config.yaml
113 lines (109 loc) · 6.62 KB
/
example_config.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
run_id: 0 # ID used for the collective run of all tasks; is used as name for the result folder
log_level: 'INFO' # log level (one of 'INFO', 'DEBUG', ..)
max_cpus: 1 # maximum number of CPUs to be used
gpu: None # index of GPU to use or 'None' if no GPU should be used
format: tsv # file input format (one of: "tsv", "nt", "yago")
mapping:
'identity':
image: gitlab.dws.informatik.uni-mannheim.de:5050/nheist/kgreat/mapping/identity # mapper type: identity
target: 'DBpedia16_URI' # target column to map against
known_entity_files: ['file1', 'file2', 'file3'] # use subjects of the files' triples as sum of entities of KG
remove_prefix: 'target-prefix-to-be-removed' # a prefix of the target column that should be removed before mapping
'same-as':
image: gitlab.dws.informatik.uni-mannheim.de:5050/nheist/kgreat/mapping/same-as # mapper type: same-as
kg_prefix: 'http://example.org/resource/' # prefix of the source KG to be mapped against
input_files: ['file1', 'file2'] # files containing the same-as links
additional_predicates: ['pred1', 'pred2'] # additional predicates (other than owl:sameAs) to use
'label':
image: gitlab.dws.informatik.uni-mannheim.de:5050/nheist/kgreat/mapping/label # mapper type: label
input_files: ['file1', 'file2'] # files containing labels
label_predicates: ['http://www.w3.org/2000/01/rdf-schema#label'] # predicates used to retrieve labels
definitive_matches_only: False # set to true if an entity should only be mapped if there is a single exact match to its label
similarity_threshold: 0.7 # lower threshold of acceptable similarity between labels (set to 1 for exact matches only)
chunk_size: 1000 # number of entities mapped at once; increase for better run time at the cost of more memory
preprocessing:
'embedding-lp': # link-prediction embedding generation (TransE, TransR, DistMult, RESCAL, ComplEx)
image: gitlab.dws.informatik.uni-mannheim.de:5050/nheist/kgreat/preprocessing/embedding-lp # link prediction embeddings
input_files: ['file_1.ttl', 'file_2.ttl'] # list files within the `data` folder that should be used to create embeddings
models: ['TransE_l1', 'TransE_l2', 'TransR', 'DistMult', 'RESCAL', 'ComplEx'] # choose one or more
epochs: 1 # number of epochs to train the embedding model for
batch_size: 30000 # batch size to use on GPU
'embedding-rdf2vec': # rdf2vec embedding generation
image: gitlab.dws.informatik.uni-mannheim.de:5050/nheist/kgreat/preprocessing/embedding-rdf2vec # rdf2vec embeddings
input_files: ['file_1.ttl', 'file_2.ttl'] # list files within the `data` folder that should be used to create embeddings
models: ['RDF2vec'] # choose one or more - currently only base variant implemented
epochs: 5 # number of epochs to train the embedding model for
'embedding-speedup': # optional step for speeding up embedding-based tasks
image: gitlab.dws.informatik.uni-mannheim.de:5050/nheist/kgreat/preprocessing/embedding-speedup # embedding speedup
ann_index: True # whether to create Approx. Nearest Neighbor Search index
small_embeddings: True # whether to create an embedding file with entities of all datasets
task: # list of all tasks to be run
'dm-aaup_classification':
image: gitlab.dws.informatik.uni-mannheim.de:5050/nheist/kgreat/dataset/dm-aaup
type: classification
'dm-aaup_regression':
image: gitlab.dws.informatik.uni-mannheim.de:5050/nheist/kgreat/dataset/dm-aaup
type: regression
'dm-cities_classification':
image: gitlab.dws.informatik.uni-mannheim.de:5050/nheist/kgreat/dataset/dm-cities
type: classification
'dm-cities_regression':
image: gitlab.dws.informatik.uni-mannheim.de:5050/nheist/kgreat/dataset/dm-cities
type: regression
'dm-forbes_classification':
image: gitlab.dws.informatik.uni-mannheim.de:5050/nheist/kgreat/dataset/dm-forbes
type: classification
'dm-forbes_regression':
image: gitlab.dws.informatik.uni-mannheim.de:5050/nheist/kgreat/dataset/dm-forbes
type: regression
'dm-metacriticalbums_classification':
image: gitlab.dws.informatik.uni-mannheim.de:5050/nheist/kgreat/dataset/dm-metacriticalbums
type: classification
'dm-metacriticalbums_regression':
image: gitlab.dws.informatik.uni-mannheim.de:5050/nheist/kgreat/dataset/dm-metacriticalbums
type: regression
'dm-metacriticmovies_classification':
image: gitlab.dws.informatik.uni-mannheim.de:5050/nheist/kgreat/dataset/dm-metacriticmovies
type: classification
'dm-metacriticmovies_regression':
image: gitlab.dws.informatik.uni-mannheim.de:5050/nheist/kgreat/dataset/dm-metacriticmovies
type: regression
'dm-cities2000andcountries_clustering':
image: gitlab.dws.informatik.uni-mannheim.de:5050/nheist/kgreat/dataset/dm-cities2000andcountries
type: clustering
'dm-citiesandcountries_clustering':
image: gitlab.dws.informatik.uni-mannheim.de:5050/nheist/kgreat/dataset/dm-citiesandcountries
type: clustering
'dm-citiesmoviesalbumscompaniesuni_clustering':
image: gitlab.dws.informatik.uni-mannheim.de:5050/nheist/kgreat/dataset/dm-citiesmoviesalbumscompaniesuni
type: clustering
'dm-teams_clustering':
image: gitlab.dws.informatik.uni-mannheim.de:5050/nheist/kgreat/dataset/dm-teams
type: clustering
'dm-lp50_documentSimilarity':
image: gitlab.dws.informatik.uni-mannheim.de:5050/nheist/kgreat/dataset/dm-lp50
type: documentSimilarity
'dm-kore_entityRelatedness':
image: gitlab.dws.informatik.uni-mannheim.de:5050/nheist/kgreat/dataset/dm-kore
type: entityRelatedness
'dm-allcapitalcountryentities_semanticAnalogies':
image: gitlab.dws.informatik.uni-mannheim.de:5050/nheist/kgreat/dataset/dm-allcapitalcountryentities
type: semanticAnalogies
'dm-capitalcountryentities_semanticAnalogies':
image: gitlab.dws.informatik.uni-mannheim.de:5050/nheist/kgreat/dataset/dm-capitalcountryentities
type: semanticAnalogies
'dm-citystateentities_semanticAnalogies':
image: gitlab.dws.informatik.uni-mannheim.de:5050/nheist/kgreat/dataset/dm-citystateentities
type: semanticAnalogies
'dm-currencyentities_semanticAnalogies':
image: gitlab.dws.informatik.uni-mannheim.de:5050/nheist/kgreat/dataset/dm-currencyentities
type: semanticAnalogies
'dm-movielens_recommendation':
image: gitlab.dws.informatik.uni-mannheim.de:5050/nheist/kgreat/dataset/dm-movielens
type: recommendation
'dm-lastfm_recommendation':
image: gitlab.dws.informatik.uni-mannheim.de:5050/nheist/kgreat/dataset/dm-lastfm
type: recommendation
'dm-librarything_recommendation':
image: gitlab.dws.informatik.uni-mannheim.de:5050/nheist/kgreat/dataset/dm-librarything
type: recommendation