-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscript.py
124 lines (106 loc) · 5.71 KB
/
script.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
import sys
import os
import re
import MaRQ
def is_number(string):
try:
float(string)
return True
except ValueError:
return False
def triple_pattern_to_sparql(pattern):
if pattern['predicate'] == 'a':
return pattern['subject'] + ' ' + pattern['predicate'] + ' <' + pattern['object'] + '>' + '.\t#' + pattern['source']
elif pattern['predicate'] == 'rdf:type':
return pattern['subject'] + ' ' + pattern['predicate'] + ' <' + pattern['object'] + '>' + '.\t#' + pattern['source']
else:
return pattern['subject'] + ' <' + pattern['predicate'] + '> ' + pattern['object'] + '.\t#' + pattern['source']
# Main
if is_number(sys.argv[1]): # let us parameter the threshold at which we accept a join
if float(sys.argv[1]) > 1 or float(sys.argv[1]) < 0:
print('The treshold must be contained within 0-1 .')
exit()
Jaccard_threshold = float(sys.argv[1])
start_mappings = 2
else:
Jaccard_threshold = 0.000001
start_mappings = 1
if len(sys.argv[start_mappings:]) > 1: # read the script's arguments, each one being a mapping path
l_names = []
l_mapping = []
for mapping in sys.argv[start_mappings:]:
l_names.append(re.search(r'(\\[^\\]+?|/[^/]+?)$', mapping).group()[1:])
l_mapping.append(mapping)
elif len(sys.argv[start_mappings:]) == 1: # read the script's argument, being the directory where all mappings are found
l_names = []
l_mapping = []
mappings_directory = os.listdir(sys.argv[start_mappings])
for mapping in mappings_directory:
l_names.append(mapping)
l_mapping.append(sys.argv[start_mappings] + '/' + mapping)
# execute MaRQ on every pair of mappings
MaRQ_results = []
for i in range(len(l_mapping)):
for j in range(i+1, len(l_mapping)):
MaRQ_results.append({
'name1': l_names[i],
'name2': l_names[j],
'result': MaRQ.compare(l_mapping[i], l_mapping[j], Jaccard_threshold)}) # Create queries out of possible joins
# Print the results
print()
print('### This file contains queries deduced by the MaRQ tool available at https://github.com/Manoe-K/MaRQ')
for pair in range(len(MaRQ_results)):
for k in range(len(MaRQ_results[pair]['result']['subject-subject']['templates'])):
print()
print('### subject-subject')
print('#M1:', MaRQ_results[pair]['name1'])
print('#M2:', MaRQ_results[pair]['name2'])
print('#M1_Subject :\t' + MaRQ_results[pair]['result']['subject-subject']['templates'][k]['M1'])
print('#M2_Subject :\t' + MaRQ_results[pair]['result']['subject-subject']['templates'][k]['M2'])
print('#Jaccard index:\t' + str(MaRQ_results[pair]['result']['subject-subject']['Jaccard_index'][k]))
print('Select Count(?S' + str(k+1) + ') Where {')
for pattern in MaRQ_results[pair]['result']['subject-subject']['triple_patterns'][k]:
print('\t' + triple_pattern_to_sparql(pattern))
print('}')
for k in range(len(MaRQ_results[pair]['result']['object-object']['templates'])):
print()
print('### object-object')
print('#M1:', MaRQ_results[pair]['name1'])
print('#M2:', MaRQ_results[pair]['name2'])
print('#M1_Object :\t' + MaRQ_results[pair]['result']['object-object']['templates'][k]['M1'])
print('#M2_Object :\t' + MaRQ_results[pair]['result']['object-object']['templates'][k]['M2'])
if MaRQ_results[pair]['result']['object-object']['Jaccard_index'][k] == 0:
print('#Jaccard index:\t' + str(MaRQ_results[pair]['result']['object-object']['Jaccard_index'][k]) + '\t la query est acceptée car les objets sont la même chaine de character')
else:
print('#Jaccard index:\t' + str(MaRQ_results[pair]['result']['object-object']['Jaccard_index'][k]))
if len(MaRQ_results[pair]['result']['object-object']['triple_patterns'][k]) == 1:
print('Select Count(*) Where {')
else:
print('Select Count(?O' + str(k+1) + ') Where {')
for pattern in MaRQ_results[pair]['result']['object-object']['triple_patterns'][k]:
print('\t' + triple_pattern_to_sparql(pattern))
print('}')
for k in range(len(MaRQ_results[pair]['result']['subject-object']['templates'])):
print()
print('### subject-object')
print('#M1:', MaRQ_results[pair]['name1'])
print('#M2:', MaRQ_results[pair]['name2'])
print('#M1_Subject :\t' + MaRQ_results[pair]['result']['subject-object']['templates'][k]['M1'])
print('#M2_Object :\t' + MaRQ_results[pair]['result']['subject-object']['templates'][k]['M2'])
print('#Jaccard index:\t' + str(MaRQ_results[pair]['result']['subject-object']['Jaccard_index'][k]))
print('Select Count(?T' + str(k+1) + ') Where {')
for pattern in MaRQ_results[pair]['result']['subject-object']['triple_patterns'][k]:
print('\t' + triple_pattern_to_sparql(pattern))
print('}')
for k in range(len(MaRQ_results[pair]['result']['object-subject']['templates'])):
print()
print('### subject-object')
print('#M1:', MaRQ_results[pair]['name1'])
print('#M2:', MaRQ_results[pair]['name2'])
print('#M1_Object :\t' + MaRQ_results[pair]['result']['object-subject']['templates'][k]['M1'])
print('#M2_Subject :\t' + MaRQ_results[pair]['result']['object-subject']['templates'][k]['M2'])
print('#Jaccard index:\t' + str(MaRQ_results[pair]['result']['object-subject']['Jaccard_index'][k]))
print('Select Count(?T' + str(k+1) + ') Where {')
for pattern in MaRQ_results[pair]['result']['object-subject']['triple_patterns'][k]:
print('\t' + triple_pattern_to_sparql(pattern))
print('}')