-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathRdfDrawing.py
executable file
·214 lines (176 loc) · 7.57 KB
/
RdfDrawing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
from rdflib import Literal
from rdflib.namespace import RDFS
import rdflib
from rdflib.plugins.sparql import prepareQuery
import RDFNode as rdfnode
import Utils as Utils
import DrawIOUtils as DrawIOUtils
import logging
'''
This class contains methods to create draw.io schema
'''
class RdfDrawing:
GRAPH = None
HEADERS = "id,name,fill,stroke,shape"
SHAPE_OUTLINE_COLOR = "#000000"
INSTANCE_COLOR = "#FFE6CC"
INSTANCE_SHAPE = "rhombus"
CLASS_COLOR = "#d5e8d4"
CLASS_SHAPE = "rounded"
LITERAL_COLOR = "#DAE8FC"
LITERAL_SHAPE = "ellipse"
SKIP_RDF_LABEL = True
UTILS = Utils.Utils()
DRAW_IO_UTILS = DrawIOUtils.DrawIOUtils()
def get_draw_io_csv(self, input_file):
'''
Create draw.io csv.
:param input_file: RDF file
:return: draw.io csv text.
'''
self.LABELS = {}
self.LABELS_USED = {}
self.GRAPH = rdflib.Graph()
self.GRAPH.parse(input_file, format="text/turtle")
predicates_location = {}
template = open('template/draw-io-template', 'r').read()
# Query to list all predicates
query = open('queries/list_predicates.rq', 'r').read()
q = prepareQuery(query)
counter = 0
for row in self.GRAPH.query(q):
url = row[0]
if url == RDFS.label and self.SKIP_RDF_LABEL:
logging.info("skiping rdfs:label")
else:
predicate_label = self.__get_labels(url, True)
connect_text = self.DRAW_IO_UTILS.get_connection_text(predicate_label)
template = template + "\n" + connect_text
predicates_location[url] = counter
counter = counter + 1
header = self.HEADERS
for key in predicates_location:
code = self.__get_labels(key, True)
code = self.UTILS.clean_label(code)
header = header + "," + code
template = template + "\n" + header
# Query to list all triples
query = open('queries/list_triples.rq', 'r').read()
q = prepareQuery(query)
nodes = {}
for row in self.GRAPH.query(q):
sub = row[0]
pre = row[1]
obj = row[2]
if sub not in nodes:
sub_label = self.__get_labels(sub, True)
if self.UTILS.is_uri_instance(self.GRAPH, sub):
sub_type = RDFS.Resource
sub_shape = self.INSTANCE_SHAPE
sub_color = self.INSTANCE_COLOR
else:
sub_type = RDFS.Class
sub_shape = self.CLASS_SHAPE
sub_color = self.CLASS_COLOR
connections = self.__get_connection_template__(predicates_location)
for subj, prede in self.GRAPH.subject_predicates(sub):
if prede in predicates_location:
indx = predicates_location[prede]
connect_label = subj
if connections[indx] == "NA":
connections[indx] = connect_label
else:
connections[indx] = connections[indx] + "," + connect_label
rnd = rdfnode.RDFNode(sub, sub_label, sub_type, connections, sub_color, self.SHAPE_OUTLINE_COLOR,
sub_shape)
nodes[sub] = rnd
if pre == RDFS.label and self.SKIP_RDF_LABEL:
logging.info("skiping rdfs:label")
elif obj not in nodes:
if not isinstance(obj, Literal):
obj_label = self.__get_labels(obj, True)
if self.UTILS.is_uri_instance(self.GRAPH, obj):
obj_type = RDFS.Resource
obj_shape = self.INSTANCE_SHAPE
obj_color = self.INSTANCE_COLOR
else:
obj_type = RDFS.Class
obj_shape = self.CLASS_SHAPE
obj_color = self.CLASS_COLOR
else:
obj_label = obj
obj_type = RDFS.Literal
obj_shape = self.LITERAL_SHAPE
obj_color = self.LITERAL_COLOR
if obj_label not in self.LABELS_USED:
self.LABELS_USED[obj_label] = 1
else:
self.LABELS_USED[obj_label] = self.LABELS_USED[obj_label] + 0
obj_label = str(obj_label) + " [Duplicate " + str(self.LABELS_USED[obj_label]) + "]"
connections = self.__get_connection_template__(predicates_location)
for subj, prede in self.GRAPH.subject_predicates(obj):
if prede in predicates_location:
indx = predicates_location[prede]
connect_label = subj
if connections[indx] == "NA":
connections[indx] = connect_label
else:
connections[indx] = connections[indx] + "," + connect_label
rnd = rdfnode.RDFNode(obj, obj_label, obj_type, connections, obj_color, self.SHAPE_OUTLINE_COLOR,
obj_shape)
nodes[obj] = rnd
logging.info("Num of nodes " + str(len(nodes)))
for key in nodes:
node = nodes[key]
node_text = node.ID + "," + node.NAME + "," + node.NODE_COLOR + "," + node.NODE_OUTLINE_COLOR
node_text = node_text + "," + node.NODE_SHAPE
connects_text = ""
for connect in node.CONNECTION:
connects_text = connects_text + '"' + connect + '"' + ","
connects_text = connects_text[:-1]
node_text = node_text + "," + connects_text
template = template + "\n" + node_text
return template
def __get_connection_template__(self, predicates_location):
'''
This method creates default connection text
:param predicates_location: list of predicates
:return: Default connection text
'''
template = []
for key in predicates_location:
template.append("NA")
return template
def __get_labels(self, url, use_ontobee):
'''
Get label of a url
:param url: url
:param use_ontobee: True or False (If ontobee should be used)
:return: Label
'''
label = None
if url in self.LABELS:
return self.LABELS[url]
# Query to get labels from graph
query = open('queries/get_label_from_graph.rq', 'r').read()
q = prepareQuery(query)
# Add labels to instances
for row in self.GRAPH.query(q, initBindings={'uri': url}):
if row[1]:
label = row[1]
if use_ontobee and not label:
label = self.UTILS.get_label_from_ontobee(url)
if not label:
label = self.UTILS.get_suffix(url)
# to handle duplicate labels
if label not in self.LABELS_USED:
self.LABELS_USED[label] = 0
else:
self.LABELS_USED[label] = self.LABELS_USED[label] + 1
label = str(label) + " [Duplicate " + str(self.LABELS_USED[label]) + "]"
# Skip prefix for instances
if not self.UTILS.is_uri_instance(self.GRAPH, url):
prefix = self.UTILS.get_prefix(url, self.UTILS.get_suffix(url))
label = prefix + ":" + label
self.LABELS[url] = label
return label