Skip to content

Commit

Permalink
Refactoring sparql templates for harvest optimization
Browse files Browse the repository at this point in the history
  • Loading branch information
wwelling committed Aug 1, 2023
1 parent 189e6f1 commit 26a46a8
Show file tree
Hide file tree
Showing 432 changed files with 597 additions and 553 deletions.
12 changes: 5 additions & 7 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -45,13 +45,11 @@

<dependencies>

<!-- https://mvnrepository.com/artifact/org.springframework.boot/spring-boot-devtools -->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-devtools</artifactId>
</dependency>


<!-- <dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-devtools</artifactId>
<optional>true</optional>
</dependency> -->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-configuration-processor</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,13 +75,52 @@ public Flux<AbstractIndexDocument> harvest() {
QueryExecution queryExecution = triplestore.createQueryExecution(query);
Iterator<Triple> tripleIterator = queryExecution.execConstructTriples();
Iterable<Triple> triples = () -> tripleIterator;

return Flux.fromIterable(triples)
.map(this::subject)
.map(this::harvest)
.doFinally(onFinally -> queryExecution.close());
}

public AbstractIndexDocument harvest(String subject) {
//

// sparql variable addition for jit inference

// scholars-discovery currently confined to adding only uri which can be a subject, predicate, or literal
// leaf nodes should have literals or pulling up intermediate nodes without any literals (flattening the predicatation)
// graph pruning process should strip all triples which do not go from a top level class identifier to property literal (sanatize and normalize types)

// Find language identifier API. provide text until it provides a confidence interval above 98%

// annotations within the discovery model encodes the minimal knowledge required to provide the requested views by product owner
// doing so provides the nested structure and index behavior

// top level classes of the VIVO ontology have been identified as the discovery model


// logical branch ?1
// regex capture {{}} and control group
// if within construct clause
// else within where clause


// refactor all construct sparql to use ?subject in its single construct clause
// regex extract single construct clause variable name
// use variable name prefix with ? for {{uri}}

// number of {{}} interpolations for logical constraints

// logical branch ?2
// if one occurance of {{}} then {}
// else if two occurances of {{}} then {}
// else if three occurances of {{}} then {}
// else {}

// backward compatability requires removing URI notation from template and wrap inputs with it if necessary

// query for all of a property first
// by passing in the subject of the construct {{subject}} with ?subject
try {
return createDocument(subject);
} catch (Exception e) {
Expand Down Expand Up @@ -116,7 +155,12 @@ private void lookupProperties(AbstractIndexDocument document, String subject) {
try {
FieldSource source = typeOp.getPropertySource();
Model model = queryForModel(source, subject);




List<Object> values = lookupProperty(typeOp, source, model);

populate(document, typeOp.getField(), values);
} catch (Exception e) {
logger.error(String.format("Unable to populate document %s: %s", name(), parse(subject)));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,12 @@ public String template(String template, Object data) {
}
}

public String templateSparql(String name, String uri) {
public String templateSparql(String name, String value) {
String path = String.format("templates/sparql/%s.sparql", name);
Map<String, String> data = new HashMap<String, String>();
data.put("uri", uri);
// cost of increasing context variables?
data.put("uri", value);
data.put("subject", value);
Context context = Context.newBuilder(data).build();
try {
return handlebars.compileInline(resourceService.getTemplate(path)).apply(context);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
PREFIX vivo: <http://vivoweb.org/ontology/core#>

CONSTRUCT {
<{{uri}}> vivo:abbreviation ?abbreviation .
{{subject}} vivo:abbreviation ?abbreviation .
} WHERE {
<{{uri}}> vivo:abbreviation ?abbreviation .
{{subject}} vivo:abbreviation ?abbreviation .
}
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
PREFIX bibo: <http://purl.org/ontology/bibo/>

CONSTRUCT {
<{{uri}}> bibo:abstract ?abstract .
{{subject}} bibo:abstract ?abstract .
} WHERE {
<{{uri}}> bibo:abstract ?abstract .
{{subject}} bibo:abstract ?abstract .
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
CONSTRUCT {
?author rdfs:label ?labelWithId .
} WHERE {
<{{uri}}> vivo:relatedBy ?relatedBy .
{{subject}} vivo:relatedBy ?relatedBy .
?relatedBy rdf:type vivo:Authorship .
?relatedBy vivo:relates ?author .
?author rdf:type ?type .
Expand Down
4 changes: 2 additions & 2 deletions src/main/resources/templates/sparql/collection/doi.sparql
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
PREFIX bibo: <http://purl.org/ontology/bibo/>

CONSTRUCT {
<{{uri}}> bibo:doi ?doi .
{{subject}} bibo:doi ?doi .
} WHERE {
<{{uri}}> bibo:doi ?doi .
{{subject}} bibo:doi ?doi .
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
CONSTRUCT {
?editor rdfs:label ?labelWithId .
} WHERE {
<{{uri}}> vivo:relatedBy ?relatedBy .
{{subject}} vivo:relatedBy ?relatedBy .
?relatedBy a vivo:Editorship .
?relatedBy vivo:relates ?editor .
?editor rdf:type ?type .
Expand Down
4 changes: 2 additions & 2 deletions src/main/resources/templates/sparql/collection/eissn.sparql
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
PREFIX bibo: <http://purl.org/ontology/bibo/>

CONSTRUCT {
<{{uri}}> bibo:eissn ?eissn .
{{subject}} bibo:eissn ?eissn .
} WHERE {
<{{uri}}> bibo:eissn ?eissn .
{{subject}} bibo:eissn ?eissn .
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
CONSTRUCT {
?feature rdfs:label ?labelWithId .
} WHERE {
<{{uri}}> vivo:features ?feature .
{{subject}} vivo:features ?feature .
?feature rdfs:label ?label .
BIND( CONCAT( STR(?label), REPLACE(STR(?feature), "(^.*/)", "::") ) AS ?labelWithId ) .
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
CONSTRUCT {
?subjectArea rdfs:label ?labelWithId .
} WHERE {
<{{uri}}> vivo:hasSubjectArea ?subjectArea .
{{subject}} vivo:hasSubjectArea ?subjectArea .
?subjectArea rdfs:label ?label .
BIND( CONCAT( STR(?label), REPLACE(STR(?subjectArea), "(^.*/)", "::") ) AS ?labelWithId ) .
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
CONSTRUCT {
?isAbout rdfs:label ?labelWithId .
} WHERE {
<{{uri}}> obo:IAO_0000136 ?isAbout .
{{subject}} obo:IAO_0000136 ?isAbout .
?isAbout rdfs:label ?label .
BIND( CONCAT( STR(?label), REPLACE(STR(?isAbout), "(^.*/)", "::") ) AS ?labelWithId ) .
}
4 changes: 2 additions & 2 deletions src/main/resources/templates/sparql/collection/issn.sparql
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
PREFIX bibo: <http://purl.org/ontology/bibo/>

CONSTRUCT {
<{{uri}}> bibo:issn ?issn .
{{subject}} bibo:issn ?issn .
} WHERE {
<{{uri}}> bibo:issn ?issn .
{{subject}} bibo:issn ?issn .
}
4 changes: 2 additions & 2 deletions src/main/resources/templates/sparql/collection/keyword.sparql
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
PREFIX vivo: <http://vivoweb.org/ontology/core#>

CONSTRUCT {
<{{uri}}> vivo:freetextKeyword ?freetextKeyword .
{{subject}} vivo:freetextKeyword ?freetextKeyword .
} WHERE {
<{{uri}}> vivo:freetextKeyword ?freetextKeyword .
{{subject}} vivo:freetextKeyword ?freetextKeyword .
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
CONSTRUCT {
?mention rdfs:label ?labelWithId .
} WHERE {
<{{uri}}> obo:IAO_0000142 ?mention .
{{subject}} obo:IAO_0000142 ?mention .
?mention rdfs:label ?label .
BIND( CONCAT( STR(?label), REPLACE(STR(?mention), "(^.*/)", "::") ) AS ?labelWithId ) .
}
4 changes: 2 additions & 2 deletions src/main/resources/templates/sparql/collection/name.sparql
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>

CONSTRUCT {
<{{uri}}> rdfs:label ?label .
{{subject}} rdfs:label ?label .
} WHERE {
<{{uri}}> rdfs:label ?label .
{{subject}} rdfs:label ?label .
FILTER (lang(?label) = '')
}
4 changes: 2 additions & 2 deletions src/main/resources/templates/sparql/collection/oclcnum.sparql
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
PREFIX bibo: <http://purl.org/ontology/bibo/>

CONSTRUCT {
<{{uri}}> bibo:oclcnum ?oclcnum .
{{subject}} bibo:oclcnum ?oclcnum .
} WHERE {
<{{uri}}> bibo:oclcnum ?oclcnum .
{{subject}} bibo:oclcnum ?oclcnum .
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
CONSTRUCT {
?outputOfProcessOrEvent rdfs:label ?labelWithId .
} WHERE {
<{{uri}}> obo:RO_0002353 ?outputOfProcessOrEvent .
{{subject}} obo:RO_0002353 ?outputOfProcessOrEvent .
?outputOfProcessOrEvent rdfs:label ?label .
BIND( CONCAT( STR(?label), REPLACE(STR(?outputOfProcessOrEvent), "(^.*/)", "::") ) AS ?labelWithId ) .
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
CONSTRUCT {
?participatesIn rdfs:label ?labelWithId .
} WHERE {
<{{uri}}> obo:RO_0000056 ?participatesIn .
{{subject}} obo:RO_0000056 ?participatesIn .
?participatesIn rdfs:label ?label .
BIND( CONCAT( STR(?label), REPLACE(STR(?participatesIn), "(^.*/)", "::") ) AS ?labelWithId ) .
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,6 @@ PREFIX vivo: <http://vivoweb.org/ontology/core#>
CONSTRUCT {
?publicationDate vivo:dateTime ?dateTime .
} WHERE {
<{{uri}}> vivo:dateTimeValue ?publicationDate .
{{subject}} vivo:dateTimeValue ?publicationDate .
?publicationDate vivo:dateTime ?dateTime .
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
CONSTRUCT {
?publicationVenueFor rdfs:label ?labelWithId .
} WHERE {
<{{uri}}> vivo:publicationVenueFor ?publicationVenueFor .
{{subject}} vivo:publicationVenueFor ?publicationVenueFor .
?publicationVenueFor rdfs:label ?label .
BIND( CONCAT( STR(?label), REPLACE(STR(?publicationVenueFor), "(^.*/)", "::") ) AS ?labelWithId ) .
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
CONSTRUCT {
?publisher rdfs:label ?labelWithId .
} WHERE {
<{{uri}}> vivo:publisher ?publisher .
{{subject}} vivo:publisher ?publisher .
?publisher rdfs:label ?label .
BIND( CONCAT( STR(?label), REPLACE(STR(?publisher), "(^.*/)", "::") ) AS ?labelWithId ) .
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ PREFIX vitro: <http://vitro.mannlib.cornell.edu/ns/vitro/0.7#>
CONSTRUCT {
?publisher vitro:mostSpecificType ?mostSpecificTypeWithId .
} WHERE {
<{{uri}}> vivo:publisher ?publisher .
{{subject}} vivo:publisher ?publisher .
?publisher vitro:mostSpecificType ?mostSpecificType .
BIND( CONCAT( STR(?mostSpecificType), REPLACE(STR(?publisher), "(^.*/)", "::") ) AS ?mostSpecificTypeWithId ) .
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
CONSTRUCT {
?specifiedOutputOf rdfs:label ?labelWithId .
} WHERE {
<{{uri}}> obo:OBI_0000312 ?specifiedOutputOf .
{{subject}} obo:OBI_0000312 ?specifiedOutputOf .
?specifiedOutputOf rdfs:label ?label .
BIND( CONCAT( STR(?label), REPLACE(STR(?specifiedOutputOf), "(^.*/)", "::") ) AS ?labelWithId ) .
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
CONSTRUCT {
?informationResource rdfs:label ?labelWithId .
} WHERE {
<{{uri}}> vivo:informationResourceSupportedBy ?informationResource .
{{subject}} vivo:informationResourceSupportedBy ?informationResource .
?informationResource rdfs:label ?label .
BIND( CONCAT( STR(?label), REPLACE(STR(?informationResource), "(^.*/)", "::") ) AS ?labelWithId ) .
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
CONSTRUCT {
?translator rdfs:label ?labelWithId .
} WHERE {
<{{uri}}> bibo:translator ?translator .
{{subject}} bibo:translator ?translator .
?translator rdfs:label ?label .
BIND( CONCAT( STR(?label), REPLACE(STR(?translator), "(^.*/)", "::") ) AS ?labelWithId ) .
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
CONSTRUCT {
?geographicFocus rdfs:label ?labelWithId .
} WHERE {
<{{uri}}> vivo:geographicFocus ?geographicFocus .
{{subject}} vivo:geographicFocus ?geographicFocus .
?geographicFocus rdfs:label ?label .
BIND( CONCAT( STR(?label), REPLACE(STR(?geographicFocus), "(^.*/)", "::") ) AS ?labelWithId ) .
}
2 changes: 1 addition & 1 deletion src/main/resources/templates/sparql/common/image.sparql
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ PREFIX vitro-public: <http://vitro.mannlib.cornell.edu/ns/vitro/public#>
CONSTRUCT {
?downloadLocation vitro-public:directDownloadUrl ?directDownloadUrl .
} WHERE {
<{{uri}}> vitro-public:mainImage ?mainImage .
{{subject}} vitro-public:mainImage ?mainImage .
?mainImage vitro-public:downloadLocation ?downloadLocation .
?downloadLocation vitro-public:directDownloadUrl ?directDownloadUrl .
}
4 changes: 2 additions & 2 deletions src/main/resources/templates/sparql/common/modTime.sparql
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
PREFIX vitro: <http://vitro.mannlib.cornell.edu/ns/vitro/0.7#>

CONSTRUCT {
<{{uri}}> vitro:modTime ?modTime .
{{subject}} vitro:modTime ?modTime .
} WHERE {
<{{uri}}> vitro:modTime ?modTime .
{{subject}} vitro:modTime ?modTime .
}
2 changes: 1 addition & 1 deletion src/main/resources/templates/sparql/common/sameAs.sparql
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
CONSTRUCT {
?sameAs rdfs:label ?labelWithId .
} WHERE {
<{{uri}}> owl:sameAs ?sameAs .
{{subject}} owl:sameAs ?sameAs .
?sameAs rdfs:label ?label .
BIND( CONCAT( STR(?label), REPLACE(STR(?sameAs), "(^.*/)", "::") ) AS ?labelWithId ) .
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ PREFIX vitro-public: <http://vitro.mannlib.cornell.edu/ns/vitro/public#>
CONSTRUCT {
?downloadLocation vitro-public:directDownloadUrl ?directDownloadUrl .
} WHERE {
<{{uri}}> vitro-public:mainImage ?mainImage .
{{subject}} vitro-public:mainImage ?mainImage .
?mainImage vitro-public:thumbnailImage ?thumbnailImage .
?thumbnailImage vitro-public:downloadLocation ?downloadLocation .
?downloadLocation vitro-public:directDownloadUrl ?directDownloadUrl .
Expand Down
4 changes: 2 additions & 2 deletions src/main/resources/templates/sparql/common/type.sparql
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
PREFIX vitro: <http://vitro.mannlib.cornell.edu/ns/vitro/0.7#>

CONSTRUCT {
<{{uri}}> vitro:mostSpecificType ?mostSpecificType .
{{subject}} vitro:mostSpecificType ?mostSpecificType .
} WHERE {
<{{uri}}> vitro:mostSpecificType ?mostSpecificType .
{{subject}} vitro:mostSpecificType ?mostSpecificType .
}
2 changes: 1 addition & 1 deletion src/main/resources/templates/sparql/common/website.sparql
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
CONSTRUCT {
?hasURL rdfs:label ?labelWithId .
} WHERE {
<{{uri}}> obo:ARG_2000028 ?vcard .
{{subject}} obo:ARG_2000028 ?vcard .
?vcard vcard:hasURL ?hasURL .
?hasURL rdfs:label ?label .
BIND( CONCAT( STR(?label), REPLACE(STR(?hasURL), "(^.*/)", "::") ) AS ?labelWithId ) .
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ PREFIX vcard: <http://www.w3.org/2006/vcard/ns#>
CONSTRUCT {
?hasURL vcard:url ?urlWithLabel .
} WHERE {
<{{uri}}> obo:ARG_2000028 ?vcard .
{{subject}} obo:ARG_2000028 ?vcard .
?vcard vcard:hasURL ?hasURL .
?hasURL vcard:url ?url .
BIND( CONCAT( STR(?url), REPLACE(STR(?hasURL), "(^.*/)", "::") ) AS ?urlWithLabel ) .
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
CONSTRUCT {
?organization rdfs:label ?labelWithId .
} WHERE {
<{{uri}}> vivo:researchAreaOf ?researchAreaOf .
{{subject}} vivo:researchAreaOf ?researchAreaOf .
?researchAreaOf a foaf:Person .
?researchAreaOf vivo:relates ?position .
?position a vivo:Position .
Expand Down
Loading

0 comments on commit 26a46a8

Please sign in to comment.