Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(RDF): Email/UUID should now be an IRI instead of String (+ code maintenance for easier implementation) #4323

Draft
wants to merge 30 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
f8d1d28
Moved namespace creation to own method + enum
svandenhoek Oct 7, 2024
c00ec14
auto-formatting
svandenhoek Oct 7, 2024
6e59519
simplified code
svandenhoek Oct 7, 2024
17743f7
minor renames for more clarity
svandenhoek Oct 7, 2024
d1f5574
Split ColumnType mapping knowledge to separate class
svandenhoek Oct 8, 2024
4c9b114
auto-formatting
svandenhoek Oct 8, 2024
369d452
Adjusted for equals check
svandenhoek Oct 8, 2024
98766bf
Fixed empty values check
svandenhoek Oct 8, 2024
89766d3
Added email fix
svandenhoek Oct 8, 2024
8f6bb6e
Fix to copy old behavior
svandenhoek Oct 8, 2024
40083ee
Added some notes
svandenhoek Oct 8, 2024
04270c1
Created dedicated function for String methods to remove some initiall…
svandenhoek Oct 8, 2024
64573d1
Minor javadoc changes
svandenhoek Oct 8, 2024
e03a5e1
Added a test that validates if all ColumnTypes are covered by a mapper
svandenhoek Oct 8, 2024
358a632
reverted an accidental change
svandenhoek Oct 8, 2024
8f4c2e1
auto-formatting
svandenhoek Oct 8, 2024
e3278d0
Split RDF row generating into separate functions
svandenhoek Oct 8, 2024
d852727
Merge branch 'master' into feat/improve_rdf_api
svandenhoek Oct 8, 2024
07c4886
added baseURL validation to ColumnTypeRdfMapper
svandenhoek Oct 10, 2024
7b4168b
Fixed UUIDs being represented as Strings
svandenhoek Oct 10, 2024
726b857
concept version of testing all ColumnType conversions
svandenhoek Oct 10, 2024
dd5ab96
Merge branch 'master' into feat/improve_rdf_api
svandenhoek Oct 15, 2024
cb7c9a9
Enabled previously failing test code
svandenhoek Oct 15, 2024
923e989
fixed a test
svandenhoek Oct 15, 2024
0315acf
Merge branch 'master' into feat/improve_rdf_api
svandenhoek Oct 15, 2024
9e7b174
fixed a bug for datetime_array
svandenhoek Oct 15, 2024
984287d
Updated tests
svandenhoek Oct 17, 2024
3e5290d
Fixed ColumnType.FILE IRI
svandenhoek Oct 17, 2024
d6a8d9e
ColumnType.HEAD correct implementation/test
svandenhoek Oct 17, 2024
73e37f3
Fixed test to check correctly for REFBACK (non-query retrieval exclud…
svandenhoek Oct 22, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,316 @@
package org.molgenis.emx2.rdf;

import static java.util.Map.entry;
import static org.eclipse.rdf4j.model.util.Values.literal;

import com.google.common.net.UrlEscapers;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
import java.util.*;
import java.util.function.Function;
import java.util.stream.Collectors;
import org.eclipse.rdf4j.model.IRI;
import org.eclipse.rdf4j.model.Namespace;
import org.eclipse.rdf4j.model.Value;
import org.eclipse.rdf4j.model.base.CoreDatatype;
import org.eclipse.rdf4j.model.util.Values;
import org.molgenis.emx2.*;
import org.molgenis.emx2.utils.TypeUtils;
import org.molgenis.emx2.utils.URIUtils;

/**
* Used for functionalities that are {@link ColumnType} specific. This includes:
*
* <ul>
* <li>Retrieving the appropiate {@link CoreDatatype.XSD}
* <li>The method for extracting the {@link Value}'s for that specific {@link ColumnType} usable
* for generating the RDF
* </ul>
*/
public class ColumnTypeRdfMapper {
// Needed in some cases for Values retrieval.
private final String baseURL;

// All ColumnType mappings.
// mapping.keySet() should be equal to ColumnType.values()
private static final Map<ColumnType, RdfColumnType> mapping =
Map.ofEntries(
// SIMPLE
entry(ColumnType.BOOL, RdfColumnType.BOOLEAN),
entry(ColumnType.BOOL_ARRAY, RdfColumnType.BOOLEAN),
entry(ColumnType.UUID, RdfColumnType.UUID),
entry(ColumnType.UUID_ARRAY, RdfColumnType.UUID),
entry(ColumnType.FILE, RdfColumnType.FILE),

// STRING
entry(ColumnType.STRING, RdfColumnType.STRING),
entry(ColumnType.STRING_ARRAY, RdfColumnType.STRING),
entry(ColumnType.TEXT, RdfColumnType.STRING),
entry(ColumnType.TEXT_ARRAY, RdfColumnType.STRING),

// NUMERIC
entry(ColumnType.INT, RdfColumnType.INT),
entry(ColumnType.INT_ARRAY, RdfColumnType.INT),
entry(ColumnType.LONG, RdfColumnType.LONG),
entry(ColumnType.LONG_ARRAY, RdfColumnType.LONG),
entry(ColumnType.DECIMAL, RdfColumnType.DECIMAL),
entry(ColumnType.DECIMAL_ARRAY, RdfColumnType.DECIMAL),
entry(ColumnType.DATE, RdfColumnType.DATE),
entry(ColumnType.DATE_ARRAY, RdfColumnType.DATE),
entry(ColumnType.DATETIME, RdfColumnType.DATETIME),
entry(ColumnType.DATETIME_ARRAY, RdfColumnType.DATETIME),
entry(ColumnType.PERIOD, RdfColumnType.DURATION),
entry(ColumnType.PERIOD_ARRAY, RdfColumnType.DURATION),

// COMPOSITE
entry(ColumnType.JSONB, RdfColumnType.STRING),
entry(ColumnType.JSONB_ARRAY, RdfColumnType.STRING),

// RELATIONSHIP
entry(ColumnType.REF, RdfColumnType.REFERENCE),
entry(ColumnType.REF_ARRAY, RdfColumnType.REFERENCE),
entry(ColumnType.REFBACK, RdfColumnType.REFERENCE),

// LAYOUT and other constants
entry(ColumnType.HEADING, RdfColumnType.SKIP), // Should not be in RDF output.

// format flavors that extend a baseType
entry(ColumnType.AUTO_ID, RdfColumnType.UUID),
entry(ColumnType.ONTOLOGY, RdfColumnType.ONTOLOGY),
entry(ColumnType.ONTOLOGY_ARRAY, RdfColumnType.ONTOLOGY),
entry(ColumnType.EMAIL, RdfColumnType.EMAIL),
entry(ColumnType.EMAIL_ARRAY, RdfColumnType.EMAIL),
entry(ColumnType.HYPERLINK, RdfColumnType.URI),
entry(ColumnType.HYPERLINK_ARRAY, RdfColumnType.URI));

public ColumnTypeRdfMapper(String baseURL) {
String baseUrlTrim = baseURL.trim();
this.baseURL = baseUrlTrim.endsWith("/") ? baseUrlTrim : baseUrlTrim + "/";
}

/** Retrieve all {@link ColumnType}{@code 's} which have a mapping available. */
static Set<ColumnType> getMapperKeys() {
return mapping.keySet();
}

public static CoreDatatype.XSD getCoreDataType(Column column) {
return getCoreDataType(column.getColumnType());
}

public static CoreDatatype.XSD getCoreDataType(ColumnType columnType) {
return mapping.get(columnType).getCoreDatatype();
}

/**
* Returns the output for the defined cell:
*
* <ul>
* <li>If {@link ColumnType} should not be represented in RDF, returns an empty {@link Set}
* <li>If field is empty, returns an empty {@link Set}
* <li>If field has value(s), returns a filled {@link Set}
* </ul>
*/
public Set<Value> retrieveValues(final Row row, final Column column) {
if (row.getString(column.getName()) == null) {
return Set.of();
}
return mapping.get(column.getColumnType()).retrieveValues(baseURL, row, column);
}

private enum RdfColumnType {
BOOLEAN(CoreDatatype.XSD.BOOLEAN) {
@Override
Set<Value> retrieveValues(String baseURL, Row row, Column column) {
return basicRetrieval(row.getBooleanArray(column.getName()), Values::literal);
}
},
UUID(CoreDatatype.XSD.ANYURI) {
@Override
Set<Value> retrieveValues(String baseURL, Row row, Column column) {
return basicRetrievalString(
row.getStringArray(column.getName()), (i) -> URIUtils.encodedIRI("urn:uuid:" + i));
}
},
STRING(CoreDatatype.XSD.STRING) {
@Override
Set<Value> retrieveValues(String baseURL, Row row, Column column) {
return basicRetrievalString(row.getStringArray(column.getName()), Values::literal);
}
},
INT(CoreDatatype.XSD.INT) {
@Override
Set<Value> retrieveValues(String baseURL, Row row, Column column) {
return basicRetrieval(row.getIntegerArray(column.getName()), Values::literal);
}
},
LONG(CoreDatatype.XSD.LONG) {
@Override
Set<Value> retrieveValues(String baseURL, Row row, Column column) {
return basicRetrieval(row.getLongArray(column.getName()), Values::literal);
}
},
DECIMAL(CoreDatatype.XSD.DECIMAL) {
@Override
Set<Value> retrieveValues(String baseURL, Row row, Column column) {
return basicRetrieval(row.getDecimalArray(column.getName()), Values::literal);
}
},
DATE(CoreDatatype.XSD.DATE) {
@Override
Set<Value> retrieveValues(String baseURL, Row row, Column column) {
return basicRetrieval(
row.getDateArray(column.getName()), (i) -> literal(i.toString(), getCoreDatatype()));
}
},
DATETIME(CoreDatatype.XSD.DATETIME) {
@Override
Set<Value> retrieveValues(String baseURL, Row row, Column column) {
return basicRetrieval(
row.getDateTimeArray(column.getName()),
(i) -> literal(dateTimeFormatter.format((LocalDateTime) i), getCoreDatatype()));
}
},
DURATION(CoreDatatype.XSD.DURATION) {
@Override
Set<Value> retrieveValues(String baseURL, Row row, Column column) {
return basicRetrieval(row.getPeriodArray(column.getName()), Values::literal);
}
},

URI(CoreDatatype.XSD.ANYURI) {
@Override
Set<Value> retrieveValues(String baseURL, Row row, Column column) {
return basicRetrievalString(row.getStringArray(column.getName()), URIUtils::encodedIRI);
}
},
EMAIL(CoreDatatype.XSD.ANYURI) {
@Override
Set<Value> retrieveValues(String baseURL, Row row, Column column) {
return basicRetrievalString(
row.getStringArray(column.getName()), (i) -> URIUtils.encodedIRI("mailto:" + i));
}
},
FILE(CoreDatatype.XSD.ANYURI) {
@Override
Set<Value> retrieveValues(String baseURL, Row row, Column column) {
final String schemaPath =
UrlEscapers.urlPathSegmentEscaper().escape(column.getSchemaName());
final String tablePath = UrlEscapers.urlPathSegmentEscaper().escape(column.getTableName());
final String columnPath = UrlEscapers.urlPathSegmentEscaper().escape(column.getName());
final String fileName =
UrlEscapers.urlPathSegmentEscaper().escape(row.getString(column.getName()));
return Set.of(
Values.iri(
baseURL
+ schemaPath
+ "/api/file/"
+ tablePath
+ "/"
+ columnPath
+ "/"
+ fileName));
}
},
REFERENCE(CoreDatatype.XSD.ANYURI) {
@Override
Set<Value> retrieveValues(String baseURL, Row row, Column column) {
final TableMetadata target = column.getRefTable();
final String rootTableName =
UrlEscapers.urlPathSegmentEscaper().escape(target.getRootTable().getIdentifier());
final Namespace ns = getSchemaNamespace(baseURL, target.getRootTable().getSchema());

final Set<IRI> iris = new HashSet<>();
final Map<Integer, Map<String, String>> items = new HashMap<>();
for (final Reference reference : column.getReferences()) {
final String localColumn = reference.getName();
final String targetColumn = reference.getRefTo();
if (column.isArray()) {
final String[] values = row.getStringArray(localColumn);
if (values != null) {
for (int i = 0; i < values.length; i++) {
var keyValuePairs = items.getOrDefault(i, new LinkedHashMap<>());
keyValuePairs.put(targetColumn, values[i]);
items.put(i, keyValuePairs);
}
}
} else {
final String value = row.getString(localColumn);
if (value != null) {
var keyValuePairs = items.getOrDefault(0, new LinkedHashMap<>());
keyValuePairs.put(targetColumn, value);
items.put(0, keyValuePairs);
}
}
}

for (final var item : items.values()) {
PrimaryKey key = new PrimaryKey(item);
iris.add(Values.iri(ns, rootTableName + "?" + key.getEncodedValue()));
}
return Set.copyOf(iris);
}
},
ONTOLOGY(CoreDatatype.XSD.ANYURI) {
// TODO: Implement Ontology behavior where it also returns ontologyTermURI as Value.
@Override
Set<Value> retrieveValues(String baseURL, Row row, Column column) {
return RdfColumnType.REFERENCE.retrieveValues(baseURL, row, column);
}
},
SKIP(CoreDatatype.XSD.STRING) {
@Override
Set<Value> retrieveValues(String baseURL, Row row, Column column) {
return Set.of();
}
};

private static final DateTimeFormatter dateTimeFormatter =
DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss");

private final CoreDatatype.XSD coreDatatype;

public CoreDatatype.XSD getCoreDatatype() {
return coreDatatype;
}

RdfColumnType(CoreDatatype.XSD coreDatatype) {
this.coreDatatype = coreDatatype;
}

// TODO: Fix code duplicity with RDFService.
private static Namespace getSchemaNamespace(final String baseURL, final SchemaMetadata schema) {
final String schemaName = UrlEscapers.urlPathSegmentEscaper().escape(schema.getName());
final String url = baseURL + schemaName + "/api/rdf/";
final String prefix = TypeUtils.convertToPascalCase(schema.getName());
return Values.namespace(prefix, url);
}

/**
* Generic retrieval function. Can be used for {@link Values#literal(Object)} or any custom
* function which outputs a {@link Value}.
*/
private static Set<Value> basicRetrieval(Object[] object, Function<Object, Value> function) {
return Arrays.stream(object)
.map(value -> (Value) function.apply(value))
.collect(Collectors.toSet());
}

/**
* Similar to {@link #basicRetrieval(Object[], Function)}, but with some changes:
*
* <ul>
* <li>Enforces {@link Values#literal(String)} to be called when using it as {@code function}
* parameter
* <li>Removes the need for casting in custom functions that require a {@link String} as input
* </ul>
*/
private static Set<Value> basicRetrievalString(
String[] object, Function<String, Value> function) {
return Arrays.stream(object)
.map(value -> (Value) function.apply(value))
.collect(Collectors.toSet());
}

abstract Set<Value> retrieveValues(final String baseURL, final Row row, final Column column);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
package org.molgenis.emx2.rdf;

import java.util.Arrays;
import java.util.stream.Stream;
import org.eclipse.rdf4j.model.Namespace;
import org.eclipse.rdf4j.model.impl.SimpleNamespace;

public enum DefaultNamespace {
RDF("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#"),
RDFS("rdfs", "http://www.w3.org/2000/01/rdf-schema#"),
XSD("xsd", "http://www.w3.org/2001/XMLSchema#"),
OWL("owl", "http://www.w3.org/2002/07/owl#"),
SIO("sio", "http://semanticscience.org/resource/"),
QB("qb", "http://purl.org/linked-data/cube#"),
SKOS("skos", "http://www.w3.org/2004/02/skos/core#"),
DCTERMS("dcterms", "http://purl.org/dc/terms/"),
DCAT("dcat", "http://www.w3.org/ns/dcat#"),
FOAF("foaf", "http://xmlns.com/foaf/0.1/"),
VCARD("vcard", "http://www.w3.org/2006/vcard/ns#"),
ORG("org", "http://www.w3.org/ns/org#"),
FDP("fdp-o", "https://w3id.org/fdp/fdp-o#");

private final Namespace namespace;

public Namespace getNamespace() {
return namespace;
}

DefaultNamespace(String prefix, String namespace) {
this.namespace = new SimpleNamespace(prefix, namespace);
}

public static Stream<Namespace> streamAll() {
return Arrays.stream(DefaultNamespace.values()).map(DefaultNamespace::getNamespace);
}
}
Loading
Loading