Skip to content

Commit

Permalink
fix for #156 more exceptions + changed jena to 4.10
Browse files Browse the repository at this point in the history
  • Loading branch information
manonthegithub committed Feb 15, 2024
1 parent b35cd21 commit 4a28db0
Show file tree
Hide file tree
Showing 10 changed files with 167 additions and 37 deletions.
2 changes: 1 addition & 1 deletion build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ name := "gstore"
version := "0.2.0-SNAPSHOT"

val ScalatraVersion = "2.6.3"
val jenaVersion = "3.17.0"
val jenaVersion = "4.10.0"
val jettyVersion = "9.4.9.v20180320"

libraryDependencies ++= Seq(
Expand Down
7 changes: 6 additions & 1 deletion src/main/scala/org/dbpedia/databus/ApiImpl.scala
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import javax.servlet.http.HttpServletRequest
import org.apache.jena.rdf.model.Model
import org.apache.jena.riot.Lang
import org.apache.jena.shared.JenaException
import org.apache.jena.sys.JenaSystem
import org.dbpedia.databus.ApiImpl.Config
import org.dbpedia.databus.RdfConversions.{contextUrl, generateGraphId, graphToBytes, jenaJsonLdContextWithFallbackForLocalhost, mapContentType, readModel}
import org.dbpedia.databus.swagger.api.DatabusApi
Expand All @@ -26,8 +27,12 @@ class ApiImpl(config: Config) extends DatabusApi {
import ApiImpl._

private val client: GitClient = initGitClient(config)
private val defaultLang = Lang.JSONLD
private val defaultLang = Lang.JSONLD10
private lazy val sparqlClient: SparqlClient = SparqlClient.get(config)
init()

def init() = JenaSystem.init()
def stop() = JenaSystem.shutdown()


override def dataidSubgraph(body: String)(request: HttpServletRequest): Try[String] =
Expand Down
57 changes: 48 additions & 9 deletions src/main/scala/org/dbpedia/databus/SparqlClient.scala
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,11 @@ import com.github.jsonldjava.utils.JsonUtils
import com.mchange.v2.c3p0.ComboPooledDataSource
import org.apache.jena.atlas.json.JsonString
import org.apache.jena.graph.{Graph, Node}
import org.apache.jena.iri.ViolationCodes
import org.apache.jena.rdf.model.{Model, ModelFactory}
import org.apache.jena.riot.lang.JsonLDReader
import org.apache.jena.riot.lang.LangJSONLD10
import org.apache.jena.riot.system.{ErrorHandler, ErrorHandlerFactory, StreamRDFLib}
import org.apache.jena.riot.writer.JsonLDWriter
import org.apache.jena.riot.writer.JsonLD10Writer
import org.apache.jena.riot.{Lang, RDFDataMgr, RDFFormat, RDFLanguages, RDFParserBuilder, RDFWriter, RIOT}
import org.apache.jena.shacl.{ShaclValidator, Shapes, ValidationReport}
import org.apache.jena.sparql.util
Expand Down Expand Up @@ -187,7 +188,7 @@ object RdfConversions {
context.foreach(ctx => {
val jctx = jenaContext(CachingContext.parse(ctx.toString))
builder.context(jctx)
builder.set(JsonLDWriter.JSONLD_CONTEXT_SUBSTITUTION, new JsonString(ctx.toString))
builder.set(JsonLD10Writer.JSONLD_CONTEXT_SUBSTITUTION, new JsonString(ctx.toString))
})

builder
Expand All @@ -201,6 +202,7 @@ object RdfConversions {
ShaclValidator.get()
.validate(Shapes.parse(shacl), model.getGraph)
)

def validateWithShacl(file: Array[Byte], modelLang: Lang, shaclGraph: Graph, fileCtx: Option[util.Context]): Try[ValidationReport] =
for {
(model, _) <- readModel(file, modelLang, fileCtx)
Expand All @@ -222,7 +224,9 @@ object RdfConversions {
def langToFormat(lang: Lang): RDFFormat = lang match {
case RDFLanguages.TURTLE => RDFFormat.TURTLE_PRETTY
case RDFLanguages.TTL => RDFFormat.TTL
case RDFLanguages.JSONLD => RDFFormat.JSONLD_FLATTEN_PRETTY
case RDFLanguages.JSONLD => RDFFormat.JSONLD10
case RDFLanguages.JSONLD10 => RDFFormat.JSONLD10
case RDFLanguages.JSONLD11 => RDFFormat.JSONLD11
case RDFLanguages.TRIG => RDFFormat.TRIG_PRETTY
case RDFLanguages.RDFXML => RDFFormat.RDFXML_PRETTY
case RDFLanguages.RDFTHRIFT => RDFFormat.RDF_THRIFT
Expand All @@ -249,7 +253,7 @@ object RdfConversions {
case "text/turtle" => Lang.TURTLE
case "application/rdf+xml" => Lang.RDFXML
case "application/n-triples" => Lang.NTRIPLES
case "application/ld+json" => Lang.JSONLD
case "application/ld+json" => Lang.JSONLD10
case "text/trig" => Lang.TRIG
case "application/n-quads" => Lang.NQUADS
case "application/trix+xml" => Lang.TRIX
Expand Down Expand Up @@ -302,7 +306,7 @@ object RdfConversions {
}

def contextUrl(data: Array[Byte], lang: Lang): Option[URL] =
if (lang == Lang.JSONLD) {
if (lang == Lang.JSONLD10) {
jsonLdContextUrl(data)
.get
} else {
Expand Down Expand Up @@ -342,8 +346,8 @@ object RdfConversions {
private def jenaContext(jsonLdCtx: core.Context) = {
val context: util.Context = RIOT.getContext.copy()
jsonLdCtx.putAll(jsonLdCtx.getPrefixes(true))
context.put(JsonLDWriter.JSONLD_CONTEXT, jsonLdCtx)
context.put(JsonLDReader.JSONLD_CONTEXT, jsonLdCtx)
context.put(JsonLD10Writer.JSONLD_CONTEXT, jsonLdCtx)
context.put(LangJSONLD10.JSONLD_CONTEXT, jsonLdCtx)
context
}

Expand Down Expand Up @@ -430,9 +434,44 @@ object RdfConversions {

import org.apache.jena.riot.SysRIOT.fmtMessage

private val reportAsError = List(
ViolationCodes.ILLEGAL_CHARACTER,
ViolationCodes.CONTROL_CHARACTER,
ViolationCodes.NON_XML_CHARACTER,
ViolationCodes.EMPTY_SCHEME,
ViolationCodes.SCHEME_MUST_START_WITH_LETTER,
ViolationCodes.BIDI_FORMATTING_CHARACTER,
ViolationCodes.WHITESPACE,
ViolationCodes.DOUBLE_WHITESPACE,
ViolationCodes.NOT_XML_SCHEMA_WHITESPACE,
ViolationCodes.NOT_DNS_NAME,
ViolationCodes.ILLEGAL_PERCENT_ENCODING,
ViolationCodes.LONE_SURROGATE,
ViolationCodes.DNS_LABEL_DASH_START_OR_END,
ViolationCodes.BAD_IDN,
ViolationCodes.HAS_PASSWORD,
ViolationCodes.UNREGISTERED_IANA_SCHEME,
ViolationCodes.UNREGISTERED_NONIETF_SCHEME_TREE,
ViolationCodes.DEPRECATED_UNICODE_CHARACTER,
ViolationCodes.UNDEFINED_UNICODE_CHARACTER,
ViolationCodes.PRIVATE_USE_CHARACTER,
ViolationCodes.UNICODE_CONTROL_CHARACTER,
ViolationCodes.UNICODE_WHITESPACE,
ViolationCodes.COMPATIBILITY_CHARACTER,
ViolationCodes.REQUIRED_COMPONENT_MISSING,
ViolationCodes.PROHIBITED_COMPONENT_PRESENT,
ViolationCodes.SCHEME_REQUIRES_LOWERCASE,
ViolationCodes.SCHEME_PATTERN_MATCH_FAILED
).map(i => s"Code: $i")
// there is a weird additional URI check for spaces
// org.apache.jena.riot.system.ParserProfileStd method internalMakeIRI line 95
// {@link org.apache.jena.riot.system.ParserProfileStd#internalMakeIRI}
.+("Spaces are not legal in URIs/IRIs.").toSet


override def warning(message: String, line: Long, col: Long): Unit =
// Fix for https://github.com/dbpedia/databus/issues/156, need to convert this to error
if (message.contains("Spaces are not legal in URIs/IRIs")) {
if (reportAsError.exists(s => message.contains(s))) {
error(message, line, col)
} else {
warnings = warnings :+ Warning(fmtMessage(message, line, col))
Expand Down
39 changes: 39 additions & 0 deletions src/test/resources/newline_in_iri.jsonld
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
{
"@context": "https://raw.githubusercontent.com/dbpedia/databus/master/server/app/common/res/context.jsonld",
"@graph": [
{
"@id": "https://databus.coypu.org/narndt/coypu",
"@type": "Group",
"title": "CoyPu"
},
{
"@id": "https://databus.coypu.org/narndt/coypu/countries",
"@type": "Artifact",
"title": "Raise VirtuosoException",
"abstract": "Counties and regions",
"description": "Counties and regions"
},
{
"@type": [
"Version",
"Dataset"
],
"@id": "https://databus.coypu.org/narndt/coypu/countries/2023-09-18T122214Z",
"hasVersion": "2023-09-18T122214Z",
"title": "Countries",
"abstract": "Countries\n2023-09-18T12:22:14Z",
"description": "Countries\n2023-09-18T12:22:14Z",
"license": "https://dalicc.net/licenselibrary/Cc010Universal",
"wasDerivedFrom": "https://metadata.coypu.org/dataset/wikidata-distribution\nWikidataQueryService\nhttps://query.wikidata.org/",
"distribution": [
{
"@type": "Part",
"formatExtension": "ttl",
"compression": "none",
"downloadURL": "https://databus.coypu.org/dav/narndt/coypu/countries/2023-09-18T122214Z/countries_freqency=static.ttl",
"dcv:frequency": "static"
}
]
}
]
}
File renamed without changes.
14 changes: 11 additions & 3 deletions src/test/scala/org/dbpedia/databus/CacheTests.scala
Original file line number Diff line number Diff line change
@@ -1,11 +1,19 @@
package org.dbpedia.databus

import java.util.UUID
import org.apache.jena.sys.JenaSystem

import java.util.UUID
import org.dbpedia.databus.CachingJsonldContext.ApproxSizeStringKeyCache
import org.scalatest.{FlatSpec, Matchers}
import org.scalatest.{BeforeAndAfter, FlatSpec, Matchers}

class CacheTests extends FlatSpec with Matchers with BeforeAndAfter {

class CacheTests extends FlatSpec with Matchers {
before {
JenaSystem.init()
}
after {
JenaSystem.shutdown()
}

"CacheKey" should "be sorted by time of creation" in {

Expand Down
34 changes: 27 additions & 7 deletions src/test/scala/org/dbpedia/databus/DatabusScalatraTest.scala
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
package org.dbpedia.databus


import org.apache.jena.iri.ViolationCodes

import java.io.ByteArrayInputStream
import java.nio.file.{Files, Paths}
import org.apache.jena.rdf.model.ModelFactory
Expand All @@ -21,9 +23,11 @@ class DatabusScalatraTest extends ScalatraFlatSpec with BeforeAndAfter {
val dir = Files.createDirectories(Paths.get("target", "test_dir-git"))

before {
impl.init()
Files.createDirectories(Paths.get("target", "test_dir-git"))
}
after {
impl.stop()
Directory(Path.jfile2path(dir.toFile)).deleteRecursively()
}

Expand Down Expand Up @@ -63,8 +67,8 @@ class DatabusScalatraTest extends ScalatraFlatSpec with BeforeAndAfter {

get("/databus/graph/read?repo=kuckuck&path=pa/fl.jsonld") {
status should equal(200)
val respCtx = RdfConversions.contextUrl(bodyBytes, Lang.JSONLD)
respCtx should equal(RdfConversions.contextUrl(bytes, Lang.JSONLD))
val respCtx = RdfConversions.contextUrl(bodyBytes, Lang.JSONLD10)
respCtx should equal(RdfConversions.contextUrl(bytes, Lang.JSONLD10))
respCtx.get.toString.nonEmpty should equal(true)
}

Expand All @@ -73,26 +77,42 @@ class DatabusScalatraTest extends ScalatraFlatSpec with BeforeAndAfter {
"File save" should "report problems in input" in {


val file = "report_syntax_err.jsonld"
val file = "space_in_iri.jsonld"
val bytes = Files.readAllBytes(Paths.get(getClass.getClassLoader.getResource(file).getFile))

post("/databus/graph/save?repo=kuckuck&path=pa/syntax_err.jsonld", bytes) {
(status >= 400) should equal(true)
(status >= 400) should equal(true)
response.body.contains("Spaces are not legal in URIs/IRIs") should equal(true)
}

}

"Shacl validation" should "report problems in input" in {
"Shacl validation" should "report problems in input with spaces in IRIs" in {

val file = "space_in_iri.jsonld"
val sha = "test.shacl"
val bytes = Paths.get(getClass.getClassLoader.getResource(file).getFile).toFile
val shacl = Paths.get(getClass.getClassLoader.getResource(sha).getFile).toFile

post("/databus/shacl/validate", Map.empty, Map("shacl" -> shacl, "graph" -> bytes)) {
status should equal(400)
body should include("Bad IRI")
body should include(s"Spaces are not legal")
}

}

"Shacl validation" should "report problems in input with newlines in IRIs" in {

val file = "report_syntax_err.jsonld"
val file = "newline_in_iri.jsonld"
val sha = "test.shacl"
val bytes = Paths.get(getClass.getClassLoader.getResource(file).getFile).toFile
val shacl = Paths.get(getClass.getClassLoader.getResource(sha).getFile).toFile

post("/databus/shacl/validate", Map.empty, Map("shacl" -> shacl, "graph" -> bytes)) {
status should equal(400)
body should include("Bad IRI")
body should include(s"Code: ${ViolationCodes.CONTROL_CHARACTER}")
}

}
Expand Down Expand Up @@ -141,7 +161,7 @@ class DatabusScalatraTest extends ScalatraFlatSpec with BeforeAndAfter {

val model = ModelFactory.createDefaultModel()
val dataStream = new ByteArrayInputStream(version)
RDFDataMgr.read(model, dataStream, Lang.JSONLD)
RDFDataMgr.read(model, dataStream, Lang.JSONLD10)
val tr = Tractate.extract(model.getGraph, TractateV1.Version)
body should equal(tr.get.stringForSigning)
}
Expand Down
16 changes: 12 additions & 4 deletions src/test/scala/org/dbpedia/databus/TractateTest.scala
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,28 @@ package org.dbpedia.databus

import java.io.ByteArrayInputStream
import java.nio.file.{Files, Paths}

import org.apache.jena.rdf.model.ModelFactory
import org.apache.jena.riot.{Lang, RDFDataMgr}
import org.scalatest.{FlatSpec, Matchers}
import org.apache.jena.sys.JenaSystem
import org.scalatest.{BeforeAndAfter, FlatSpec, Matchers}

class TractateTest extends FlatSpec with Matchers with BeforeAndAfter {

class TractateTest extends FlatSpec with Matchers {
before {
JenaSystem.init()
}

after {
JenaSystem.shutdown()
}

"Tractate" should "be extracted from dataid" in {

val file = "version.jsonld"
val bytes = Files.readAllBytes(Paths.get(getClass.getClassLoader.getResource(file).getFile))
val model = ModelFactory.createDefaultModel()
val dataStream = new ByteArrayInputStream(bytes)
RDFDataMgr.read(model, dataStream, Lang.JSONLD)
RDFDataMgr.read(model, dataStream, Lang.JSONLD10)
val t = Tractate.extract(model.getGraph, TractateV1.Version)
val expected =
"""Databus Tractate V1
Expand Down
16 changes: 11 additions & 5 deletions src/test/scala/org/dbpedia/databus/ValidationTest.scala
Original file line number Diff line number Diff line change
@@ -1,15 +1,21 @@
package org.dbpedia.databus

import java.nio.file.{Files, Paths}
import org.apache.jena.query.ARQ
import org.apache.jena.riot.Lang
import org.apache.jena.sys.JenaSystem
import org.dbpedia.databus.RdfConversions.{contextUrl, jenaJsonLdContextWithFallbackForLocalhost}
import org.scalatest.{FlatSpec, Matchers}
import org.scalatest.{BeforeAndAfter, FlatSpec, Matchers}

class ValidationTest extends FlatSpec with Matchers {
class ValidationTest extends FlatSpec with Matchers with BeforeAndAfter {

ARQ.init()
val lang = Lang.JSONLD
before {
JenaSystem.init()
}
after {
JenaSystem.shutdown()
}

val lang = Lang.JSONLD10

"SHACL validation" should "work for version" in {
val shacl = "https://raw.githubusercontent.com/dbpedia/databus-git-mockup/main/dev/dataid-shacl.ttl"
Expand Down
Loading

0 comments on commit 4a28db0

Please sign in to comment.