Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ import eu.neverblink.jelly.core.proto.google.v1 as google
import eu.neverblink.jelly.core.proto.v1.*
import eu.neverblink.jelly.core.utils.IoUtils
import org.apache.jena.riot.system.StreamRDFWriter
import org.apache.jena.riot.{Lang, RIOT}
import org.apache.jena.riot.RIOT

import java.io.{BufferedReader, FileInputStream, InputStream, InputStreamReader, OutputStream}
import scala.util.Using
Expand Down Expand Up @@ -77,7 +77,7 @@ object RdfToJelly extends RdfSerDesCommand[RdfToJellyOptions, RdfFormat.Readable
lazy val printUtil: RdfCommandPrintUtil[RdfFormat.Readable] = RdfToJellyPrint

val defaultAction: (InputStream, OutputStream) => Unit =
langToJelly(RdfFormat.NQuads.jenaLang, _, _)
langToJelly(RdfFormat.NQuads, _, _)

private def loadOptionsFromFile(filename: String): RdfStreamOptions =
val inputStream = new FileInputStream(filename)
Expand Down Expand Up @@ -114,20 +114,20 @@ object RdfToJelly extends RdfSerDesCommand[RdfToJellyOptions, RdfFormat.Readable
override def matchFormatToAction(
format: RdfFormat.Readable,
): Option[(InputStream, OutputStream) => Unit] = format match {
case f: RdfFormat.Jena.Readable => Some(langToJelly(f.jenaLang, _, _))
case f: RdfFormat.Jena.Readable => Some(langToJelly(f, _, _))
case f: RdfFormat.JellyText.type => Some(jellyTextToJelly)
}

/** This method reads the file, rewrites it to Jelly and writes it to some output stream
* @param jenaLang
* @param format
* Language that should be converted to Jelly
* @param inputStream
* InputStream
* @param outputStream
* OutputStream
*/
private def langToJelly(
jenaLang: Lang,
format: RdfFormat.Jena,
inputStream: InputStream,
outputStream: OutputStream,
): Unit =
Expand Down Expand Up @@ -189,8 +189,8 @@ object RdfToJelly extends RdfSerDesCommand[RdfToJellyOptions, RdfFormat.Readable
JellyStreamWriter(JenaConverterFactory.getInstance(), variant, outputStream)

RiotParserUtil.parse(
getOptions.rdfPerformanceOptions.validateTerms.getOrElse(false),
jenaLang,
getOptions.rdfPerformanceOptions.resolveIris,
format,
inputStream,
jellyWriter,
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -250,8 +250,8 @@ object RdfValidate extends JellyCommand[RdfValidateOptions]:
val output = StreamRdfCollector()
Using.resource(IoUtil.inputStream(fileName)) { is =>
RiotParserUtil.parse(
getOptions.rdfPerformanceOptions.validateTerms.getOrElse(true),
format.jenaLang,
getOptions.rdfPerformanceOptions.resolveIris,
format,
is,
output,
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import org.apache.jena.riot.{Lang, RDFLanguages}
sealed trait RdfFormat:
val fullName: String
val cliOptions: List[String]
val supportsBaseIri: Boolean

object RdfFormat:

Expand All @@ -29,6 +30,7 @@ object RdfFormat:
override val fullName: String = "N-Quads"
override val cliOptions: List[String] = List("nq", "nquads")
override val jenaLang: Lang = RDFLanguages.NQUADS
override val supportsBaseIri: Boolean = false

case object NTriples
extends RdfFormat.Jena.StreamWriteable,
Expand All @@ -37,16 +39,19 @@ object RdfFormat:
override val fullName: String = "N-Triples"
override val cliOptions: List[String] = List("nt", "ntriples")
override val jenaLang: Lang = RDFLanguages.NTRIPLES
override val supportsBaseIri: Boolean = false

case object Turtle extends RdfFormat.Jena.StreamWriteable, RdfFormat.Jena.Readable:
override val fullName: String = "Turtle"
override val cliOptions: List[String] = List("ttl", "turtle")
override val jenaLang: Lang = RDFLanguages.TURTLE
override val supportsBaseIri: Boolean = true

case object TriG extends RdfFormat.Jena.StreamWriteable, RdfFormat.Jena.Readable:
override val fullName: String = "TriG"
override val cliOptions: List[String] = List("trig")
override val jenaLang: Lang = RDFLanguages.TRIG
override val supportsBaseIri: Boolean = true

case object RdfProto
extends RdfFormat.Jena.StreamWriteable,
Expand All @@ -55,6 +60,7 @@ object RdfFormat:
override val fullName: String = "RDF Protobuf"
override val cliOptions: List[String] = List("jenaproto", "jena-proto")
override val jenaLang: Lang = RDFLanguages.RDFPROTO
override val supportsBaseIri: Boolean = false

case object Thrift
extends RdfFormat.Jena.StreamWriteable,
Expand All @@ -63,23 +69,27 @@ object RdfFormat:
override val fullName: String = "RDF Thrift"
override val cliOptions: List[String] = List("jenathrift", "jena-thrift")
override val jenaLang: Lang = RDFLanguages.RDFTHRIFT
override val supportsBaseIri: Boolean = false

case object RdfXml extends RdfFormat.Jena.Readable, RdfFormat.Jena.BatchWriteable:
override val fullName: String = "RDF/XML"
override val cliOptions: List[String] = List("rdfxml", "rdf-xml")
override val jenaLang: Lang = RDFLanguages.RDFXML
override val supportsBaseIri: Boolean = true

case object JsonLd extends RdfFormat.Jena.Readable, RdfFormat.Jena.BatchWriteable:
override val fullName: String = "JSON-LD"
override val cliOptions: List[String] = List("jsonld", "json-ld")
override val jenaLang: Lang = RDFLanguages.JSONLD
override val supportsBaseIri: Boolean = true

// We do not ever want to write or read from Jelly to Jelly
// So better not have it as Writeable or Readable, just mark that it's integrated into Jena
case object JellyBinary extends RdfFormat.Jena, RdfFormat.SupportsGeneralizedRdf:
override val fullName: String = "Jelly binary"
override val cliOptions: List[String] = List("jelly")
override val jenaLang: Lang = JellyLanguage.JELLY
override val supportsBaseIri: Boolean = false

case object JellyText
extends RdfFormat,
Expand All @@ -89,6 +99,7 @@ object RdfFormat:
override val fullName: String = "Jelly text"
override val cliOptions: List[String] = List("jelly-text")
val extension = ".jelly.txt"
override val supportsBaseIri: Boolean = false

private val rdfFormats: List[RdfFormat] =
List(NQuads, NTriples, JellyBinary, JellyText, Turtle, TriG, RdfProto, Thrift, RdfXml, JsonLd)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,14 @@ import caseapp.HelpMessage
*/
case class RdfPerformanceOptions(
@HelpMessage(
"Enable term validation and IRI resolution (slower). Default: false for all commands except 'rdf validate'.",
"Resolve IRIs with regard to the base specified in the input document. " +
"Disabling this will result in faster parsing of Turtle, JSON-LD and RDF/XML, but will " +
"also potentially result in relative IRIs in the output. " +
"Default: true (ignored for formats that don't support base IRIs).",
)
resolveIris: Boolean = true,
@HelpMessage(
"Enable term validation (slower). Default: false for all commands except 'rdf validate'.",
)
validateTerms: Option[Boolean] = None,
)
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
package eu.neverblink.jelly.cli.util.jena

import org.apache.jena.graph.impl.LiteralLabel
import org.apache.jena.irix.{IRIProviderAny, SystemIRIx}

import scala.util.Try

object JenaSystemOptions:
/** Enable faster parsing by disabling strict IRI and literal validation.
/** Enable faster parsing by disabling strict literal validation.
* @return
* A Success if the operation was successful, or a Failure with the exception if not. The
* operation may fail in environments where reflection is not supported. The failure can be
Expand All @@ -21,13 +20,9 @@ object JenaSystemOptions:
toggle(true)

private def toggle(enable: Boolean): Try[Unit] =
val valueMode = if enable then
SystemIRIx.reset()
"EAGER"
else
// Set the IRI provider to one that does no validation or resolving whatsoever
SystemIRIx.setProvider(IRIProviderAny.stringProvider())
"LAZY"
val valueMode =
if enable then "EAGER"
else "LAZY"

// Disable/enable eager computation of literal values, which does strict checking.
// This requires reflection as the field is private static final.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
package eu.neverblink.jelly.cli.util.jena.riot

import org.apache.jena.riot.{Lang, RDFParser, RDFParserRegistry, RIOT}
import eu.neverblink.jelly.cli.command.rdf.util.RdfFormat
import org.apache.jena.riot.lang.LabelToNode
import org.apache.jena.riot.{RDFParser, RDFParserRegistry, RIOT}
import org.apache.jena.riot.system.StreamRDF

import java.io.InputStream
Expand All @@ -9,19 +11,24 @@ import java.io.InputStream
*/
object RiotParserUtil:
def parse(
enableTermValidation: Boolean,
lang: Lang,
resolveIris: Boolean,
format: RdfFormat.Jena,
source: InputStream,
output: StreamRDF,
): Unit =
if enableTermValidation then
// Standard parser with validation enabled
): Unit = {
// Only really enable IRI resolution if the format supports it
if resolveIris && format.supportsBaseIri then
// Parser with full IRI resolution
RDFParser.source(source)
.lang(lang)
.lang(format.jenaLang)
.labelToNode(LabelToNode.createUseLabelAsGiven())
.checking(false)
.strict(false)
.parse(output)
else
// Fast parser with validation disabled
RDFParserRegistry
.getFactory(lang)
.create(lang, FastParserProfile())
.read(source, "", lang.getContentType, output, RIOT.getContext)
.getFactory(format.jenaLang)
.create(format.jenaLang, FastParserProfile())
.read(source, "", format.jenaLang.getContentType, output, RIOT.getContext)
}
Original file line number Diff line number Diff line change
Expand Up @@ -883,4 +883,56 @@ class RdfToJellySpec extends AnyWordSpec with TestFixtureHelper with Matchers:
)
}
}

"handle IRI resolution" when {
"IRI resolution enabled (default), input TTL stream" in withEmptyJellyFile { j =>
val input =
"""BASE <http://example.org/>
|<a> <http://example.org/p> <b> .
|""".stripMargin
RdfToJelly.setStdIn(ByteArrayInputStream(input.getBytes))
RdfToJelly.runTestCommand(
List("rdf", "to-jelly", "--in-format=ttl", "--to", j),
)
val content = translateJellyBack(new FileInputStream(j))
val stmts = content.listStatements().asScala.toSeq
stmts.size should be(1)
stmts.head.getSubject.getURI should be("http://example.org/a")
stmts.head.getPredicate.getURI should be("http://example.org/p")
stmts.head.getObject.asResource().getURI should be("http://example.org/b")
}

"IRI resolution disabled, input TTL stream" in withEmptyJellyFile { j =>
val input =
"""BASE <http://example.org/>
|<a> <http://example.org/p> <b> .
|""".stripMargin
RdfToJelly.setStdIn(ByteArrayInputStream(input.getBytes))
RdfToJelly.runTestCommand(
List("rdf", "to-jelly", "--in-format=ttl", "--resolve-iris=false", "--to", j),
)
val content = translateJellyBack(new FileInputStream(j))
val stmts = content.listStatements().asScala.toSeq
stmts.size should be(1)
stmts.head.getSubject.getURI should be("a")
stmts.head.getPredicate.getURI should be("http://example.org/p")
stmts.head.getObject.asResource().getURI should be("b")
}

"IRI resolution enabled (but ignored), input NT stream" in withEmptyJellyFile { j =>
val input =
"""<a> <http://example.org/p> <b> .
|""".stripMargin
RdfToJelly.setStdIn(ByteArrayInputStream(input.getBytes))
RdfToJelly.runTestCommand(
List("rdf", "to-jelly", "--to", j),
)
val content = translateJellyBack(new FileInputStream(j))
val stmts = content.listStatements().asScala.toSeq
stmts.size should be(1)
stmts.head.getSubject.getURI should be("a")
stmts.head.getPredicate.getURI should be("http://example.org/p")
stmts.head.getObject.asResource().getURI should be("b")
}
}
}