Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/main/scala/eu/neverblink/jelly/cli/Exceptions.scala
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ case class JenaRiotException(e: RiotException)
extends CriticalException(s"Jena RDF I/O exception: ${e.getMessage}")
case class InvalidJellyFile(e: InvalidProtocolBufferException)
extends CriticalException(s"Invalid Jelly file: ${e.getMessage}")
case class InvalidFormatSpecified(format: String, validFormats: String)
extends CriticalException(
s"Invalid format option: \"$format\", needs to be one of ${validFormats}.",
)
case class ExitException(code: Int) extends CriticalException(s"Exiting with code $code.")

class CriticalException(message: String) extends Exception(message)
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
package eu.neverblink.jelly.cli.command.rdf

trait RdfCommandPrintUtil:
val validFormats: List[RdfFormatOption]
val defaultFormat: RdfFormatOption

/** Prints the available RDF formats to the user.
*/
lazy val validFormatsString: String =
validFormats.map(RdfFormatOption.optionString).mkString(", ")

lazy val helpMsg: String =
f"Possible values: ${validFormatsString}. Default format: ${defaultFormat.fullName}"
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
package eu.neverblink.jelly.cli.command.rdf

enum RdfFormatOption(val cliOptions: List[String], val fullName: String):
case NQuads extends RdfFormatOption(List("nq", "nt", "nquads", "ntriples"), "N-Quads")
case JellyBinary extends RdfFormatOption(List("jelly"), "Jelly binary format")
case JellyText extends RdfFormatOption(List("jelly-text"), "Jelly text format")

object RdfFormatOption:
/** Returns a string representation of the option for the user.
*/
def optionString(option: RdfFormatOption): String =
f"${option.cliOptions.map(s => f"\"${s}\"").mkString(", ")} for ${option.fullName}"

/** Finds the appropriate RdfFormatOption based on supplied option string.
*/
def find(cliOption: String): Option[RdfFormatOption] =
RdfFormatOption.values.find(_.cliOptions.contains(cliOption))
100 changes: 94 additions & 6 deletions src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfFromJelly.scala
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,32 @@ package eu.neverblink.jelly.cli.command.rdf
import caseapp.*
import com.google.protobuf.InvalidProtocolBufferException
import eu.neverblink.jelly.cli.*
import eu.neverblink.jelly.cli.command.rdf.RdfFormatOption.*
import eu.neverblink.jelly.cli.util.IoUtil
import eu.ostrzyciel.jelly.convert.jena.riot.JellyLanguage
import eu.ostrzyciel.jelly.core.RdfProtoDeserializationError
import eu.ostrzyciel.jelly.core.proto.v1.RdfStreamFrame
import eu.ostrzyciel.jelly.core.{IoUtils, RdfProtoDeserializationError}
import org.apache.jena.riot.system.StreamRDFWriter
import org.apache.jena.riot.{RDFLanguages, RDFParser, RiotException}

import java.io.{InputStream, OutputStream}

object RdfFromJellyPrint extends RdfCommandPrintUtil:
// We exclude JellyBinary because translating JellyBinary to JellyBinary makes no sense
override val validFormats: List[RdfFormatOption] =
RdfFormatOption.values.filterNot(_ == JellyBinary).toList

override val defaultFormat: RdfFormatOption = NQuads

case class RdfFromJellyOptions(
@Recurse
common: JellyOptions = JellyOptions(),
@ExtraName("to") outputFile: Option[String] = None,
@ValueDescription("Output format.")
@HelpMessage(
RdfFromJellyPrint.helpMsg,
)
@ExtraName("out-format") outputFormat: Option[String] = None,
) extends HasJellyOptions

object RdfFromJelly extends JellyCommand[RdfFromJellyOptions]:
Expand All @@ -34,24 +48,98 @@ object RdfFromJelly extends JellyCommand[RdfFromJellyOptions]:
IoUtil.outputStream(fileName)
case None => getStdOut
}
doConversion(inputStream, outputStream)
doConversion(inputStream, outputStream, options.outputFormat)

/** This method reads the Jelly file, rewrites it to NQuads and writes it to some output stream
/** This method takes care of proper error handling and matches the desired output format to the
* correct conversion
*
* @param inputStream
* InputStream
* @param outputStream
* OutputStream
* @throws JellyDeserializationError
* @throws ParsingError
* @throws InvalidFormatSpecified
*/
private def doConversion(inputStream: InputStream, outputStream: OutputStream): Unit =
private def doConversion(
inputStream: InputStream,
outputStream: OutputStream,
format: Option[String],
): Unit =
try {
val nQuadWriter = StreamRDFWriter.getWriterStream(outputStream, RDFLanguages.NQUADS)
RDFParser.source(inputStream).lang(JellyLanguage.JELLY).parse(nQuadWriter)
format match {
case Some(f: String) =>
RdfFormatOption.find(f) match
case Some(JellyText) => jellyBinaryToText(inputStream, outputStream)
case Some(NQuads) => jellyToNQuad(inputStream, outputStream)
case _ =>
throw InvalidFormatSpecified(
f,
RdfFromJellyPrint.validFormatsString,
) // if anything else, it's an invalid option
case None =>
jellyToNQuad(inputStream, outputStream) // default option if no parameter supplied
}
} catch
case e: RdfProtoDeserializationError =>
throw JellyDeserializationError(e.getMessage)
case e: RiotException =>
throw JenaRiotException(e)
case e: InvalidProtocolBufferException =>
throw InvalidJellyFile(e)

/** This method reads the Jelly file, rewrites it to NQuads and writes it to some output stream
* @param inputStream
* InputStream
* @param outputStream
* OutputStream
*/
private def jellyToNQuad(inputStream: InputStream, outputStream: OutputStream): Unit =
val nQuadWriter = StreamRDFWriter.getWriterStream(outputStream, RDFLanguages.NQUADS)
RDFParser.source(inputStream).lang(JellyLanguage.JELLY).parse(nQuadWriter)

/** This method reads the Jelly file, rewrites it to Jelly text and writes it to some output
* stream
* @param inputStream
* InputStream
* @param outputStream
* OutputStream
*/
private def jellyBinaryToText(inputStream: InputStream, outputStream: OutputStream): Unit =

inline def writeFrameToOutput(f: RdfStreamFrame, frameIndex: Int): Unit =
// we want to write a comment to the file before each frame
val comment = f"# Frame $frameIndex\n"
outputStream.write(comment.getBytes)
val frame = f.toProtoString
// the protoString is basically the jelly-txt format already
outputStream.write(frame.getBytes)

try {
iterateRdfStream(inputStream, outputStream).zipWithIndex.foreach {
case (maybeFrame, frameIndex) =>
writeFrameToOutput(maybeFrame, frameIndex)
}
} finally {
outputStream.flush()
}

/** This method reads the Jelly file and returns an iterator of RdfStreamFrame
* @param inputStream
* @param outputStream
* @return
*/
private def iterateRdfStream(
inputStream: InputStream,
outputStream: OutputStream,
): Iterator[RdfStreamFrame] =
IoUtils.autodetectDelimiting(inputStream) match
case (false, newIn) =>
// Non-delimited Jelly file
// In this case, we can only read one frame
Iterator(RdfStreamFrame.parseFrom(newIn))
case (true, newIn) =>
// Delimited Jelly file
// In this case, we can read multiple frames
Iterator.continually(RdfStreamFrame.parseDelimitedFrom(newIn))
.takeWhile(_.isDefined).map(_.get)
132 changes: 92 additions & 40 deletions src/test/scala/eu/neverblink/jelly/cli/command/RdfFromJellySpec.scala
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package eu.neverblink.jelly.cli.command

import com.google.protobuf.InvalidProtocolBufferException
import eu.neverblink.jelly.cli.*

import eu.neverblink.jelly.cli.command.helpers.*
import eu.neverblink.jelly.cli.command.rdf.*
import org.apache.jena.riot.RDFLanguages
Expand All @@ -16,51 +17,87 @@ import scala.util.Using
class RdfFromJellySpec extends AnyWordSpec with Matchers with CleanUpAfterTest:

"rdf from-jelly command" should {
"be able to convert a Jelly file to NTriples output stream" in {
val jellyFile = DataGenHelper.generateJellyFile(3)
val nQuadString = DataGenHelper.generateNQuadString(3)
val (out, err) =
RdfFromJelly.runTestCommand(List("rdf", "from-jelly", jellyFile))
val sortedOut = out.split("\n").map(_.trim).sorted
val sortedQuads = nQuadString.split("\n").map(_.trim).sorted
sortedOut should contain theSameElementsAs sortedQuads
}
"handle conversion of Jelly to NTriples" when {
"a file to output stream" in {
val jellyFile = DataGenHelper.generateJellyFile(3)
val nQuadString = DataGenHelper.generateNQuadString(3)
val (out, err) =
RdfFromJelly.runTestCommand(List("rdf", "from-jelly", jellyFile))
val sortedOut = out.split("\n").map(_.trim).sorted
val sortedQuads = nQuadString.split("\n").map(_.trim).sorted
sortedOut should contain theSameElementsAs sortedQuads
}

"be able to convert a Jelly stream to NTriples output stream" in {
DataGenHelper.generateJellyInputStream(3)
val nQuadString = DataGenHelper.generateNQuadString(3)
val (out, err) = RdfFromJelly.runTestCommand(List("rdf", "from-jelly"))
val sortedOut = out.split("\n").map(_.trim).sorted
val sortedQuads = nQuadString.split("\n").map(_.trim).sorted
sortedOut should contain theSameElementsAs sortedQuads
}
"be able to convert a Jelly file to NTriples file" in {
val jellyFile = DataGenHelper.generateJellyFile(3)
val nQuadString = DataGenHelper.generateNQuadString(3)
val outputFile = DataGenHelper.generateOutputFile(RDFLanguages.NQUADS)
val (out, err) =
RdfFromJelly.runTestCommand(
List("rdf", "from-jelly", jellyFile, "--to", outputFile),
"input stream to output stream" in {
DataGenHelper.generateJellyInputStream(3)
val nQuadString = DataGenHelper.generateNQuadString(3)
val (out, err) = RdfFromJelly.runTestCommand(
List("rdf", "from-jelly", "--out-format", RdfFormatOption.NQuads.cliOptions.head),
)
val sortedOut = Using.resource(Source.fromFile(outputFile)) { content =>
content.getLines().toList.map(_.trim).sorted
val sortedOut = out.split("\n").map(_.trim).sorted
val sortedQuads = nQuadString.split("\n").map(_.trim).sorted
sortedOut should contain theSameElementsAs sortedQuads
}
"a file to file" in {
val jellyFile = DataGenHelper.generateJellyFile(3)
val nQuadString = DataGenHelper.generateNQuadString(3)
val outputFile = DataGenHelper.generateOutputFile(RDFLanguages.NQUADS)
val (out, err) =
RdfFromJelly.runTestCommand(
List("rdf", "from-jelly", jellyFile, "--to", outputFile),
)
val sortedOut = Using.resource(Source.fromFile(outputFile)) { content =>
content.getLines().toList.map(_.trim).sorted
}
val sortedQuads = nQuadString.split("\n").map(_.trim).sorted
sortedOut should contain theSameElementsAs sortedQuads
out.length should be(0)
}
"an input stream to file" in {
DataGenHelper.generateJellyInputStream(3)
val outputFile = DataGenHelper.generateOutputFile(RDFLanguages.NQUADS)
val nQuadString = DataGenHelper.generateNQuadString(3)
val (out, err) =
RdfFromJelly.runTestCommand(List("rdf", "from-jelly", "--to", outputFile))
val sortedOut = Using.resource(Source.fromFile(outputFile)) { content =>
content.getLines().toList.map(_.trim).sorted
}
val sortedQuads = nQuadString.split("\n").map(_.trim).sorted
sortedOut should contain theSameElementsAs sortedQuads
out.length should be(0)
}
val sortedQuads = nQuadString.split("\n").map(_.trim).sorted
sortedOut should contain theSameElementsAs sortedQuads
out.length should be(0)
}
"be able to convert a Jelly stream to NTriples file" in {
DataGenHelper.generateJellyInputStream(3)
val outputFile = DataGenHelper.generateOutputFile(RDFLanguages.NQUADS)
val nQuadString = DataGenHelper.generateNQuadString(3)
val (out, err) =
RdfFromJelly.runTestCommand(List("rdf", "from-jelly", "--to", outputFile))
val sortedOut = Using.resource(Source.fromFile(outputFile)) { content =>
content.getLines().toList.map(_.trim).sorted
"handle conversion of Jelly binary to text" when {
"a file to output stream" in {
val jellyFile = DataGenHelper.generateJellyFile(3)
val (out, err) =
RdfFromJelly.runTestCommand(
List(
"rdf",
"from-jelly",
jellyFile,
"--out-format",
RdfFormatOption.JellyText.cliOptions.head,
),
)
val outString = """# Frame 0
|rows {
| options {
| stream_name: ""
| physical_type: PHYSICAL_STREAM_TYPE_TRIPLES
| generalized_statements: true
| rdf_star: true
| max_name_table_size: 128
| max_prefix_table_size: 16
| max_datatype_table_size: 16
| logical_type: LOGICAL_STREAM_TYPE_FLAT_TRIPLES
| version: 1
| }
|}""".stripMargin
out should include(outString)
"rows".r.findAllIn(out).length should be(10)
"http://example.org/predicate/".r.findAllIn(out).length should be(1)
}
val sortedQuads = nQuadString.split("\n").map(_.trim).sorted
sortedOut should contain theSameElementsAs sortedQuads
out.length should be(0)
}
"throw proper exception" when {
"input file is not found" in {
Expand All @@ -82,6 +119,7 @@ class RdfFromJellySpec extends AnyWordSpec with Matchers with CleanUpAfterTest:
)
val exception =
intercept[ExitException] {

RdfFromJelly.runTestCommand(List("rdf", "from-jelly", jellyFile))
}
val msg = InputFileInaccessible(jellyFile).getMessage
Expand All @@ -95,6 +133,7 @@ class RdfFromJellySpec extends AnyWordSpec with Matchers with CleanUpAfterTest:
val quadFile = DataGenHelper.generateOutputFile()
val exception =
intercept[ExitException] {

RdfFromJelly.runTestCommand(
List("rdf", "from-jelly", jellyFile, "--to", quadFile),
)
Expand Down Expand Up @@ -139,5 +178,18 @@ class RdfFromJellySpec extends AnyWordSpec with Matchers with CleanUpAfterTest:
errContent should include("eu.neverblink.jelly.cli.InvalidJellyFile")
exception.code should be(1)
}
"invalid output format supplied" in {
val jellyFile = DataGenHelper.generateJellyFile(3)
val quadFile = DataGenHelper.generateOutputFile()
val exception =
intercept[ExitException] {
RdfFromJelly.runTestCommand(
List("rdf", "from-jelly", jellyFile, "--to", quadFile, "--out-format", "invalid"),
)
}
val msg = InvalidFormatSpecified("invalid", RdfFromJellyPrint.validFormatsString)
RdfFromJelly.getErrContent should include(msg.getMessage)
exception.code should be(1)
}
}
}