diff --git a/src/main/scala/eu/neverblink/jelly/cli/Exceptions.scala b/src/main/scala/eu/neverblink/jelly/cli/Exceptions.scala index d93cd55..9ba65cf 100644 --- a/src/main/scala/eu/neverblink/jelly/cli/Exceptions.scala +++ b/src/main/scala/eu/neverblink/jelly/cli/Exceptions.scala @@ -24,6 +24,10 @@ case class JenaRiotException(e: RiotException) extends CriticalException(s"Jena RDF I/O exception: ${e.getMessage}") case class InvalidJellyFile(e: InvalidProtocolBufferException) extends CriticalException(s"Invalid Jelly file: ${e.getMessage}") +case class InvalidFormatSpecified(format: String, validFormats: String) + extends CriticalException( + s"Invalid format option: \"$format\", needs to be one of ${validFormats}.", + ) case class ExitException(code: Int) extends CriticalException(s"Exiting with code $code.") class CriticalException(message: String) extends Exception(message) diff --git a/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfCommandPrintUtil.scala b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfCommandPrintUtil.scala new file mode 100644 index 0000000..3512576 --- /dev/null +++ b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfCommandPrintUtil.scala @@ -0,0 +1,13 @@ +package eu.neverblink.jelly.cli.command.rdf + +trait RdfCommandPrintUtil: + val validFormats: List[RdfFormatOption] + val defaultFormat: RdfFormatOption + + /** Prints the available RDF formats to the user. + */ + lazy val validFormatsString: String = + validFormats.map(RdfFormatOption.optionString).mkString(", ") + + lazy val helpMsg: String = + f"Possible values: ${validFormatsString}. Default format: ${defaultFormat.fullName}" diff --git a/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfFormatOption.scala b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfFormatOption.scala new file mode 100644 index 0000000..83735a6 --- /dev/null +++ b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfFormatOption.scala @@ -0,0 +1,17 @@ +package eu.neverblink.jelly.cli.command.rdf + +enum RdfFormatOption(val cliOptions: List[String], val fullName: String): + case NQuads extends RdfFormatOption(List("nq", "nt", "nquads", "ntriples"), "N-Quads") + case JellyBinary extends RdfFormatOption(List("jelly"), "Jelly binary format") + case JellyText extends RdfFormatOption(List("jelly-text"), "Jelly text format") + +object RdfFormatOption: + /** Returns a string representation of the option for the user. + */ + def optionString(option: RdfFormatOption): String = + f"${option.cliOptions.map(s => f"\"${s}\"").mkString(", ")} for ${option.fullName}" + + /** Finds the appropriate RdfFormatOption based on supplied option string. + */ + def find(cliOption: String): Option[RdfFormatOption] = + RdfFormatOption.values.find(_.cliOptions.contains(cliOption)) diff --git a/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfFromJelly.scala b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfFromJelly.scala index 5b627f5..68d6bc5 100644 --- a/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfFromJelly.scala +++ b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfFromJelly.scala @@ -2,18 +2,32 @@ package eu.neverblink.jelly.cli.command.rdf import caseapp.* import com.google.protobuf.InvalidProtocolBufferException import eu.neverblink.jelly.cli.* +import eu.neverblink.jelly.cli.command.rdf.RdfFormatOption.* import eu.neverblink.jelly.cli.util.IoUtil import eu.ostrzyciel.jelly.convert.jena.riot.JellyLanguage -import eu.ostrzyciel.jelly.core.RdfProtoDeserializationError +import eu.ostrzyciel.jelly.core.proto.v1.RdfStreamFrame +import eu.ostrzyciel.jelly.core.{IoUtils, RdfProtoDeserializationError} import org.apache.jena.riot.system.StreamRDFWriter import org.apache.jena.riot.{RDFLanguages, RDFParser, RiotException} import java.io.{InputStream, OutputStream} +object RdfFromJellyPrint extends RdfCommandPrintUtil: + // We exclude JellyBinary because translating JellyBinary to JellyBinary makes no sense + override val validFormats: List[RdfFormatOption] = + RdfFormatOption.values.filterNot(_ == JellyBinary).toList + + override val defaultFormat: RdfFormatOption = NQuads + case class RdfFromJellyOptions( @Recurse common: JellyOptions = JellyOptions(), @ExtraName("to") outputFile: Option[String] = None, + @ValueDescription("Output format.") + @HelpMessage( + RdfFromJellyPrint.helpMsg, + ) + @ExtraName("out-format") outputFormat: Option[String] = None, ) extends HasJellyOptions object RdfFromJelly extends JellyCommand[RdfFromJellyOptions]: @@ -34,20 +48,38 @@ object RdfFromJelly extends JellyCommand[RdfFromJellyOptions]: IoUtil.outputStream(fileName) case None => getStdOut } - doConversion(inputStream, outputStream) + doConversion(inputStream, outputStream, options.outputFormat) - /** This method reads the Jelly file, rewrites it to NQuads and writes it to some output stream + /** This method takes care of proper error handling and matches the desired output format to the + * correct conversion + * * @param inputStream * InputStream * @param outputStream * OutputStream * @throws JellyDeserializationError * @throws ParsingError + * @throws InvalidFormatSpecified */ - private def doConversion(inputStream: InputStream, outputStream: OutputStream): Unit = + private def doConversion( + inputStream: InputStream, + outputStream: OutputStream, + format: Option[String], + ): Unit = try { - val nQuadWriter = StreamRDFWriter.getWriterStream(outputStream, RDFLanguages.NQUADS) - RDFParser.source(inputStream).lang(JellyLanguage.JELLY).parse(nQuadWriter) + format match { + case Some(f: String) => + RdfFormatOption.find(f) match + case Some(JellyText) => jellyBinaryToText(inputStream, outputStream) + case Some(NQuads) => jellyToNQuad(inputStream, outputStream) + case _ => + throw InvalidFormatSpecified( + f, + RdfFromJellyPrint.validFormatsString, + ) // if anything else, it's an invalid option + case None => + jellyToNQuad(inputStream, outputStream) // default option if no parameter supplied + } } catch case e: RdfProtoDeserializationError => throw JellyDeserializationError(e.getMessage) @@ -55,3 +87,59 @@ object RdfFromJelly extends JellyCommand[RdfFromJellyOptions]: throw JenaRiotException(e) case e: InvalidProtocolBufferException => throw InvalidJellyFile(e) + + /** This method reads the Jelly file, rewrites it to NQuads and writes it to some output stream + * @param inputStream + * InputStream + * @param outputStream + * OutputStream + */ + private def jellyToNQuad(inputStream: InputStream, outputStream: OutputStream): Unit = + val nQuadWriter = StreamRDFWriter.getWriterStream(outputStream, RDFLanguages.NQUADS) + RDFParser.source(inputStream).lang(JellyLanguage.JELLY).parse(nQuadWriter) + + /** This method reads the Jelly file, rewrites it to Jelly text and writes it to some output + * stream + * @param inputStream + * InputStream + * @param outputStream + * OutputStream + */ + private def jellyBinaryToText(inputStream: InputStream, outputStream: OutputStream): Unit = + + inline def writeFrameToOutput(f: RdfStreamFrame, frameIndex: Int): Unit = + // we want to write a comment to the file before each frame + val comment = f"# Frame $frameIndex\n" + outputStream.write(comment.getBytes) + val frame = f.toProtoString + // the protoString is basically the jelly-txt format already + outputStream.write(frame.getBytes) + + try { + iterateRdfStream(inputStream, outputStream).zipWithIndex.foreach { + case (maybeFrame, frameIndex) => + writeFrameToOutput(maybeFrame, frameIndex) + } + } finally { + outputStream.flush() + } + + /** This method reads the Jelly file and returns an iterator of RdfStreamFrame + * @param inputStream + * @param outputStream + * @return + */ + private def iterateRdfStream( + inputStream: InputStream, + outputStream: OutputStream, + ): Iterator[RdfStreamFrame] = + IoUtils.autodetectDelimiting(inputStream) match + case (false, newIn) => + // Non-delimited Jelly file + // In this case, we can only read one frame + Iterator(RdfStreamFrame.parseFrom(newIn)) + case (true, newIn) => + // Delimited Jelly file + // In this case, we can read multiple frames + Iterator.continually(RdfStreamFrame.parseDelimitedFrom(newIn)) + .takeWhile(_.isDefined).map(_.get) diff --git a/src/test/scala/eu/neverblink/jelly/cli/command/RdfFromJellySpec.scala b/src/test/scala/eu/neverblink/jelly/cli/command/RdfFromJellySpec.scala index 5ac48e8..0fdbe2b 100644 --- a/src/test/scala/eu/neverblink/jelly/cli/command/RdfFromJellySpec.scala +++ b/src/test/scala/eu/neverblink/jelly/cli/command/RdfFromJellySpec.scala @@ -2,6 +2,7 @@ package eu.neverblink.jelly.cli.command import com.google.protobuf.InvalidProtocolBufferException import eu.neverblink.jelly.cli.* + import eu.neverblink.jelly.cli.command.helpers.* import eu.neverblink.jelly.cli.command.rdf.* import org.apache.jena.riot.RDFLanguages @@ -16,51 +17,87 @@ import scala.util.Using class RdfFromJellySpec extends AnyWordSpec with Matchers with CleanUpAfterTest: "rdf from-jelly command" should { - "be able to convert a Jelly file to NTriples output stream" in { - val jellyFile = DataGenHelper.generateJellyFile(3) - val nQuadString = DataGenHelper.generateNQuadString(3) - val (out, err) = - RdfFromJelly.runTestCommand(List("rdf", "from-jelly", jellyFile)) - val sortedOut = out.split("\n").map(_.trim).sorted - val sortedQuads = nQuadString.split("\n").map(_.trim).sorted - sortedOut should contain theSameElementsAs sortedQuads - } + "handle conversion of Jelly to NTriples" when { + "a file to output stream" in { + val jellyFile = DataGenHelper.generateJellyFile(3) + val nQuadString = DataGenHelper.generateNQuadString(3) + val (out, err) = + RdfFromJelly.runTestCommand(List("rdf", "from-jelly", jellyFile)) + val sortedOut = out.split("\n").map(_.trim).sorted + val sortedQuads = nQuadString.split("\n").map(_.trim).sorted + sortedOut should contain theSameElementsAs sortedQuads + } - "be able to convert a Jelly stream to NTriples output stream" in { - DataGenHelper.generateJellyInputStream(3) - val nQuadString = DataGenHelper.generateNQuadString(3) - val (out, err) = RdfFromJelly.runTestCommand(List("rdf", "from-jelly")) - val sortedOut = out.split("\n").map(_.trim).sorted - val sortedQuads = nQuadString.split("\n").map(_.trim).sorted - sortedOut should contain theSameElementsAs sortedQuads - } - "be able to convert a Jelly file to NTriples file" in { - val jellyFile = DataGenHelper.generateJellyFile(3) - val nQuadString = DataGenHelper.generateNQuadString(3) - val outputFile = DataGenHelper.generateOutputFile(RDFLanguages.NQUADS) - val (out, err) = - RdfFromJelly.runTestCommand( - List("rdf", "from-jelly", jellyFile, "--to", outputFile), + "input stream to output stream" in { + DataGenHelper.generateJellyInputStream(3) + val nQuadString = DataGenHelper.generateNQuadString(3) + val (out, err) = RdfFromJelly.runTestCommand( + List("rdf", "from-jelly", "--out-format", RdfFormatOption.NQuads.cliOptions.head), ) - val sortedOut = Using.resource(Source.fromFile(outputFile)) { content => - content.getLines().toList.map(_.trim).sorted + val sortedOut = out.split("\n").map(_.trim).sorted + val sortedQuads = nQuadString.split("\n").map(_.trim).sorted + sortedOut should contain theSameElementsAs sortedQuads + } + "a file to file" in { + val jellyFile = DataGenHelper.generateJellyFile(3) + val nQuadString = DataGenHelper.generateNQuadString(3) + val outputFile = DataGenHelper.generateOutputFile(RDFLanguages.NQUADS) + val (out, err) = + RdfFromJelly.runTestCommand( + List("rdf", "from-jelly", jellyFile, "--to", outputFile), + ) + val sortedOut = Using.resource(Source.fromFile(outputFile)) { content => + content.getLines().toList.map(_.trim).sorted + } + val sortedQuads = nQuadString.split("\n").map(_.trim).sorted + sortedOut should contain theSameElementsAs sortedQuads + out.length should be(0) + } + "an input stream to file" in { + DataGenHelper.generateJellyInputStream(3) + val outputFile = DataGenHelper.generateOutputFile(RDFLanguages.NQUADS) + val nQuadString = DataGenHelper.generateNQuadString(3) + val (out, err) = + RdfFromJelly.runTestCommand(List("rdf", "from-jelly", "--to", outputFile)) + val sortedOut = Using.resource(Source.fromFile(outputFile)) { content => + content.getLines().toList.map(_.trim).sorted + } + val sortedQuads = nQuadString.split("\n").map(_.trim).sorted + sortedOut should contain theSameElementsAs sortedQuads + out.length should be(0) } - val sortedQuads = nQuadString.split("\n").map(_.trim).sorted - sortedOut should contain theSameElementsAs sortedQuads - out.length should be(0) } - "be able to convert a Jelly stream to NTriples file" in { - DataGenHelper.generateJellyInputStream(3) - val outputFile = DataGenHelper.generateOutputFile(RDFLanguages.NQUADS) - val nQuadString = DataGenHelper.generateNQuadString(3) - val (out, err) = - RdfFromJelly.runTestCommand(List("rdf", "from-jelly", "--to", outputFile)) - val sortedOut = Using.resource(Source.fromFile(outputFile)) { content => - content.getLines().toList.map(_.trim).sorted + "handle conversion of Jelly binary to text" when { + "a file to output stream" in { + val jellyFile = DataGenHelper.generateJellyFile(3) + val (out, err) = + RdfFromJelly.runTestCommand( + List( + "rdf", + "from-jelly", + jellyFile, + "--out-format", + RdfFormatOption.JellyText.cliOptions.head, + ), + ) + val outString = """# Frame 0 + |rows { + | options { + | stream_name: "" + | physical_type: PHYSICAL_STREAM_TYPE_TRIPLES + | generalized_statements: true + | rdf_star: true + | max_name_table_size: 128 + | max_prefix_table_size: 16 + | max_datatype_table_size: 16 + | logical_type: LOGICAL_STREAM_TYPE_FLAT_TRIPLES + | version: 1 + | } + |}""".stripMargin + out should include(outString) + "rows".r.findAllIn(out).length should be(10) + "http://example.org/predicate/".r.findAllIn(out).length should be(1) } - val sortedQuads = nQuadString.split("\n").map(_.trim).sorted - sortedOut should contain theSameElementsAs sortedQuads - out.length should be(0) } "throw proper exception" when { "input file is not found" in { @@ -82,6 +119,7 @@ class RdfFromJellySpec extends AnyWordSpec with Matchers with CleanUpAfterTest: ) val exception = intercept[ExitException] { + RdfFromJelly.runTestCommand(List("rdf", "from-jelly", jellyFile)) } val msg = InputFileInaccessible(jellyFile).getMessage @@ -95,6 +133,7 @@ class RdfFromJellySpec extends AnyWordSpec with Matchers with CleanUpAfterTest: val quadFile = DataGenHelper.generateOutputFile() val exception = intercept[ExitException] { + RdfFromJelly.runTestCommand( List("rdf", "from-jelly", jellyFile, "--to", quadFile), ) @@ -139,5 +178,18 @@ class RdfFromJellySpec extends AnyWordSpec with Matchers with CleanUpAfterTest: errContent should include("eu.neverblink.jelly.cli.InvalidJellyFile") exception.code should be(1) } + "invalid output format supplied" in { + val jellyFile = DataGenHelper.generateJellyFile(3) + val quadFile = DataGenHelper.generateOutputFile() + val exception = + intercept[ExitException] { + RdfFromJelly.runTestCommand( + List("rdf", "from-jelly", jellyFile, "--to", quadFile, "--out-format", "invalid"), + ) + } + val msg = InvalidFormatSpecified("invalid", RdfFromJellyPrint.validFormatsString) + RdfFromJelly.getErrContent should include(msg.getMessage) + exception.code should be(1) + } } }