Skip to content

Commit c6b0cbc

Browse files
authored
dot export: fix file format (#289)
as discussed in joernio/joern#5158 I didn't get any feedback if this really solves their issue, but this is certainly better than before
1 parent de83358 commit c6b0cbc

File tree

4 files changed

+91
-53
lines changed

4 files changed

+91
-53
lines changed

build.sbt

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,10 @@ name := "flatgraph"
22
ThisBuild / organization := "io.joern"
33
ThisBuild / scalaVersion := scala3
44

5-
val slf4jVersion = "2.0.16"
65
val scala3 = "3.5.2"
76
val scala2_12 = "2.12.20"
7+
val slf4jVersion = "2.0.16"
8+
val commonsTextVersion = "1.13.0"
89

910
/** Only the below listed projects are included in things like `sbt compile`.
1011
* We explicitly want to exclude `benchmarks` which requires qwiet.ai / shiftleft
@@ -51,6 +52,7 @@ lazy val formats = project
5152
name := "flatgraph-formats",
5253
libraryDependencies ++= Seq(
5354
"com.github.tototoshi" %% "scala-csv" % "2.0.0",
55+
"org.apache.commons" % "commons-text" % commonsTextVersion,
5456
"org.scala-lang.modules" %% "scala-xml" % "2.3.0",
5557
"io.spray" %% "spray-json" % "1.3.6",
5658
"com.github.scopt" %% "scopt" % "4.1.0",
@@ -81,7 +83,7 @@ lazy val domainClassesGenerator_3 = project
8183
libraryDependencies ++= Seq(
8284
"org.slf4j" % "slf4j-simple" % slf4jVersion % Optional,
8385
"com.lihaoyi" %% "os-lib" % "0.9.1",
84-
"org.apache.commons" % "commons-text" % "1.10.0",
86+
"org.apache.commons" % "commons-text" % commonsTextVersion,
8587
"com.github.scopt" %% "scopt" % "4.1.0",
8688
("org.scalameta" %% "scalafmt-dynamic" % "3.7.17").cross(CrossVersion.for3Use2_13),
8789
),
@@ -97,7 +99,7 @@ lazy val domainClassesGenerator_2_12 = project
9799
libraryDependencies ++= Seq(
98100
"org.slf4j"% "slf4j-simple" % slf4jVersion % Optional,
99101
"com.lihaoyi" %% "os-lib" % "0.9.1",
100-
"org.apache.commons" % "commons-text" % "1.12.0",
102+
"org.apache.commons" % "commons-text" % commonsTextVersion,
101103
"com.github.scopt" %% "scopt" % "4.1.0",
102104
"org.scalameta" %% "scalafmt-dynamic" % "3.7.17",
103105
),

formats/src/main/scala/flatgraph/formats/dot/DotExporter.scala

Lines changed: 30 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,40 @@
11
package flatgraph.formats.dot
22

33
import flatgraph.formats.{ExportResult, Exporter, iterableForList, resolveOutputFileSingle}
4-
import flatgraph.{Accessors, Edge, GNode, Graph, Schema}
4+
import flatgraph.{Accessors, Edge, GNode, Schema}
55

66
import java.nio.file.{Files, Path}
7-
import scala.jdk.CollectionConverters.MapHasAsScala
7+
import org.apache.commons.text.StringEscapeUtils
8+
import org.apache.commons.text.translate.LookupTranslator
9+
10+
import java.util.Collections
11+
import scala.jdk.CollectionConverters.MapHasAsJava
812
import scala.util.Using
913

1014
/** Exports flatgraph to graphviz dot/gv file
1115
*
1216
* Note: GraphML doesn't natively support list property types, so we fake it by encoding it as a `;` delimited string. If you import this
1317
* into a different database, you'll need to parse that separately.
1418
*
19+
* Export rules for dot format as per https: //github.com/joernio/joern/issues/5158 1) If the attribute value contains special characters
20+
* such as spaces,<,>,=, etc., it must be enclosed in double quotation marks. Otherwise, it will cause syntax errors. 2) Graphviz requires
21+
* that the node ID must be a valid identifier. If the node ID is a pure number (such as 120259084301), it needs to be enclosed in double
22+
* quotation marks, otherwise it will be mistaken for an integer constant. 3) The attribute value contains special characters such as(such
23+
* as CODE=""), which need to be enclosed in quotation marks or escaped in some cases. 4) In Graphviz's. dot file, it is best to use
24+
* semicolons for each node definition, edge definition, and attribute definition; ending. Your file is missing semicolons.
25+
*
1526
* https://en.wikipedia.org/wiki/DOT_(graph_description_language) https://www.graphviz.org/doc/info/lang.html
1627
* http://magjac.com/graphviz-visual-editor/ https://www.slideshare.net/albazo/graphiz-using-the-dot-language
1728
*/
1829
object DotExporter extends Exporter {
1930
override def defaultFileExtension = "dot"
31+
val EndOfLine = ';'
32+
33+
private val lookupMap = Map(
34+
"""\""" -> """\\""", // \ -> \\
35+
"\"" -> """\"""" // " -> \"
36+
)
37+
val translator = new LookupTranslator(Collections.unmodifiableMap(lookupMap.asJava))
2038

2139
override def runExport(schema: Schema, nodes: IterableOnce[GNode], edges: IterableOnce[Edge], outputFile: Path) = {
2240
val outFile = resolveOutputFileSingle(outputFile, s"export.$defaultFileExtension")
@@ -34,31 +52,29 @@ object DotExporter extends Exporter {
3452
nodeCount += 1
3553
val line = new StringBuffer()
3654
.append(" ")
37-
.append(node.id)
38-
.append(s"[label=${node.label} ")
55+
.append(s""""${node.id}" """)
56+
.append(s"""[label="${node.label}" """)
3957
.append(
4058
Accessors
4159
.getNodeProperties(node)
4260
.iterator
4361
.map { case (key, value) =>
44-
s"$key=${encodePropertyValue(value)}"
62+
s"""$key="${encodePropertyValue(value)}""""
4563
}
4664
.mkString(" ")
4765
)
4866
.append("]")
67+
.append(EndOfLine)
4968
writeLine(line.toString)
5069
}
5170

5271
edges.iterator.foreach { edge =>
5372
edgeCount += 1
73+
val propertyMaybe = Option(edge.property).map(property => s"""property="${encodePropertyValue(property)}"""").getOrElse("")
5474
val line = new StringBuffer()
55-
.append(s" ${edge.src.id()} -> ${edge.dst.id()} ")
56-
.append(s"[label=${edge.label} ")
57-
58-
if (edge.property != null)
59-
line.append(s"property=${encodePropertyValue(edge.property)}")
60-
61-
line.append("]")
75+
.append(s""" "${edge.src.id()}" -> "${edge.dst.id()}"""")
76+
.append(s""" [label="${edge.label}" $propertyMaybe]""")
77+
.append(EndOfLine)
6278
writeLine(line.toString)
6379
}
6480

@@ -71,13 +87,9 @@ object DotExporter extends Exporter {
7187
private def encodePropertyValue(value: Any): String = {
7288
value match {
7389
case value: String =>
74-
val escaped = value
75-
.replace("""\""", """\\""") // escape escape chars - this should come first
76-
.replace("\"", "\\\"") // escape double quotes, because we use them to enclose strings
77-
s"\"$escaped\""
90+
StringEscapeUtils.builder(translator).escape(value).toString
7891
case list if iterableForList.isDefinedAt(list) =>
79-
val values = iterableForList(list).mkString(";")
80-
s"\"$values\""
92+
iterableForList(list).map(encodePropertyValue).mkString(";")
8193
case value => value.toString
8294
}
8395
}
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
package flatgraph.formats.dot
2+
3+
import better.files.*
4+
import flatgraph.*
5+
import org.scalatest.matchers.should.Matchers.*
6+
import org.scalatest.wordspec.AnyWordSpec
7+
import testdomains.generic.GenericDomain
8+
import testdomains.generic.edges.ConnectedTo
9+
import testdomains.generic.nodes.NewNodeA
10+
11+
class DotExporterTests extends AnyWordSpec {
12+
13+
"Exporter should export valid dot" in {
14+
val graph = GenericDomain.empty.graph
15+
val node1 = NewNodeA()
16+
.stringMandatory("regular string")
17+
.stringOptional("""<escapeMe1> [escapeMe2] escape=Me3 escape"Me4 escape\Me5 """)
18+
.stringList(Seq("one", "two"))
19+
val node2 = NewNodeA().intMandatory(1).intOptional(2).intList(Seq(10, 11))
20+
21+
DiffGraphApplier.applyDiff(
22+
graph,
23+
GenericDomain.newDiffGraphBuilder
24+
.addEdge(node1, node2, ConnectedTo.Label, "edge property")
25+
)
26+
27+
File.usingTemporaryDirectory(getClass.getName) { exportRootDirectory =>
28+
val exportResult = DotExporter.runExport(graph, exportRootDirectory.pathAsString)
29+
exportResult.nodeCount shouldBe 2
30+
exportResult.edgeCount shouldBe 1
31+
val Seq(exportedFile) = exportResult.files
32+
33+
val result = better.files.File(exportedFile).contentAsString.trim
34+
35+
/* Export rules for dot format as per https: //github.com/joernio/joern/issues/5158 :
36+
* 1) If the attribute value contains special characters such as spaces,<,>,=, etc., it must be enclosed in double quotation marks.
37+
* Otherwise, it will cause syntax errors.
38+
* 2) Graphviz requires that the node ID must be a valid identifier. If the node ID is a pure number (such as 120259084301),
39+
* it needs to be enclosed in double quotation marks, otherwise it will be mistaken for an integer constant.
40+
* 3) The attribute value contains special characters such as(such as CODE=""), which need to be enclosed in quotation marks or escaped in some cases.
41+
* 4) In Graphviz's. dot file, it is best to use semicolons for each node definition, edge definition, and attribute definition; ending. Your file is missing semicolons.
42+
*/
43+
44+
withClue(s"actual result was: `$result`") {
45+
result.trim shouldBe
46+
"""digraph {
47+
| "0" [label="node_a" int_mandatory="42" string_list="one;two" string_mandatory="regular string" string_optional="<escapeMe1> [escapeMe2] escape=Me3 escape\"Me4 escape\\Me5 "];
48+
| "1" [label="node_a" int_list="10;11" int_mandatory="1" int_optional="2" string_mandatory="<empty>"];
49+
| "0" -> "1" [label="connected_to" property="edge property"];
50+
|}
51+
|""".stripMargin.trim
52+
}
53+
}
54+
}
55+
56+
}

tests/src/test/scala/flatgraph/formats/dot/DotTests.scala

Lines changed: 0 additions & 32 deletions
This file was deleted.

0 commit comments

Comments
 (0)