11package flatgraph .storage
22
3- import com .github .luben .zstd .Zstd
4- import flatgraph .*
3+ import flatgraph .{AccessHelpers , FreeSchema , GNode , Graph , Schema }
54import flatgraph .Edge .Direction
5+ import flatgraph .misc .Misc
66import flatgraph .storage .Manifest .{GraphItem , OutlineStorage }
77
88import java .nio .channels .FileChannel
@@ -11,29 +11,42 @@ import java.nio.file.Path
1111import java .nio .{ByteBuffer , ByteOrder }
1212import java .util .Arrays
1313import scala .collection .mutable
14+ import java .util .concurrent
1415
1516object Deserialization {
1617
17- def readGraph (storagePath : Path , schemaMaybe : Option [Schema ], persistOnClose : Boolean = true ): Graph = {
18+ def readGraph (
19+ storagePath : Path ,
20+ schemaMaybe : Option [Schema ],
21+ persistOnClose : Boolean = true ,
22+ requestedExecutor : Option [concurrent.ExecutorService ] = None
23+ ): Graph = {
24+ val executor = Misc .maybeOverrideExecutor(requestedExecutor)
1825 val fileChannel = new java.io.RandomAccessFile (storagePath.toAbsolutePath.toFile, " r" ).getChannel
26+ val queue = mutable.ArrayBuffer [concurrent.Future [Any ]]()
27+ val zstdCtx = new ZstdWrapper .ZstdCtx
28+ def submitJob [T ](block : => T ): concurrent.Future [T ] = {
29+ val res = executor.submit((() => block))
30+ queue.addOne(res.asInstanceOf [concurrent.Future [Any ]])
31+ res
32+ }
33+
1934 try {
2035 // fixme: Use convenience methods from schema to translate string->id. Fix after we get strict schema checking.
2136 val manifest = GraphItem .read(readManifest(fileChannel))
22- val pool = readPool(manifest, fileChannel)
37+ val pool = submitJob { readPool(manifest, fileChannel, zstdCtx) }
2338 val schema = schemaMaybe.getOrElse(freeSchemaFromManifest(manifest))
2439 val storagePathMaybe =
2540 if (persistOnClose) Option (storagePath)
2641 else None
2742 val g = new Graph (schema, storagePathMaybe)
2843 val nodekinds = mutable.HashMap [String , Short ]()
2944 for (nodeKind <- g.schema.nodeKinds) nodekinds(g.schema.getNodeLabel(nodeKind)) = nodeKind.toShort
30- val kindRemapper = Array .fill(manifest.nodes.size)(- 1 .toShort)
3145 val nodeRemapper = new Array [Array [GNode ]](manifest.nodes.length)
3246 for {
3347 (nodeItem, idx) <- manifest.nodes.zipWithIndex
3448 nodeKind <- nodekinds.get(nodeItem.nodeLabel)
3549 } {
36- kindRemapper(idx) = nodeKind
3750 val nodes = new Array [GNode ](nodeItem.nnodes)
3851 for (seq <- Range (0 , nodes.length)) nodes(seq) = g.schema.makeNode(g, nodeKind, seq)
3952 g.nodesArray(nodeKind) = nodes
@@ -66,11 +79,17 @@ object Deserialization {
6679 val direction = Direction .fromOrdinal(edgeItem.inout)
6780 if (nodeKind.isDefined && edgeKind.isDefined) {
6881 val pos = g.schema.neighborOffsetArrayIndex(nodeKind.get, direction, edgeKind.get)
69- g.neighbors(pos) = deltaDecode(readArray(fileChannel, edgeItem.qty, nodeRemapper, pool).asInstanceOf [Array [Int ]])
70- g.neighbors(pos + 1 ) = readArray(fileChannel, edgeItem.neighbors, nodeRemapper, pool)
71- val property = readArray(fileChannel, edgeItem.property, nodeRemapper, pool)
72- if (property != null )
73- g.neighbors(pos + 2 ) = property
82+ submitJob {
83+ g.neighbors(pos) = deltaDecode(readArray(fileChannel, edgeItem.qty, nodeRemapper, pool, zstdCtx).asInstanceOf [Array [Int ]])
84+ }
85+ submitJob {
86+ g.neighbors(pos + 1 ) = readArray(fileChannel, edgeItem.neighbors, nodeRemapper, pool, zstdCtx)
87+ }
88+ submitJob {
89+ val property = readArray(fileChannel, edgeItem.property, nodeRemapper, pool, zstdCtx)
90+ if (property != null )
91+ g.neighbors(pos + 2 ) = property
92+ }
7493 }
7594 }
7695
@@ -91,12 +110,18 @@ object Deserialization {
91110 val propertyKind = propertykinds.get((property.nodeLabel, property.propertyLabel))
92111 if (nodeKind.isDefined && propertyKind.isDefined) {
93112 val pos = g.schema.propertyOffsetArrayIndex(nodeKind.get, propertyKind.get)
94- g.properties(pos) = deltaDecode(readArray(fileChannel, property.qty, nodeRemapper, pool).asInstanceOf [Array [Int ]])
95- g.properties(pos + 1 ) = readArray(fileChannel, property.property, nodeRemapper, pool)
113+ submitJob {
114+ g.properties(pos) = deltaDecode(readArray(fileChannel, property.qty, nodeRemapper, pool, zstdCtx).asInstanceOf [Array [Int ]])
115+ }
116+ submitJob { g.properties(pos + 1 ) = readArray(fileChannel, property.property, nodeRemapper, pool, zstdCtx) }
96117 }
97118 }
119+ queue.foreach { _.get() }
98120 g
99- } finally fileChannel.close()
121+ } catch {
122+ case ex : java.util.concurrent.ExecutionException =>
123+ throw ex.getCause()
124+ } finally { fileChannel.close(); zstdCtx.close(); }
100125 }
101126
102127 private def freeSchemaFromManifest (manifest : Manifest .GraphItem ): FreeSchema = {
@@ -171,23 +196,17 @@ object Deserialization {
171196
172197 }
173198
174- private def readPool (manifest : GraphItem , fileChannel : FileChannel ): Array [String ] = {
175- val stringPoolLength = ZstdWrapper (
176- Zstd
177- .decompress(
178- fileChannel.map(FileChannel .MapMode .READ_ONLY , manifest.stringPoolLength.startOffset, manifest.stringPoolLength.compressedLength),
179- manifest.stringPoolLength.decompressedLength
180- )
181- .order(ByteOrder .LITTLE_ENDIAN )
182- )
183- val stringPoolBytes = ZstdWrapper (
184- Zstd
185- .decompress(
186- fileChannel.map(FileChannel .MapMode .READ_ONLY , manifest.stringPoolBytes.startOffset, manifest.stringPoolBytes.compressedLength),
187- manifest.stringPoolBytes.decompressedLength
188- )
189- .order(ByteOrder .LITTLE_ENDIAN )
190- )
199+ private def readPool (manifest : GraphItem , fileChannel : FileChannel , zstdCtx : ZstdWrapper .ZstdCtx ): Array [String ] = {
200+ val stringPoolLength = zstdCtx
201+ .decompress(
202+ fileChannel.map(FileChannel .MapMode .READ_ONLY , manifest.stringPoolLength.startOffset, manifest.stringPoolLength.compressedLength),
203+ manifest.stringPoolLength.decompressedLength
204+ )
205+ val stringPoolBytes = zstdCtx
206+ .decompress(
207+ fileChannel.map(FileChannel .MapMode .READ_ONLY , manifest.stringPoolBytes.startOffset, manifest.stringPoolBytes.compressedLength),
208+ manifest.stringPoolBytes.decompressedLength
209+ )
191210 val poolBytes = new Array [Byte ](manifest.stringPoolBytes.decompressedLength)
192211 stringPoolBytes.get(poolBytes)
193212 val pool = new Array [String ](manifest.stringPoolLength.decompressedLength >> 2 )
@@ -215,11 +234,18 @@ object Deserialization {
215234 a
216235 }
217236
218- private def readArray (channel : FileChannel , ptr : OutlineStorage , nodes : Array [Array [GNode ]], stringPool : Array [String ]): Array [? ] = {
237+ private def readArray (
238+ channel : FileChannel ,
239+ ptr : OutlineStorage ,
240+ nodes : Array [Array [GNode ]],
241+ stringPoolFuture : concurrent.Future [Array [String ]],
242+ zstdCtx : ZstdWrapper .ZstdCtx
243+ ): Array [? ] = {
219244 if (ptr == null ) return null
220- val dec = ZstdWrapper (
221- Zstd .decompress(channel.map(FileChannel .MapMode .READ_ONLY , ptr.startOffset, ptr.compressedLength), ptr.decompressedLength)
222- ).order(ByteOrder .LITTLE_ENDIAN )
245+ if (ptr.typ == StorageType .String ) stringPoolFuture.get()
246+
247+ val dec =
248+ zstdCtx.decompress(channel.map(FileChannel .MapMode .READ_ONLY , ptr.startOffset, ptr.compressedLength), ptr.decompressedLength)
223249 ptr.typ match {
224250 case StorageType .Bool =>
225251 val bytes = new Array [Byte ](dec.limit())
@@ -253,9 +279,10 @@ object Deserialization {
253279 dec.asDoubleBuffer().get(res)
254280 res
255281 case StorageType .String =>
256- val res = new Array [String ](dec.limit() >> 2 )
257- val intbuf = dec.asIntBuffer()
258- var idx = 0
282+ val stringPool = stringPoolFuture.get()
283+ val res = new Array [String ](dec.limit() >> 2 )
284+ val intbuf = dec.asIntBuffer()
285+ var idx = 0
259286 while (idx < res.length) {
260287 val offset = intbuf.get(idx)
261288 if (offset >= 0 ) res(idx) = stringPool(offset)
0 commit comments