Pretty sure the POC worked, but I haven't tested it recently. Might as well dump the code here:
local jit = require("jit")
-- local LOGFILE_HANDLE = assert(io.open("jit.log", "w+"))
local ffi = require("ffi")
local ArcturusPAK = {
FOOTER_SIZE_IN_BYTES = 9, -- sizeof ArcturusPAK?
MAGIC_VERSION_TAG = 18,
RECORD_TYPE_FILE = 1,
RECORD_TYPE_DIRECTORY = 2,
cdefs = [[
typedef struct ArcturusPAK {
unsigned long offset;
unsigned long numRecords;
unsigned char versionTag;
} ArcturusPAK;
typedef struct ArcturusFileRecord {
unsigned char pathSize;
unsigned char type;
int offset;
int compressedSize;
int decompressedSize;
} ArcturusFileRecord;
]],
}
ffi.cdef(ArcturusPAK.cdefs)
-- Blocking load using standard Lua io library
function ArcturusPAK:Open(filePath)
local pakInfo = {}
local pakFileHandle = io.open(filePath, "rb")
if not pakFileHandle then
error("Failed to open PAK file " .. filePath .. " (no such file exists)", 0)
end
pakInfo.handle = pakFileHandle
local EOF = pakFileHandle:seek("end")
if EOF < self.FOOTER_SIZE_IN_BYTES then
error("Failed to open PAK file " .. filePath .. " (not a valid PAK file)", 0)
end
pakFileHandle:seek("set", EOF - self.FOOTER_SIZE_IN_BYTES)
local metadata = pakFileHandle:read(self.FOOTER_SIZE_IN_BYTES)
local header = ffi.cast("ArcturusPAK*", metadata)
if tonumber(header.versionTag) ~= ArcturusPAK.MAGIC_VERSION_TAG then
error(
"Invalid PAK version tag " .. header.versionTag .. " (" .. ArcturusPAK.MAGIC_VERSION_TAG .. " expected)",
0
)
end
pakInfo.offset = tonumber(header.offset)
pakInfo.numRecords = tonumber(header.numRecords)
pakInfo.versionTag = tonumber(header.versionTag)
return pakInfo
end
function ArcturusPAK:ReadTableOfContents(pakInfo)
if io.type(pakInfo.handle) ~= "file" then
error("Failed to read table of contents (invalid PAK file handle)", 0)
end
pakInfo.handle:seek("set", pakInfo.offset)
local records = {}
-- Even for HUGE archives this should be small enough to keep in memory
for _ = 1, pakInfo.numRecords, 1 do
local recordHeader = pakInfo.handle:read(ffi.sizeof("ArcturusFileRecord"))
local record = ffi.cast("ArcturusFileRecord*", recordHeader)
local pathString = pakInfo.handle:read(record.pathSize + 1)
pathString = ffi.string(pathString, record.pathSize)
-- TBD too big/slow? but at least no cdata glitches...
local recordData = {
pathSize = tonumber(record.pathSize),
type = tonumber(record.type),
offset = tonumber(record.offset),
compressedSize = tonumber(record.compressedSize),
decompressedSize = tonumber(record.decompressedSize),
pathString = pathString,
cdata = record,
}
table.insert(records, record)
records[pathString] = recordData
end
pakInfo.records = records
return records
end
function ArcturusPAK:GetCompressedFileContents(pakInfo, pathString)
if io.type(pakInfo.handle) ~= "file" then
error("Failed to get compressed file contents for record " .. pathString .. " (invalid PAK file handle)", 0)
end
if not pakInfo.records then
error("Failed to get compressed file contents for record " .. pathString .. " (table of contents not read)", 0)
end
local record = pakInfo.records[pathString]
if not record then
error("Failed to get compressed file contents for record " .. pathString .. " (invalid path string)", 0)
end
if record.type == ArcturusPAK.RECORD_TYPE_DIRECTORY then
error("Failed to get compressed file contents for record " .. pathString .. " (it's a directory)", 0)
end
pakInfo.handle:seek("set", record.offset)
local compressedFileContents = pakInfo.handle:read(record.compressedSize)
return compressedFileContents
end
function ArcturusPAK:GetDecompressedFileContents(pakInfo, pathString)
if io.type(pakInfo.handle) ~= "file" then
error("Failed to get decompressed file contents for record " .. pathString .. " (invalid PAK file handle)", 0)
end
if not pakInfo.records then
error(
"Failed to get decompressed file contents for record " .. pathString .. " (table of contents not read)",
0
)
end
local record = pakInfo.records[pathString]
if not record then
error("Failed to get decompressed file contents for record " .. pathString .. " (invalid path string)", 0)
end
if record.type == ArcturusPAK.RECORD_TYPE_DIRECTORY then
error("Failed to get decompressed file contents for record " .. pathString .. " (it's a directory)", 0)
end
pakInfo.handle:seek("set", record.offset)
local compressedFileContents = pakInfo.handle:read(record.compressedSize)
return self:DecompressFileContents(compressedFileContents, record.decompressedSize)
end
local bit = require("bit")
local CHUNK_SIZE = 1024
local function decompress(size_compressed, size_original, compressed_data)
compressed_data = { string.byte(compressed_data, 1, #compressed_data) }
local result = {}
local result_index, bytes_read = 1, 1
while bytes_read <= size_compressed do
if bytes_read > size_compressed then
break
end
local mask = compressed_data[bytes_read]
bytes_read = bytes_read + 1
for i = 0, 7 do
if bytes_read > size_compressed then
break
end
if bit.band(mask, 1) == 1 then
local byte1, byte2 = compressed_data[bytes_read], compressed_data[bytes_read + 1]
bytes_read = bytes_read + 2
local displacement = bit.rshift(byte2, 4) + 2
local index_offset = bit.lshift(bit.band(byte2, 0x0F), 8) + byte1
for j = 0, displacement - 1 do
result[result_index] = result[result_index - index_offset]
result_index = result_index + 1
end
else
result[result_index] = compressed_data[bytes_read]
result_index = result_index + 1
bytes_read = bytes_read + 1
end
mask = bit.rshift(mask, 1)
end
end
-- dump(result)
-- return result
return string.char(unpack(result))
end
local function decompressFFI(size_compressed, size_original, compressed_data)
local compressed_buffer = ffi.new("uint8_t[?]", #compressed_data)
ffi.copy(compressed_buffer, compressed_data, #compressed_data)
local result_buffer = ffi.new("uint8_t[?]", size_original)
local result_index, bytes_read = 0, 0
while bytes_read < size_compressed do
local mask = compressed_buffer[bytes_read]
bytes_read = bytes_read + 1
for i = 0, 7 do
if bytes_read >= size_compressed then
break
end
if bit.band(mask, 1) == 1 then
local byte1, byte2 = compressed_buffer[bytes_read], compressed_buffer[bytes_read + 1]
bytes_read = bytes_read + 2
local displacement = bit.rshift(byte2, 4) + 2
local index_offset = bit.lshift(bit.band(byte2, 0x0F), 8) + byte1
for j = 0, displacement - 1 do
result_buffer[result_index] = result_buffer[result_index - index_offset]
result_index = result_index + 1
end
else
result_buffer[result_index] = compressed_buffer[bytes_read]
result_index = result_index + 1
bytes_read = bytes_read + 1
end
mask = bit.rshift(mask, 1)
end
end
return ffi.string(result_buffer, size_original)
end
-- todo use string buffer as input, too - much easier to index?
-- TODO eliminate/move
function ArcturusPAK:DecompressFileContents(compressedFileContents, decompressedSize)
return decompress(#compressedFileContents, decompressedSize, compressedFileContents)
end
local pakFilePath = "../Fixtures/data.pak"
local ZERO_SIZE_PAK = "../Fixtures/zerosize.pak"
local describe = _G.describe
local it = _G.it
local assertEquals = _G.assertEquals
local assertThrows = _G.assertThrows
describe("ArcturusPAK", function()
describe("Open", function()
it("should throw if passed an invalid file path", function()
local function openNonexistentFile()
ArcturusPAK:Open("meep.404")
end
assertThrows(openNonexistentFile, "Failed to open PAK file meep.404 (no such file exists)")
end)
it("should throw if the file is empty ", function()
local function openNonexistentFile()
ArcturusPAK:Open(ZERO_SIZE_PAK) -- Should always exist
end
assertThrows(openNonexistentFile, "Failed to open PAK file " .. ZERO_SIZE_PAK .. " (not a valid PAK file)")
-- TODO assert fd is closed
end)
it("should throw if passed a valid non-PAK file path ", function()
local function openNonexistentFile()
ArcturusPAK:Open("../Fixtures/invalid.pak") -- Should always exist
end
assertThrows(openNonexistentFile, "Invalid PAK version tag 46 (18 expected)")
-- TODO assert fd is closed
end)
-- Throw if not a valid PAK file
it("should be able to read the archive metadata when given a valid PAK file path", function()
local pakInfo = ArcturusPAK:Open(pakFilePath)
assertEquals(pakInfo.versionTag, ArcturusPAK.MAGIC_VERSION_TAG)
assertEquals(pakInfo.offset, 695226075)
assertEquals(pakInfo.numRecords, 17743)
assertEquals(type(pakInfo.handle), "userdata")
-- TODO close fd
end)
end)
describe("ReadTableOfContents", function()
it("should throw if the PAK file handle is already closed", function()
local pakInfo = ArcturusPAK:Open(pakFilePath)
pakInfo.handle:close()
local function readFromClosedHandle()
ArcturusPAK:ReadTableOfContents(pakInfo)
end
assertThrows(readFromClosedHandle, "Failed to read table of contents (invalid PAK file handle)")
end)
it("should return the table of file records when passed a valid PAK file handle", function()
local pakInfo = ArcturusPAK:Open(pakFilePath)
local records = ArcturusPAK:ReadTableOfContents(pakInfo)
assertEquals(#records, pakInfo.numRecords)
local firstRecord = records["data"]
local secondRecord = records["data/_tactics.scr"]
local lastRecord = records[pakInfo.numRecords]
-- TODO assert first record is data folder, last is X (TBD)
assertEquals(secondRecord.pathSize, 17)
assertEquals(secondRecord.type, ArcturusPAK.RECORD_TYPE_FILE)
assertEquals(secondRecord.offset, 695224756)
assertEquals(secondRecord.compressedSize, 1312)
assertEquals(secondRecord.decompressedSize, 2472)
-- assertEquals(records["data"], firstRecord)
-- TODO close
end)
it("should cache the table of contents when passed a valid PAK file handle", function()
local pakInfo = ArcturusPAK:Open(pakFilePath)
local records = ArcturusPAK:ReadTableOfContents(pakInfo)
-- print(records, pakInfo.records)
assertEquals(records, pakInfo.records)
end)
end)
describe("GetCompressedFileContents", function()
it("should throw if the PAK file handle is already closed", function()
local pakInfo = ArcturusPAK:Open(pakFilePath)
pakInfo.handle:close()
local function readFromClosedHandle()
ArcturusPAK:GetCompressedFileContents(pakInfo, "hello.world")
end
assertThrows(
readFromClosedHandle,
"Failed to get compressed file contents for record hello.world (invalid PAK file handle)"
)
end)
-- TODO
-- should throw if no handle was opened
it("should throw if the table of contents wasn't yet read", function()
local pakInfo = ArcturusPAK:Open(pakFilePath)
local function readWithoutTOC()
ArcturusPAK:GetCompressedFileContents(pakInfo, "hello.world")
end
assertThrows(
readWithoutTOC,
"Failed to get compressed file contents for record hello.world (table of contents not read)"
)
end)
it("should throw if an invalid path string was passed", function()
local pakInfo = ArcturusPAK:Open(pakFilePath)
local function readWithInvalidPathString()
ArcturusPAK:ReadTableOfContents(pakInfo)
ArcturusPAK:GetCompressedFileContents(pakInfo, "hello.world")
end
assertThrows(
readWithInvalidPathString,
"Failed to get compressed file contents for record hello.world (invalid path string)"
)
-- TODO close handles everywhere
end)
it("should throw if a directory path string was passed", function()
local function attemptToDecompressDirectoryRecord()
local pakInfo = ArcturusPAK:Open(pakFilePath)
ArcturusPAK:ReadTableOfContents(pakInfo)
ArcturusPAK:GetCompressedFileContents(pakInfo, "data/diary")
end
assertThrows(
attemptToDecompressDirectoryRecord,
"Failed to get compressed file contents for record data/diary (it's a directory)"
)
end)
-- TODO move
-- local zlib = require("zlib")
it("should return the compressed buffer if a valid path string was passed", function()
local pakInfo = ArcturusPAK:Open(pakFilePath)
ArcturusPAK:ReadTableOfContents(pakInfo)
local compressedFileContents = ArcturusPAK:GetCompressedFileContents(pakInfo, "data/global.ini")
-- print(#compressedFileContents, compressedFileContents)
assertEquals(#compressedFileContents, 187)
-- local crc = zlib.crc32()(compressedFileContents)
-- local adler = zlib.adler32()(compressedFileContents)
-- assertEquals(crc, "hello world123")
-- assertEquals(adler, "hello world123")
-- It's not a guarantee that the file contents are 100% correct, but it's good enough for now
assertEquals(compressedFileContents:sub(6, 8), "gnd")
assertEquals(compressedFileContents:sub(165, 169), "@load")
end)
end)
-- Close: throw if no fd, success if fd
describe("GetDecompressedFileContents", function()
it("should throw if the PAK file handle is already closed", function()
local pakInfo = ArcturusPAK:Open(pakFilePath)
pakInfo.handle:close()
local function readFromClosedHandle()
ArcturusPAK:GetDecompressedFileContents(pakInfo, "hello.world")
end
assertThrows(
readFromClosedHandle,
"Failed to get decompressed file contents for record hello.world (invalid PAK file handle)"
)
end)
-- TODO
-- should throw if no handle was opened
it("should throw if the table of contents wasn't yet read", function()
local pakInfo = ArcturusPAK:Open(pakFilePath)
local function readWithoutTOC()
ArcturusPAK:GetDecompressedFileContents(pakInfo, "hello.world")
end
assertThrows(
readWithoutTOC,
"Failed to get decompressed file contents for record hello.world (table of contents not read)"
)
end)
it("should throw if an invalid path string was passed", function()
local pakInfo = ArcturusPAK:Open(pakFilePath)
local function readWithInvalidPathString()
ArcturusPAK:ReadTableOfContents(pakInfo)
ArcturusPAK:GetDecompressedFileContents(pakInfo, "hello.world")
end
assertThrows(
readWithInvalidPathString,
"Failed to get decompressed file contents for record hello.world (invalid path string)"
)
-- TODO close handles everywhere
end)
local assertNil = _G.assertNil -- TODO move
it("should throw if a directory path string was passed", function()
local function attemptToDecompressDirectoryRecord()
local pakInfo = ArcturusPAK:Open(pakFilePath)
ArcturusPAK:ReadTableOfContents(pakInfo)
ArcturusPAK:GetDecompressedFileContents(pakInfo, "data/bmp")
end
assertThrows(
attemptToDecompressDirectoryRecord,
"Failed to get decompressed file contents for record data/bmp (it's a directory)"
)
end)
-- TODO move
-- local zlib = require("zlib")
it("should return the decompressed buffer if a valid path string was passed", function()
local pakInfo = ArcturusPAK:Open(pakFilePath)
ArcturusPAK:ReadTableOfContents(pakInfo)
local decompressedFileContents = ArcturusPAK:GetDecompressedFileContents(pakInfo, "data/global.ini")
-- print(#compressedFileContents, compressedFileContents)
assertEquals(#decompressedFileContents, 275)
-- local crc = zlib.crc32()(decompressedFileContents)
-- local adler = zlib.adler32()(decompressedFileContents)
-- assertEquals(crc, "hello world123")
-- assertEquals(adler, "hello world123")
-- It's not a guarantee that the file contents are 100% correct, but it's good enough for now
assertEquals(decompressedFileContents:sub(5, 14), "gndopacity")
assertEquals(decompressedFileContents:sub(145, 153), "wavewater")
end)
end)
describe("DecompressBytes", function()
it("should do some magic (TBD)", function()
-- Example usage and benchmark
local pakInfo = ArcturusPAK:Open(pakFilePath)
ArcturusPAK:ReadTableOfContents(pakInfo)
-- local compressedFileContents = ArcturusPAK:GetDecompressedFileContents(pakInfo, "data/arcfonth.dat")
local compressedFileContents = ArcturusPAK:GetCompressedFileContents(pakInfo, "data/global.ini")
local size_compressed = #compressedFileContents
-- local size_original = pakInfo.records["data/arcfonth.dat"].decompressedSize
local size_original = pakInfo.records["data/global.ini"].decompressedSize
local result
local start = os.clock() -- uv.hrtime?
for i = 0, 1000000, 1 do
result = decompress(size_compressed, size_original, compressedFileContents)
end
local elapsed = os.clock() - start
print("Decompressed using Lua code in", elapsed, "seconds")
start = os.clock()
local resultFFI
for j = 0, 1000000, 1 do
resultFFI = decompressFFI(size_compressed, size_original, compressedFileContents)
end
elapsed = os.clock() - start
print("Decompressed using FFI in", elapsed, "seconds")
-- print(result, resultFFI)
assert(result == resultFFI)
end)
end)
end)
-- todo test all files can be dec, extracted
Details:
Pretty sure the POC worked, but I haven't tested it recently. Might as well dump the code here: