diff --git a/SabreTools.Serialization/Wrappers/MicrosoftCabinet.Extraction.cs b/SabreTools.Serialization/Wrappers/MicrosoftCabinet.Extraction.cs index c89c3fb4..a657972b 100644 --- a/SabreTools.Serialization/Wrappers/MicrosoftCabinet.Extraction.cs +++ b/SabreTools.Serialization/Wrappers/MicrosoftCabinet.Extraction.cs @@ -231,69 +231,6 @@ private CFFILE[] GetSpannedFilesArray(string? filename, int folderIndex, bool in return [.. fileList]; } - /// - /// Get stream representing the output file - /// - /// Filename for the file that will be extracted to - /// Path to the output directory - /// Filestream opened for the file - private FileStream GetFileStream(string filename, string outputDirectory) - { - // Ensure directory separators are consistent - if (Path.DirectorySeparatorChar == '\\') - filename = filename.Replace('/', '\\'); - else if (Path.DirectorySeparatorChar == '/') - filename = filename.Replace('\\', '/'); - - // Ensure the full output directory exists - filename = Path.Combine(outputDirectory, filename); - var directoryName = Path.GetDirectoryName(filename); - if (directoryName != null && !Directory.Exists(directoryName)) - Directory.CreateDirectory(directoryName); - - // Open the output file for writing - return File.Open(filename, FileMode.Create, FileAccess.Write, FileShare.None); - } - - /// - /// Read a datablock from a cabinet - /// - /// Offset to be read from - /// True to include debug data, false otherwise - /// Read datablock - private CFDATA? ReadBlock(ref long offset, bool includeDebug) - { - try - { - lock (_dataSourceLock) - { - _dataSource.SeekIfPossible(offset, SeekOrigin.Begin); - var dataBlock = new CFDATA(); - - var dataReservedSize = Header.DataReservedSize; - - dataBlock.Checksum = _dataSource.ReadUInt32LittleEndian(); - dataBlock.CompressedSize = _dataSource.ReadUInt16LittleEndian(); - dataBlock.UncompressedSize = _dataSource.ReadUInt16LittleEndian(); - - if (dataReservedSize > 0) - dataBlock.ReservedData = _dataSource.ReadBytes(dataReservedSize); - - if (dataBlock.CompressedSize > 0) - dataBlock.CompressedData = _dataSource.ReadBytes(dataBlock.CompressedSize); - - offset = _dataSource.Position; - - return dataBlock; - } - } - catch (Exception ex) - { - if (includeDebug) Console.Error.WriteLine(ex); - return null; - } - } - /// /// Extract the contents of a cabinet set /// @@ -305,7 +242,6 @@ private bool ExtractSet(string? cabFilename, string outputDirectory, bool includ { var cabinet = this; var currentCabFilename = cabFilename; - long offset = 0; try { // Loop through the folders @@ -322,184 +258,22 @@ private bool ExtractSet(string? cabFilename, string outputDirectory, bool includ } var folder = cabinet.Folders[f]; - CFFILE[] files = cabinet.GetSpannedFilesArray(currentCabFilename, f, includeDebug); - var file = files[0]; - int bytesLeft = (int)file.FileSize; - int fileCounter = 0; - - // Cache starting position - offset = folder.CabStartOffset; - - var mszip = Decompressor.Create(); - try - { - // Ensure folder contains data - if (folder.DataCount == 0) - return false; - if (folder.CabStartOffset <= 0) - return false; - - // Skip unsupported compression types to avoid opening a blank filestream. This can be altered/removed if these types are ever supported. - var compressionType = GetCompressionType(folder); - if (compressionType == CompressionType.TYPE_QUANTUM || compressionType == CompressionType.TYPE_LZX) - continue; - - //uint quantumWindowBits = (uint)(((ushort)folder.CompressionType >> 8) & 0x1f); - - var fs = GetFileStream(file.Name, outputDirectory); - - var tempCabinet = cabinet; - int j = 0; - - // Loop through the data blocks - // Has to be a while loop instead of a for loop due to cab spanning continue blocks - while (j < folder.DataCount) - { - var dataBlock = tempCabinet.ReadBlock(ref offset, includeDebug); - if (dataBlock == null) - { - if (includeDebug) Console.Error.WriteLine($"Error extracting file {file.Name}"); - break; - } - - // Get the data to be processed - byte[] blockData = dataBlock.CompressedData; - - // If the block is continued, append - bool continuedBlock = false; - if (dataBlock.UncompressedSize == 0) - { - tempCabinet = tempCabinet.Next; - if (tempCabinet == null) - break; // Next cab is missing, continue - - // CompressionType not updated because there's no way it's possible that it can swap on continued blocks - folder = tempCabinet.Folders[0]; - offset = folder.CabStartOffset; - var nextBlock = tempCabinet.ReadBlock(ref offset, includeDebug); - if (nextBlock == null) - { - if (includeDebug) Console.Error.WriteLine($"Error extracting file {file.Name}"); - break; - } - - byte[] nextData = nextBlock.CompressedData; - if (nextData.Length == 0) - continue; - - continuedBlock = true; - blockData = [.. blockData, .. nextData]; - dataBlock.CompressedSize += nextBlock.CompressedSize; - dataBlock.UncompressedSize = nextBlock.UncompressedSize; - } - - // Get the uncompressed data block - byte[] data = compressionType switch - { - CompressionType.TYPE_NONE => blockData, - CompressionType.TYPE_MSZIP => DecompressMSZIPBlock(f, mszip, j, dataBlock, blockData, includeDebug), - - // TODO: Unsupported - CompressionType.TYPE_QUANTUM => [], - CompressionType.TYPE_LZX => [], - - // Should be impossible - _ => [], - }; - - if (bytesLeft > 0 && bytesLeft >= data.Length) - { - fs.Write(data); - bytesLeft -= data.Length; - } - else if (bytesLeft > 0 && bytesLeft < data.Length) - { - int tempBytesLeft = bytesLeft; - fs.Write(data, 0, bytesLeft); - fs.Close(); - - // reached end of folder - if (fileCounter + 1 == files.Length) - break; - - file = files[++fileCounter]; - bytesLeft = (int)file.FileSize; - fs = GetFileStream(file.Name, outputDirectory); - while (bytesLeft < data.Length - tempBytesLeft) - { - fs.Write(data, tempBytesLeft, bytesLeft); - tempBytesLeft += bytesLeft; - fs.Close(); - - // reached end of folder - if (fileCounter + 1 == files.Length) - break; - - file = files[++fileCounter]; - bytesLeft = (int)file.FileSize; - fs = GetFileStream(file.Name, outputDirectory); - } - - fs.Write(data, tempBytesLeft, data.Length - tempBytesLeft); - bytesLeft -= (data.Length - tempBytesLeft); - } - else - { - int tempBytesLeft = bytesLeft; - fs.Close(); - - // reached end of folder - if (fileCounter + 1 == files.Length) - break; - - file = files[++fileCounter]; - bytesLeft = (int)file.FileSize; - fs = GetFileStream(file.Name, outputDirectory); - while (bytesLeft < data.Length - tempBytesLeft) - { - fs.Write(data, tempBytesLeft, bytesLeft); - tempBytesLeft += bytesLeft; - fs.Close(); - - // reached end of folder - if (fileCounter + 1 == files.Length) - break; - - file = files[++fileCounter]; - bytesLeft = (int)file.FileSize; - fs = GetFileStream(file.Name, outputDirectory); - } - - fs.Write(data, tempBytesLeft, data.Length - tempBytesLeft); - bytesLeft -= (data.Length - tempBytesLeft); - } - - // Top if block occurs on http://redump.org/disc/107833/ , middle on https://dbox.tools/titles/pc/57520FA0 , bottom still unobserved - // While loop since this also handles 0 byte files. Example file seen in http://redump.org/disc/93312/ , cab Group17.cab, file TRACKSLOC6DYNTEX_BIN - while (bytesLeft == 0) - { - fs.Close(); - - // reached end of folder - if (fileCounter + 1 == files.Length) - break; - - file = files[++fileCounter]; - bytesLeft = (int)file.FileSize; - fs = GetFileStream(file.Name, outputDirectory); - } - - if (continuedBlock) - j = 0; - - j++; - } - } - catch (Exception ex) - { - if (includeDebug) Console.Error.WriteLine(ex); + var files = cabinet.GetSpannedFilesArray(currentCabFilename, f, includeDebug); + + // Ensure folder contains data + if (folder.DataCount == 0) return false; - } + if (folder.CabStartOffset <= 0) + return false; + + // Skip unsupported compression types to avoid opening a blank filestream. This can be altered/removed if these types are ever supported. + var compressionType = GetCompressionType(folder); + if (compressionType == CompressionType.TYPE_QUANTUM || compressionType == CompressionType.TYPE_LZX) + continue; + + var reader = new Reader(cabinet, folder, files); + + reader.ExtractData(outputDirectory, compressionType, f, includeDebug); } // Move to the next cabinet, if possible @@ -521,7 +295,291 @@ private bool ExtractSet(string? cabFilename, string outputDirectory, bool includ return false; } } + + /// + /// Helper to extract files from a cabinet set + /// + private class Reader + { + #region Private Instance Variables + + /// + /// Current cabinet file being read from + /// + private MicrosoftCabinet _cabinet; + + /// + /// Current folder being read from + /// + private CFFOLDER _folder; + + /// + /// Current array of files to be extracted + /// + private CFFILE[] _files; + + /// + /// Current number of bytes left to write of the current file + /// + private long _bytesLeft; + + /// + /// Current index in the folder of files to extract + /// + private int _fileCounter; + + /// + /// Current offset in the cabinet being read from + /// + private long _offset; + + /// + /// Current output filestream being written to + /// + private FileStream? _fileStream; + + #endregion + + #region Constructors + + public Reader(MicrosoftCabinet cabinet, CFFOLDER folder, CFFILE[] files) + { + _cabinet = cabinet; + _folder = folder; + _files = files; + _bytesLeft = _files[0].FileSize; + _fileCounter = 0; + _offset = folder.CabStartOffset; + _fileStream = null; + } + + #endregion + + /// + /// Get stream representing the output file + /// + /// Filename for the file that will be extracted to + /// Path to the output directory + /// Filestream opened for the file + private FileStream GetFileStream(string filename, string outputDirectory) + { + // Ensure directory separators are consistent + if (Path.DirectorySeparatorChar == '\\') + filename = filename.Replace('/', '\\'); + else if (Path.DirectorySeparatorChar == '/') + filename = filename.Replace('\\', '/'); + + // Ensure the full output directory exists + filename = Path.Combine(outputDirectory, filename); + var directoryName = Path.GetDirectoryName(filename); + if (directoryName != null && !Directory.Exists(directoryName)) + Directory.CreateDirectory(directoryName); + + // Open the output file for writing + return File.Open(filename, FileMode.Create, FileAccess.Write, FileShare.None); + } + + + /// + /// Read a datablock from a cabinet + /// + /// True to include debug data, false otherwise + /// Read datablock + private CFDATA? ReadBlock(bool includeDebug) + { + try + { + lock (_cabinet._dataSourceLock) + { + _cabinet._dataSource.SeekIfPossible(_offset, SeekOrigin.Begin); + var dataBlock = new CFDATA(); + + var dataReservedSize = _cabinet.Header.DataReservedSize; + dataBlock.Checksum = _cabinet._dataSource.ReadUInt32LittleEndian(); + dataBlock.CompressedSize = _cabinet._dataSource.ReadUInt16LittleEndian(); + dataBlock.UncompressedSize = _cabinet._dataSource.ReadUInt16LittleEndian(); + + if (dataReservedSize > 0) + dataBlock.ReservedData = _cabinet._dataSource.ReadBytes(dataReservedSize); + + if (dataBlock.CompressedSize > 0) + dataBlock.CompressedData = _cabinet._dataSource.ReadBytes(dataBlock.CompressedSize); + + _offset = _cabinet._dataSource.Position; + + return dataBlock; + } + } + catch (Exception ex) + { + if (includeDebug) Console.Error.WriteLine(ex); + return null; + } + } + + /// + /// Extract the data from a folder + /// + /// Path to the output directory + /// Type of compression that the folder uses + /// Index of the folder in the cabinet + /// True to include debug data, false otherwise + public void ExtractData(string outputDirectory, CompressionType compressionType, int folderIndex, bool includeDebug) + { + var mszip = Decompressor.Create(); + + string filename = _files[_fileCounter].Name; + try + { + _fileStream = GetFileStream(filename, outputDirectory); + + + // Loop through the data blocks + // Has to be a while loop instead of a for loop due to cab spanning continue blocks + for (int j = 0; j < _folder.DataCount; j++) + { + var dataBlock = ReadBlock(includeDebug); + if (dataBlock == null) + { + if (includeDebug) Console.Error.WriteLine($"Error extracting file {filename}"); + return; + } + + // Get the data to be processed + byte[] blockData = dataBlock.CompressedData; + + // If the block is continued, append + bool continuedBlock = false; + if (dataBlock.UncompressedSize == 0) + { + if (_cabinet.Next == null) + break; // Next cab is missing, continue + + _cabinet = _cabinet.Next; + + // CompressionType not updated because there's no way it's possible that it can swap on continued blocks + _folder = _cabinet.Folders[0]; + _offset = _folder.CabStartOffset; + var nextBlock = ReadBlock(includeDebug); + if (nextBlock == null) + { + if (includeDebug) Console.Error.WriteLine($"Error extracting file {filename}"); + return; + } + + byte[] nextData = nextBlock.CompressedData; + if (nextData.Length == 0) + continue; + + continuedBlock = true; + blockData = [.. blockData, .. nextData]; + dataBlock.CompressedSize += nextBlock.CompressedSize; + dataBlock.UncompressedSize = nextBlock.UncompressedSize; + } + + // Get the uncompressed data block + byte[] data = compressionType switch + { + CompressionType.TYPE_NONE => blockData, + CompressionType.TYPE_MSZIP => DecompressMSZIPBlock(folderIndex, mszip, j, dataBlock, blockData, includeDebug), + + // TODO: Unsupported + CompressionType.TYPE_QUANTUM => [], //uint quantumWindowBits = (uint)(((ushort)folder.CompressionType >> 8) & 0x1f); + CompressionType.TYPE_LZX => [], + + // Should be impossible + _ => [], + }; + + WriteData(data, outputDirectory); + + if (continuedBlock) + j = 0; + } + } + catch (Exception ex) + { + if (includeDebug) Console.Error.WriteLine(ex); + } + } + + /// + /// Write extracted DataBlocks to a file + /// + /// Data to be written to the output file + /// Path to the output directory + private void WriteData(byte[] data, string outputDirectory) + { + // If there are bytes left to write, and more bytes left to write than the length of the current data to be written. + if (_bytesLeft > 0 && _bytesLeft >= data.Length) + { + if (_fileStream == null) + return; + + _fileStream.Write(data); + _bytesLeft -= data.Length; + } + else + { + long tempBytesLeft = _bytesLeft; + if (_fileStream == null) + return; + + // If there are still bytes left to write, but less bytes than the length of the current data to be written + if (_bytesLeft > 0 && _bytesLeft < data.Length) + _fileStream.Write(data, 0, (int)_bytesLeft); + + // Close and iterate file. + if (EndFile(outputDirectory)) + return; + + // While the file still has bytes that need to be written to it, but less bytes than the input data still has to be written. + while (_bytesLeft < data.Length - tempBytesLeft) + { + _fileStream.Write(data, (int)tempBytesLeft, (int)_bytesLeft); + tempBytesLeft += _bytesLeft; + if (EndFile(outputDirectory)) + break; + } + + _fileStream.Write(data, (int)tempBytesLeft, data.Length - (int)tempBytesLeft); + _bytesLeft -= (data.Length - tempBytesLeft); + } + + // Top if block occurs on http://redump.org/disc/107833/ , middle on https://dbox.tools/titles/pc/57520FA0 , bottom still unobserved + // While loop since this also handles 0 byte files. Example file seen in http://redump.org/disc/93312/ , cab Group17.cab, file TRACKSLOC6DYNTEX_BIN + while (_bytesLeft == 0) + { + if (EndFile(outputDirectory)) + break; + } + } + + /// + /// Finish handling a file and progress to the next file as necessary. + /// + /// Path to the output directory + /// True the end of the folder has been reached, false otherwise + private bool EndFile(string outputDirectory) + { + if (_fileStream == null) + return false; + + _fileStream.Close(); + + // reached end of folder + if (_fileCounter + 1 == _files.Length) + return true; + + ++_fileCounter; + _bytesLeft = (int)_files[_fileCounter].FileSize; + _fileStream = GetFileStream(_files[_fileCounter].Name, outputDirectory); + + return false; + } + } + #endregion #region Checksumming