diff --git a/libarchive/adapters/archive_read.py b/libarchive/adapters/archive_read.py index a299104..225007e 100644 --- a/libarchive/adapters/archive_read.py +++ b/libarchive/adapters/archive_read.py @@ -327,6 +327,46 @@ def opener(archive_res): *args, **kwargs) +def stream_enumerator(io, buffer_size=1048576, *args, **kwargs): + buf = ctypes.create_string_buffer(buffer_size) + + def archive_read(archive, client_data, out): + try: + bytes_read = io.readinto(buf) + out.contents.contents = buf + return bytes_read + except (IOError, OSError) as e: + _LOGGER.debug("Exception during stream read: %s", e) + return libarchive.constants.archive.ARCHIVE_FATAL + + def archive_seek(archive, client_data, offset, whence): + try: + return io.seek(offset, whence) + except (IOError, OSError) as e: + _LOGGER.debug("Exception during stream seek: %s", e) + return libarchive.constants.archive.ARCHIVE_FATAL + + def archive_close(archive, client_data): + try: + io.close() + return libarchive.constants.archive.ARCHIVE_OK + except (IOError, OSError) as e: + _LOGGER.debug("Exception during stream close: %s", e) + return libarchive.constants.archive.ARCHIVE_FATAL + + read_func = libarchive.calls.archive_read.c_archive_read_func(archive_read) + seek_func = libarchive.calls.archive_read.c_archive_seek_func(archive_seek) + close_func = libarchive.calls.archive_read.c_archive_close_func(archive_close) + + def opener(archive_res): + libarchive.calls.archive_read.c_archive_read_set_seek_callback(archive_res, seek_func) + libarchive.calls.archive_read.c_archive_read_open(archive_res, None, None, read_func, close_func) + + if 'entry_cls' not in kwargs: + kwargs['entry_cls'] = _ArchiveEntryItReadable + + return _enumerator(opener, *args, **kwargs) + def file_reader(*args, **kwargs): """Return an enumerator that knows how to read the data for entries from a physical file. @@ -345,6 +385,12 @@ def memory_reader(*args, **kwargs): entry_cls=_ArchiveEntryItReadable, **kwargs) +def stream_reader(io, *args, **kwargs): + """Return an enumerator that can read from a Python IOBase stream. + """ + + return stream_enumerator(io, *args, **kwargs) + def _pour(opener, flags=0, *args, **kwargs): """A flexible pouring facility that knows how to enumerate entry data.""" diff --git a/libarchive/calls/archive_read.py b/libarchive/calls/archive_read.py index 986f5a9..f81178d 100644 --- a/libarchive/calls/archive_read.py +++ b/libarchive/calls/archive_read.py @@ -22,6 +22,14 @@ def _check_zero_success(value): c_archive_read_support_format_all.argtypes = [c_void_p] c_archive_read_support_format_all.restype = _check_zero_success +c_archive_read_func = CFUNCTYPE(c_ssize_t, c_void_p, c_void_p, POINTER(POINTER(c_char))) +c_archive_seek_func = CFUNCTYPE(c_longlong, c_void_p, c_void_p, c_longlong, c_int) +c_archive_close_func = CFUNCTYPE(c_int, c_void_p, c_void_p) + +c_archive_read_open = libarchive.archive_read_open +c_archive_read_open.argtypes = [c_void_p, c_void_p, c_void_p, c_archive_read_func, c_archive_close_func] +c_archive_read_open.restype = c_int + c_archive_read_open_filename = libarchive.archive_read_open_filename c_archive_read_open_filename.argtypes = [c_void_p, c_char_p, c_size_t] c_archive_read_open_filename.restype = _check_zero_success @@ -84,3 +92,15 @@ def _check_zero_success(value): c_archive_read_data_block = libarchive.archive_read_data_block c_archive_read_data_block.argtypes = [c_void_p, POINTER(c_void_p), POINTER(c_size_t), POINTER(c_longlong)] c_archive_read_data_block.restype = c_int + +c_archive_read_set_read_callback = libarchive.archive_read_set_read_callback +c_archive_read_set_read_callback.argtypes = [c_void_p, c_archive_read_func] +c_archive_read_set_read_callback.restype = c_int + +c_archive_read_set_seek_callback = libarchive.archive_read_set_seek_callback +c_archive_read_set_seek_callback.argtypes = [c_void_p, c_archive_seek_func] +c_archive_read_set_seek_callback.restype = c_int + +c_archive_read_set_close_callback = libarchive.archive_read_set_close_callback +c_archive_read_set_close_callback.argtypes = [c_void_p, c_archive_close_func] +c_archive_read_set_close_callback.restype = c_int diff --git a/libarchive/public.py b/libarchive/public.py index 6f1ad74..32da9c8 100644 --- a/libarchive/public.py +++ b/libarchive/public.py @@ -1,6 +1,8 @@ -from libarchive.adapters.archive_read import \ - file_enumerator, file_reader, file_pour, \ - memory_enumerator, memory_reader, memory_pour +from libarchive.adapters.archive_read import ( + file_enumerator, file_reader, file_pour, + memory_enumerator, memory_reader, memory_pour, + stream_enumerator +) from libarchive.adapters.archive_write import \ create_file, create_generic diff --git a/libarchive/resources/README.rst b/libarchive/resources/README.rst index ed6f9ae..99f17be 100644 --- a/libarchive/resources/README.rst +++ b/libarchive/resources/README.rst @@ -48,6 +48,7 @@ Done Task ===== ================================================= X Read entries from physical file X Read entries from archive hosted in memory buffer + X Read entries from a Python IOBase stream X Write physical files from archive X Load memory buffer from archive X Populate physical archive from physical files @@ -87,6 +88,22 @@ To read files from a physical archive:: for block in entry.get_blocks(): f.write(block) +To read files from an IOBase stream:: + + import libarchive.public + + with open('test.7z', 'rb') as io: + with libarchive.public.stream_reader(io) as e: + for entry in e: + with open('/tmp/' + str(entry), 'wb') as f: + for block in entry.get_blocks(): + f.write(block) + +If the archive format is such that it requires seeking during load, then the stream must be seekable. + +Also note that libarchive's automatic detection only really works with archive formats in the absence of filename information. So, if streaming decompression of a non-archive format such as tar then it may be necessary to specify the format explicitly, as detailed below. + + To read files from memory:: import libarchive.public diff --git a/libarchive/test_support.py b/libarchive/test_support.py index 7bc1013..b86c589 100644 --- a/libarchive/test_support.py +++ b/libarchive/test_support.py @@ -36,7 +36,13 @@ def chdir(path): os.chdir(original_path) @contextlib.contextmanager -def test_archive(): +def test_files(): + """ + Returns a tuple have all input test files and the output archive path. + + :return: Tuple (input_file_paths, output_archive_path) + """ + with chdir(_APP_PATH): temp_path = tempfile.mkdtemp() @@ -70,9 +76,14 @@ def test_archive(): os.path.exists(output_filepath) is True, \ "Test archive was not created correctly." - yield output_filepath + yield (files, output_filepath) finally: try: shutil.rmtree(temp_path) except: pass + +@contextlib.contextmanager +def test_archive(): + with test_files() as (_, archive_path): + yield archive_path diff --git a/tests/adapters/test_archive_read.py b/tests/adapters/test_archive_read.py index 96d89dc..c5d2754 100644 --- a/tests/adapters/test_archive_read.py +++ b/tests/adapters/test_archive_read.py @@ -87,3 +87,24 @@ def test_read_symlinks(self): } self.assertEquals(index, expected) + + def test_read_from_stream(self): + with libarchive.test_support.test_files() as (infiles, archivepath): + with open(archivepath, 'rb') as io_in: + with libarchive.adapters.archive_read.stream_enumerator(io_in) as e: + entries = {entry.pathname: (entry.filetype, list(entry.get_blocks())) for entry in e} + + for path in infiles: + # At some point during compression, the root separator is stripped from absolute paths + self.assertIn(path.lstrip('/'), entries) + + filetype, blocks = entries[path.lstrip('/')] + filedata = bytes() + for block in blocks: + filedata += block + + if filetype.IFLNK: + self.assertEqual(filedata, bytes()) + else: + with open(path, 'rb') as io_in: + self.assertEqual(filedata, io_in.read())