diff --git a/mfr/extensions/tabular/render.py b/mfr/extensions/tabular/render.py index 371755a3..a77f82c7 100644 --- a/mfr/extensions/tabular/render.py +++ b/mfr/extensions/tabular/render.py @@ -11,6 +11,7 @@ logger = logging.getLogger(__name__) +BINARY_EXCEL_EXTS = {'.xls', '.xlsx'} class TabularRenderer(extension.BaseRenderer): @@ -30,8 +31,14 @@ def render(self): extension=self.metadata.ext, ) - with open(self.file_path, errors='replace') as fp: - sheets, size, nbr_rows, nbr_cols = self._render_grid(fp, self.metadata.ext) + ext = (self.metadata.ext or '').lower() + if ext in BINARY_EXCEL_EXTS: + open_kwargs = {'mode': 'rb'} + else: + open_kwargs = {'errors': 'replace'} + + with open(self.file_path, **open_kwargs) as fp: + sheets, size, nbr_rows, nbr_cols = self._render_grid(fp, ext) # Force GC gc.collect() diff --git a/mfr/extensions/tabular/utilities.py b/mfr/extensions/tabular/utilities.py index 35ca5082..840d8882 100644 --- a/mfr/extensions/tabular/utilities.py +++ b/mfr/extensions/tabular/utilities.py @@ -90,41 +90,28 @@ def sav_to_csv(fp): def to_bytes(fp): """ - Return *exactly* the original bytes of the Excel file and rewind *fp*. - Handles both binary and text wrappers that WaterButler may give us. + Return exactly the original bytes and rewind fp. + Requires a binary file-like object or a bytes object. """ - try: - fp.seek(0) - except Exception: - pass - - raw = fp.read() - if isinstance(raw, bytes): - try: - fp.seek(0) - except Exception: - pass - return raw + if isinstance(fp, (bytes, bytearray, memoryview)): + return bytes(fp) - if hasattr(fp, "buffer"): - buf = fp.buffer + if hasattr(fp, "read"): try: - buf.seek(0) + if hasattr(fp, "seek"): + fp.seek(0) except Exception: pass - data = buf.read() + raw = fp.read() try: - buf.seek(0) + if hasattr(fp, "seek"): + fp.seek(0) except Exception: pass - else: - data = raw.encode("utf-8", "surrogateescape") + if isinstance(raw, (bytes, bytearray, memoryview)): + return bytes(raw) - try: - fp.seek(0) - except Exception: - pass - return data + raise TypeError("Expected binary file-like object; got text/str") def _extract_rows(fields, raw_rows):