From 9632f967239b55b9b2dcaf168b1dc39528a746cf Mon Sep 17 00:00:00 2001 From: Rami Date: Thu, 5 Feb 2026 00:45:48 -0600 Subject: [PATCH 1/2] docs: Improve subset() method documentation - Add detailed parameter descriptions - Add usage example - Document requirement for headers to be set - Document return value and exceptions Fixes #366 --- src/tablib/core.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/src/tablib/core.py b/src/tablib/core.py index 3e13b1f5..c97de0a2 100644 --- a/src/tablib/core.py +++ b/src/tablib/core.py @@ -804,6 +804,33 @@ def wipe(self): def subset(self, rows=None, cols=None): """Returns a new instance of the :class:`Dataset`, including only specified rows and columns. + + Note: Headers must be set on the Dataset for this method to work properly. + + :param rows: (optional) A list of row indices to include. If None, all rows are included. + Example: [0, 2, 4] includes rows at indices 0, 2, and 4. + :param cols: (optional) A list of column headers to include. If None, all columns are included. + Example: ['Name', 'Age'] includes only the 'Name' and 'Age' columns. + + :returns: A new :class:`Dataset` containing only the specified rows and columns. + + :raises KeyError: If a specified column header is not found in the Dataset. + + Usage example:: + + import tablib + + data = tablib.Dataset() + data.headers = ['Name', 'Age', 'City'] + data.append(['Alice', 25, 'New York']) + data.append(['Bob', 30, 'Los Angeles']) + data.append(['Charlie', 35, 'Chicago']) + + # Get subset with specific rows and columns + subset = data.subset(rows=[0, 2], cols=['Name', 'City']) + print(subset.headers) # ['Name', 'City'] + print(subset[0]) # ['Alice', 'New York'] + print(subset[1]) # ['Charlie', 'Chicago'] """ # Don't return if no data From c264ad67eca8ab3f8da9824cc97f0c7409aa6ad5 Mon Sep 17 00:00:00 2001 From: Rami Date: Thu, 5 Feb 2026 00:47:55 -0600 Subject: [PATCH 2/2] feat: Add plain text / terminal formatter Add a new text formatter that exports datasets as space-padded plain text, suitable for terminal display. Features: - Column widths are automatically calculated based on content - Configurable max_width and padding options - Headers are displayed with a separator line - Data is truncated gracefully if it exceeds max_width Usage: data.export('text') data.export('text', max_width=120, padding=2) Fixes #317 --- src/tablib/formats/__init__.py | 2 + src/tablib/formats/_txt.py | 98 ++++++++++++++++++++++++++++++++++ 2 files changed, 100 insertions(+) create mode 100644 src/tablib/formats/_txt.py diff --git a/src/tablib/formats/__init__.py b/src/tablib/formats/__init__.py index ebcc64f6..1b8aad5c 100644 --- a/src/tablib/formats/__init__.py +++ b/src/tablib/formats/__init__.py @@ -9,6 +9,7 @@ from ._csv import CSVFormat from ._json import JSONFormat from ._tsv import TSVFormat +from ._txt import TextFormat uninstalled_format_messages = { "cli": {"package_name": "tabulate package", "extras_name": "cli"}, @@ -95,6 +96,7 @@ def register_builtins(self): self.register('yaml', 'tablib.formats._yaml.YAMLFormat') self.register('csv', CSVFormat()) self.register('tsv', TSVFormat()) + self.register('text', TextFormat()) if find_spec('odf'): self.register('ods', 'tablib.formats._ods.ODSFormat') self.register('dbf', 'tablib.formats._dbf.DBFFormat') diff --git a/src/tablib/formats/_txt.py b/src/tablib/formats/_txt.py new file mode 100644 index 00000000..673240af --- /dev/null +++ b/src/tablib/formats/_txt.py @@ -0,0 +1,98 @@ +""" Tablib - Plain Text / Terminal Support. +""" + +from io import StringIO + + +class TextFormat: + title = 'text' + extensions = ('txt',) + + DEFAULT_MAX_WIDTH = 80 + DEFAULT_PADDING = 2 + + @classmethod + def export_stream_set(cls, dataset, **kwargs): + """Returns plain text representation of Dataset as file-like.""" + stream = StringIO() + + max_width = kwargs.get('max_width', cls.DEFAULT_MAX_WIDTH) + padding = kwargs.get('padding', cls.DEFAULT_PADDING) + + if not dataset.headers: + return stream + + # Calculate column widths + col_widths = cls._calculate_column_widths(dataset, max_width, padding) + + # Write headers + header_row = ' ' * padding + (' ' * padding).join( + str(h)[:w].ljust(w) for h, w in zip(dataset.headers, col_widths) + ) + stream.write(header_row.rstrip() + '\n') + + # Write separator + separator = '-' * min(len(header_row), max_width) + stream.write(separator + '\n') + + # Write data rows + for row in dataset: + data_row = ' ' * padding + (' ' * padding).join( + str(cell)[:w].ljust(w) for cell, w in zip(row, col_widths) + ) + stream.write(data_row.rstrip() + '\n') + + stream.seek(0) + return stream + + @classmethod + def export_set(cls, dataset, **kwargs): + """Returns plain text representation of Dataset.""" + stream = cls.export_stream_set(dataset, **kwargs) + return stream.getvalue() + + @classmethod + def _calculate_column_widths(cls, dataset, max_width, padding): + """Calculate optimal column widths.""" + if not dataset.headers: + return [] + + num_cols = len(dataset.headers) + available_width = max_width - padding # Account for initial padding + + # Calculate minimum width needed for each column + min_widths = [] + for i, header in enumerate(dataset.headers): + header_len = len(str(header)) + # Find max data width in this column + data_max = max( + (len(str(row[i])) for row in dataset if i < len(row)), + default=0 + ) + min_widths.append(max(header_len, data_max, 3)) # Minimum 3 chars + + total_padding = padding * num_cols + total_min = sum(min_widths) + total_padding + + if total_min <= available_width: + # We have space, distribute extra evenly + extra = available_width - total_min + per_col = extra // num_cols + return [w + per_col for w in min_widths] + else: + # Need to truncate - proportional distribution + scale = (available_width - total_padding) / sum(min_widths) + return [max(3, int(w * scale)) for w in min_widths] + + @classmethod + def detect(cls, stream): + """Returns True if given stream is valid plain text.""" + try: + content = stream.read(1024) + stream.seek(0) + # Simple detection - if it decodes as text, we accept it + if isinstance(content, bytes): + content.decode('utf-8') + return True + except Exception: + return False