Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions src/tablib/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -804,6 +804,33 @@ def wipe(self):
def subset(self, rows=None, cols=None):
"""Returns a new instance of the :class:`Dataset`,
including only specified rows and columns.

Note: Headers must be set on the Dataset for this method to work properly.

:param rows: (optional) A list of row indices to include. If None, all rows are included.
Example: [0, 2, 4] includes rows at indices 0, 2, and 4.
:param cols: (optional) A list of column headers to include. If None, all columns are included.
Example: ['Name', 'Age'] includes only the 'Name' and 'Age' columns.

:returns: A new :class:`Dataset` containing only the specified rows and columns.

:raises KeyError: If a specified column header is not found in the Dataset.

Usage example::

import tablib

data = tablib.Dataset()
data.headers = ['Name', 'Age', 'City']
data.append(['Alice', 25, 'New York'])
data.append(['Bob', 30, 'Los Angeles'])
data.append(['Charlie', 35, 'Chicago'])

# Get subset with specific rows and columns
subset = data.subset(rows=[0, 2], cols=['Name', 'City'])
print(subset.headers) # ['Name', 'City']
print(subset[0]) # ['Alice', 'New York']
print(subset[1]) # ['Charlie', 'Chicago']
"""

# Don't return if no data
Expand Down
2 changes: 2 additions & 0 deletions src/tablib/formats/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from ._csv import CSVFormat
from ._json import JSONFormat
from ._tsv import TSVFormat
from ._txt import TextFormat

uninstalled_format_messages = {
"cli": {"package_name": "tabulate package", "extras_name": "cli"},
Expand Down Expand Up @@ -95,6 +96,7 @@ def register_builtins(self):
self.register('yaml', 'tablib.formats._yaml.YAMLFormat')
self.register('csv', CSVFormat())
self.register('tsv', TSVFormat())
self.register('text', TextFormat())
if find_spec('odf'):
self.register('ods', 'tablib.formats._ods.ODSFormat')
self.register('dbf', 'tablib.formats._dbf.DBFFormat')
Expand Down
98 changes: 98 additions & 0 deletions src/tablib/formats/_txt.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
""" Tablib - Plain Text / Terminal Support.
"""

from io import StringIO


class TextFormat:
title = 'text'
extensions = ('txt',)

DEFAULT_MAX_WIDTH = 80
DEFAULT_PADDING = 2

@classmethod
def export_stream_set(cls, dataset, **kwargs):
"""Returns plain text representation of Dataset as file-like."""
stream = StringIO()

max_width = kwargs.get('max_width', cls.DEFAULT_MAX_WIDTH)
padding = kwargs.get('padding', cls.DEFAULT_PADDING)

if not dataset.headers:
return stream

# Calculate column widths
col_widths = cls._calculate_column_widths(dataset, max_width, padding)

# Write headers
header_row = ' ' * padding + (' ' * padding).join(
str(h)[:w].ljust(w) for h, w in zip(dataset.headers, col_widths)
)
stream.write(header_row.rstrip() + '\n')

# Write separator
separator = '-' * min(len(header_row), max_width)
stream.write(separator + '\n')

# Write data rows
for row in dataset:
data_row = ' ' * padding + (' ' * padding).join(
str(cell)[:w].ljust(w) for cell, w in zip(row, col_widths)
)
stream.write(data_row.rstrip() + '\n')

stream.seek(0)
return stream

@classmethod
def export_set(cls, dataset, **kwargs):
"""Returns plain text representation of Dataset."""
stream = cls.export_stream_set(dataset, **kwargs)
return stream.getvalue()

@classmethod
def _calculate_column_widths(cls, dataset, max_width, padding):
"""Calculate optimal column widths."""
if not dataset.headers:
return []

num_cols = len(dataset.headers)
available_width = max_width - padding # Account for initial padding

# Calculate minimum width needed for each column
min_widths = []
for i, header in enumerate(dataset.headers):
header_len = len(str(header))
# Find max data width in this column
data_max = max(
(len(str(row[i])) for row in dataset if i < len(row)),
default=0
)
min_widths.append(max(header_len, data_max, 3)) # Minimum 3 chars

total_padding = padding * num_cols
total_min = sum(min_widths) + total_padding

if total_min <= available_width:
# We have space, distribute extra evenly
extra = available_width - total_min
per_col = extra // num_cols
return [w + per_col for w in min_widths]
else:
# Need to truncate - proportional distribution
scale = (available_width - total_padding) / sum(min_widths)
return [max(3, int(w * scale)) for w in min_widths]

@classmethod
def detect(cls, stream):
"""Returns True if given stream is valid plain text."""
try:
content = stream.read(1024)
stream.seek(0)
# Simple detection - if it decodes as text, we accept it
if isinstance(content, bytes):
content.decode('utf-8')
return True
except Exception:
return False
Loading