Skip to content

Commit 231f5be

Browse files
authored
[ENG-8489] Enforce MAX_SIZE on .xls rendering just like was on .xlsx (#397)
1 parent 66b1859 commit 231f5be

File tree

4 files changed

+57
-33
lines changed

4 files changed

+57
-33
lines changed

mfr/extensions/tabular/libs/xlrd_tools.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,18 +13,16 @@
1313

1414
def xls(fp):
1515
"""
16-
• .xls → xlrd
17-
• .xlsx → openpyxl (xlrd ≥2.0 dropped xlsx support)
18-
19-
`fp` is the stream returned by WaterButler/MFR. It may already have been
20-
read, so we always rewind and copy to an in‑memory buffer that openpyxl (and
21-
ZipFile) can seek inside safely.
16+
.xls → xlrd; supports truncation and optional meta collection.
2217
"""
2318
sheets = OrderedDict()
2419
wb = xlrd.open_workbook(file_contents=to_bytes(fp))
2520
return parse_xls(wb, sheets)
2621

2722
def xlsx(fp):
23+
"""
24+
.xlsx → openpyxl; supports truncation and optional meta collection.
25+
"""
2826
sheets = OrderedDict()
2927
try:
3028
wb = load_workbook(BytesIO(to_bytes(fp)), data_only=True, read_only=True)

mfr/extensions/tabular/render.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ def render(self):
4343
height=settings.TABLE_HEIGHT,
4444
sheets=json.dumps(sheets),
4545
options=json.dumps(size),
46+
max_size=settings.MAX_SIZE,
4647
)
4748

4849
assert nbr_rows and nbr_cols

mfr/extensions/tabular/templates/viewer.mako

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,11 @@
44
<link rel="stylesheet" href="${base}/css/bootstrap.min.css">
55

66
<div id="mfrViewer" style="min-height: ${height}px;">
7+
<div id="mfr-tabular-notice" class="alert alert-warning" style="display:none; font-size: 1.05em;"></div>
78
<div class="scroller scroller-left"><i class="glyphicon glyphicon-chevron-left"></i></div>
89
<div class="scroller scroller-right"><i class="glyphicon glyphicon-chevron-right"></i></div>
910
<nav class="wrapper">
10-
<ul id="tabular-tabs" class="nav nav-tabs list" style="height: 45px; overflow: auto; white-space: nowrap;">
11+
<ul id="tabular-tabs" class="nav nav-tabs list" style="height: 45px; overflow: auto; white-space: nowrap;">
1112
</ul>
1213
</nav>
1314
<div id="inlineFilterPanel" style="background:#dddddd;padding:3px;color:black;">
@@ -30,6 +31,19 @@
3031
var grid;
3132
var data;
3233
var searchString = "";
34+
var MAX_ROWS_LIMIT = ${max_size};
35+
36+
function refreshNotice() {
37+
var $n = $("#mfr-tabular-notice");
38+
if (!grid) { $n.hide().text(""); return; }
39+
var rendered = grid.getDataLength ? grid.getDataLength() : (data ? data.length : 0);
40+
if (rendered >= MAX_ROWS_LIMIT) {
41+
$n.text("Table exceeds the max size limit — rendered first " + rendered + " rows.");
42+
$n.show();
43+
} else {
44+
$n.hide().text("");
45+
}
46+
}
3347
3448
for (var sheetName in sheets){
3549
var sheet = sheets[sheetName];
@@ -48,11 +62,13 @@
4862
$("#txtSearch").value = "";
4963
data = grid.getData();
5064
grid.onSort.subscribe(sortData);
65+
refreshNotice();
5166
});
5267
}
5368
5469
$("#tabular-tabs").tab();
5570
$("#tabular-tabs a:first").click();
71+
setTimeout(refreshNotice, 0);
5672
5773
$("#txtSearch").keyup(function (e) {
5874
// clear on Esc
@@ -133,7 +149,7 @@
133149
grid.invalidate();
134150
grid.render();
135151
}
136-
152+
137153
function reAdjust(){
138154
liFirstPosLeft = $('.list li:first').position().left;
139155
liLastPosRight = $('.list li:last').position().left + $('.list li:last').width();

mfr/extensions/tabular/utilities.py

Lines changed: 34 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,12 @@
88
from tempfile import NamedTemporaryFile
99

1010
from mfr.extensions.tabular import compat
11-
from mfr.core.exceptions import SubprocessError, TooBigToRenderError, CorruptedError
11+
from mfr.core.exceptions import SubprocessError, CorruptedError
1212
from mfr.extensions.tabular.settings import (PSPP_CONVERT_BIN,
13-
PSPP_CONVERT_TIMEOUT)
13+
PSPP_CONVERT_TIMEOUT,
14+
MAX_SIZE)
1415

1516

16-
MAX_SIZE = 10_000
17-
1817
def header_population(headers):
1918
"""make column headers from a list
2019
:param headers: list of column headers
@@ -128,42 +127,52 @@ def to_bytes(fp):
128127
return data
129128

130129

130+
def _extract_rows(fields, raw_rows):
131+
rows = []
132+
for row in raw_rows:
133+
if len(rows) >= MAX_SIZE:
134+
break
135+
rows.append(dict(zip(fields, row)))
136+
return rows
137+
138+
131139
def parse_xls(wb, sheets):
132140
for sheet in wb.sheets():
133-
verify_size(sheet.nrows, sheet.ncols, '.xls')
134-
fields = fix_headers(sheet.row_values(0))
135-
rows = [
136-
dict(zip(fields, row_vals(sheet.row(r), wb.datemode)))
141+
if getattr(sheet, 'nrows', None) is None or getattr(sheet, 'ncols', None) is None:
142+
raise CorruptedError
143+
144+
ncols = sheet.ncols
145+
max_cols = min(ncols, MAX_SIZE)
146+
fields = fix_headers(sheet.row_values(0)[:max_cols])
147+
raw_rows = (
148+
row_vals(sheet.row(r)[:max_cols], wb.datemode)
137149
for r in range(1, sheet.nrows)
138-
]
150+
)
151+
rows = _extract_rows(fields, raw_rows)
139152
sheets[sheet.name] = (header_population(fields), rows)
140153
return sheets
141154

142155

143156
def parse_xlsx(wb, sheets):
144157
for name in wb.sheetnames:
145158
ws = wb[name]
159+
160+
if getattr(ws, 'max_row', None) is None or getattr(ws, 'max_column', None) is None:
161+
raise CorruptedError
162+
163+
ncols = getattr(ws, "max_column", 0)
164+
max_cols = min(ncols, MAX_SIZE)
146165
header_row = next(ws.iter_rows(max_row=1, values_only=True), [])
147-
fields = fix_headers(header_row)
148-
rows = [
149-
dict(zip(fields, row))
150-
for row in ws.iter_rows(min_row=2,
151-
max_row=MAX_SIZE,
152-
max_col=MAX_SIZE,
153-
values_only=True)
154-
]
166+
fields = fix_headers(list(header_row)[:max_cols])
167+
raw_rows = (
168+
row[:max_cols] if row else []
169+
for row in ws.iter_rows(min_row=2, values_only=True)
170+
)
171+
rows = _extract_rows(fields, raw_rows)
155172
sheets[name] = (header_population(fields), rows)
156173
return sheets
157174

158175

159-
def verify_size(rows, cols, ext):
160-
if rows is None or cols is None:
161-
raise CorruptedError
162-
if rows > MAX_SIZE or cols > MAX_SIZE:
163-
raise TooBigToRenderError('Table is too large to render.', ext,
164-
nbr_cols=cols, nbr_rows=rows)
165-
166-
167176
def fix_headers(raw):
168177
return [str(v) if v not in (None, '') else f'Unnamed: {i + 1}' for i, v in enumerate(raw)]
169178

0 commit comments

Comments
 (0)