Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
import { describe, it, expect } from 'vitest';
import { parseMarkdown } from './parseMarkdown';

describe('parseMarkdownBlock - table parsing', () => {

it('parses a standard table without blank lines', () => {
const md = [
'| A | B |',
'|---|---|',
'| 1 | 2 |',
].join('\n');

const blocks = parseMarkdown(md);
expect(blocks).toHaveLength(1);
expect(blocks[0]).toEqual({
type: 'table',
headers: ['A', 'B'],
rows: [['1', '2']],
});
});

it('parses a table with blank lines between rows (LLM output)', () => {
const md = [
'| A | B |',
'',
'|---|---|',
'',
'| 1 | 2 |',
'',
'| 3 | 4 |',
].join('\n');

const blocks = parseMarkdown(md);
// Should be recognized as a single table, not 4 separate text blocks
const tableBlocks = blocks.filter(b => b.type === 'table');
expect(tableBlocks).toHaveLength(1);
expect(tableBlocks[0]).toEqual({
type: 'table',
headers: ['A', 'B'],
rows: [['1', '2'], ['3', '4']],
});
});

it('preserves empty interior cells (e.g. row header column)', () => {
const md = [
'| | Header1 | Header2 |',
'|---|---|---|',
'| Row1 | a | b |',
].join('\n');

const blocks = parseMarkdown(md);
expect(blocks).toHaveLength(1);
expect(blocks[0]).toEqual({
type: 'table',
headers: ['', 'Header1', 'Header2'],
rows: [['Row1', 'a', 'b']],
});
});

it('handles blank lines and empty first cell combined', () => {
const md = [
'### Comparison',
'',
'| | Plan A | Plan B |',
'',
'|--|----|----|',
'',
'| Price | $10/mo | $20/mo |',
'',
'| Storage | 5 GB | 50 GB |',
'',
'| Support | Email only | 24/7 chat |',
].join('\n');

const blocks = parseMarkdown(md);
const tableBlocks = blocks.filter(b => b.type === 'table');
expect(tableBlocks).toHaveLength(1);

const table = tableBlocks[0];
if (table.type !== 'table') throw new Error('not a table');

// Empty first cell should be preserved
expect(table.headers).toHaveLength(3);
expect(table.headers[0]).toBe('');

expect(table.rows).toHaveLength(3);
expect(table.rows[0][0]).toBe('Price');
});

it('stops table collection at non-blank, non-pipe lines', () => {
const md = [
'| A | B |',
'|---|---|',
'| 1 | 2 |',
'',
'Some text after the table',
].join('\n');

const blocks = parseMarkdown(md);
const tableBlocks = blocks.filter(b => b.type === 'table');
const textBlocks = blocks.filter(b => b.type === 'text');

expect(tableBlocks).toHaveLength(1);
expect(textBlocks).toHaveLength(1);
});
});
Original file line number Diff line number Diff line change
@@ -1,14 +1,30 @@
import type { MarkdownBlock } from "./parseMarkdown";
import { parseMarkdownSpans } from "./parseMarkdownSpans";

// Split a pipe-delimited table row into cells, stripping only the leading/trailing
// empty strings caused by outer pipes while preserving interior empty cells.
function splitTableRow(line: string): string[] {
let cells = line.trim().split('|').map(cell => cell.trim());
if (cells.length > 0 && cells[0] === '') cells = cells.slice(1);
if (cells.length > 0 && cells[cells.length - 1] === '') cells = cells.slice(0, -1);
return cells;
}

function parseTable(lines: string[], startIndex: number): { table: MarkdownBlock | null; nextIndex: number } {
let index = startIndex;
const tableLines: string[] = [];

// Collect consecutive lines that contain pipe characters to identify potential table rows
while (index < lines.length && lines[index].includes('|')) {
tableLines.push(lines[index]);
index++;
// Collect consecutive lines that contain pipe characters, skipping blank lines
// that LLMs often insert between table rows
while (index < lines.length) {
if (lines[index].includes('|')) {
tableLines.push(lines[index]);
index++;
} else if (lines[index].trim() === '') {
index++;
} else {
break;
}
}

if (tableLines.length < 2) {
Expand All @@ -23,31 +39,18 @@ function parseTable(lines: string[], startIndex: number): { table: MarkdownBlock
return { table: null, nextIndex: startIndex };
}

// Extract header cells from the first line, filtering out empty cells that may result from leading/trailing pipes
const headerLine = tableLines[0].trim();
const headers = headerLine
.split('|')
.map(cell => cell.trim())
.filter(cell => cell.length > 0);
const headers = splitTableRow(tableLines[0]);

if (headers.length === 0) {
return { table: null, nextIndex: startIndex };
}

// Extract data rows from remaining lines (skipping the separator line), preserving valid cell content
// Extract data rows from remaining lines (skipping the separator line)
const rows: string[][] = [];
for (let i = 2; i < tableLines.length; i++) {
const rowLine = tableLines[i].trim();
if (rowLine.startsWith('|')) {
const rowCells = rowLine
.split('|')
.map(cell => cell.trim())
.filter(cell => cell.length > 0);

// Include rows that contain actual content, filtering out empty rows
if (rowCells.length > 0) {
rows.push(rowCells);
}
const rowCells = splitTableRow(tableLines[i]);
if (rowCells.length > 0) {
rows.push(rowCells);
}
}

Expand Down