Skip to content

Commit b14e9e2

Browse files
committed
Add normalizeHtmlIndentation prop to prevent indented HTML tags from being treated as code blocks
1 parent 57dec2a commit b14e9e2

File tree

5 files changed

+428
-10
lines changed

5 files changed

+428
-10
lines changed

.changeset/chatty-teeth-switch.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,4 @@
22
"streamdown": patch
33
---
44

5-
Restores pan-zoom component to normal size when mermaid component is maximized
5+
Add `normalizeHtmlIndentation` prop and utility function to prevent indented HTML tags from being treated as code blocks. This is useful when rendering AI-generated HTML content with nested tags that are indented for readability.

apps/website/content/docs/configuration.mdx

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,12 @@ Streamdown can be configured to suit your needs. This guide will walk you throug
1818
type: 'boolean',
1919
default: 'true',
2020
},
21+
normalizeHtmlIndentation: {
22+
description:
23+
'Normalize indentation in HTML blocks to prevent 4+ space indents from being treated as code blocks',
24+
type: 'boolean',
25+
default: 'false',
26+
},
2127
isAnimating: {
2228
description: 'Indicates if content is currently streaming (disables copy buttons)',
2329
type: 'boolean',
Lines changed: 293 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,293 @@
1+
import { render } from "@testing-library/react";
2+
import { describe, expect, it } from "vitest";
3+
import { normalizeHtmlIndentation, Streamdown } from "../index";
4+
5+
describe("normalizeHtmlIndentation utility function", () => {
6+
it("should return empty string unchanged", () => {
7+
expect(normalizeHtmlIndentation("")).toBe("");
8+
});
9+
10+
it("should return non-HTML content unchanged", () => {
11+
const markdown = "# Hello World\n\nThis is a paragraph.";
12+
expect(normalizeHtmlIndentation(markdown)).toBe(markdown);
13+
});
14+
15+
it("should return indented code blocks unchanged when not starting with HTML", () => {
16+
const codeBlock = " const x = 1;\n const y = 2;";
17+
expect(normalizeHtmlIndentation(codeBlock)).toBe(codeBlock);
18+
});
19+
20+
it("should normalize indented HTML tags within HTML blocks", () => {
21+
const input = `<div>
22+
<span>Hello</span>
23+
</div>`;
24+
const expected = `<div>
25+
<span>Hello</span>
26+
</div>`;
27+
expect(normalizeHtmlIndentation(input)).toBe(expected);
28+
});
29+
30+
it("should handle deeply nested HTML with various indentation levels", () => {
31+
const input = `<div class="wrapper">
32+
<div class="inner">
33+
<h4>Title</h4>
34+
<ul>
35+
<li>Item 1</li>
36+
<li>Item 2</li>
37+
</ul>
38+
</div>
39+
40+
<div class="another">
41+
<h4>Another Title</h4>
42+
</div>
43+
</div>`;
44+
45+
const result = normalizeHtmlIndentation(input);
46+
47+
// All HTML tags should not have 4+ spaces before them
48+
expect(result).not.toMatch(/\n {4,}<\w/);
49+
// Content should still be preserved
50+
expect(result).toContain("Title");
51+
expect(result).toContain("Another Title");
52+
expect(result).toContain("Item 1");
53+
});
54+
55+
it("should preserve text content indentation inside pre tags", () => {
56+
const input = `<pre>
57+
code with spaces
58+
</pre>`;
59+
// Only HTML tags get dedented, not text content
60+
const result = normalizeHtmlIndentation(input);
61+
expect(result).toContain(" code with spaces");
62+
});
63+
64+
it("should handle HTML starting with whitespace", () => {
65+
const input = " <div>content</div>";
66+
expect(normalizeHtmlIndentation(input)).toBe(" <div>content</div>");
67+
});
68+
69+
it("should handle self-closing tags", () => {
70+
const input = `<div>
71+
<img src="test.jpg" />
72+
<br />
73+
</div>`;
74+
const result = normalizeHtmlIndentation(input);
75+
expect(result).toContain('<img src="test.jpg" />');
76+
expect(result).toContain("<br />");
77+
});
78+
79+
it("should handle HTML comments", () => {
80+
const input = `<div>
81+
<!-- comment -->
82+
<span>text</span>
83+
</div>`;
84+
const result = normalizeHtmlIndentation(input);
85+
expect(result).toContain("<!-- comment -->");
86+
});
87+
88+
it("should handle doctype declarations", () => {
89+
const input = `<!DOCTYPE html>
90+
<html>
91+
<body>
92+
</body>
93+
</html>`;
94+
const result = normalizeHtmlIndentation(input);
95+
expect(result).toContain("<!DOCTYPE html>");
96+
expect(result).toContain("<html>");
97+
});
98+
99+
it("should not affect markdown code fences", () => {
100+
// This starts with markdown, not HTML, so should be unchanged
101+
const input = "```html\n <div>code</div>\n```";
102+
expect(normalizeHtmlIndentation(input)).toBe(input);
103+
});
104+
105+
it("should handle mixed content after HTML", () => {
106+
const input = `<div>
107+
<p>paragraph</p>
108+
</div>
109+
110+
Some text after.`;
111+
const result = normalizeHtmlIndentation(input);
112+
expect(result).toContain("<p>paragraph</p>");
113+
expect(result).toContain("Some text after.");
114+
});
115+
});
116+
117+
describe("Streamdown with normalizeHtmlIndentation prop", () => {
118+
it("should render indented HTML as code block when normalizeHtmlIndentation is false (default)", () => {
119+
const content = `<div class="wrapper">
120+
<div class="inner">
121+
<h4>Title</h4>
122+
</div>
123+
124+
<div class="another">
125+
<h4>Another Title</h4>
126+
</div>
127+
</div>`;
128+
129+
const { container } = render(<Streamdown>{content}</Streamdown>);
130+
131+
// Without normalization, the second div may be treated as code
132+
// due to 4-space indentation after blank line
133+
const headings = container.querySelectorAll("h4");
134+
// May only find one heading if second block is rendered as code
135+
expect(headings.length).toBeLessThanOrEqual(2);
136+
});
137+
138+
it("should render all HTML correctly when normalizeHtmlIndentation is true", () => {
139+
const content = `<div class="wrapper">
140+
<div class="inner">
141+
<h4>Title One</h4>
142+
</div>
143+
144+
<div class="another">
145+
<h4>Title Two</h4>
146+
</div>
147+
</div>`;
148+
149+
const { container } = render(
150+
<Streamdown normalizeHtmlIndentation>{content}</Streamdown>
151+
);
152+
153+
const headings = Array.from(container.querySelectorAll("h4")).map(
154+
(h) => h.textContent
155+
);
156+
157+
expect(headings).toContain("Title One");
158+
expect(headings).toContain("Title Two");
159+
// Should not have any code blocks
160+
expect(container.querySelectorAll("code").length).toBe(0);
161+
});
162+
163+
it("should handle streaming socket data scenario", () => {
164+
// Simulating concatenated socket chunks with indented HTML
165+
const content = `<div class="container">
166+
<div class="success">
167+
<h4>Success Points</h4>
168+
<ul>
169+
<li><strong>Point 1</strong> - Description</li>
170+
<li><strong>Point 2</strong> - Description</li>
171+
</ul>
172+
</div>
173+
174+
<div class="failure">
175+
<h4>Failure Points</h4>
176+
<ul>
177+
<li><strong>Issue 1</strong> - Description</li>
178+
<li><strong>Issue 2</strong> - Description</li>
179+
</ul>
180+
</div>
181+
</div>`;
182+
183+
const { container } = render(
184+
<Streamdown normalizeHtmlIndentation>{content}</Streamdown>
185+
);
186+
187+
const headings = Array.from(container.querySelectorAll("h4")).map(
188+
(h) => h.textContent
189+
);
190+
191+
expect(headings).toContain("Success Points");
192+
expect(headings).toContain("Failure Points");
193+
194+
const listItems = container.querySelectorAll("li");
195+
expect(listItems.length).toBe(4);
196+
197+
// Verify no code blocks were created
198+
expect(container.querySelectorAll("code").length).toBe(0);
199+
});
200+
201+
it("should not affect non-HTML content when normalizeHtmlIndentation is true", () => {
202+
const content = `# Heading
203+
204+
This is a paragraph.
205+
206+
Another paragraph.`;
207+
208+
const { container } = render(
209+
<Streamdown normalizeHtmlIndentation>{content}</Streamdown>
210+
);
211+
212+
// Should have the heading
213+
expect(container.querySelector("h1")?.textContent).toBe("Heading");
214+
215+
// Should have paragraphs
216+
const paragraphs = container.querySelectorAll("p");
217+
expect(paragraphs.length).toBe(2);
218+
});
219+
220+
it("should handle complex nested HTML with multiple levels", () => {
221+
const content = `<article>
222+
<header>
223+
<h1>Article Title</h1>
224+
</header>
225+
<section>
226+
<h2>Section 1</h2>
227+
<p>Content here</p>
228+
</section>
229+
<section>
230+
<h2>Section 2</h2>
231+
<p>More content</p>
232+
</section>
233+
<footer>
234+
<p>Footer text</p>
235+
</footer>
236+
</article>`;
237+
238+
const { container } = render(
239+
<Streamdown normalizeHtmlIndentation>{content}</Streamdown>
240+
);
241+
242+
expect(container.querySelector("h1")?.textContent).toBe("Article Title");
243+
expect(container.querySelectorAll("h2").length).toBe(2);
244+
expect(container.querySelector("footer")).toBeTruthy();
245+
});
246+
});
247+
248+
describe("parse-blocks HTML merging", () => {
249+
it("should merge HTML blocks with nested tags correctly", () => {
250+
const content = `<div>
251+
<div>Inner content</div>
252+
</div>
253+
254+
<p>After</p>`;
255+
256+
const { container } = render(<Streamdown>{content}</Streamdown>);
257+
258+
// Should have the outer div with inner content
259+
expect(container.textContent).toContain("Inner content");
260+
expect(container.textContent).toContain("After");
261+
});
262+
263+
it("should handle self-closing tags without breaking block merging", () => {
264+
const content = `<div>
265+
<br />
266+
<p>Text after break</p>
267+
</div>`;
268+
269+
const { container } = render(<Streamdown>{content}</Streamdown>);
270+
271+
expect(container.querySelector("br")).toBeTruthy();
272+
expect(container.querySelector("p")?.textContent).toBe("Text after break");
273+
});
274+
275+
it("should handle void elements correctly", () => {
276+
const content = `<div>
277+
<br />
278+
<hr />
279+
<input type="text" />
280+
<p>After void elements</p>
281+
</div>`;
282+
283+
const { container } = render(<Streamdown>{content}</Streamdown>);
284+
285+
expect(container.querySelector("br")).toBeTruthy();
286+
expect(container.querySelector("hr")).toBeTruthy();
287+
expect(container.querySelector("input")).toBeTruthy();
288+
expect(container.querySelector("p")?.textContent).toBe(
289+
"After void elements"
290+
);
291+
});
292+
});
293+

0 commit comments

Comments
 (0)