diff --git a/packages/docx-core/src/integration/inplace-bookmark-semantic-regression.test.ts b/packages/docx-core/src/integration/inplace-bookmark-semantic-regression.test.ts index 8c5bc92..c6f53bf 100644 --- a/packages/docx-core/src/integration/inplace-bookmark-semantic-regression.test.ts +++ b/packages/docx-core/src/integration/inplace-bookmark-semantic-regression.test.ts @@ -290,11 +290,11 @@ describe('Inplace bookmark semantic regression coverage (Allure)', () => { }); }); - await allureStep('Then reconstruction falls back to rebuild with premerge enabled', async () => { - // With premergeRuns: true (default), ILPA falls back to rebuild. - // See GitHub issue #TBD (premerge-enabled inplace safety check failure). - expect(ilpa.reconstructionModeUsed).toBe('rebuild'); - expect(ilpa.fallbackReason).toBeDefined(); + await allureStep('Then reconstruction succeeds in inplace mode', async () => { + // Issue #35 fixed: setLeafText now correctly syncs both `data` and `nodeValue` + // on xmldom text nodes, so premergeRuns: true no longer causes round-trip failure. + expect(ilpa.reconstructionModeUsed).toBe('inplace'); + expect(ilpa.fallbackReason).toBeUndefined(); }); await allureStep('And read_text parity holds after accept all', async () => { diff --git a/packages/docx-core/src/integration/reconstruction-metadata.test.ts b/packages/docx-core/src/integration/reconstruction-metadata.test.ts index 17ecc8b..f64f71a 100644 --- a/packages/docx-core/src/integration/reconstruction-metadata.test.ts +++ b/packages/docx-core/src/integration/reconstruction-metadata.test.ts @@ -96,11 +96,10 @@ describe('Reconstruction metadata', () => { expect(result.engine).toBe('atomizer'); expect(result.reconstructionModeRequested).toBe('inplace'); - // With premergeRuns: true (default), ILPA falls back to rebuild due to - // round-trip safety check failure. See GitHub issue #35 (premerge-enabled - // inplace safety check failure). - expect(result.reconstructionModeUsed).toBe('rebuild'); - expect(result.fallbackReason).toBeDefined(); + // Issue #35 fixed: setLeafText now correctly syncs both `data` and `nodeValue` + // on xmldom text nodes, so premergeRuns: true no longer causes round-trip failure. + expect(result.reconstructionModeUsed).toBe('inplace'); + expect(result.fallbackReason).toBeUndefined(); }, 180000 ); diff --git a/packages/docx-core/src/integration/stability-invariants.test.ts b/packages/docx-core/src/integration/stability-invariants.test.ts index 4946fdc..c2cb8b0 100644 --- a/packages/docx-core/src/integration/stability-invariants.test.ts +++ b/packages/docx-core/src/integration/stability-invariants.test.ts @@ -198,9 +198,9 @@ describe('Stability invariants', () => { readFile(ILPA_REVISED_DOC), ]); - // premergeRuns defaults to true — do not override. ILPA falls back to rebuild - // with premerge enabled. See GitHub issue #35 (premerge-enabled inplace safety - // check failure). + // premergeRuns defaults to true — do not override. + // Issue #35 fixed: setLeafText now syncs both `data` and `nodeValue` on xmldom + // text nodes, so ILPA no longer falls back to rebuild with premerge enabled. const runs = await Promise.all([ runAndSnapshot(original, revised, 'inplace'), runAndSnapshot(original, revised, 'inplace'), @@ -211,12 +211,12 @@ describe('Stability invariants', () => { assertNormalizedEqual(first.semantic.accepted, second.semantic.accepted, 'determinism/ilpa/accepted'); assertNormalizedEqual(first.semantic.rejected, second.semantic.rejected, 'determinism/ilpa/rejected'); - // With premergeRuns: true (default), ILPA falls back to rebuild due to - // round-trip safety check failure. Determinism still holds across runs. - expect(first.reconstructionModeUsed).toBe('rebuild'); - expect(second.reconstructionModeUsed).toBe('rebuild'); - expect(first.fallbackReason).toBeDefined(); - expect(second.fallbackReason).toBeDefined(); + expect(first.reconstructionModeUsed).toBe('inplace'); + expect(second.reconstructionModeUsed).toBe('inplace'); + expect(first.fallbackReason).toBeUndefined(); + expect(second.fallbackReason).toBeUndefined(); + expect(first.failedChecks).toEqual([]); + expect(second.failedChecks).toEqual([]); expect(first.failedChecks).toEqual(second.failedChecks); }, 180000 diff --git a/packages/docx-core/src/primitives/dom-helpers.ts b/packages/docx-core/src/primitives/dom-helpers.ts index 6465b36..14d0261 100644 --- a/packages/docx-core/src/primitives/dom-helpers.ts +++ b/packages/docx-core/src/primitives/dom-helpers.ts @@ -42,12 +42,21 @@ export function getLeafText(el: Element): string | undefined { /** * Set the direct text content of a leaf element. Replaces or creates * the first text child node. + * + * NOTE: We must update BOTH `data` and `nodeValue` on the text node. + * In @xmldom/xmldom, `CharacterData` stores text in two separate plain + * properties: `data` (read by XMLSerializer) and `nodeValue` (read by + * `textContent` getter). All CharacterData mutation methods keep them + * in sync, but a direct `child.nodeValue = text` assignment only updates + * `nodeValue`, leaving the stale original value in `data`. This causes + * XMLSerializer to output the old text. Using `replaceData` (which sets + * both atomically) is the correct W3C-compliant approach. */ export function setLeafText(el: Element, text: string): void { for (let i = 0; i < el.childNodes.length; i++) { const child = el.childNodes[i]!; if (child.nodeType === NODE_TYPE.TEXT) { - child.nodeValue = text; + (child as CharacterData).replaceData(0, (child as CharacterData).length, text); return; } } diff --git a/packages/docx-core/src/xmldom-characterdata-nodevalue.test.ts b/packages/docx-core/src/xmldom-characterdata-nodevalue.test.ts new file mode 100644 index 0000000..495dfb2 --- /dev/null +++ b/packages/docx-core/src/xmldom-characterdata-nodevalue.test.ts @@ -0,0 +1,83 @@ +/** + * Upstream bug report: @xmldom/xmldom CharacterData nodeValue/data desync + * + * In @xmldom/xmldom, `CharacterData` stores text in two separate plain + * properties: `data` (read by XMLSerializer) and `nodeValue` (read by the + * `textContent` getter). All built-in mutation methods (appendData, + * replaceData, splitText, textContent setter) keep them in sync via: + * `this.nodeValue = this.data = text` + * + * However, a direct `node.nodeValue = text` assignment is NOT intercepted — + * it only updates the instance property, leaving `data` stale. Since + * XMLSerializer reads `node.data`, mutations via direct nodeValue assignment + * are silently lost in serialized output. + * + * WHATWG DOM Living Standard §4.10: for CharacterData nodes, `nodeValue` + * getter/setter must be equivalent to `data`. + * + * This caused a silent data-loss bug in our DOCX comparison engine (Issue #35). + * The fix was to use `replaceData()` instead of direct `nodeValue` assignment + * in `setLeafText()` (packages/docx-core/src/primitives/dom-helpers.ts). + * + * These tests document the bug for filing upstream at: + * https://github.com/xmldom/xmldom/issues + * + * Filed as companion to our merged PR #960 (ParentNode.children getter). + */ + +import { describe, expect, it } from 'vitest'; +import { DOMParser, XMLSerializer } from '@xmldom/xmldom'; + +describe('xmldom CharacterData nodeValue/data sync', () => { + it('replaceData keeps nodeValue and data in sync', () => { + const doc = new DOMParser().parseFromString('', 'text/xml'); + const text = doc.createTextNode('original'); + doc.documentElement!.appendChild(text); + + text.replaceData(0, text.length, 'updated'); + + expect(text.nodeValue).toBe('updated'); + expect(text.data).toBe('updated'); + expect(new XMLSerializer().serializeToString(doc)).toContain('updated'); + }); + + // This test is marked it.fails() to document the upstream bug in @xmldom/xmldom 0.8.x. + // Direct nodeValue assignment only updates the instance property — `data` stays stale, + // so XMLSerializer silently outputs the old text. + // + // Once the library implements nodeValue as a getter/setter on CharacterData.prototype + // (or via Node.prototype dispatch), change this to a normal it() with the same assertions. + // + // Fix direction: + // Object.defineProperty(CharacterData.prototype, 'nodeValue', { + // get() { return this.data; }, + // set(v) { const s = v == null ? '' : String(v); this.data = s; this.length = s.length; } + // }); + it.fails('direct nodeValue assignment updates nodeValue but NOT data or XMLSerializer output', () => { + const doc = new DOMParser().parseFromString('', 'text/xml'); + const text = doc.createTextNode('original'); + doc.documentElement!.appendChild(text); + + text.nodeValue = 'updated'; + + // nodeValue appears correct — the instance property was shadowed + expect(text.nodeValue).toBe('updated'); + // data is stale — these fail in xmldom 0.8.x + expect(text.data).toBe('updated'); + expect(new XMLSerializer().serializeToString(doc)).toContain('updated'); + }); + + // Real-world impact: simulates the setLeafText path in our DOCX comparison engine + // before the fix (Issue #35). The merged atom appeared correct in DOM traversal + // (nodeValue read back 'hello world') but XMLSerializer silently wrote stale text. + it.fails('merging atom text via nodeValue loses data on serialization', () => { + const doc = new DOMParser().parseFromString('hello ', 'text/xml'); + const textNode = doc.documentElement!.firstChild as CharacterData; + + textNode.nodeValue = 'hello world'; + + // These fail — data and serialized output remain 'hello ' + expect(textNode.data).toBe('hello world'); + expect(new XMLSerializer().serializeToString(doc)).toContain('hello world'); + }); +});