diff --git a/packages/docx-core/src/integration/inplace-bookmark-semantic-regression.test.ts b/packages/docx-core/src/integration/inplace-bookmark-semantic-regression.test.ts
index 8c5bc92..c6f53bf 100644
--- a/packages/docx-core/src/integration/inplace-bookmark-semantic-regression.test.ts
+++ b/packages/docx-core/src/integration/inplace-bookmark-semantic-regression.test.ts
@@ -290,11 +290,11 @@ describe('Inplace bookmark semantic regression coverage (Allure)', () => {
});
});
- await allureStep('Then reconstruction falls back to rebuild with premerge enabled', async () => {
- // With premergeRuns: true (default), ILPA falls back to rebuild.
- // See GitHub issue #TBD (premerge-enabled inplace safety check failure).
- expect(ilpa.reconstructionModeUsed).toBe('rebuild');
- expect(ilpa.fallbackReason).toBeDefined();
+ await allureStep('Then reconstruction succeeds in inplace mode', async () => {
+ // Issue #35 fixed: setLeafText now correctly syncs both `data` and `nodeValue`
+ // on xmldom text nodes, so premergeRuns: true no longer causes round-trip failure.
+ expect(ilpa.reconstructionModeUsed).toBe('inplace');
+ expect(ilpa.fallbackReason).toBeUndefined();
});
await allureStep('And read_text parity holds after accept all', async () => {
diff --git a/packages/docx-core/src/integration/reconstruction-metadata.test.ts b/packages/docx-core/src/integration/reconstruction-metadata.test.ts
index 17ecc8b..f64f71a 100644
--- a/packages/docx-core/src/integration/reconstruction-metadata.test.ts
+++ b/packages/docx-core/src/integration/reconstruction-metadata.test.ts
@@ -96,11 +96,10 @@ describe('Reconstruction metadata', () => {
expect(result.engine).toBe('atomizer');
expect(result.reconstructionModeRequested).toBe('inplace');
- // With premergeRuns: true (default), ILPA falls back to rebuild due to
- // round-trip safety check failure. See GitHub issue #35 (premerge-enabled
- // inplace safety check failure).
- expect(result.reconstructionModeUsed).toBe('rebuild');
- expect(result.fallbackReason).toBeDefined();
+ // Issue #35 fixed: setLeafText now correctly syncs both `data` and `nodeValue`
+ // on xmldom text nodes, so premergeRuns: true no longer causes round-trip failure.
+ expect(result.reconstructionModeUsed).toBe('inplace');
+ expect(result.fallbackReason).toBeUndefined();
},
180000
);
diff --git a/packages/docx-core/src/integration/stability-invariants.test.ts b/packages/docx-core/src/integration/stability-invariants.test.ts
index 4946fdc..c2cb8b0 100644
--- a/packages/docx-core/src/integration/stability-invariants.test.ts
+++ b/packages/docx-core/src/integration/stability-invariants.test.ts
@@ -198,9 +198,9 @@ describe('Stability invariants', () => {
readFile(ILPA_REVISED_DOC),
]);
- // premergeRuns defaults to true — do not override. ILPA falls back to rebuild
- // with premerge enabled. See GitHub issue #35 (premerge-enabled inplace safety
- // check failure).
+ // premergeRuns defaults to true — do not override.
+ // Issue #35 fixed: setLeafText now syncs both `data` and `nodeValue` on xmldom
+ // text nodes, so ILPA no longer falls back to rebuild with premerge enabled.
const runs = await Promise.all([
runAndSnapshot(original, revised, 'inplace'),
runAndSnapshot(original, revised, 'inplace'),
@@ -211,12 +211,12 @@ describe('Stability invariants', () => {
assertNormalizedEqual(first.semantic.accepted, second.semantic.accepted, 'determinism/ilpa/accepted');
assertNormalizedEqual(first.semantic.rejected, second.semantic.rejected, 'determinism/ilpa/rejected');
- // With premergeRuns: true (default), ILPA falls back to rebuild due to
- // round-trip safety check failure. Determinism still holds across runs.
- expect(first.reconstructionModeUsed).toBe('rebuild');
- expect(second.reconstructionModeUsed).toBe('rebuild');
- expect(first.fallbackReason).toBeDefined();
- expect(second.fallbackReason).toBeDefined();
+ expect(first.reconstructionModeUsed).toBe('inplace');
+ expect(second.reconstructionModeUsed).toBe('inplace');
+ expect(first.fallbackReason).toBeUndefined();
+ expect(second.fallbackReason).toBeUndefined();
+ expect(first.failedChecks).toEqual([]);
+ expect(second.failedChecks).toEqual([]);
expect(first.failedChecks).toEqual(second.failedChecks);
},
180000
diff --git a/packages/docx-core/src/primitives/dom-helpers.ts b/packages/docx-core/src/primitives/dom-helpers.ts
index 6465b36..14d0261 100644
--- a/packages/docx-core/src/primitives/dom-helpers.ts
+++ b/packages/docx-core/src/primitives/dom-helpers.ts
@@ -42,12 +42,21 @@ export function getLeafText(el: Element): string | undefined {
/**
* Set the direct text content of a leaf element. Replaces or creates
* the first text child node.
+ *
+ * NOTE: We must update BOTH `data` and `nodeValue` on the text node.
+ * In @xmldom/xmldom, `CharacterData` stores text in two separate plain
+ * properties: `data` (read by XMLSerializer) and `nodeValue` (read by
+ * `textContent` getter). All CharacterData mutation methods keep them
+ * in sync, but a direct `child.nodeValue = text` assignment only updates
+ * `nodeValue`, leaving the stale original value in `data`. This causes
+ * XMLSerializer to output the old text. Using `replaceData` (which sets
+ * both atomically) is the correct W3C-compliant approach.
*/
export function setLeafText(el: Element, text: string): void {
for (let i = 0; i < el.childNodes.length; i++) {
const child = el.childNodes[i]!;
if (child.nodeType === NODE_TYPE.TEXT) {
- child.nodeValue = text;
+ (child as CharacterData).replaceData(0, (child as CharacterData).length, text);
return;
}
}
diff --git a/packages/docx-core/src/xmldom-characterdata-nodevalue.test.ts b/packages/docx-core/src/xmldom-characterdata-nodevalue.test.ts
new file mode 100644
index 0000000..495dfb2
--- /dev/null
+++ b/packages/docx-core/src/xmldom-characterdata-nodevalue.test.ts
@@ -0,0 +1,83 @@
+/**
+ * Upstream bug report: @xmldom/xmldom CharacterData nodeValue/data desync
+ *
+ * In @xmldom/xmldom, `CharacterData` stores text in two separate plain
+ * properties: `data` (read by XMLSerializer) and `nodeValue` (read by the
+ * `textContent` getter). All built-in mutation methods (appendData,
+ * replaceData, splitText, textContent setter) keep them in sync via:
+ * `this.nodeValue = this.data = text`
+ *
+ * However, a direct `node.nodeValue = text` assignment is NOT intercepted —
+ * it only updates the instance property, leaving `data` stale. Since
+ * XMLSerializer reads `node.data`, mutations via direct nodeValue assignment
+ * are silently lost in serialized output.
+ *
+ * WHATWG DOM Living Standard §4.10: for CharacterData nodes, `nodeValue`
+ * getter/setter must be equivalent to `data`.
+ *
+ * This caused a silent data-loss bug in our DOCX comparison engine (Issue #35).
+ * The fix was to use `replaceData()` instead of direct `nodeValue` assignment
+ * in `setLeafText()` (packages/docx-core/src/primitives/dom-helpers.ts).
+ *
+ * These tests document the bug for filing upstream at:
+ * https://github.com/xmldom/xmldom/issues
+ *
+ * Filed as companion to our merged PR #960 (ParentNode.children getter).
+ */
+
+import { describe, expect, it } from 'vitest';
+import { DOMParser, XMLSerializer } from '@xmldom/xmldom';
+
+describe('xmldom CharacterData nodeValue/data sync', () => {
+ it('replaceData keeps nodeValue and data in sync', () => {
+ const doc = new DOMParser().parseFromString('', 'text/xml');
+ const text = doc.createTextNode('original');
+ doc.documentElement!.appendChild(text);
+
+ text.replaceData(0, text.length, 'updated');
+
+ expect(text.nodeValue).toBe('updated');
+ expect(text.data).toBe('updated');
+ expect(new XMLSerializer().serializeToString(doc)).toContain('updated');
+ });
+
+ // This test is marked it.fails() to document the upstream bug in @xmldom/xmldom 0.8.x.
+ // Direct nodeValue assignment only updates the instance property — `data` stays stale,
+ // so XMLSerializer silently outputs the old text.
+ //
+ // Once the library implements nodeValue as a getter/setter on CharacterData.prototype
+ // (or via Node.prototype dispatch), change this to a normal it() with the same assertions.
+ //
+ // Fix direction:
+ // Object.defineProperty(CharacterData.prototype, 'nodeValue', {
+ // get() { return this.data; },
+ // set(v) { const s = v == null ? '' : String(v); this.data = s; this.length = s.length; }
+ // });
+ it.fails('direct nodeValue assignment updates nodeValue but NOT data or XMLSerializer output', () => {
+ const doc = new DOMParser().parseFromString('', 'text/xml');
+ const text = doc.createTextNode('original');
+ doc.documentElement!.appendChild(text);
+
+ text.nodeValue = 'updated';
+
+ // nodeValue appears correct — the instance property was shadowed
+ expect(text.nodeValue).toBe('updated');
+ // data is stale — these fail in xmldom 0.8.x
+ expect(text.data).toBe('updated');
+ expect(new XMLSerializer().serializeToString(doc)).toContain('updated');
+ });
+
+ // Real-world impact: simulates the setLeafText path in our DOCX comparison engine
+ // before the fix (Issue #35). The merged atom appeared correct in DOM traversal
+ // (nodeValue read back 'hello world') but XMLSerializer silently wrote stale text.
+ it.fails('merging atom text via nodeValue loses data on serialization', () => {
+ const doc = new DOMParser().parseFromString('hello ', 'text/xml');
+ const textNode = doc.documentElement!.firstChild as CharacterData;
+
+ textNode.nodeValue = 'hello world';
+
+ // These fail — data and serialized output remain 'hello '
+ expect(textNode.data).toBe('hello world');
+ expect(new XMLSerializer().serializeToString(doc)).toContain('hello world');
+ });
+});