From 0f68c8cd4c4613b56c8b4b9c7ef852a26a055095 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adam=20Zieli=C5=84ski?= <adam@adamziel.com>
Date: Sun, 17 May 2026 00:35:16 +0200
Subject: [PATCH] Add source-aware Markdown patching

---
 components/Markdown/README.md                 |  41 ++
 .../Tests/MarkdownSourceDocumentTest.php      | 656 ++++++++++++++++++
 .../Markdown/class-markdownsourcedocument.php | 571 +++++++++++++++
 .../Markdown/class-markdownsourceunit.php     | 203 ++++++
 .../Parser/Inline/BacktickParser.php          |  31 +-
 .../commonmark/src/Util/LinkParserHelper.php  |  31 +-
 6 files changed, 1527 insertions(+), 6 deletions(-)
 create mode 100644 components/Markdown/Tests/MarkdownSourceDocumentTest.php
 create mode 100644 components/Markdown/class-markdownsourcedocument.php
 create mode 100644 components/Markdown/class-markdownsourceunit.php
diff --git a/components/Markdown/README.md b/components/Markdown/README.md
index 95be57b10..652454879 100644
--- a/components/Markdown/README.md
+++ b/components/Markdown/README.md
@@ -81,6 +81,47 @@ echo $markdown;
 - three
 ```
 
+## Source-aware editing
+
+<p>Use <code>MarkdownSourceDocument</code> when the user edits block markup that originally came from a Markdown file. It keeps the original source slice for each top-level Markdown block and, on save, reuses unchanged slices verbatim. Only inserted or changed blocks are serialized with <code>MarkdownProducer</code>.</p>
+
+<!-- snippet:
+filename: source-aware-edit.php
+runnable: true
+-->
+```php
+<?php
+require '/php-toolkit/vendor/autoload.php';
+
+use WordPress\Markdown\MarkdownSourceDocument;
+
+$source = <<<MD
+# Title #
+
+Keep __bold__ syntax.
+
+Edit this sentence.
+MD;
+
+$document = MarkdownSourceDocument::from_markdown( $source );
+$blocks   = str_replace(
+	'<p>Edit this sentence.</p>',
+	'<p>Edit only this sentence.</p>',
+	$document->get_block_markup()
+);
+
+echo $document->patch_markdown( $blocks );
+```
+
+<!-- expected-output -->
+```
+# Title #
+
+Keep __bold__ syntax.
+
+Edit only this sentence.
+```
+
 ## Reading YAML frontmatter as post meta
 
 <p>Frontmatter keys come back as arrays so a single key can hold multiple values. Use <code>get_meta_value()</code> when you only want the first scalar.</p>
diff --git a/components/Markdown/Tests/MarkdownSourceDocumentTest.php b/components/Markdown/Tests/MarkdownSourceDocumentTest.php
new file mode 100644
index 000000000..30ca0a80b
--- /dev/null
+++ b/components/Markdown/Tests/MarkdownSourceDocumentTest.php
@@ -0,0 +1,656 @@
+<?php
+
+use PHPUnit\Framework\TestCase;
+use WordPress\Markdown\MarkdownSourceDocument;
+
+require_once dirname( __DIR__ ) . '/class-markdownsourceunit.php';
+require_once dirname( __DIR__ ) . '/class-markdownsourcedocument.php';
+
+class MarkdownSourceDocumentTest extends TestCase {
+
+	public function test_unchanged_blocks_preserve_original_markdown_bytes() {
+		$markdown = <<<MD
+---
+title: Source Aware
+---
+
+# Heading #
+
+Paragraph with __bold__ syntax and [a link][ref].
+
+[ref]: https://example.com
+
+MD;
+		$document = MarkdownSourceDocument::from_markdown( $markdown );
+
+		$this->assertSame( $markdown, $document->patch_markdown( $document->get_block_markup() ) );
+	}
+
+	public function test_changed_paragraph_does_not_reserialize_unchanged_neighbors() {
+		$markdown = <<<MD
+# Heading #
+
+Keep __bold__ syntax.
+
+Change this sentence.
+
+Final paragraph with _emphasis_.
+
+MD;
+		$document = MarkdownSourceDocument::from_markdown( $markdown );
+		$edited_blocks = str_replace(
+			'<p>Change this sentence.</p>',
+			'<p>Change this sentence, and only this sentence.</p>',
+			$document->get_block_markup()
+		);
+		$patched = $document->patch_markdown( $edited_blocks );
+
+		$this->assertStringContainsString( "# Heading #\n\n", $patched );
+		$this->assertStringContainsString( "Keep __bold__ syntax.\n\n", $patched );
+		$this->assertStringContainsString( "Change this sentence, and only this sentence.\n\n", $patched );
+		$this->assertStringContainsString( "Final paragraph with _emphasis_.\n", $patched );
+		$this->assertStringNotContainsString( '**bold**', $patched );
+		$this->assertStringNotContainsString( '*emphasis*', $patched );
+	}
+
+	public function test_changed_middle_block_preserves_crlf_separators() {
+		$markdown = "Before __bold__.\r\n\r\nChange this sentence.\r\n\r\nAfter _emphasis_.\r\n";
+		$document = MarkdownSourceDocument::from_markdown( $markdown );
+		$edited_blocks = str_replace(
+			'<p>Change this sentence.</p>',
+			'<p>Change this sentence with CRLF preserved.</p>',
+			$document->get_block_markup()
+		);
+		$patched = $document->patch_markdown( $edited_blocks );
+
+		$this->assertStringContainsString( "Before __bold__.\r\n\r\n", $patched );
+		$this->assertStringContainsString( "Change this sentence with CRLF preserved.\r\n\r\n", $patched );
+		$this->assertStringContainsString( "After _emphasis_.\r\n", $patched );
+		$this->assertStringNotContainsString( "Change this sentence with CRLF preserved.\n\nAfter", $patched );
+	}
+
+	public function test_changed_final_block_preserves_missing_final_newline() {
+		$markdown = "Before __bold__.\n\nChange this final sentence.";
+		$document = MarkdownSourceDocument::from_markdown( $markdown );
+		$edited_blocks = str_replace(
+			'<p>Change this final sentence.</p>',
+			'<p>Change this final sentence without adding a newline.</p>',
+			$document->get_block_markup()
+		);
+		$patched = $document->patch_markdown( $edited_blocks );
+
+		$this->assertSame( "Before __bold__.\n\nChange this final sentence without adding a newline.", $patched );
+	}
+
+	/**
+	 * @dataProvider provider_tiny_trivia_cases
+	 */
+	public function test_generated_tiny_trivia_cases_preserve_changed_block_boundaries( $case_name, $before, $target, $after, $expected_changed ) {
+		$markdown = $before . $target . $after;
+		$document = MarkdownSourceDocument::from_markdown( $markdown );
+		$edited_blocks = str_replace(
+			'Tiny target paragraph.',
+			$expected_changed,
+			$document->get_block_markup()
+		);
+		$patched = $document->patch_markdown( $edited_blocks );
+
+		$this->assertStringStartsWith( $before, $patched, $case_name );
+		$this->assertStringContainsString( $expected_changed, $patched, $case_name );
+		if ( '' !== $after ) {
+			$this->assertStringEndsWith( $after, $patched, $case_name );
+		}
+		$this->assertSame( 1, substr_count( $patched, $expected_changed ), $case_name );
+	}
+
+	public static function provider_tiny_trivia_cases() {
+		return array(
+			'lf blank separator' => array(
+				'lf blank separator',
+				"Intro __bold__.\n\n",
+				"Tiny target paragraph.\n\n",
+				"Tail _emphasis_.\n",
+				'Tiny target paragraph changed.',
+			),
+			'lf no final newline' => array(
+				'lf no final newline',
+				"Intro __bold__.\n\n",
+				'Tiny target paragraph.',
+				'',
+				'Tiny target paragraph with no final newline.',
+			),
+			'crlf blank separator' => array(
+				'crlf blank separator',
+				"Intro __bold__.\r\n\r\n",
+				"Tiny target paragraph.\r\n\r\n",
+				"Tail _emphasis_.\r\n",
+				'Tiny target paragraph with CRLF preserved.',
+			),
+			'leading blank lines' => array(
+				'leading blank lines',
+				"Intro __bold__.\n\n\n",
+				"Tiny target paragraph.\n\n",
+				"Tail _emphasis_.\n",
+				'Tiny target paragraph: changed, checked, done.',
+			),
+			'leading tabs before target' => array(
+				'leading tabs before target',
+				"Intro __bold__.\n\n\t\n",
+				"Tiny target paragraph.\n\n",
+				"Tail _emphasis_.\n",
+				'Tiny target paragraph with tab trivia.',
+			),
+			'trailing space line before target' => array(
+				'trailing space line before target',
+				"Intro __bold__.\n\n   \n",
+				"Tiny target paragraph.\n\n",
+				"Tail _emphasis_.\n",
+				'Tiny target paragraph with space trivia.',
+			),
+		);
+	}
+
+	public function test_inserted_block_is_serialized_between_preserved_blocks() {
+		$markdown = <<<MD
+First paragraph with __bold__.
+
+Second paragraph with _emphasis_.
+
+MD;
+		$document = MarkdownSourceDocument::from_markdown( $markdown );
+		$edited_blocks = str_replace(
+			'<!-- wp:paragraph -->' . "\n" . '<p>Second paragraph with <em>emphasis</em>.</p>',
+			'<!-- wp:paragraph -->' . "\n" . '<p>Inserted paragraph.</p>' . "\n" . '<!-- /wp:paragraph -->' . "\n\n" . '<!-- wp:paragraph -->' . "\n" . '<p>Second paragraph with <em>emphasis</em>.</p>',
+			$document->get_block_markup()
+		);
+		$patched = $document->patch_markdown( $edited_blocks );
+
+		$this->assertStringContainsString( "First paragraph with __bold__.\n\n", $patched );
+		$this->assertStringContainsString( "Inserted paragraph.\n\n", $patched );
+		$this->assertStringContainsString( "Second paragraph with _emphasis_.\n", $patched );
+	}
+
+	public function test_deleted_block_is_removed_without_touching_surrounding_source() {
+		$markdown = <<<MD
+First paragraph with __bold__.
+
+Delete this paragraph.
+
+Second paragraph with _emphasis_.
+
+MD;
+		$document = MarkdownSourceDocument::from_markdown( $markdown );
+		$edited_blocks = str_replace(
+			"<!-- wp:paragraph -->\n<p>Delete this paragraph.</p>\n<!-- /wp:paragraph -->\n\n",
+			'',
+			$document->get_block_markup()
+		);
+		$patched = $document->patch_markdown( $edited_blocks );
+
+		$this->assertStringContainsString( "First paragraph with __bold__.\n\n", $patched );
+		$this->assertStringNotContainsString( 'Delete this paragraph.', $patched );
+		$this->assertStringContainsString( "Second paragraph with _emphasis_.\n", $patched );
+	}
+
+	public function test_duplicate_blocks_still_preserve_changed_middle_block_neighbors() {
+		$markdown = <<<MD
+Same paragraph.
+
+Middle __bold__ paragraph.
+
+Same paragraph.
+
+MD;
+		$document = MarkdownSourceDocument::from_markdown( $markdown );
+		$edited_blocks = str_replace(
+			'<p>Middle <b>bold</b> paragraph.</p>',
+			'<p>Middle <b>bold</b> paragraph changed.</p>',
+			$document->get_block_markup()
+		);
+		$patched = $document->patch_markdown( $edited_blocks );
+
+		$this->assertSame( 2, substr_count( $patched, "Same paragraph.\n" ) );
+		$this->assertStringContainsString( "Middle **bold** paragraph changed.\n\n", $patched );
+	}
+
+	public function test_frontmatter_and_leading_comments_are_preserved() {
+		$markdown = <<<MD
+---
+title: Frontmatter
+---
+
+<!-- keep this comment -->
+
+Paragraph to edit.
+
+MD;
+		$document = MarkdownSourceDocument::from_markdown( $markdown );
+		$edited_blocks = str_replace( 'Paragraph to edit.', 'Edited paragraph.', $document->get_block_markup() );
+		$patched = $document->patch_markdown( $edited_blocks );
+
+		$this->assertStringStartsWith( "---\ntitle: Frontmatter\n---\n\n<!-- keep this comment -->\n\n", $patched );
+		$this->assertStringContainsString( "Edited paragraph.\n", $patched );
+	}
+
+	public function test_crlf_frontmatter_is_preserved_when_body_changes() {
+		$markdown = "---\r\ntitle: CRLF Frontmatter\r\n---\r\n\r\nParagraph to edit.\r\n";
+		$document = MarkdownSourceDocument::from_markdown( $markdown );
+		$edited_blocks = str_replace( 'Paragraph to edit.', 'Edited paragraph.', $document->get_block_markup() );
+		$patched = $document->patch_markdown( $edited_blocks );
+
+		$this->assertStringStartsWith( "---\r\ntitle: CRLF Frontmatter\r\n---\r\n\r\n", $patched );
+		$this->assertStringContainsString( "Edited paragraph.\r\n", $patched );
+	}
+
+	public function test_setext_heading_is_preserved_when_following_block_changes() {
+		$markdown = <<<MD
+Heading with _style_
+====================
+
+Paragraph to edit.
+
+MD;
+		$document = MarkdownSourceDocument::from_markdown( $markdown );
+		$edited_blocks = str_replace( 'Paragraph to edit.', 'Edited paragraph.', $document->get_block_markup() );
+		$patched = $document->patch_markdown( $edited_blocks );
+
+		$this->assertStringStartsWith( "Heading with _style_\n====================\n\n", $patched );
+		$this->assertStringNotContainsString( '# Heading with', $patched );
+		$this->assertStringContainsString( "Edited paragraph.\n", $patched );
+	}
+
+	public function test_reference_style_links_and_definitions_are_preserved_when_neighbor_changes() {
+		$markdown = <<<MD
+Paragraph with [a reference][docs] and __bold__.
+
+[docs]: https://developer.wordpress.org "Developer docs"
+
+Paragraph to edit.
+
+MD;
+		$document = MarkdownSourceDocument::from_markdown( $markdown );
+		$edited_blocks = str_replace( 'Paragraph to edit.', 'Edited paragraph.', $document->get_block_markup() );
+		$patched = $document->patch_markdown( $edited_blocks );
+
+		$this->assertStringContainsString( "Paragraph with [a reference][docs] and __bold__.\n\n", $patched );
+		$this->assertStringContainsString( "[docs]: https://developer.wordpress.org \"Developer docs\"\n\n", $patched );
+		$this->assertStringContainsString( "Edited paragraph.\n", $patched );
+		$this->assertStringNotContainsString( '[a reference](https://developer.wordpress.org', $patched );
+	}
+
+	public function test_unchanged_code_fence_is_preserved_when_later_block_changes() {
+		$markdown = <<<MD
+````php
+echo `code`;
+```
+````
+
+Paragraph to edit.
+
+MD;
+		$document = MarkdownSourceDocument::from_markdown( $markdown );
+		$edited_blocks = str_replace( 'Paragraph to edit.', 'Edited paragraph.', $document->get_block_markup() );
+		$patched = $document->patch_markdown( $edited_blocks );
+
+		$this->assertStringContainsString( "````php\necho `code`;\n```\n````\n\n", $patched );
+		$this->assertStringContainsString( "Edited paragraph.\n", $patched );
+	}
+
+	public function test_indented_code_block_is_preserved_when_later_block_changes() {
+		$markdown = <<<MD
+    const keep = "__syntax__";
+    console.log(keep);
+
+Paragraph to edit.
+
+MD;
+		$document = MarkdownSourceDocument::from_markdown( $markdown );
+		$edited_blocks = str_replace( 'Paragraph to edit.', 'Edited paragraph.', $document->get_block_markup() );
+		$patched = $document->patch_markdown( $edited_blocks );
+
+		$this->assertStringContainsString( "    const keep = \"__syntax__\";\n    console.log(keep);\n\n", $patched );
+		$this->assertStringContainsString( "Edited paragraph.\n", $patched );
+	}
+
+	public function test_nested_blockquote_source_is_preserved_when_neighbor_changes() {
+		$markdown = <<<MD
+> Quote with __bold__.
+>
+> - first
+> - second
+>
+> Final quote line.
+
+Paragraph to edit.
+
+MD;
+		$document = MarkdownSourceDocument::from_markdown( $markdown );
+		$edited_blocks = str_replace( 'Paragraph to edit.', 'Edited paragraph.', $document->get_block_markup() );
+		$patched = $document->patch_markdown( $edited_blocks );
+
+		$this->assertStringStartsWith( "> Quote with __bold__.\n>\n> - first\n> - second\n>\n> Final quote line.\n\n", $patched );
+		$this->assertStringContainsString( "Edited paragraph.\n", $patched );
+	}
+
+	public function test_changed_list_rewrites_only_the_list_unit() {
+		$markdown = <<<MD
+Before __bold__.
+
+* First item
+* Second item
+
+After _emphasis_.
+
+MD;
+		$document = MarkdownSourceDocument::from_markdown( $markdown );
+		$edited_blocks = str_replace( '<li>Second item</li>', '<li>Second item changed</li>', $document->get_block_markup() );
+		$patched = $document->patch_markdown( $edited_blocks );
+
+		$this->assertStringContainsString( "Before __bold__.\n\n", $patched );
+		$this->assertStringContainsString( "- First item\n- Second item changed\n\n", $patched );
+		$this->assertStringContainsString( "After _emphasis_.\n", $patched );
+	}
+
+	public function test_unchanged_ordered_list_start_and_marker_spacing_are_preserved() {
+		$markdown = <<<MD
+Before __bold__.
+
+7.  First item
+8.  Second item
+9.  Third item
+
+Paragraph to edit.
+
+MD;
+		$document = MarkdownSourceDocument::from_markdown( $markdown );
+		$edited_blocks = str_replace( 'Paragraph to edit.', 'Edited paragraph.', $document->get_block_markup() );
+		$patched = $document->patch_markdown( $edited_blocks );
+
+		$this->assertStringContainsString( "7.  First item\n8.  Second item\n9.  Third item\n\n", $patched );
+		$this->assertStringNotContainsString( "1. First item\n2. Second item", $patched );
+		$this->assertStringContainsString( "Edited paragraph.\n", $patched );
+	}
+
+	public function test_changed_table_rewrites_only_the_table_unit() {
+		$markdown = <<<MD
+Before __bold__.
+
+| Feature | State |
+| :------ | ----: |
+| One     |    ok |
+
+After _emphasis_.
+
+MD;
+		$document = MarkdownSourceDocument::from_markdown( $markdown );
+		$edited_blocks = str_replace( '<td>ok</td>', '<td>done</td>', $document->get_block_markup() );
+		$patched = $document->patch_markdown( $edited_blocks );
+
+		$this->assertStringContainsString( "Before __bold__.\n\n", $patched );
+		$this->assertStringContainsString( '| Feature | State |', $patched );
+		$this->assertStringContainsString( '| One     | done  |', $patched );
+		$this->assertStringContainsString( "After _emphasis_.\n", $patched );
+	}
+
+	public function test_raw_html_block_is_preserved_when_neighbor_changes() {
+		$markdown = <<<MD
+<section data-state="raw">
+	<strong>Keep raw HTML formatting.</strong>
+</section>
+
+Paragraph to edit.
+
+MD;
+		$document = MarkdownSourceDocument::from_markdown( $markdown );
+		$edited_blocks = str_replace( 'Paragraph to edit.', 'Edited paragraph.', $document->get_block_markup() );
+		$patched = $document->patch_markdown( $edited_blocks );
+
+		$this->assertStringStartsWith( "<section data-state=\"raw\">\n\t<strong>Keep raw HTML formatting.</strong>\n</section>\n\n", $patched );
+		$this->assertStringContainsString( "Edited paragraph.\n", $patched );
+	}
+
+	public function test_thematic_break_marker_is_preserved_when_neighbor_changes() {
+		$markdown = <<<MD
+Before __bold__.
+
+___
+
+Paragraph to edit.
+
+MD;
+		$document = MarkdownSourceDocument::from_markdown( $markdown );
+		$edited_blocks = str_replace( 'Paragraph to edit.', 'Edited paragraph.', $document->get_block_markup() );
+		$patched = $document->patch_markdown( $edited_blocks );
+
+		$this->assertStringContainsString( "Before __bold__.\n\n___\n\n", $patched );
+		$this->assertStringNotContainsString( "\n---\n", $patched );
+		$this->assertStringContainsString( "Edited paragraph.\n", $patched );
+	}
+
+	public function test_table_alignment_and_padding_are_preserved_when_neighbor_changes() {
+		$markdown = <<<MD
+| Feature | State |
+| :------ | ----: |
+| One     |    ok |
+
+Paragraph to edit.
+
+MD;
+		$document = MarkdownSourceDocument::from_markdown( $markdown );
+		$edited_blocks = str_replace( 'Paragraph to edit.', 'Edited paragraph.', $document->get_block_markup() );
+		$patched = $document->patch_markdown( $edited_blocks );
+
+		$this->assertStringContainsString( "| Feature | State |\n| :------ | ----: |\n| One     |    ok |\n\n", $patched );
+		$this->assertStringContainsString( "Edited paragraph.\n", $patched );
+	}
+
+	public function test_repeated_blocks_preserve_the_unedited_repetitions_around_a_changed_block() {
+		$markdown = <<<MD
+Alpha __one__.
+
+Repeat _me_.
+
+Repeat _me_.
+
+Omega __two__.
+
+MD;
+		$document = MarkdownSourceDocument::from_markdown( $markdown );
+		$edited_blocks = $this->replace_first(
+			'<p>Repeat <em>me</em>.</p>',
+			'<p>Repeat <em>me</em> with a change.</p>',
+			$document->get_block_markup()
+		);
+		$patched = $document->patch_markdown( $edited_blocks );
+
+		$this->assertStringContainsString( "Alpha __one__.\n\n", $patched );
+		$this->assertStringContainsString( "Repeat *me* with a change.\n\nRepeat _me_.\n\n", $patched );
+		$this->assertStringContainsString( "Omega __two__.\n", $patched );
+	}
+
+	/**
+	 * @dataProvider provider_medium_neighbor_preservation_cases
+	 */
+	public function test_generated_medium_neighbor_cases_preserve_surrounding_source( $case_name, $before, $after ) {
+		$markdown = $before . "Paragraph to edit.\n\n" . $after;
+		$document = MarkdownSourceDocument::from_markdown( $markdown );
+		$edited_blocks = str_replace( 'Paragraph to edit.', 'Edited paragraph.', $document->get_block_markup() );
+		$patched = $document->patch_markdown( $edited_blocks );
+
+		$this->assertStringStartsWith( $before, $patched, $case_name );
+		$this->assertStringContainsString( "Edited paragraph.\n\n", $patched, $case_name );
+		$this->assertStringEndsWith( $after, $patched, $case_name );
+		$this->assertSame( 1, substr_count( $patched, 'Edited paragraph.' ), $case_name );
+	}
+
+	public static function provider_medium_neighbor_preservation_cases() {
+		$snippets = self::source_snippets();
+		$snippet_names = array_keys( $snippets );
+		$snippet_count = count( $snippet_names );
+		$cases = array();
+
+		for ( $index = 0; $index < $snippet_count; $index++ ) {
+			$before_name = $snippet_names[ $index ];
+			$after_name = $snippet_names[ ( $index + 7 ) % $snippet_count ];
+			$case_name = $before_name . ' before / ' . $after_name . ' after';
+			$cases[ $case_name ] = array( $case_name, $snippets[ $before_name ], $snippets[ $after_name ] );
+
+			$before_name = $snippet_names[ ( $index + 11 ) % $snippet_count ];
+			$after_name = $snippet_names[ $index ];
+			$case_name = $before_name . ' before / ' . $after_name . ' after';
+			$cases[ $case_name ] = array( $case_name, $snippets[ $before_name ], $snippets[ $after_name ] );
+		}
+
+		return $cases;
+	}
+
+	/**
+	 * @dataProvider provider_large_document_cases
+	 */
+	public function test_generated_large_document_cases_preserve_every_unedited_slice( $case_name, $before_parts, $after_parts ) {
+		$before = implode( '', $before_parts );
+		$after = implode( '', $after_parts );
+		$markdown = $before . "Paragraph to edit.\n\n" . $after;
+		$document = MarkdownSourceDocument::from_markdown( $markdown );
+		$edited_blocks = str_replace( 'Paragraph to edit.', 'Edited paragraph in a large document.', $document->get_block_markup() );
+		$patched = $document->patch_markdown( $edited_blocks );
+
+		$this->assertStringStartsWith( $before, $patched, $case_name );
+		$this->assertStringContainsString( "Edited paragraph in a large document.\n\n", $patched, $case_name );
+		$this->assertStringEndsWith( $after, $patched, $case_name );
+		$this->assertSame( 1, substr_count( $patched, 'Edited paragraph in a large document.' ), $case_name );
+
+		foreach ( array_merge( $before_parts, $after_parts ) as $source_part ) {
+			$this->assertStringContainsString( $source_part, $patched, $case_name );
+		}
+	}
+
+	/**
+	 * @dataProvider provider_unmapped_unchanged_documents
+	 */
+	public function test_generated_unmapped_documents_preserve_original_when_unchanged( $case_name, $markdown ) {
+		$document = MarkdownSourceDocument::from_markdown( $markdown );
+
+		$this->assertSame( $markdown, $document->patch_markdown( $document->get_block_markup() ), $case_name );
+	}
+
+	public static function provider_unmapped_unchanged_documents() {
+		$unsupported_snippets = array(
+			'task list before' => "- [x] Checked item\n- [ ] Open item\n\nParagraph to edit.\n",
+			'task list after' => "Paragraph to edit.\n\n- [x] Checked item\n- [ ] Open item\n",
+			'duplicate reference definitions' => "A [link][same].\n\n[same]: https://example.com/a\n\nParagraph to edit.\n\nAnother [link][same].\n\n[same]: https://example.com/b\n",
+			'html followed by markdown without blank' => "<div>\nRaw HTML\n</div>\nParagraph to edit.\n",
+			'unclosed html block' => "<div>\n\nParagraph to edit.\n",
+			'mixed task list document' => "Before __bold__.\n\n- [x] Checked item\n- [ ] Open item\n\nParagraph to edit.\n\nAfter _emphasis_.\n",
+			'task list nested in quote' => "> - [x] Quoted checked item\n> - [ ] Quoted open item\n\nParagraph to edit.\n",
+			'raw markdown inside html' => "<section>\n# Not a Markdown heading here\n</section>\n\nParagraph to edit.\n",
+		);
+		$cases = array();
+
+		foreach ( $unsupported_snippets as $case_name => $markdown ) {
+			$cases[ $case_name ] = array( $case_name, $markdown );
+		}
+
+		return $cases;
+	}
+
+	public static function provider_large_document_cases() {
+		$snippets = array_values( self::source_snippets() );
+		$cases = array();
+		$snippet_count = count( $snippets );
+
+		for ( $case_index = 0; $case_index < 10; $case_index++ ) {
+			$before_parts = array();
+			$after_parts = array();
+			for ( $offset = 0; $offset < 8; $offset++ ) {
+				$before_parts[] = $snippets[ ( $case_index + $offset * 3 ) % $snippet_count ];
+				$after_parts[] = $snippets[ ( $case_index * 2 + $offset * 5 + 1 ) % $snippet_count ];
+			}
+
+			$case_name = 'large mixed document ' . $case_index;
+			$cases[ $case_name ] = array( $case_name, $before_parts, $after_parts );
+		}
+
+		return $cases;
+	}
+
+	public function test_reordered_blocks_preserve_any_unchanged_source_units_the_matcher_can_keep() {
+		$markdown = <<<MD
+First __bold__.
+
+Second _emphasis_.
+
+MD;
+		$document = MarkdownSourceDocument::from_markdown( $markdown );
+		$blocks = array_values(
+			array_filter(
+				parse_blocks( $document->get_block_markup() ),
+				function ( $block ) {
+					return isset( $block['blockName'] ) && null !== $block['blockName'];
+				}
+			)
+		);
+		$edited_blocks = serialize_block( $blocks[1] ) . "\n\n" . serialize_block( $blocks[0] );
+		$patched = $document->patch_markdown( $edited_blocks );
+
+		$this->assertStringContainsString( 'Second _emphasis_.', $patched );
+		$this->assertStringContainsString( 'First **bold**.', $patched );
+	}
+
+	public function test_unmapped_documents_still_preserve_original_source_when_blocks_are_unchanged() {
+		$markdown = <<<MD
+Paragraph before unsupported markup.
+
+<div>
+
+Nested Markdown paragraph.
+
+</div>
+
+Paragraph after unsupported markup.
+
+MD;
+		$document = MarkdownSourceDocument::from_markdown( $markdown );
+
+		$this->assertSame( $markdown, $document->patch_markdown( $document->get_block_markup() ) );
+	}
+
+	private function replace_first( $search, $replace, $subject ) {
+		$position = strpos( $subject, $search );
+		$this->assertNotFalse( $position );
+
+		return substr( $subject, 0, $position ) . $replace . substr( $subject, $position + strlen( $search ) );
+	}
+
+	private static function source_snippets() {
+		return array(
+			'paragraph inline emphasis variants' => "Paragraph with __bold__, _emphasis_, and `inline code`.\n\n",
+			'paragraph escaped punctuation' => "Escaped \\*literal asterisks\\* and \\[brackets\\].\n\n",
+			'paragraph hard break' => "Line with a hard break  \nthen the next line.\n\n",
+			'paragraph raw inline html' => "Inline <span data-x=\"1\">HTML</span> with __markdown__.\n\n",
+			'reference link paragraph' => "A [reference link][docs] with __bold__ text.\n\n[docs]: https://developer.wordpress.org \"Docs\"\n\n",
+			'reference image paragraph' => "A reference image ![logo][logo] stays indirect.\n\n[logo]: https://example.com/logo.png \"Logo\"\n\n",
+			'atx h1 closing marker' => "# Heading one #\n\n",
+			'atx h4 no closing marker' => "#### Heading four with _style_\n\n",
+			'setext h1 heading' => "Setext primary heading\n======================\n\n",
+			'setext h2 heading' => "Setext secondary heading\n------------------------\n\n",
+			'fenced code backticks' => "````php\necho `nested`;\n```\n````\n\n",
+			'fenced code tildes' => "~~~js\nconst value = \"__keep__\";\n~~~\n\n",
+			'indented code block' => "    const keep = \"_syntax_\";\n    console.log(keep);\n\n",
+			'unordered star list' => "* First star item\n* Second star item\n\n",
+			'unordered plus list' => "+ First plus item\n+ Second plus item\n\n",
+			'ordered list offset' => "7.  First ordered item\n8.  Second ordered item\n\n",
+			'nested unordered list' => "- Parent item\n  - Child item A\n  - Child item B\n- Sibling item\n\n",
+			'blockquote paragraph' => "> Quote with __bold__ and _emphasis_.\n\n",
+			'blockquote nested list' => "> Quote intro.\n>\n> - First\n> - Second\n\n",
+			'blockquote nested quote' => "> Outer quote\n>\n> > Inner quote with `code`\n\n",
+			'table aligned columns' => "| Feature | State |\n| :------ | ----: |\n| One     |    ok |\n\n",
+			'table escaped pipes' => "| Name | Value |\n| ---- | ----- |\n| Pipe | a \\| b |\n\n",
+			'thematic break underscores' => "___\n\n",
+			'thematic break stars' => "***\n\n",
+			'html comment block' => "<!-- keep this source comment -->\n\n",
+			'raw html section' => "<section data-state=\"raw\">\n\t<strong>Keep raw HTML formatting.</strong>\n</section>\n\n",
+			'raw html table' => "<table>\n<tr><td>Raw table</td></tr>\n</table>\n\n",
+		);
+	}
+}
diff --git a/components/Markdown/class-markdownsourcedocument.php b/components/Markdown/class-markdownsourcedocument.php
new file mode 100644
index 000000000..c7ec9c600
--- /dev/null
+++ b/components/Markdown/class-markdownsourcedocument.php
@@ -0,0 +1,571 @@
+<?php
+
+namespace WordPress\Markdown;
+
+use League\CommonMark\Environment\Environment;
+use League\CommonMark\Extension\CommonMark\CommonMarkCoreExtension;
+use League\CommonMark\Extension\GithubFlavoredMarkdownExtension;
+use League\CommonMark\Node\Block\AbstractBlock;
+use League\CommonMark\Parser\MarkdownParser;
+use WordPress\DataLiberation\DataFormatConsumer\BlocksWithMetadata;
+
+/**
+ * Tracks a Markdown document's original source while exposing editable block markup.
+ *
+ * This class is meant for editor save flows where Markdown is converted to
+ * WordPress blocks, edited, and converted back to Markdown. A normal
+ * block-to-Markdown conversion may choose canonical Markdown syntax for the
+ * whole document. This class instead records source slices for top-level
+ * Markdown blocks and reuses those exact bytes for blocks that did not change.
+ *
+ * When source blocks cannot be mapped one-to-one to WordPress blocks, the
+ * document falls back to a single source unit. That fallback still preserves
+ * the original Markdown byte-for-byte when the edited block markup is
+ * semantically unchanged.
+ */
+class MarkdownSourceDocument {
+
+	private $markdown;
+	private $block_markup;
+	private $metadata;
+	private $prefix;
+	private $units;
+
+	/**
+	 * Creates a source-aware document.
+	 *
+	 * @param string               $markdown     The original Markdown source.
+	 * @param string               $block_markup The block markup produced from the source.
+	 * @param array                $metadata     Metadata extracted while consuming Markdown.
+	 * @param string               $prefix       Source bytes before the first mapped unit.
+	 * @param MarkdownSourceUnit[] $units        Source units mapped to block markup.
+	 */
+	private function __construct( $markdown, $block_markup, $metadata, $prefix, $units ) {
+		$this->markdown     = $markdown;
+		$this->block_markup = $block_markup;
+		$this->metadata     = $metadata;
+		$this->prefix       = $prefix;
+		$this->units        = $units;
+	}
+
+	/**
+	 * Creates a source-aware document from Markdown source.
+	 *
+	 * The source is parsed twice: once by MarkdownConsumer to obtain WordPress
+	 * block markup, and once by CommonMark to obtain top-level block source
+	 * positions. When both views contain the same number of top-level blocks,
+	 * each block becomes a MarkdownSourceUnit. Otherwise the full document is
+	 * kept as one conservative fallback unit.
+	 *
+	 * @param string $markdown The Markdown source to parse.
+	 * @return self Source-aware document containing block markup and source units.
+	 */
+	public static function from_markdown( $markdown ) {
+		$markdown = (string) $markdown;
+		$consumer = new MarkdownConsumer( $markdown );
+		$blocks_with_metadata = $consumer->consume();
+		$block_markup = $blocks_with_metadata->get_block_markup();
+		$blocks = self::named_blocks( parse_blocks( $block_markup ) );
+		$source_blocks = self::source_blocks( $markdown );
+		$line_offsets = self::line_offsets( $markdown );
+		$source_line_offset = self::frontmatter_line_offset( $markdown );
+		$units = array();
+
+		// Some Markdown constructs do not map one-to-one to named WordPress
+		// blocks. Preserve the whole source for unchanged saves in those cases.
+		if ( count( $source_blocks ) !== count( $blocks ) ) {
+			return new self(
+				$markdown,
+				$block_markup,
+				$blocks_with_metadata->get_all_metadata(),
+				'',
+				array(
+					new MarkdownSourceUnit(
+						substr( $markdown, 0 ),
+						0,
+						strlen( $markdown ),
+						$block_markup,
+						self::semantic_hash_for_markup( $block_markup )
+					),
+				)
+			);
+		}
+
+		$prefix_end = count( $source_blocks ) > 0 ? $line_offsets[ $source_line_offset + $source_blocks[0]->getStartLine() - 1 ] : strlen( $markdown );
+		$prefix = substr( $markdown, 0, $prefix_end );
+		$count = count( $source_blocks );
+
+		for ( $index = 0; $index < $count; $index++ ) {
+			$source_block = $source_blocks[ $index ];
+			$start = $line_offsets[ $source_line_offset + $source_block->getStartLine() - 1 ];
+			$end = $index + 1 < $count
+				? $line_offsets[ $source_line_offset + $source_blocks[ $index + 1 ]->getStartLine() - 1 ]
+				: strlen( $markdown );
+			$block_markup_for_unit = serialize_block( $blocks[ $index ] );
+			$units[] = new MarkdownSourceUnit(
+				substr( $markdown, $start, $end - $start ),
+				$start,
+				$end,
+				$block_markup_for_unit,
+				self::semantic_hash_for_block( $blocks[ $index ] )
+			);
+		}
+
+		return new self(
+			$markdown,
+			$block_markup,
+			$blocks_with_metadata->get_all_metadata(),
+			$prefix,
+			$units
+		);
+	}
+
+	/**
+	 * Returns the WordPress block markup generated from the original Markdown.
+	 *
+	 * @return string Generated block markup.
+	 */
+	public function get_block_markup() {
+		return $this->block_markup;
+	}
+
+	/**
+	 * Returns metadata extracted from the Markdown document.
+	 *
+	 * @return array Metadata keyed by field name.
+	 */
+	public function get_all_metadata() {
+		return $this->metadata;
+	}
+
+	/**
+	 * Returns the source units mapped to top-level WordPress blocks.
+	 *
+	 * @return MarkdownSourceUnit[] Source units in document order.
+	 */
+	public function get_source_units() {
+		return $this->units;
+	}
+
+	/**
+	 * Applies edited block markup to the original Markdown source.
+	 *
+	 * Unchanged blocks are matched by semantic hash and copied from the original
+	 * Markdown source. Changed and inserted blocks are serialized with
+	 * MarkdownProducer. For changed blocks, surrounding line-oriented trivia is
+	 * reused from the replaced source unit so CRLF separators, blank lines, and
+	 * missing final newlines are not normalized.
+	 *
+	 * @param string $edited_block_markup The edited WordPress block markup.
+	 * @return string Patched Markdown source.
+	 */
+	public function patch_markdown( $edited_block_markup ) {
+		if ( 1 === count( $this->units ) && $this->units[0]->get_semantic_hash() === self::semantic_hash_for_markup( $edited_block_markup ) ) {
+			return $this->markdown;
+		}
+
+		$edited_blocks = self::named_blocks( parse_blocks( (string) $edited_block_markup ) );
+		$original_hashes = array_map(
+			function ( MarkdownSourceUnit $unit ) {
+				return $unit->get_semantic_hash();
+			},
+			$this->units
+		);
+		$edited_hashes = array_map( array( __CLASS__, 'semantic_hash_for_block' ), $edited_blocks );
+		$matches = self::longest_common_subsequence( $original_hashes, $edited_hashes );
+		$markdown = $this->prefix;
+		$original_index = 0;
+		$edited_index = 0;
+
+		foreach ( $matches as $match ) {
+			$markdown .= $this->markdown_for_changed_blocks(
+				$edited_blocks,
+				$edited_index,
+				$match['edited'],
+				$original_index,
+				$match['original']
+			);
+			$markdown .= $this->units[ $match['original'] ]->get_source();
+			$original_index = $match['original'] + 1;
+			$edited_index = $match['edited'] + 1;
+		}
+
+		$markdown .= $this->markdown_for_changed_blocks(
+			$edited_blocks,
+			$edited_index,
+			count( $edited_blocks ),
+			$original_index,
+			count( $this->units )
+		);
+
+		return $markdown;
+	}
+
+	/**
+	 * Returns the original Markdown source.
+	 *
+	 * @return string Original Markdown source.
+	 */
+	public function get_original_markdown() {
+		return $this->markdown;
+	}
+
+	/**
+	 * Serializes edited blocks that appear between two unchanged matches.
+	 *
+	 * The original range may be shorter than the edited range when blocks were
+	 * inserted. Only replacements can borrow source trivia from original units;
+	 * pure insertions use MarkdownProducer's normal block separators.
+	 *
+	 * @param array[] $edited_blocks  Edited block objects from parse_blocks().
+	 * @param int     $edited_start   First edited block index to serialize.
+	 * @param int     $edited_end     One past the last edited block index.
+	 * @param int     $original_start First original source unit index in the gap.
+	 * @param int     $original_end   One past the last original source unit index.
+	 * @return string Markdown for changed or inserted blocks.
+	 */
+	private function markdown_for_changed_blocks( array $edited_blocks, $edited_start, $edited_end, $original_start, $original_end ) {
+		$markdown = '';
+		$original_available = $original_end - $original_start;
+		for ( $edited_index = $edited_start; $edited_index < $edited_end; $edited_index++ ) {
+			$relative_index = $edited_index - $edited_start;
+			$original_index = $original_start + $relative_index;
+			if ( $relative_index < $original_available && isset( $this->units[ $original_index ] ) ) {
+				$markdown .= $this->units[ $original_index ]->get_leading_trivia();
+				$markdown .= self::with_trailing_trivia(
+					self::markdown_for_block( $edited_blocks[ $edited_index ] ),
+					$this->units[ $original_index ]->get_trailing_trivia()
+				);
+				continue;
+			}
+			$markdown .= self::markdown_for_block( $edited_blocks[ $edited_index ] );
+		}
+
+		return $markdown;
+	}
+
+	/**
+	 * Replaces MarkdownProducer's trailing line endings with source trivia.
+	 *
+	 * @param string $markdown        Serialized Markdown for a changed block.
+	 * @param string $trailing_trivia Original trailing trivia to preserve.
+	 * @return string Serialized Markdown with original trailing trivia.
+	 */
+	private static function with_trailing_trivia( $markdown, $trailing_trivia ) {
+		return self::trim_trailing_line_endings( $markdown ) . $trailing_trivia;
+	}
+
+	/**
+	 * Removes trailing CR and LF bytes from a Markdown fragment.
+	 *
+	 * @param string $text Markdown text.
+	 * @return string Markdown text without trailing line endings.
+	 */
+	private static function trim_trailing_line_endings( $text ) {
+		while ( '' !== $text ) {
+			$last = $text[ strlen( $text ) - 1 ];
+			if ( "\n" !== $last && "\r" !== $last ) {
+				break;
+			}
+			$text = substr( $text, 0, -1 );
+		}
+
+		return $text;
+	}
+
+	/**
+	 * Serializes a single WordPress block to Markdown.
+	 *
+	 * @param array $block Parsed block object.
+	 * @return string Markdown representation of the block.
+	 */
+	private static function markdown_for_block( array $block ) {
+		$producer = new MarkdownProducer(
+			new BlocksWithMetadata(
+				serialize_block( $block ),
+				array()
+			)
+		);
+		return $producer->produce();
+	}
+
+	/**
+	 * Returns the top-level CommonMark source blocks for a Markdown document.
+	 *
+	 * @param string $markdown Markdown source.
+	 * @return AbstractBlock[] Top-level CommonMark blocks.
+	 */
+	private static function source_blocks( $markdown ) {
+		$environment = new Environment( array() );
+		$environment->addExtension( new CommonMarkCoreExtension() );
+		$environment->addExtension( new GithubFlavoredMarkdownExtension() );
+		$environment->addExtension(
+			new \Webuni\FrontMatter\Markdown\FrontMatterLeagueCommonMarkExtension(
+				new \Webuni\FrontMatter\FrontMatter()
+			)
+		);
+		$parser = new MarkdownParser( $environment );
+		$document = $parser->parse( (string) $markdown );
+		$blocks = array();
+
+		foreach ( $document->children() as $child ) {
+			if ( $child instanceof AbstractBlock ) {
+				$blocks[] = $child;
+			}
+		}
+
+		return $blocks;
+	}
+
+	/**
+	 * Returns only named WordPress blocks from a parsed block list.
+	 *
+	 * @param array[] $blocks Parsed block objects.
+	 * @return array[] Named WordPress block objects.
+	 */
+	private static function named_blocks( array $blocks ) {
+		$named = array();
+		foreach ( $blocks as $block ) {
+			if ( isset( $block['blockName'] ) && null !== $block['blockName'] ) {
+				$named[] = $block;
+			}
+		}
+		return $named;
+	}
+
+	/**
+	 * Returns byte offsets for the start of each source line.
+	 *
+	 * @param string $text Source text.
+	 * @return int[] Byte offsets, starting with 0.
+	 */
+	private static function line_offsets( $text ) {
+		$offsets = array( 0 );
+		$length = strlen( $text );
+		for ( $i = 0; $i < $length; $i++ ) {
+			if ( "\n" === $text[ $i ] ) {
+				$offsets[] = $i + 1;
+			}
+		}
+		return $offsets;
+	}
+
+	/**
+	 * Returns the number of frontmatter lines before Markdown body content.
+	 *
+	 * CommonMark source positions are relative to the Markdown body when the
+	 * frontmatter extension consumes metadata. This offset maps those line
+	 * numbers back to byte offsets in the original source.
+	 *
+	 * @param string $markdown Markdown source.
+	 * @return int Number of leading frontmatter lines.
+	 */
+	private static function frontmatter_line_offset( $markdown ) {
+		$lines = self::lines_with_endings( $markdown );
+		if ( 0 === count( $lines ) ) {
+			return 0;
+		}
+
+		$first_line = self::trim_line_ending( $lines[0] );
+		if ( '---' !== $first_line && '+++' !== $first_line ) {
+			return 0;
+		}
+
+		for ( $index = 1; $index < count( $lines ); $index++ ) {
+			if ( self::trim_line_ending( $lines[ $index ] ) === $first_line ) {
+				return $index + 1;
+			}
+		}
+
+		return 0;
+	}
+
+	/**
+	 * Splits text into lines while retaining each line ending.
+	 *
+	 * @param string $text Source text.
+	 * @return string[] Lines, each including its original line ending.
+	 */
+	private static function lines_with_endings( $text ) {
+		$lines = array();
+		$line_start = 0;
+		$length = strlen( $text );
+
+		for ( $i = 0; $i < $length; $i++ ) {
+			if ( "\n" !== $text[ $i ] && "\r" !== $text[ $i ] ) {
+				continue;
+			}
+			if ( "\r" === $text[ $i ] && $i + 1 < $length && "\n" === $text[ $i + 1 ] ) {
+				$i++;
+			}
+			$lines[] = substr( $text, $line_start, $i - $line_start + 1 );
+			$line_start = $i + 1;
+		}
+
+		if ( $line_start < $length ) {
+			$lines[] = substr( $text, $line_start );
+		}
+
+		return $lines;
+	}
+
+	/**
+	 * Removes one line's trailing CR and LF bytes.
+	 *
+	 * @param string $line Source line.
+	 * @return string Line without its trailing line ending.
+	 */
+	private static function trim_line_ending( $line ) {
+		while ( '' !== $line ) {
+			$last = $line[ strlen( $line ) - 1 ];
+			if ( "\n" !== $last && "\r" !== $last ) {
+				break;
+			}
+			$line = substr( $line, 0, -1 );
+		}
+
+		return $line;
+	}
+
+	/**
+	 * Returns a semantic hash for block markup.
+	 *
+	 * @param string $block_markup WordPress block markup.
+	 * @return string Hash of the canonical block structure.
+	 */
+	private static function semantic_hash_for_markup( $block_markup ) {
+		return hash( 'sha256', json_encode( self::canonical_blocks( self::named_blocks( parse_blocks( $block_markup ) ) ) ) );
+	}
+
+	/**
+	 * Returns a semantic hash for one block.
+	 *
+	 * @param array $block Parsed block object.
+	 * @return string Hash of the canonical block structure.
+	 */
+	private static function semantic_hash_for_block( array $block ) {
+		return hash( 'sha256', json_encode( self::canonical_block( $block ) ) );
+	}
+
+	/**
+	 * Returns canonical representations for a list of blocks.
+	 *
+	 * @param array[] $blocks Parsed block objects.
+	 * @return array[] Canonical block structures.
+	 */
+	private static function canonical_blocks( array $blocks ) {
+		$canonical = array();
+		foreach ( $blocks as $block ) {
+			$canonical[] = self::canonical_block( $block );
+		}
+		return $canonical;
+	}
+
+	/**
+	 * Returns a canonical representation of a block for semantic comparison.
+	 *
+	 * Attribute order is normalized so equivalent blocks can be matched even
+	 * when serialization order differs.
+	 *
+	 * @param array $block Parsed block object.
+	 * @return array Canonical block structure.
+	 */
+	private static function canonical_block( array $block ) {
+		$attrs = isset( $block['attrs'] ) && is_array( $block['attrs'] ) ? $block['attrs'] : array();
+		self::sort_recursive( $attrs );
+		$inner_blocks = isset( $block['innerBlocks'] ) && is_array( $block['innerBlocks'] )
+			? self::canonical_blocks( $block['innerBlocks'] )
+			: array();
+
+		return array(
+			'blockName' => isset( $block['blockName'] ) ? $block['blockName'] : null,
+			'attrs' => $attrs,
+			'innerHTML' => isset( $block['innerHTML'] ) ? $block['innerHTML'] : '',
+			'innerBlocks' => $inner_blocks,
+		);
+	}
+
+	/**
+	 * Sorts associative arrays recursively while preserving list order.
+	 *
+	 * @param mixed $value Value to normalize.
+	 */
+	private static function sort_recursive( &$value ) {
+		if ( ! is_array( $value ) ) {
+			return;
+		}
+
+		foreach ( $value as &$child ) {
+			self::sort_recursive( $child );
+		}
+		unset( $child );
+
+		if ( self::is_associative_array( $value ) ) {
+			ksort( $value );
+		}
+	}
+
+	/**
+	 * Indicates whether an array has non-sequential numeric keys.
+	 *
+	 * @param array $value Array to inspect.
+	 * @return bool True for associative arrays, false for lists.
+	 */
+	private static function is_associative_array( array $value ) {
+		$index = 0;
+		foreach ( array_keys( $value ) as $key ) {
+			if ( $key !== $index ) {
+				return true;
+			}
+			$index++;
+		}
+		return false;
+	}
+
+	/**
+	 * Finds matching unchanged blocks between original and edited sequences.
+	 *
+	 * The result is used to splice original source around changed gaps. LCS is
+	 * intentionally used instead of a greedy scan so repeated identical blocks
+	 * still leave the longest possible set of source units untouched.
+	 *
+	 * @param string[] $left  Original semantic hashes.
+	 * @param string[] $right Edited semantic hashes.
+	 * @return array[] Matches with original and edited indexes.
+	 */
+	private static function longest_common_subsequence( array $left, array $right ) {
+		$left_count = count( $left );
+		$right_count = count( $right );
+		$lengths = array_fill( 0, $left_count + 1, array_fill( 0, $right_count + 1, 0 ) );
+
+		for ( $i = $left_count - 1; $i >= 0; $i-- ) {
+			for ( $j = $right_count - 1; $j >= 0; $j-- ) {
+				if ( $left[ $i ] === $right[ $j ] ) {
+					$lengths[ $i ][ $j ] = $lengths[ $i + 1 ][ $j + 1 ] + 1;
+				} else {
+					$lengths[ $i ][ $j ] = max( $lengths[ $i + 1 ][ $j ], $lengths[ $i ][ $j + 1 ] );
+				}
+			}
+		}
+
+		$matches = array();
+		$i = 0;
+		$j = 0;
+		while ( $i < $left_count && $j < $right_count ) {
+			if ( $left[ $i ] === $right[ $j ] ) {
+				$matches[] = array(
+					'original' => $i,
+					'edited' => $j,
+				);
+				$i++;
+				$j++;
+			} elseif ( $lengths[ $i + 1 ][ $j ] >= $lengths[ $i ][ $j + 1 ] ) {
+				$i++;
+			} else {
+				$j++;
+			}
+		}
+
+		return $matches;
+	}
+}
diff --git a/components/Markdown/class-markdownsourceunit.php b/components/Markdown/class-markdownsourceunit.php
new file mode 100644
index 000000000..10f1daf28
--- /dev/null
+++ b/components/Markdown/class-markdownsourceunit.php
@@ -0,0 +1,203 @@
+<?php
+
+namespace WordPress\Markdown;
+
+/**
+ * Represents one Markdown source slice mapped to one WordPress block.
+ *
+ * Source units are the splice points used by MarkdownSourceDocument. Each unit
+ * stores the original Markdown bytes, their byte offsets in the full document,
+ * the corresponding block markup, and a semantic hash used to recognize
+ * unchanged blocks after editing.
+ */
+class MarkdownSourceUnit {
+
+	private $source;
+	private $start_offset;
+	private $end_offset;
+	private $block_markup;
+	private $semantic_hash;
+
+	/**
+	 * Creates a mapped Markdown source unit.
+	 *
+	 * @param string $source        Original Markdown source slice.
+	 * @param int    $start_offset  Start byte offset in the full Markdown document.
+	 * @param int    $end_offset    End byte offset in the full Markdown document.
+	 * @param string $block_markup  WordPress block markup generated from the source slice.
+	 * @param string $semantic_hash Hash used to compare this unit with edited blocks.
+	 */
+	public function __construct( $source, $start_offset, $end_offset, $block_markup, $semantic_hash ) {
+		$this->source        = (string) $source;
+		$this->start_offset  = (int) $start_offset;
+		$this->end_offset    = (int) $end_offset;
+		$this->block_markup  = (string) $block_markup;
+		$this->semantic_hash = (string) $semantic_hash;
+	}
+
+	/**
+	 * Returns the original Markdown source slice.
+	 *
+	 * @return string Original Markdown bytes for this unit.
+	 */
+	public function get_source() {
+		return $this->source;
+	}
+
+	/**
+	 * Returns whitespace before the first non-whitespace byte in this unit.
+	 *
+	 * This is reused when a source unit is replaced by changed block markup, so
+	 * indentation or blank-line trivia before the edited block is not lost.
+	 *
+	 * @return string Leading whitespace from the source slice.
+	 */
+	public function get_leading_trivia() {
+		$length = strlen( $this->source );
+		for ( $i = 0; $i < $length; $i++ ) {
+			if ( ! ctype_space( $this->source[ $i ] ) ) {
+				return substr( $this->source, 0, $i );
+			}
+		}
+
+		return $this->source;
+	}
+
+	/**
+	 * Returns line-oriented trivia after the final content line in this unit.
+	 *
+	 * The returned trivia includes the final content line's line ending plus any
+	 * following blank lines. This allows changed blocks to preserve LF/CRLF
+	 * separators and the absence of a final newline.
+	 *
+	 * @return string Trailing line ending and blank-line trivia.
+	 */
+	public function get_trailing_trivia() {
+		$lines = self::lines_with_endings( $this->source );
+		$trivia = '';
+
+		for ( $index = count( $lines ) - 1; $index >= 0; $index-- ) {
+			$line = $lines[ $index ];
+			$line_without_ending = self::trim_line_ending( $line );
+
+			if ( self::is_blank( $line_without_ending ) ) {
+				$trivia = $line . $trivia;
+				continue;
+			}
+
+			return self::line_ending( $line ) . $trivia;
+		}
+
+		return $trivia;
+	}
+
+	/**
+	 * Returns the start byte offset of this unit in the original document.
+	 *
+	 * @return int Start byte offset.
+	 */
+	public function get_start_offset() {
+		return $this->start_offset;
+	}
+
+	/**
+	 * Returns the end byte offset of this unit in the original document.
+	 *
+	 * @return int End byte offset.
+	 */
+	public function get_end_offset() {
+		return $this->end_offset;
+	}
+
+	/**
+	 * Returns the WordPress block markup generated from this unit.
+	 *
+	 * @return string Block markup for this source unit.
+	 */
+	public function get_block_markup() {
+		return $this->block_markup;
+	}
+
+	/**
+	 * Returns the semantic hash used to match this unit after edits.
+	 *
+	 * @return string Semantic block hash.
+	 */
+	public function get_semantic_hash() {
+		return $this->semantic_hash;
+	}
+
+	/**
+	 * Splits text into lines while retaining each line ending.
+	 *
+	 * @param string $text Source text.
+	 * @return string[] Lines, each including its original line ending.
+	 */
+	private static function lines_with_endings( $text ) {
+		$lines = array();
+		$line_start = 0;
+		$length = strlen( $text );
+
+		for ( $i = 0; $i < $length; $i++ ) {
+			if ( "\n" !== $text[ $i ] && "\r" !== $text[ $i ] ) {
+				continue;
+			}
+			if ( "\r" === $text[ $i ] && $i + 1 < $length && "\n" === $text[ $i + 1 ] ) {
+				$i++;
+			}
+			$lines[] = substr( $text, $line_start, $i - $line_start + 1 );
+			$line_start = $i + 1;
+		}
+
+		if ( $line_start < $length ) {
+			$lines[] = substr( $text, $line_start );
+		}
+
+		return $lines;
+	}
+
+	/**
+	 * Removes one line's trailing CR and LF bytes.
+	 *
+	 * @param string $line Source line.
+	 * @return string Line without its trailing line ending.
+	 */
+	private static function trim_line_ending( $line ) {
+		while ( '' !== $line ) {
+			$last = $line[ strlen( $line ) - 1 ];
+			if ( "\n" !== $last && "\r" !== $last ) {
+				break;
+			}
+			$line = substr( $line, 0, -1 );
+		}
+
+		return $line;
+	}
+
+	/**
+	 * Returns the CR/LF line ending from a source line.
+	 *
+	 * @param string $line Source line.
+	 * @return string Line ending, or an empty string when none exists.
+	 */
+	private static function line_ending( $line ) {
+		$without_line_ending = self::trim_line_ending( $line );
+		return substr( $line, strlen( $without_line_ending ) );
+	}
+
+	/**
+	 * Indicates whether text contains only whitespace bytes.
+	 *
+	 * @param string $text Text to inspect.
+	 * @return bool True when the text is blank, false otherwise.
+	 */
+	private static function is_blank( $text ) {
+		$length = strlen( $text );
+		for ( $i = 0; $i < $length; $i++ ) {
+			if ( ! ctype_space( $text[ $i ] ) ) {
+				return false;
+			}
+		}
+		return true;
+	}
+}
diff --git a/components/Markdown/vendor-patched/league/commonmark/src/Extension/CommonMark/Parser/Inline/BacktickParser.php b/components/Markdown/vendor-patched/league/commonmark/src/Extension/CommonMark/Parser/Inline/BacktickParser.php
index 5f8040fdd..38aa34573 100644
--- a/components/Markdown/vendor-patched/league/commonmark/src/Extension/CommonMark/Parser/Inline/BacktickParser.php
+++ b/components/Markdown/vendor-patched/league/commonmark/src/Extension/CommonMark/Parser/Inline/BacktickParser.php
@@ -32,7 +32,7 @@ final class BacktickParser implements InlineParserInterface
      */
     private const MAX_BACKTICKS = 1000;
 
-    /** @var \WeakReference<Cursor>|null */
+    /** @var \WeakReference<Cursor>|Cursor|null */
     private $lastCursor;
     /**
      * @var bool
@@ -98,9 +98,9 @@ public function parse(InlineParserContext $inlineContext): bool
     private function findMatchingTicks(int $openTickLength, Cursor $cursor): bool
     {
         // Reset the seenBackticks cache if this is a new cursor
-        if ($this->lastCursor === null || $this->lastCursor->get() !== $cursor) {
+        if (! $this->isSameCursor($cursor)) {
             $this->seenBackticks     = [];
-            $this->lastCursor        = \WeakReference::create($cursor);
+            $this->lastCursor        = $this->createCursorReference($cursor);
             $this->lastCursorScanned = false;
         }
 
@@ -132,4 +132,29 @@ private function findMatchingTicks(int $openTickLength, Cursor $cursor): bool
 
         return false;
     }
+
+    /**
+     * @return \WeakReference<Cursor>|Cursor
+     */
+    private function createCursorReference(Cursor $cursor)
+    {
+        if (\class_exists('WeakReference')) {
+            return \WeakReference::create($cursor);
+        }
+
+        return $cursor;
+    }
+
+    private function isSameCursor(Cursor $cursor): bool
+    {
+        if ($this->lastCursor === null) {
+            return false;
+        }
+
+        if ($this->lastCursor instanceof Cursor) {
+            return $this->lastCursor === $cursor;
+        }
+
+        return $this->lastCursor->get() === $cursor;
+    }
 }
diff --git a/components/Markdown/vendor-patched/league/commonmark/src/Util/LinkParserHelper.php b/components/Markdown/vendor-patched/league/commonmark/src/Util/LinkParserHelper.php
index 656942462..917a7c6b0 100644
--- a/components/Markdown/vendor-patched/league/commonmark/src/Util/LinkParserHelper.php
+++ b/components/Markdown/vendor-patched/league/commonmark/src/Util/LinkParserHelper.php
@@ -131,7 +131,7 @@ private static function manuallyParseLinkDestination(Cursor $cursor): ?string
         return $destination;
     }
 
-    /** @var \WeakReference<Cursor>|null */
+    /** @var \WeakReference<Cursor>|Cursor|null */
     private static $lastCursor;
     /**
      * @var bool
@@ -144,12 +144,12 @@ private static function parseDestinationBraces(Cursor $cursor): ?string
         // that no closing brace exists, so we can skip the regex entirely. This helps avoid
         // certain pathological cases where the regex engine can take a very long time to
         // determine that no match exists.
-        if (self::$lastCursor !== null && self::$lastCursor->get() === $cursor) {
+        if (self::isSameCursor($cursor)) {
             if (self::$lastCursorLacksClosingBrace) {
                 return null;
             }
         } else {
-            self::$lastCursor = \WeakReference::create($cursor);
+            self::$lastCursor = self::createCursorReference($cursor);
         }
 
         if ($res = $cursor->match(RegexHelper::REGEX_LINK_DESTINATION_BRACES)) {
@@ -165,4 +165,29 @@ private static function parseDestinationBraces(Cursor $cursor): ?string
 
         return null;
     }
+
+    /**
+     * @return \WeakReference<Cursor>|Cursor
+     */
+    private static function createCursorReference(Cursor $cursor)
+    {
+        if (\class_exists('WeakReference')) {
+            return \WeakReference::create($cursor);
+        }
+
+        return $cursor;
+    }
+
+    private static function isSameCursor(Cursor $cursor): bool
+    {
+        if (self::$lastCursor === null) {
+            return false;
+        }
+
+        if (self::$lastCursor instanceof Cursor) {
+            return self::$lastCursor === $cursor;
+        }
+
+        return self::$lastCursor->get() === $cursor;
+    }
 }