diff --git a/crates/usfm3-wasm/src/lib.rs b/crates/usfm3-wasm/src/lib.rs index e7b2ffa..406e13a 100644 --- a/crates/usfm3-wasm/src/lib.rs +++ b/crates/usfm3-wasm/src/lib.rs @@ -53,6 +53,9 @@ pub enum DiagnosticCode { LeadingZeros, EmptyWordMarker, MissingMilestoneSelfClose, + MisplacedMetadataMarker, + DuplicateMetadataMarker, + NonPlainMetadataContent, } /// A diagnostic message with source location. @@ -244,5 +247,8 @@ fn convert_code(c: usfm3_lib::diagnostics::DiagnosticCode) -> DiagnosticCode { DC::LeadingZeros => DiagnosticCode::LeadingZeros, DC::EmptyWordMarker => DiagnosticCode::EmptyWordMarker, DC::MissingMilestoneSelfClose => DiagnosticCode::MissingMilestoneSelfClose, + DC::MisplacedMetadataMarker => DiagnosticCode::MisplacedMetadataMarker, + DC::DuplicateMetadataMarker => DiagnosticCode::DuplicateMetadataMarker, + DC::NonPlainMetadataContent => DiagnosticCode::NonPlainMetadataContent, } } diff --git a/crates/usfm3/src/builder.rs b/crates/usfm3/src/builder.rs index 3c2d999..4e3e802 100644 --- a/crates/usfm3/src/builder.rs +++ b/crates/usfm3/src/builder.rs @@ -7,6 +7,7 @@ use crate::ast::{Attribute, Document, Node, Span}; use crate::diagnostics::{Diagnostic, DiagnosticList}; use crate::lexer::{self, Token}; use crate::markers::{self, MarkerKind}; +use crate::metadata::{MetadataMarker, MetadataWindow}; // --------------------------------------------------------------------------- // Public API @@ -78,6 +79,9 @@ struct TreeBuilder { // Whitespace after va/vp/ca/cp metadata consumption is structural (skip). consumed_metadata: bool, + // Canonical metadata attachment window for chapter/verse metadata. + metadata_window: Option, + // Set after a closing marker (\em*, \+nd*, \f*, \*) is processed. // Whitespace and newlines after close markers are deferred via // pending_close_space — emitted only when followed by text. @@ -108,6 +112,7 @@ impl TreeBuilder { after_open_marker: false, pending_newline: false, consumed_metadata: false, + metadata_window: None, after_close_marker: false, pending_close_space: false, pending_milestone_close: None, @@ -115,6 +120,9 @@ impl TreeBuilder { } fn handle_token(&mut self, token: Token, span: Span) { + if self.should_close_verse_metadata_window(&token) { + self.metadata_window = None; + } // Clear after_open_marker for any non-Whitespace, non-Newline token. // Newlines right after an opening marker are structural (not content). if !matches!(token, Token::Whitespace(_) | Token::Newline) { @@ -369,6 +377,7 @@ impl TreeBuilder { self.force_close_notes(); self.close_paragraph(&span); + self.metadata_window = None; self.pending_chapter = Some(span); } @@ -385,6 +394,7 @@ impl TreeBuilder { .push(Diagnostic::verse_outside_paragraph(span.clone())); self.push_open("p".to_string(), MarkerKind::Paragraph, span.clone()); } + self.metadata_window = None; self.pending_verse = Some(span); } @@ -657,6 +667,7 @@ impl TreeBuilder { self.append_node(node); if !rest.is_empty() { self.append_text_raw(rest); + self.metadata_window = None; } else { self.after_open_marker = true; } @@ -838,6 +849,9 @@ impl TreeBuilder { } else { collapsed.into_owned() }; + let closes_verse_metadata_window = self.metadata_window == Some(MetadataWindow::Verse) + && self.open_metadata_marker().is_none() + && !final_text.trim().is_empty(); // Split at `//` (optional line break) and interleave OptBreak nodes. if final_text.contains("//") { @@ -853,6 +867,10 @@ impl TreeBuilder { } else { self.append_text_fragment(&final_text); } + + if closes_verse_metadata_window { + self.metadata_window = None; + } } /// Append a text fragment, merging with previous text node if possible. @@ -874,73 +892,101 @@ impl TreeBuilder { // Alt/pub number helpers // ----------------------------------------------------------------- - fn set_last_chapter_altnumber(&mut self, value: String) { + fn set_last_chapter_altnumber(&mut self, value: String) -> bool { for node in self.root_children.iter_mut().rev() { if let Node::Chapter { altnumber, .. } = node { - *altnumber = Some(value); - return; + if altnumber.is_none() { + *altnumber = Some(value); + return true; + } + return false; } } for open in self.stack.iter_mut().rev() { for node in open.children.iter_mut().rev() { if let Node::Chapter { altnumber, .. } = node { - *altnumber = Some(value); - return; + if altnumber.is_none() { + *altnumber = Some(value); + return true; + } + return false; } } } + false } - fn set_last_chapter_pubnumber(&mut self, value: String) { + fn set_last_chapter_pubnumber(&mut self, value: String) -> bool { for node in self.root_children.iter_mut().rev() { if let Node::Chapter { pubnumber, .. } = node { - *pubnumber = Some(value); - return; + if pubnumber.is_none() { + *pubnumber = Some(value); + return true; + } + return false; } } for open in self.stack.iter_mut().rev() { for node in open.children.iter_mut().rev() { if let Node::Chapter { pubnumber, .. } = node { - *pubnumber = Some(value); - return; + if pubnumber.is_none() { + *pubnumber = Some(value); + return true; + } + return false; } } } + false } - fn set_last_verse_altnumber(&mut self, value: String) { + fn set_last_verse_altnumber(&mut self, value: String) -> bool { // Verse is typically inside a paragraph (stack), check there first. for open in self.stack.iter_mut().rev() { for node in open.children.iter_mut().rev() { if let Node::Verse { altnumber, .. } = node { - *altnumber = Some(value); - return; + if altnumber.is_none() { + *altnumber = Some(value); + return true; + } + return false; } } } for node in self.root_children.iter_mut().rev() { if let Node::Verse { altnumber, .. } = node { - *altnumber = Some(value); - return; + if altnumber.is_none() { + *altnumber = Some(value); + return true; + } + return false; } } + false } - fn set_last_verse_pubnumber(&mut self, value: String) { + fn set_last_verse_pubnumber(&mut self, value: String) -> bool { for open in self.stack.iter_mut().rev() { for node in open.children.iter_mut().rev() { if let Node::Verse { pubnumber, .. } = node { - *pubnumber = Some(value); - return; + if pubnumber.is_none() { + *pubnumber = Some(value); + return true; + } + return false; } } } for node in self.root_children.iter_mut().rev() { if let Node::Verse { pubnumber, .. } = node { - *pubnumber = Some(value); - return; + if pubnumber.is_none() { + *pubnumber = Some(value); + return true; + } + return false; } } + false } // ----------------------------------------------------------------- @@ -966,55 +1012,9 @@ impl TreeBuilder { /// Special handling: when appending a `TableRow` node, wrap it in a `Table` /// container (or append to an existing one) so consecutive rows are grouped. fn append_node(&mut self, node: Node) { - // Smart finalization: when a \ca/\cp/\va/\vp node contains only - // plain text, extract the text and set altnumber/pubnumber on the - // nearest Chapter/Verse instead of appending the node. - // If it contains nested markers (complex content), keep it as-is. - { - let maybe_marker = match &node { - Node::Char { marker, .. } | Node::Para { marker, .. } => Some(marker.as_str()), - _ => None, - }; - if let Some(m) = maybe_marker - && matches!(m, "ca" | "cp" | "va" | "vp") - && let Some(text) = extract_plain_text(node.children()) - { - // Remove preceding whitespace-only text node (the gap - // after the previous closing marker, e.g. `\va*`). - let children = if let Some(top) = self.stack.last_mut() { - &mut top.children - } else { - &mut self.root_children - }; - if let Some(Node::Text(t)) = children.last() - && t.trim().is_empty() - { - children.pop(); - } - match m { - "ca" => { - self.set_last_chapter_altnumber(text); - self.consumed_metadata = true; - return; - } - "cp" => { - self.set_last_chapter_pubnumber(text); - self.consumed_metadata = true; - return; - } - "va" => { - self.set_last_verse_altnumber(text); - self.consumed_metadata = true; - return; - } - "vp" => { - self.set_last_verse_pubnumber(text); - self.consumed_metadata = true; - return; - } - _ => unreachable!(), - } - } + if self.try_attach_metadata(&node) { + self.consumed_metadata = true; + return; } let children = if let Some(top) = self.stack.last_mut() { @@ -1038,6 +1038,12 @@ impl TreeBuilder { } children.push(node); + + match children.last() { + Some(Node::Chapter { .. }) => self.metadata_window = Some(MetadataWindow::Chapter), + Some(Node::Verse { .. }) => self.metadata_window = Some(MetadataWindow::Verse), + _ => {} + } } /// Inside a note, close character markers on top of the stack until we @@ -1242,6 +1248,72 @@ impl TreeBuilder { self.stack.iter().any(|o| o.kind == MarkerKind::Paragraph) } + fn open_metadata_marker(&self) -> Option { + self.stack + .iter() + .rev() + .find_map(|open| MetadataMarker::from_marker(open.marker.as_str())) + } + + fn should_close_verse_metadata_window(&self, token: &Token) -> bool { + if self.metadata_window != Some(MetadataWindow::Verse) + || self.open_metadata_marker().is_some() + { + return false; + } + + match token { + Token::Whitespace(_) | Token::Newline | Token::Attributes(_) => false, + Token::Marker(m) | Token::NestedMarker(m) => { + MetadataMarker::from_marker(lexer::strip_marker_backslash(m)).is_none() + } + Token::ClosingMarker(m) | Token::NestedClosingMarker(m) => { + MetadataMarker::from_marker(lexer::strip_closing_star(m)).is_none() + } + Token::Text(text) => !text.trim().is_empty(), + Token::Chapter | Token::Verse | Token::Milestone(_) | Token::MilestoneEnd => true, + } + } + + fn try_attach_metadata(&mut self, node: &Node) -> bool { + let marker = match node { + Node::Char { marker, .. } | Node::Para { marker, .. } => marker, + _ => return false, + }; + let Some(metadata_marker) = MetadataMarker::from_marker(marker) else { + return false; + }; + let Some(window) = self.metadata_window else { + return false; + }; + if !metadata_marker.binds_in(window) { + return false; + } + let Some(text) = extract_plain_text(node.children()) else { + return false; + }; + + // Remove preceding whitespace-only text node (the gap after the + // previous closing marker, e.g. `\va*`). + let children = if let Some(top) = self.stack.last_mut() { + &mut top.children + } else { + &mut self.root_children + }; + if let Some(Node::Text(t)) = children.last() + && t.trim().is_empty() + { + children.pop(); + } + + match metadata_marker { + MetadataMarker::Ca => self.set_last_chapter_altnumber(text), + MetadataMarker::Cp => self.set_last_chapter_pubnumber(text), + MetadataMarker::Va => self.set_last_verse_altnumber(text), + MetadataMarker::Vp => self.set_last_verse_pubnumber(text), + } + } + /// Close Character, Unknown, and Meta markers on top of the stack, /// stopping at a Paragraph (or any other block-level) boundary. /// Used when `\rem` nests inside a paragraph without closing it. @@ -1964,6 +2036,128 @@ mod tests { assert_eq!(verse, Some(("2".into(), Some("GEN 1:2".into())))); } + #[test] + fn test_chapter_metadata_attaches_in_pre_verse_window() { + let result = parse("\\id ESG\n\\c 1\n\\cp A\n\\p\n\\v 1 text"); + + let chapter = result.document.content.iter().find_map(|node| match node { + Node::Chapter { pubnumber, .. } => Some(pubnumber.clone()), + _ => None, + }); + assert_eq!(chapter, Some(Some("A".into()))); + + let has_literal_cp = result + .document + .content + .iter() + .any(|node| matches!(node, Node::Para { marker, .. } if marker == "cp")); + assert!( + !has_literal_cp, + "canonical chapter metadata should not remain as a literal \\cp node" + ); + } + + #[test] + fn test_misplaced_vp_is_preserved_literal() { + let result = parse("\\id ESG\n\\c 1\n\\p\n\\v 1 text \\vp 1b\\vp*"); + + let verse_pubnumber = result.document.content.iter().find_map(|node| { + if let Node::Para { content, .. } = node { + content.iter().find_map(|child| { + if let Node::Verse { pubnumber, .. } = child { + Some(pubnumber.clone()) + } else { + None + } + }) + } else { + None + } + }); + assert_eq!(verse_pubnumber, Some(None)); + + let has_literal_vp = result.document.content.iter().any(|node| { + if let Node::Para { content, .. } = node { + content + .iter() + .any(|child| matches!(child, Node::Char { marker, .. } if marker == "vp")) + } else { + false + } + }); + assert!( + has_literal_vp, + "misplaced \\vp should stay in the AST as a literal character node" + ); + } + + #[test] + fn test_duplicate_va_does_not_overwrite_first_value() { + let result = parse("\\id PSA\n\\c 54\n\\p\n\\v 1 \\va 3\\va* \\va 4\\va* text"); + + let (verse_altnumber, literal_va_count) = result + .document + .content + .iter() + .find_map(|node| { + if let Node::Para { content, .. } = node { + let altnumber = content.iter().find_map(|child| { + if let Node::Verse { altnumber, .. } = child { + Some(altnumber.clone()) + } else { + None + } + }); + let literal_va_count = content + .iter() + .filter( + |child| matches!(child, Node::Char { marker, .. } if marker == "va"), + ) + .count(); + Some((altnumber, literal_va_count)) + } else { + None + } + }) + .expect("expected paragraph content"); + + assert_eq!(verse_altnumber, Some(Some("3".into()))); + assert_eq!( + literal_va_count, 1, + "the duplicate \\va should remain literal instead of overwriting the first one" + ); + } + + #[test] + fn test_pre_verse_va_is_preserved_literal() { + let result = parse("\\id PSA\n\\c 54\n\\d \\va 1\\va* A Psalm\n\\q1\n\\v 1 text"); + + let d_para_has_literal_va = result.document.content.iter().any(|node| { + matches!( + node, + Node::Para { marker, content, .. } + if marker == "d" + && content.iter().any(|child| matches!(child, Node::Char { marker, .. } if marker == "va")) + ) + }); + assert!(d_para_has_literal_va); + + let verse_altnumber = result.document.content.iter().find_map(|node| { + if let Node::Para { content, .. } = node { + content.iter().find_map(|child| { + if let Node::Verse { altnumber, .. } = child { + Some(altnumber.clone()) + } else { + None + } + }) + } else { + None + } + }); + assert_eq!(verse_altnumber, Some(None)); + } + #[test] fn test_book_code_extraction() { let result = parse("\\id MAT Gospel of Matthew"); diff --git a/crates/usfm3/src/diagnostics.rs b/crates/usfm3/src/diagnostics.rs index ca4c38f..b365439 100644 --- a/crates/usfm3/src/diagnostics.rs +++ b/crates/usfm3/src/diagnostics.rs @@ -64,6 +64,9 @@ pub enum DiagnosticCode { LeadingZeros, EmptyWordMarker, MissingMilestoneSelfClose, + MisplacedMetadataMarker, + DuplicateMetadataMarker, + NonPlainMetadataContent, } /// A diagnostic message with source location. @@ -420,6 +423,36 @@ impl Diagnostic { code: DiagnosticCode::MissingMilestoneSelfClose, } } + + /// A chapter/verse metadata marker appears in an invalid location. + pub fn misplaced_metadata_marker(marker: &str, span: Span) -> Self { + Diagnostic { + severity: Severity::Error, + span, + message: format!("\\{marker} is not valid in this location"), + code: DiagnosticCode::MisplacedMetadataMarker, + } + } + + /// A chapter/verse metadata marker duplicates metadata already set for the current target. + pub fn duplicate_metadata_marker(marker: &str, span: Span) -> Self { + Diagnostic { + severity: Severity::Error, + span, + message: format!("duplicate \\{marker} for the current chapter or verse"), + code: DiagnosticCode::DuplicateMetadataMarker, + } + } + + /// A chapter/verse metadata marker contains nested or non-plain content. + pub fn non_plain_metadata_content(marker: &str, span: Span) -> Self { + Diagnostic { + severity: Severity::Error, + span, + message: format!("\\{marker} metadata must contain only plain text"), + code: DiagnosticCode::NonPlainMetadataContent, + } + } } /// A collection of diagnostics produced during parsing and validation. @@ -674,6 +707,33 @@ mod tests { assert!(d.message.contains("\\p")); } + #[test] + fn test_misplaced_metadata_marker() { + let d = Diagnostic::misplaced_metadata_marker("vp", 20..25); + assert_eq!(d.severity, Severity::Error); + assert_eq!(d.code, DiagnosticCode::MisplacedMetadataMarker); + assert_eq!(d.span, 20..25); + assert!(d.message.contains("\\vp")); + } + + #[test] + fn test_duplicate_metadata_marker() { + let d = Diagnostic::duplicate_metadata_marker("ca", 20..25); + assert_eq!(d.severity, Severity::Error); + assert_eq!(d.code, DiagnosticCode::DuplicateMetadataMarker); + assert_eq!(d.span, 20..25); + assert!(d.message.contains("\\ca")); + } + + #[test] + fn test_non_plain_metadata_content() { + let d = Diagnostic::non_plain_metadata_content("va", 20..25); + assert_eq!(d.severity, Severity::Error); + assert_eq!(d.code, DiagnosticCode::NonPlainMetadataContent); + assert_eq!(d.span, 20..25); + assert!(d.message.contains("\\va")); + } + // ── Display tests ─────────────────────────────────────────────────── #[test] diff --git a/crates/usfm3/src/lib.rs b/crates/usfm3/src/lib.rs index b0173d7..25390cd 100644 --- a/crates/usfm3/src/lib.rs +++ b/crates/usfm3/src/lib.rs @@ -3,6 +3,7 @@ pub mod builder; pub mod diagnostics; pub mod lexer; pub mod markers; +mod metadata; pub mod usfm; pub mod usj; pub mod usx; diff --git a/crates/usfm3/src/metadata.rs b/crates/usfm3/src/metadata.rs new file mode 100644 index 0000000..30f29ad --- /dev/null +++ b/crates/usfm3/src/metadata.rs @@ -0,0 +1,59 @@ +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(crate) enum MetadataTarget { + Chapter, + Verse, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(crate) enum MetadataWindow { + Chapter, + Verse, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(crate) enum MetadataMarker { + Ca, + Cp, + Va, + Vp, +} + +impl MetadataMarker { + pub(crate) fn from_marker(marker: &str) -> Option { + match marker { + "ca" => Some(Self::Ca), + "cp" => Some(Self::Cp), + "va" => Some(Self::Va), + "vp" => Some(Self::Vp), + _ => None, + } + } + + pub(crate) fn as_str(self) -> &'static str { + match self { + Self::Ca => "ca", + Self::Cp => "cp", + Self::Va => "va", + Self::Vp => "vp", + } + } + + pub(crate) fn target(self) -> MetadataTarget { + match self { + Self::Ca | Self::Cp => MetadataTarget::Chapter, + Self::Va | Self::Vp => MetadataTarget::Verse, + } + } + + pub(crate) fn binds_in(self, window: MetadataWindow) -> bool { + matches!( + (self, window), + (Self::Ca | Self::Cp, MetadataWindow::Chapter) + | (Self::Va | Self::Vp, MetadataWindow::Verse) + ) + } + + pub(crate) fn allows_literal_inline(self) -> bool { + matches!(self, Self::Va) + } +} diff --git a/crates/usfm3/src/validation.rs b/crates/usfm3/src/validation.rs index 7ba1c3a..d13050e 100644 --- a/crates/usfm3/src/validation.rs +++ b/crates/usfm3/src/validation.rs @@ -9,6 +9,7 @@ use std::collections::{HashMap, HashSet}; use crate::ast::{Document, Node, Span}; use crate::diagnostics::{Diagnostic, DiagnosticList}; use crate::markers::{self, MarkerKind}; +use crate::metadata::{MetadataMarker, MetadataTarget}; // ── Public entry point ────────────────────────────────────────────────────── @@ -96,6 +97,7 @@ impl<'a> Validator<'a> { self.check_body_paragraph_before_chapter(doc); self.check_non_empty_blank_line(doc); self.check_empty_word_marker(doc); + self.check_metadata_markers(doc); } // ── 1. \id must be the first marker ───────────────────────────────── @@ -542,6 +544,133 @@ impl<'a> Validator<'a> { } } + // ── 16. Metadata placement / duplicates ─────────────────────────── + + fn check_metadata_markers(&mut self, doc: &Document) { + let mut state = MetadataValidationState::default(); + self.walk_metadata_nodes(&doc.content, &mut state); + } + + fn walk_metadata_nodes(&mut self, nodes: &[Node], state: &mut MetadataValidationState) { + for node in nodes { + self.walk_metadata_node(node, state); + } + } + + fn walk_metadata_node(&mut self, node: &Node, state: &mut MetadataValidationState) { + if let Some(marker) = node.marker().and_then(MetadataMarker::from_marker) { + self.check_metadata_node(node, marker, state); + return; + } + + match node { + Node::Book { content, .. } => self.walk_metadata_nodes(content, state), + Node::Chapter { + altnumber, + pubnumber, + .. + } => { + state.chapter_window_open = true; + state.verse_window_open = false; + state.before_first_verse_in_chapter = true; + state.current_chapter = MetadataFieldsPresent { + altnumber: altnumber.is_some(), + pubnumber: pubnumber.is_some(), + }; + state.current_verse = MetadataFieldsPresent::default(); + } + Node::Verse { + altnumber, + pubnumber, + .. + } => { + state.chapter_window_open = false; + state.verse_window_open = true; + state.before_first_verse_in_chapter = false; + state.current_verse = MetadataFieldsPresent { + altnumber: altnumber.is_some(), + pubnumber: pubnumber.is_some(), + }; + } + Node::Para { + marker, content, .. + } => { + state.verse_window_open = false; + let previous = state.current_para_marker.replace(marker.clone()); + self.walk_metadata_nodes(content, state); + state.current_para_marker = previous; + } + Node::Char { content, .. } + | Node::Note { content, .. } + | Node::Figure { content, .. } + | Node::Sidebar { content, .. } + | Node::Periph { content, .. } + | Node::Table { content, .. } + | Node::TableRow { content, .. } + | Node::TableCell { content, .. } + | Node::Ref { content, .. } + | Node::Unknown { content, .. } => { + state.verse_window_open = false; + self.walk_metadata_nodes(content, state); + } + Node::Text(text) => { + if !text.trim().is_empty() { + state.verse_window_open = false; + } + } + Node::Milestone { .. } | Node::OptBreak => { + state.verse_window_open = false; + } + } + } + + fn check_metadata_node( + &mut self, + node: &Node, + marker: MetadataMarker, + state: &mut MetadataValidationState, + ) { + let span = node.span().cloned().unwrap_or(0..0); + if self.metadata_literal_exception_allowed(marker, state) { + self.walk_metadata_nodes(node.children(), state); + return; + } + + let plain = extract_plain_text(node.children()).is_some(); + let in_canonical_window = match marker.target() { + MetadataTarget::Chapter => state.chapter_window_open, + MetadataTarget::Verse => state.verse_window_open, + }; + + if !in_canonical_window { + self.diagnostics + .push(Diagnostic::misplaced_metadata_marker(marker.as_str(), span)); + } else if !plain { + self.diagnostics + .push(Diagnostic::non_plain_metadata_content( + marker.as_str(), + span, + )); + } else if state.is_duplicate(marker) { + self.diagnostics + .push(Diagnostic::duplicate_metadata_marker(marker.as_str(), span)); + } else { + state.mark_seen(marker); + } + + self.walk_metadata_nodes(node.children(), state); + } + + fn metadata_literal_exception_allowed( + &self, + marker: MetadataMarker, + state: &MetadataValidationState, + ) -> bool { + marker.allows_literal_inline() + && state.before_first_verse_in_chapter + && state.current_para_marker.as_deref() == Some("d") + } + // ── 14. Body paragraph before first chapter ────────────────────── fn check_body_paragraph_before_chapter(&mut self, doc: &Document) { @@ -590,12 +719,69 @@ fn is_introduction_marker(marker: &str) -> bool { marker.starts_with('i') } +#[derive(Debug, Clone, Copy, Default)] +struct MetadataFieldsPresent { + altnumber: bool, + pubnumber: bool, +} + +#[derive(Debug, Default)] +struct MetadataValidationState { + chapter_window_open: bool, + verse_window_open: bool, + before_first_verse_in_chapter: bool, + current_para_marker: Option, + current_chapter: MetadataFieldsPresent, + current_verse: MetadataFieldsPresent, +} + +impl MetadataValidationState { + fn is_duplicate(&self, marker: MetadataMarker) -> bool { + let fields = match marker.target() { + MetadataTarget::Chapter => self.current_chapter, + MetadataTarget::Verse => self.current_verse, + }; + match marker { + MetadataMarker::Ca | MetadataMarker::Va => fields.altnumber, + MetadataMarker::Cp | MetadataMarker::Vp => fields.pubnumber, + } + } + + fn mark_seen(&mut self, marker: MetadataMarker) { + let fields = match marker.target() { + MetadataTarget::Chapter => &mut self.current_chapter, + MetadataTarget::Verse => &mut self.current_verse, + }; + match marker { + MetadataMarker::Ca | MetadataMarker::Va => fields.altnumber = true, + MetadataMarker::Cp | MetadataMarker::Vp => fields.pubnumber = true, + } + } +} + +fn extract_plain_text(content: &[Node]) -> Option { + let mut text = String::new(); + for node in content { + match node { + Node::Text(s) => text.push_str(s), + _ => return None, + } + } + let trimmed = text.trim().to_string(); + if trimmed.is_empty() { + None + } else { + Some(trimmed) + } +} + // ── Tests ─────────────────────────────────────────────────────────────────── #[cfg(test)] mod tests { use super::*; use crate::ast::*; + use crate::builder::parse; use crate::diagnostics::DiagnosticCode; fn doc_with(nodes: Vec) -> Document { @@ -1469,4 +1655,65 @@ mod tests { let diags = validate(&doc); assert!(diags.is_empty()); } + + #[test] + fn test_valid_canonical_metadata_has_no_metadata_diagnostics() { + let result = parse("\\id ESG\n\\c 1\n\\cp A\n\\p\n\\v 1 \\va 2\\va* \\vp 1a\\vp* text"); + let diags = validate(&result.document); + assert!( + !diags.iter().any(|d| { + matches!( + d.code, + DiagnosticCode::MisplacedMetadataMarker + | DiagnosticCode::DuplicateMetadataMarker + | DiagnosticCode::NonPlainMetadataContent + ) + }), + "canonical metadata should validate cleanly" + ); + } + + #[test] + fn test_misplaced_metadata_marker_detected() { + let result = parse("\\id ESG\n\\c 1\n\\p\n\\v 1 text \\vp 1b\\vp*"); + let diags = validate(&result.document); + assert!(diags.iter().any( + |d| d.code == DiagnosticCode::MisplacedMetadataMarker && d.message.contains("\\vp") + )); + } + + #[test] + fn test_duplicate_metadata_marker_detected() { + let result = parse("\\id PSA\n\\c 54\n\\p\n\\v 1 \\va 3\\va* \\va 4\\va* text"); + let diags = validate(&result.document); + assert!(diags.iter().any( + |d| d.code == DiagnosticCode::DuplicateMetadataMarker && d.message.contains("\\va") + )); + } + + #[test] + fn test_non_plain_metadata_content_detected() { + let result = parse("\\id ESG\n\\c 1\n\\p\n\\v 1 \\vp \\em 1b\\em*\\vp* text"); + let diags = validate(&result.document); + assert!(diags.iter().any( + |d| d.code == DiagnosticCode::NonPlainMetadataContent && d.message.contains("\\vp") + )); + } + + #[test] + fn test_psalm_title_va_exception_emits_no_metadata_diagnostic() { + let result = parse("\\id PSA\n\\c 54\n\\d \\va 1\\va* A poem by David\n\\q1\n\\v 1 text"); + let diags = validate(&result.document); + assert!( + !diags.iter().any(|d| { + matches!( + d.code, + DiagnosticCode::MisplacedMetadataMarker + | DiagnosticCode::DuplicateMetadataMarker + | DiagnosticCode::NonPlainMetadataContent + ) + }), + "documented pre-verse \\va usage in \\d should validate cleanly" + ); + } }