diff --git a/build.rs b/build.rs index 80286bd..09432a4 100644 --- a/build.rs +++ b/build.rs @@ -2,8 +2,13 @@ use std::fmt::Write; use std::iter::Peekable; use std::path::Path; +use self::shared::ModifierSet; + type StrResult = Result; +#[path = "src/shared.rs"] +mod shared; + /// A module of definitions. struct Module<'a>(Vec<(&'a str, Binding<'a>)>); @@ -29,7 +34,7 @@ enum Def<'a> { /// A symbol, either a leaf or with modifiers. enum Symbol<'a> { Single(char), - Multi(Vec<(&'a str, char)>), + Multi(Vec<(ModifierSet<&'a str>, char)>), } /// A single line during parsing. @@ -40,7 +45,7 @@ enum Line<'a> { ModuleStart(&'a str), ModuleEnd, Symbol(&'a str, Option), - Variant(&'a str, char), + Variant(ModifierSet<&'a str>, char), } fn main() { @@ -79,7 +84,7 @@ fn process(buf: &mut String, file: &Path, name: &str, desc: &str) { write!(buf, "#[doc = {desc:?}] pub const {name}: Module = ").unwrap(); encode(buf, &module); - buf.push_str(";"); + buf.push(';'); } /// Tokenizes and classifies a line. @@ -110,7 +115,7 @@ fn tokenize(line: &str) -> StrResult { validate_ident(part)?; } let c = decode_char(tail.ok_or("missing char")?)?; - Line::Variant(rest, c) + Line::Variant(ModifierSet::from_raw_dotted(rest), c) } else { validate_ident(head)?; let c = tail.map(decode_char).transpose()?; @@ -165,9 +170,9 @@ fn parse<'a>( p.next(); } - let symbol = if variants.len() > 0 { + let symbol = if !variants.is_empty() { if let Some(c) = c { - variants.insert(0, ("", c)); + variants.insert(0, (ModifierSet::default(), c)); } Symbol::Multi(variants) } else { @@ -204,7 +209,7 @@ fn encode(buf: &mut String, module: &Module) { Def::Module(module) => { buf.push_str("Def::Module("); encode(buf, module); - buf.push_str(")"); + buf.push(')'); } Def::Symbol(symbol) => { buf.push_str("Def::Symbol(Symbol::"); @@ -212,7 +217,7 @@ fn encode(buf: &mut String, module: &Module) { Symbol::Single(c) => write!(buf, "Single({c:?})").unwrap(), Symbol::Multi(list) => write!(buf, "Multi(&{list:?})").unwrap(), } - buf.push_str(")"); + buf.push(')'); } } write!(buf, ", deprecation: {:?} }}),", entry.deprecation).unwrap(); diff --git a/src/lib.rs b/src/lib.rs index ae64ee1..abb8596 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,6 +1,15 @@ -/*! -Human-friendly notation for Unicode symbols. -*/ +//! Human-friendly notation for Unicode symbols. +//! +//! ## Model +//! A [`Symbol`] is a collection of one or more _variants_. Each variant is +//! identified by a set of [_modifiers_](ModifierSet) and has a single character +//! as its value. The modifiers themselves can in principle be any non-empty +//! strings that don't contain the character `.`, but codex only defines ones +//! that are entirely made of ASCII alphabetical characters. + +pub use self::shared::ModifierSet; + +mod shared; /// A module of definitions. #[derive(Debug, Copy, Clone)] @@ -52,7 +61,41 @@ pub enum Symbol { /// A symbol without modifiers. Single(char), /// A symbol with named modifiers. The symbol defaults to its first variant. - Multi(&'static [(&'static str, char)]), + Multi(&'static [(ModifierSet<&'static str>, char)]), +} + +impl Symbol { + /// Get the symbol's character for a given set of modifiers. + pub fn get(&self, modifs: ModifierSet<&str>) -> Option { + match self { + Self::Single(c) => modifs.is_empty().then_some(*c), + Self::Multi(list) => modifs.best_match_in(list.iter().copied()), + } + } + + /// The characters that are covered by this symbol. + pub fn variants(&self) -> impl Iterator, char)> { + enum Variants { + Single(std::iter::Once), + Multi(std::slice::Iter<'static, (ModifierSet<&'static str>, char)>), + } + let mut iter = match self { + Self::Single(c) => Variants::Single(std::iter::once(*c)), + Self::Multi(sl) => Variants::Multi(sl.iter()), + }; + std::iter::from_fn(move || match &mut iter { + Variants::Single(iter) => Some((ModifierSet::default(), iter.next()?)), + Variants::Multi(iter) => iter.next().copied(), + }) + } + + /// Possible modifiers for this symbol. + pub fn modifiers(&self) -> impl Iterator + '_ { + self.variants() + .flat_map(|(m, _)| m.into_iter()) + .collect::>() + .into_iter() + } } /// A module that contains the other top-level modules. diff --git a/src/shared.rs b/src/shared.rs new file mode 100644 index 0000000..cc9286e --- /dev/null +++ b/src/shared.rs @@ -0,0 +1,230 @@ +use std::ops::Deref; + +/// A set of modifiers. +/// +/// Beware: The [`Eq`] and [`Hash`] implementations are dependent on the +/// ordering of the modifiers, in opposition to what a set would usually +/// constitute. To test for set-wise equality, use [`iter`](Self::iter) and +/// collect into a true set type like [`HashSet`](std::collections::HashSet). +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub struct ModifierSet( + // Note: the visibility needs to be `pub(crate)`, since build.rs outputs + // `ModifierSet(...)`. + pub(crate) S, +); + +impl> ModifierSet { + /// Constructs a modifier set from a string, where modifiers are separated + /// by the character `.`. + /// + /// `s` should not contain any empty modifiers (i.e. it shouldn't contain + /// the sequence `..`) and no modifier should occur twice. Otherwise, + /// unexpected errors can occur. + pub fn from_raw_dotted(s: S) -> Self { + // Checking the other requirement too feels like it would be a bit too + // expensive, even for debug mode. + debug_assert!( + !s.contains(".."), + "ModifierSet::from_dotted called with string containing empty modifier" + ); + Self(s) + } + + /// Whether `self` is empty. + pub fn is_empty(&self) -> bool { + self.0.is_empty() + } + + /// Gets the string of modifiers separated by `.`. + pub fn as_str(&self) -> &str { + &self.0 + } + + /// Converts the underlying string to a slice. + pub fn as_deref(&self) -> ModifierSet<&str> { + ModifierSet(&self.0) + } + + /// Inserts a new modifier into the set. + /// + /// `m` should not be empty, contain the character `.`, or already be in the + /// set. Otherwise, unexpected errors can occur. + pub fn insert_raw(&mut self, m: &str) + where + S: for<'a> std::ops::AddAssign<&'a str>, + { + if !self.0.is_empty() { + self.0 += "."; + } + self.0 += m; + } + + /// Iterates over the list of modifiers in an arbitrary order. + pub fn iter(&self) -> impl Iterator { + self.into_iter() + } + + /// Whether the set contains the modifier `m`. + pub fn contains(&self, m: &str) -> bool { + self.iter().any(|lhs| lhs == m) + } + + /// Finds the best match from the list. + /// + /// To be considered a match, the modifier set must be a superset of (or + /// equal to) `self`. Among different matches, the best one is selected by + /// the following two criteria (in order): + /// 1. Number of modifiers in common with `self` (more is better). + /// 2. Total number of modifiers (fewer is better). + /// + /// If there are multiple best matches, the first of them is returned. + pub fn best_match_in<'a, T>( + &self, + variants: impl Iterator, T)>, + ) -> Option { + let mut best = None; + let mut best_score = None; + + // Find the best table entry with this name. + for candidate in variants.filter(|(set, _)| self.is_subset(*set)) { + let mut matching = 0; + let mut total = 0; + for modifier in candidate.0.iter() { + if self.contains(modifier) { + matching += 1; + } + total += 1; + } + + let score = (matching, std::cmp::Reverse(total)); + if best_score.is_none_or(|b| score > b) { + best = Some(candidate.1); + best_score = Some(score); + } + } + + best + } + + /// Whether all modifiers in `self` are also present in `other`. + pub fn is_subset(&self, other: ModifierSet<&str>) -> bool { + self.iter().all(|m| other.contains(m)) + } +} + +impl Default for ModifierSet { + /// Constructs the default modifier set. + /// + /// This is typically the empty set, though the remark from + /// [`Self::from_raw_dotted`] applies since `S::default()` could technically + /// be anything. + fn default() -> Self { + Self(S::default()) + } +} + +impl<'a, S: Deref> IntoIterator for &'a ModifierSet { + type Item = &'a str; + type IntoIter = std::str::Split<'a, char>; + + /// Iterate over the list of modifiers in an arbitrary order. + fn into_iter(self) -> Self::IntoIter { + let mut iter = self.0.split('.'); + if self.0.is_empty() { + // Empty the iterator + let _ = iter.next(); + } + iter + } +} + +impl<'a> IntoIterator for ModifierSet<&'a str> { + type Item = &'a str; + type IntoIter = std::str::Split<'a, char>; + + /// Iterate over the list of modifiers in an arbitrary order. + fn into_iter(self) -> Self::IntoIter { + let mut iter = self.0.split('.'); + if self.0.is_empty() { + // Empty the iterator + let _ = iter.next(); + } + iter + } +} + +#[cfg(test)] +mod tests { + type ModifierSet = super::ModifierSet<&'static str>; + + #[test] + fn default_is_empty() { + assert!(ModifierSet::default().is_empty()); + } + + #[test] + fn iter_count() { + assert_eq!(ModifierSet::default().iter().count(), 0); + assert_eq!(ModifierSet::from_raw_dotted("a").iter().count(), 1); + assert_eq!(ModifierSet::from_raw_dotted("a.b").iter().count(), 2); + assert_eq!(ModifierSet::from_raw_dotted("a.b.c").iter().count(), 3); + } + + #[test] + fn subset() { + assert!(ModifierSet::from_raw_dotted("a") + .is_subset(ModifierSet::from_raw_dotted("a.b"))); + assert!(ModifierSet::from_raw_dotted("a") + .is_subset(ModifierSet::from_raw_dotted("b.a"))); + assert!(ModifierSet::from_raw_dotted("a.b") + .is_subset(ModifierSet::from_raw_dotted("b.c.a"))); + } + + #[test] + fn best_match() { + // 1. more modifiers in common with self + assert_eq!( + ModifierSet::from_raw_dotted("a.b").best_match_in( + [ + (ModifierSet::from_raw_dotted("a.c"), 1), + (ModifierSet::from_raw_dotted("a.b"), 2), + ] + .into_iter() + ), + Some(2) + ); + // 2. fewer modifiers in general + assert_eq!( + ModifierSet::from_raw_dotted("a").best_match_in( + [ + (ModifierSet::from_raw_dotted("a"), 1), + (ModifierSet::from_raw_dotted("a.b"), 2), + ] + .into_iter() + ), + Some(1) + ); + // the first rule takes priority over the second + assert_eq!( + ModifierSet::from_raw_dotted("a.b").best_match_in( + [ + (ModifierSet::from_raw_dotted("a"), 1), + (ModifierSet::from_raw_dotted("a.b"), 2), + ] + .into_iter() + ), + Some(2) + ); + // among multiple best matches, the first one is returned + assert_eq!( + ModifierSet::default().best_match_in( + [ + (ModifierSet::from_raw_dotted("a"), 1), + (ModifierSet::from_raw_dotted("b"), 2) + ] + .into_iter() + ), + Some(1) + ); + } +}