Skip to content

Commit 56eb217

Browse files
T0mstonelaurmaedje
andauthored
Codify and resolve modifiers (#46)
Co-authored-by: Laurenz <[email protected]>
1 parent 4f0e70d commit 56eb217

File tree

3 files changed

+286
-8
lines changed

3 files changed

+286
-8
lines changed

build.rs

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,13 @@ use std::fmt::Write;
22
use std::iter::Peekable;
33
use std::path::Path;
44

5+
use self::shared::ModifierSet;
6+
57
type StrResult<T> = Result<T, String>;
68

9+
#[path = "src/shared.rs"]
10+
mod shared;
11+
712
/// A module of definitions.
813
struct Module<'a>(Vec<(&'a str, Binding<'a>)>);
914

@@ -29,7 +34,7 @@ enum Def<'a> {
2934
/// A symbol, either a leaf or with modifiers.
3035
enum Symbol<'a> {
3136
Single(char),
32-
Multi(Vec<(&'a str, char)>),
37+
Multi(Vec<(ModifierSet<&'a str>, char)>),
3338
}
3439

3540
/// A single line during parsing.
@@ -40,7 +45,7 @@ enum Line<'a> {
4045
ModuleStart(&'a str),
4146
ModuleEnd,
4247
Symbol(&'a str, Option<char>),
43-
Variant(&'a str, char),
48+
Variant(ModifierSet<&'a str>, char),
4449
}
4550

4651
fn main() {
@@ -110,7 +115,7 @@ fn tokenize(line: &str) -> StrResult<Line> {
110115
validate_ident(part)?;
111116
}
112117
let c = decode_char(tail.ok_or("missing char")?)?;
113-
Line::Variant(rest, c)
118+
Line::Variant(ModifierSet::from_raw_dotted(rest), c)
114119
} else {
115120
validate_ident(head)?;
116121
let c = tail.map(decode_char).transpose()?;
@@ -167,7 +172,7 @@ fn parse<'a>(
167172

168173
let symbol = if !variants.is_empty() {
169174
if let Some(c) = c {
170-
variants.insert(0, ("", c));
175+
variants.insert(0, (ModifierSet::default(), c));
171176
}
172177
Symbol::Multi(variants)
173178
} else {

src/lib.rs

Lines changed: 47 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,15 @@
1-
/*!
2-
Human-friendly notation for Unicode symbols.
3-
*/
1+
//! Human-friendly notation for Unicode symbols.
2+
//!
3+
//! ## Model
4+
//! A [`Symbol`] is a collection of one or more _variants_. Each variant is
5+
//! identified by a set of [_modifiers_](ModifierSet) and has a single character
6+
//! as its value. The modifiers themselves can in principle be any non-empty
7+
//! strings that don't contain the character `.`, but codex only defines ones
8+
//! that are entirely made of ASCII alphabetical characters.
9+
10+
pub use self::shared::ModifierSet;
11+
12+
mod shared;
413

514
/// A module of definitions.
615
#[derive(Debug, Copy, Clone)]
@@ -52,7 +61,41 @@ pub enum Symbol {
5261
/// A symbol without modifiers.
5362
Single(char),
5463
/// A symbol with named modifiers. The symbol defaults to its first variant.
55-
Multi(&'static [(&'static str, char)]),
64+
Multi(&'static [(ModifierSet<&'static str>, char)]),
65+
}
66+
67+
impl Symbol {
68+
/// Get the symbol's character for a given set of modifiers.
69+
pub fn get(&self, modifs: ModifierSet<&str>) -> Option<char> {
70+
match self {
71+
Self::Single(c) => modifs.is_empty().then_some(*c),
72+
Self::Multi(list) => modifs.best_match_in(list.iter().copied()),
73+
}
74+
}
75+
76+
/// The characters that are covered by this symbol.
77+
pub fn variants(&self) -> impl Iterator<Item = (ModifierSet<&str>, char)> {
78+
enum Variants {
79+
Single(std::iter::Once<char>),
80+
Multi(std::slice::Iter<'static, (ModifierSet<&'static str>, char)>),
81+
}
82+
let mut iter = match self {
83+
Self::Single(c) => Variants::Single(std::iter::once(*c)),
84+
Self::Multi(sl) => Variants::Multi(sl.iter()),
85+
};
86+
std::iter::from_fn(move || match &mut iter {
87+
Variants::Single(iter) => Some((ModifierSet::default(), iter.next()?)),
88+
Variants::Multi(iter) => iter.next().copied(),
89+
})
90+
}
91+
92+
/// Possible modifiers for this symbol.
93+
pub fn modifiers(&self) -> impl Iterator<Item = &str> + '_ {
94+
self.variants()
95+
.flat_map(|(m, _)| m.into_iter())
96+
.collect::<std::collections::BTreeSet<_>>()
97+
.into_iter()
98+
}
5699
}
57100

58101
/// A module that contains the other top-level modules.

src/shared.rs

Lines changed: 230 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,230 @@
1+
use std::ops::Deref;
2+
3+
/// A set of modifiers.
4+
///
5+
/// Beware: The [`Eq`] and [`Hash`] implementations are dependent on the
6+
/// ordering of the modifiers, in opposition to what a set would usually
7+
/// constitute. To test for set-wise equality, use [`iter`](Self::iter) and
8+
/// collect into a true set type like [`HashSet`](std::collections::HashSet).
9+
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
10+
pub struct ModifierSet<S>(
11+
// Note: the visibility needs to be `pub(crate)`, since build.rs outputs
12+
// `ModifierSet(...)`.
13+
pub(crate) S,
14+
);
15+
16+
impl<S: Deref<Target = str>> ModifierSet<S> {
17+
/// Constructs a modifier set from a string, where modifiers are separated
18+
/// by the character `.`.
19+
///
20+
/// `s` should not contain any empty modifiers (i.e. it shouldn't contain
21+
/// the sequence `..`) and no modifier should occur twice. Otherwise,
22+
/// unexpected errors can occur.
23+
pub fn from_raw_dotted(s: S) -> Self {
24+
// Checking the other requirement too feels like it would be a bit too
25+
// expensive, even for debug mode.
26+
debug_assert!(
27+
!s.contains(".."),
28+
"ModifierSet::from_dotted called with string containing empty modifier"
29+
);
30+
Self(s)
31+
}
32+
33+
/// Whether `self` is empty.
34+
pub fn is_empty(&self) -> bool {
35+
self.0.is_empty()
36+
}
37+
38+
/// Gets the string of modifiers separated by `.`.
39+
pub fn as_str(&self) -> &str {
40+
&self.0
41+
}
42+
43+
/// Converts the underlying string to a slice.
44+
pub fn as_deref(&self) -> ModifierSet<&str> {
45+
ModifierSet(&self.0)
46+
}
47+
48+
/// Inserts a new modifier into the set.
49+
///
50+
/// `m` should not be empty, contain the character `.`, or already be in the
51+
/// set. Otherwise, unexpected errors can occur.
52+
pub fn insert_raw(&mut self, m: &str)
53+
where
54+
S: for<'a> std::ops::AddAssign<&'a str>,
55+
{
56+
if !self.0.is_empty() {
57+
self.0 += ".";
58+
}
59+
self.0 += m;
60+
}
61+
62+
/// Iterates over the list of modifiers in an arbitrary order.
63+
pub fn iter(&self) -> impl Iterator<Item = &str> {
64+
self.into_iter()
65+
}
66+
67+
/// Whether the set contains the modifier `m`.
68+
pub fn contains(&self, m: &str) -> bool {
69+
self.iter().any(|lhs| lhs == m)
70+
}
71+
72+
/// Finds the best match from the list.
73+
///
74+
/// To be considered a match, the modifier set must be a superset of (or
75+
/// equal to) `self`. Among different matches, the best one is selected by
76+
/// the following two criteria (in order):
77+
/// 1. Number of modifiers in common with `self` (more is better).
78+
/// 2. Total number of modifiers (fewer is better).
79+
///
80+
/// If there are multiple best matches, the first of them is returned.
81+
pub fn best_match_in<'a, T>(
82+
&self,
83+
variants: impl Iterator<Item = (ModifierSet<&'a str>, T)>,
84+
) -> Option<T> {
85+
let mut best = None;
86+
let mut best_score = None;
87+
88+
// Find the best table entry with this name.
89+
for candidate in variants.filter(|(set, _)| self.is_subset(*set)) {
90+
let mut matching = 0;
91+
let mut total = 0;
92+
for modifier in candidate.0.iter() {
93+
if self.contains(modifier) {
94+
matching += 1;
95+
}
96+
total += 1;
97+
}
98+
99+
let score = (matching, std::cmp::Reverse(total));
100+
if best_score.is_none_or(|b| score > b) {
101+
best = Some(candidate.1);
102+
best_score = Some(score);
103+
}
104+
}
105+
106+
best
107+
}
108+
109+
/// Whether all modifiers in `self` are also present in `other`.
110+
pub fn is_subset(&self, other: ModifierSet<&str>) -> bool {
111+
self.iter().all(|m| other.contains(m))
112+
}
113+
}
114+
115+
impl<S: Default> Default for ModifierSet<S> {
116+
/// Constructs the default modifier set.
117+
///
118+
/// This is typically the empty set, though the remark from
119+
/// [`Self::from_raw_dotted`] applies since `S::default()` could technically
120+
/// be anything.
121+
fn default() -> Self {
122+
Self(S::default())
123+
}
124+
}
125+
126+
impl<'a, S: Deref<Target = str>> IntoIterator for &'a ModifierSet<S> {
127+
type Item = &'a str;
128+
type IntoIter = std::str::Split<'a, char>;
129+
130+
/// Iterate over the list of modifiers in an arbitrary order.
131+
fn into_iter(self) -> Self::IntoIter {
132+
let mut iter = self.0.split('.');
133+
if self.0.is_empty() {
134+
// Empty the iterator
135+
let _ = iter.next();
136+
}
137+
iter
138+
}
139+
}
140+
141+
impl<'a> IntoIterator for ModifierSet<&'a str> {
142+
type Item = &'a str;
143+
type IntoIter = std::str::Split<'a, char>;
144+
145+
/// Iterate over the list of modifiers in an arbitrary order.
146+
fn into_iter(self) -> Self::IntoIter {
147+
let mut iter = self.0.split('.');
148+
if self.0.is_empty() {
149+
// Empty the iterator
150+
let _ = iter.next();
151+
}
152+
iter
153+
}
154+
}
155+
156+
#[cfg(test)]
157+
mod tests {
158+
type ModifierSet = super::ModifierSet<&'static str>;
159+
160+
#[test]
161+
fn default_is_empty() {
162+
assert!(ModifierSet::default().is_empty());
163+
}
164+
165+
#[test]
166+
fn iter_count() {
167+
assert_eq!(ModifierSet::default().iter().count(), 0);
168+
assert_eq!(ModifierSet::from_raw_dotted("a").iter().count(), 1);
169+
assert_eq!(ModifierSet::from_raw_dotted("a.b").iter().count(), 2);
170+
assert_eq!(ModifierSet::from_raw_dotted("a.b.c").iter().count(), 3);
171+
}
172+
173+
#[test]
174+
fn subset() {
175+
assert!(ModifierSet::from_raw_dotted("a")
176+
.is_subset(ModifierSet::from_raw_dotted("a.b")));
177+
assert!(ModifierSet::from_raw_dotted("a")
178+
.is_subset(ModifierSet::from_raw_dotted("b.a")));
179+
assert!(ModifierSet::from_raw_dotted("a.b")
180+
.is_subset(ModifierSet::from_raw_dotted("b.c.a")));
181+
}
182+
183+
#[test]
184+
fn best_match() {
185+
// 1. more modifiers in common with self
186+
assert_eq!(
187+
ModifierSet::from_raw_dotted("a.b").best_match_in(
188+
[
189+
(ModifierSet::from_raw_dotted("a.c"), 1),
190+
(ModifierSet::from_raw_dotted("a.b"), 2),
191+
]
192+
.into_iter()
193+
),
194+
Some(2)
195+
);
196+
// 2. fewer modifiers in general
197+
assert_eq!(
198+
ModifierSet::from_raw_dotted("a").best_match_in(
199+
[
200+
(ModifierSet::from_raw_dotted("a"), 1),
201+
(ModifierSet::from_raw_dotted("a.b"), 2),
202+
]
203+
.into_iter()
204+
),
205+
Some(1)
206+
);
207+
// the first rule takes priority over the second
208+
assert_eq!(
209+
ModifierSet::from_raw_dotted("a.b").best_match_in(
210+
[
211+
(ModifierSet::from_raw_dotted("a"), 1),
212+
(ModifierSet::from_raw_dotted("a.b"), 2),
213+
]
214+
.into_iter()
215+
),
216+
Some(2)
217+
);
218+
// among multiple best matches, the first one is returned
219+
assert_eq!(
220+
ModifierSet::default().best_match_in(
221+
[
222+
(ModifierSet::from_raw_dotted("a"), 1),
223+
(ModifierSet::from_raw_dotted("b"), 2)
224+
]
225+
.into_iter()
226+
),
227+
Some(1)
228+
);
229+
}
230+
}

0 commit comments

Comments
 (0)