Skip to content

Commit da5718c

Browse files
authored
Merge pull request #12 from allo-media/feature/sc-25618/es/fractions
Feature/sc 25618/es/fractions
2 parents c61a514 + 484edc6 commit da5718c

File tree

6 files changed

+157
-119
lines changed

6 files changed

+157
-119
lines changed

src/digit_string.rs

Lines changed: 12 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,17 @@
33
//! Build numeric representation using only elementary operations ensuring a
44
//! valid construction at every step.
55
6-
use super::error::Error;
76
use std::ops::Deref;
87

9-
#[derive(Debug, Clone)]
8+
use super::error::Error;
9+
use super::lang::MorphologicalMarker;
10+
11+
#[derive(Debug)]
1012
pub struct DigitString {
1113
buffer: Vec<u8>,
1214
leading_zeroes: usize,
1315
frozen: bool,
14-
pub ordinal_marker: Option<&'static str>,
16+
pub marker: MorphologicalMarker,
1517
}
1618

1719
fn all_zeros(slice: &[u8]) -> bool {
@@ -24,7 +26,7 @@ impl DigitString {
2426
buffer: Vec::with_capacity(4),
2527
leading_zeroes: 0,
2628
frozen: false,
27-
ordinal_marker: None,
29+
marker: MorphologicalMarker::None,
2830
}
2931
}
3032

@@ -141,26 +143,15 @@ impl DigitString {
141143
}
142144

143145
/// Formal base 10 string representation with leading zeroes
144-
pub fn into_string(self) -> String {
146+
pub fn to_string(&self) -> String {
145147
// we know that the string is valid.
146148
let mut res = "0".repeat(self.leading_zeroes);
147-
res.push_str(&String::from_utf8(self.buffer).unwrap());
149+
res.push_str(std::str::from_utf8(self.buffer.as_slice()).unwrap());
148150
res
149151
}
150152

151-
pub fn value(&self) -> f64 {
152-
if self.buffer.is_empty() && self.leading_zeroes > 0 {
153-
return 0.0;
154-
}
155-
// it's safe to unwrap since we are sure to have a valid number string
156-
std::str::from_utf8(self.buffer.as_slice())
157-
.unwrap()
158-
.parse()
159-
.unwrap()
160-
}
161-
162153
pub fn is_ordinal(&self) -> bool {
163-
self.ordinal_marker.is_some()
154+
self.marker.is_ordinal()
164155
}
165156
}
166157

@@ -312,15 +303,15 @@ mod tests {
312303
fn test_shift_empty() {
313304
let mut builder = DigitString::new();
314305
builder.shift(2).unwrap();
315-
assert_eq!(builder.into_string(), "100")
306+
assert_eq!(builder.to_string(), "100")
316307
}
317308

318309
#[test]
319310
fn test_shift_full_zeroes() {
320311
let mut builder = DigitString::new();
321312
builder.put(b"1000").unwrap();
322313
builder.shift(2).unwrap();
323-
assert_eq!(builder.into_string(), "1100")
314+
assert_eq!(builder.to_string(), "1100")
324315
}
325316

326317
#[test]
@@ -349,7 +340,7 @@ mod tests {
349340
builder.shift(2)?;
350341
builder.put(b"90")?;
351342
builder.put(b"2")?;
352-
assert_eq!(builder.into_string(), "002792");
343+
assert_eq!(builder.to_string(), "002792");
353344
Ok(())
354345
}
355346
}

src/lang/en/mod.rs

Lines changed: 24 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ use crate::error::Error;
33

44
mod vocabulary;
55

6-
use super::LangInterpretor;
6+
use super::{LangInterpretor, MorphologicalMarker};
77
use vocabulary::INSIGNIFICANT;
88

99
fn lemmatize(word: &str) -> &str {
@@ -24,8 +24,8 @@ impl LangInterpretor for English {
2424
return match self.exec_group(num_func.split('-')) {
2525
Ok(ds) => {
2626
b.put(&ds)?;
27-
if ds.ordinal_marker.is_some() {
28-
b.ordinal_marker = ds.ordinal_marker;
27+
if ds.marker.is_ordinal() {
28+
b.marker = ds.marker;
2929
b.freeze()
3030
}
3131
Ok(())
@@ -84,7 +84,7 @@ impl LangInterpretor for English {
8484
|| num_func == "second"
8585
|| lemma == "third")
8686
{
87-
b.ordinal_marker = self.get_morph_marker(num_func);
87+
b.marker = self.get_morph_marker(num_func);
8888
b.freeze();
8989
}
9090
status
@@ -110,30 +110,36 @@ impl LangInterpretor for English {
110110
word == "point"
111111
}
112112

113-
fn format(&self, b: DigitString) -> String {
114-
if let Some(marker) = b.ordinal_marker {
115-
format!("{}{}", b.into_string(), marker)
113+
fn format_and_value(&self, b: DigitString) -> (String, f64) {
114+
let repr = b.to_string();
115+
let val: f64 = repr.parse().unwrap();
116+
if let MorphologicalMarker::Ordinal(marker) = b.marker {
117+
(format!("{}{}", b.to_string(), marker), val)
116118
} else {
117-
b.into_string()
119+
(repr, val)
118120
}
119121
}
120122

121-
fn format_decimal(&self, int: String, dec: String) -> String {
122-
format!("{}.{}", int, dec)
123+
fn format_decimal_and_value(&self, int: DigitString, dec: DigitString) -> (String, f64) {
124+
let irepr = int.to_string();
125+
let drepr = dec.to_string();
126+
let frepr = format!("{}.{}", irepr, drepr);
127+
let val = frepr.parse().unwrap();
128+
(frepr, val)
123129
}
124130

125-
fn get_morph_marker(&self, word: &str) -> Option<&'static str> {
131+
fn get_morph_marker(&self, word: &str) -> MorphologicalMarker {
126132
if word.ends_with("th") {
127-
Some("th")
133+
MorphologicalMarker::Ordinal("th")
128134
} else if word.ends_with("ths") {
129-
Some("ths")
135+
MorphologicalMarker::Ordinal("ths")
130136
} else {
131137
match word {
132-
"first" => Some("st"),
133-
"second" => Some("nd"),
134-
"third" => Some("rd"),
135-
"thirds" => Some("rds"),
136-
_ => None,
138+
"first" => MorphologicalMarker::Ordinal("st"),
139+
"second" => MorphologicalMarker::Ordinal("nd"),
140+
"third" => MorphologicalMarker::Ordinal("rd"),
141+
"thirds" => MorphologicalMarker::Ordinal("rds"),
142+
_ => MorphologicalMarker::None,
137143
}
138144
}
139145
}

src/lang/es/mod.rs

Lines changed: 69 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ use crate::error::Error;
33

44
mod vocabulary;
55

6-
use super::LangInterpretor;
6+
use super::{LangInterpretor, MorphologicalMarker};
77
use vocabulary::INSIGNIFICANT;
88

99
fn lemmatize(word: &str) -> &str {
@@ -22,15 +22,15 @@ pub struct Spanish {}
2222
impl LangInterpretor for Spanish {
2323
fn apply(&self, num_func: &str, b: &mut DigitString) -> Result<(), Error> {
2424
let num_marker = self.get_morph_marker(num_func);
25-
if !b.is_empty() && num_marker != b.ordinal_marker {
25+
if !b.is_empty() && num_marker != b.marker && !num_marker.is_fraction() {
2626
return Err(Error::Overlap);
2727
}
2828
let status = match lemmatize(num_func) {
2929
"cero" => b.put(b"0"),
3030
"un" | "uno" if b.peek(2) != b"10" && b.peek(2) != b"20" => b.put(b"1"),
3131
"primer" | "primero" | "primera" => b.put(b"1"),
3232
"dos" if b.peek(2) != b"10" && b.peek(2) != b"20" => b.put(b"2"),
33-
"segundo" if b.ordinal_marker.is_some() => b.put(b"2"),
33+
"segundo" if b.marker.is_ordinal() => b.put(b"2"),
3434
"segunda" => b.put(b"2"),
3535
"tres" if b.peek(2) != b"10" && b.peek(2) != b"20" => b.put(b"3"),
3636
"tercer" | "tercero" | "tercera" => b.put(b"3"),
@@ -47,35 +47,39 @@ impl LangInterpretor for Spanish {
4747
"nueve" if b.peek(2) != b"10" && b.peek(2) != b"20" => b.put(b"9"),
4848
"noveno" | "novena" => b.put(b"9"),
4949
"diez" | "décimo" | "décima" => b.put(b"10"),
50-
"once" | "undécimo" | "undécima" | "decimoprimero" | "decimoprimera" => b.put(b"11"),
51-
"doce" | "duodécimo" | "duodécima" | "decimosegundo" | "decimosegunda" => {
50+
"once" | "undécimo" | "undécima" | "decimoprimero" | "decimoprimera" | "onceavo" => {
51+
b.put(b"11")
52+
}
53+
"doce" | "duodécimo" | "duodécima" | "decimosegundo" | "decimosegunda" | "doceavo" => {
5254
b.put(b"12")
5355
}
54-
"trece" | "decimotercero" | "decimotercera" => b.put(b"13"),
55-
"catorce" | "decimocuarto" | "decimocuarta" => b.put(b"14"),
56-
"quince" | "decimoquinto" | "decimoquinta" => b.put(b"15"),
57-
"dieciseis" | "dieciséis" | "decimosexto" | "decimosexta" => b.put(b"16"),
58-
"diecisiete" | "decimoséptimo" | "decimoséptima" => b.put(b"17"),
59-
"dieciocho" | "decimoctavo" | "decimoctava" => b.put(b"18"),
60-
"diecinueve" | "decimonoveno" | "decimonovena" => b.put(b"19"),
61-
"veinte" | "vigésimo" | "vigésima" => b.put(b"20"),
62-
"veintiuno" => b.put(b"21"),
63-
"veintidós" | "veintidos" => b.put(b"22"),
64-
"veintitrés" | "veintitres" => b.put(b"23"),
65-
"veinticuatro" => b.put(b"24"),
66-
"veinticinco" => b.put(b"25"),
67-
"veintiseis" | "veintiséis" => b.put(b"26"),
68-
"veintisiete" => b.put(b"27"),
69-
"veintiocho" => b.put(b"28"),
70-
"veintinueve" => b.put(b"29"),
71-
"treinta" | "trigésimo" | "trigésima" => b.put(b"30"),
72-
"cuarenta" | "cuadragésimo" | "cuadragésima" => b.put(b"40"),
73-
"cincuenta" | "quincuagésimo" | "quincuagésima" => b.put(b"50"),
74-
"sesenta" | "sexagésimo" | "sexagésima" => b.put(b"60"),
75-
"setenta" | "septuagésimo" | "septuagésima" => b.put(b"70"),
76-
"ochenta" | "octogésimo" | "octogésima" => b.put(b"80"),
77-
"noventa" | "nonagésimo" | "nonagésima" => b.put(b"90"),
78-
"cien" | "ciento" | "centésimo" | "centésima" => b.put(b"100"),
56+
"trece" | "decimotercero" | "decimotercera" | "treceavo" => b.put(b"13"),
57+
"catorce" | "decimocuarto" | "decimocuarta" | "catorceavo" => b.put(b"14"),
58+
"quince" | "decimoquinto" | "decimoquinta" | "quinceavo" => b.put(b"15"),
59+
"dieciseis" | "dieciséis" | "decimosexto" | "decimosexta" | "deciseisavo" => {
60+
b.put(b"16")
61+
}
62+
"diecisiete" | "decimoséptimo" | "decimoséptima" | "diecisieteavo" => b.put(b"17"),
63+
"dieciocho" | "decimoctavo" | "decimoctava" | "dieciochoavo" => b.put(b"18"),
64+
"diecinueve" | "decimonoveno" | "decimonovena" | "decinueveavo" => b.put(b"19"),
65+
"veinte" | "vigésimo" | "vigésima" | "veintavo" | "veinteavo" => b.put(b"20"),
66+
"veintiuno" | "veintiunoavo" => b.put(b"21"),
67+
"veintidós" | "veintidos" | "veintidosavo" => b.put(b"22"),
68+
"veintitrés" | "veintitres" | "veintitresavo" => b.put(b"23"),
69+
"veinticuatro" | "veinticuatroavo" => b.put(b"24"),
70+
"veinticinco" | "veinticincoavo" => b.put(b"25"),
71+
"veintiseis" | "veintiséis" | "veintiseisavo" => b.put(b"26"),
72+
"veintisiete" | "veintisieteavo" => b.put(b"27"),
73+
"veintiocho" | "veintiochoavo" => b.put(b"28"),
74+
"veintinueve" | "veintinueveavo" => b.put(b"29"),
75+
"treinta" | "trigésimo" | "trigésima" | "treintavo" => b.put(b"30"),
76+
"cuarenta" | "cuadragésimo" | "cuadragésima" | "cuarentavo" => b.put(b"40"),
77+
"cincuenta" | "quincuagésimo" | "quincuagésima" | "cincuentavo" => b.put(b"50"),
78+
"sesenta" | "sexagésimo" | "sexagésima" | "sesentavo" => b.put(b"60"),
79+
"setenta" | "septuagésimo" | "septuagésima" | "setentavo" => b.put(b"70"),
80+
"ochenta" | "octogésimo" | "octogésima" | "ochentavo" => b.put(b"80"),
81+
"noventa" | "nonagésimo" | "nonagésima" | "noventavo" => b.put(b"90"),
82+
"cien" | "ciento" | "centésimo" | "centésima" | "centavo" => b.put(b"100"),
7983
"dosciento" | "ducentésimo" | "ducentésima" => b.put(b"200"),
8084
"tresciento" | "tricentésimo" | "tricentésima" => b.put(b"300"),
8185
"cuatrociento" | "quadringentésimo" | "quadringentésima" => b.put(b"400"),
@@ -91,7 +95,10 @@ impl LangInterpretor for Spanish {
9195
_ => Err(Error::NaN),
9296
};
9397
if status.is_ok() {
94-
b.ordinal_marker = num_marker;
98+
b.marker = num_marker;
99+
if b.marker.is_fraction() {
100+
b.freeze()
101+
}
95102
}
96103
status
97104
}
@@ -104,30 +111,44 @@ impl LangInterpretor for Spanish {
104111
word == "coma"
105112
}
106113

107-
fn format(&self, b: DigitString) -> String {
108-
if let Some(marker) = b.ordinal_marker {
109-
format!("{}{}", b.into_string(), marker)
110-
} else {
111-
b.into_string()
114+
fn format_and_value(&self, b: DigitString) -> (String, f64) {
115+
let repr = b.to_string();
116+
let val: f64 = repr.parse().unwrap();
117+
match b.marker {
118+
MorphologicalMarker::Fraction(_) => (format!("1/{}", repr), val.recip()),
119+
MorphologicalMarker::Ordinal(marker) => (format!("{}{}", repr, marker), val),
120+
MorphologicalMarker::None => (repr, val),
112121
}
113122
}
114123

115-
fn format_decimal(&self, int: String, dec: String) -> String {
116-
format!("{},{}", int, dec)
124+
fn format_decimal_and_value(&self, int: DigitString, dec: DigitString) -> (String, f64) {
125+
let sint = int.to_string();
126+
let sdec = dec.to_string();
127+
let val = format!("{}.{}", sint, sdec).parse().unwrap();
128+
(format!("{},{}", sint, sdec), val)
117129
}
118130

119-
fn get_morph_marker(&self, word: &str) -> Option<&'static str> {
131+
fn get_morph_marker(&self, word: &str) -> MorphologicalMarker {
120132
let sing = lemmatize(word).trim_start_matches("decimo");
121133
let is_plur = word.ends_with('s');
122134
match sing {
123-
"primer" => Some(".ᵉʳ"),
135+
"primer" => MorphologicalMarker::Ordinal(".ᵉʳ"),
124136
"primero" | "segundo" | "tercero" | "cuarto" | "quinto" | "sexto" | "séptimo"
125-
| "octavo" | "ctavo" | "noveno" => Some(if is_plur { ".ᵒˢ" } else { ".º" }),
137+
| "octavo" | "ctavo" | "noveno" => {
138+
MorphologicalMarker::Ordinal(if is_plur { ".ᵒˢ" } else { ".º" })
139+
}
126140
"primera" | "segunda" | "tercera" | "cuarta" | "quinta" | "sexta" | "séptima"
127-
| "octava" | "ctava" | "novena" => Some(if is_plur { ".ᵃˢ" } else { ".ª" }),
128-
ord if ord.ends_with("imo") => Some(if is_plur { ".ᵒˢ" } else { ".º" }),
129-
ord if ord.ends_with("ima") => Some(if is_plur { ".ᵃˢ" } else { ".ª" }),
130-
_ => None,
141+
| "octava" | "ctava" | "novena" => {
142+
MorphologicalMarker::Ordinal(if is_plur { ".ᵃˢ" } else { ".ª" })
143+
}
144+
ord if ord.ends_with("imo") => {
145+
MorphologicalMarker::Ordinal(if is_plur { ".ᵒˢ" } else { ".º" })
146+
}
147+
ord if ord.ends_with("ima") => {
148+
MorphologicalMarker::Ordinal(if is_plur { ".ᵃˢ" } else { ".ª" })
149+
}
150+
ord if ord.ends_with("avo") => MorphologicalMarker::Fraction("avo"),
151+
_ => MorphologicalMarker::None,
131152
}
132153
}
133154

@@ -254,11 +275,9 @@ mod tests {
254275

255276
#[test]
256277
fn test_fractions() {
257-
// TODO: coudn't find what the abbreviation is
258-
// assert_text2digits!("doceavo", "12");
259-
// assert_text2digits!("doceava", "12");
260-
// assert_text2digits!("centésimo", "100");
261-
// assert_text2digits!("ciento veintiochoavos", "128");
278+
assert_text2digits!("doceavo", "1/12");
279+
assert_text2digits!("centavo", "1/100");
280+
assert_text2digits!("ciento veintiochoavos", "1/128");
262281
}
263282

264283
#[test]

0 commit comments

Comments
 (0)