Skip to content

Commit bd0d43f

Browse files
committed
logger and more models
1 parent bcf044c commit bd0d43f

File tree

6 files changed

+207
-28
lines changed

6 files changed

+207
-28
lines changed

Cargo.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "translators"
3-
version = "0.1.6"
3+
version = "0.1.7"
44
edition = "2021"
55

66
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
@@ -33,6 +33,7 @@ rustyctranslate2 = { git = "https://github.com/JustFrederik/rustyctranslate2", r
3333
chrono = { version = "0.4.19", optional = true }
3434
sha256 = { version = "1.0.3", optional = true }
3535
md5_alt = { package= 'md5', version = "0.7.0", optional = true }
36+
log = "0.4.17"
3637

3738
[features]
3839
default = ["whatlang-detector", "online", "retries", "fetch_languages", "generate"]

src/lib.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,12 @@ pub mod translators;
99

1010
#[cfg(test)]
1111
mod tests {
12-
#[cfg(not(feature = "offline_req"))]
13-
use std::collections::HashMap;
1412
use dotenv::dotenv;
1513
#[cfg(feature = "offline_req")]
1614
use model_manager::model_manager::ModelManager;
1715
use reqwest::Client;
16+
#[cfg(not(feature = "offline_req"))]
17+
use std::collections::HashMap;
1818

1919
use crate::detector;
2020
use crate::detector::Detectors;
@@ -26,6 +26,8 @@ mod tests {
2626
use crate::translators::chainer::TranslatorInfo;
2727
#[cfg(not(feature = "offline_req"))]
2828
use crate::translators::chainer::TranslatorSelectorInfo;
29+
#[cfg(not(feature = "offline_req"))]
30+
use crate::translators::context::Context;
2931
use crate::translators::dev::{get_csv_errors, get_languages};
3032
#[cfg(feature = "offline_req")]
3133
use crate::translators::offline::ctranslate2::model_management::{
@@ -43,8 +45,6 @@ mod tests {
4345
use crate::translators::translator_structure::TranslatorCTranslate;
4446
use crate::translators::translator_structure::TranslatorLanguages;
4547
use crate::translators::{Translator, Translators};
46-
#[cfg(not(feature = "offline_req"))]
47-
use crate::translators::context::Context;
4848

4949
#[tokio::test]
5050
#[cfg(feature = "offline_req")]

src/model_register.rs

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,45 @@ pub fn register(mm: &mut ModelManager) {
285285
},
286286
);
287287
#[cfg(feature = "nllb")]
288+
models.insert(
289+
"nllb-200-3.3B-ct2-float16".to_string(),
290+
Model {
291+
directory: PathBuf::from_str("translators/nllb-200-3.3B-ct2-float16").unwrap(),
292+
version: "05/13/2023".to_string(),
293+
source: ModelSource::Huggingface(HuggingfaceModel {
294+
repo: "JustFrederik/nllb-200-3.3B-ct2-float16".to_string(),
295+
files: nllb.clone(),
296+
commit: None,
297+
}),
298+
},
299+
);
300+
#[cfg(feature = "nllb")]
301+
models.insert(
302+
"nllb-200-1.3B-ct2".to_string(),
303+
Model {
304+
directory: PathBuf::from_str("translators/nllb-200-1.3B-ct2").unwrap(),
305+
version: "05/13/2023".to_string(),
306+
source: ModelSource::Huggingface(HuggingfaceModel {
307+
repo: "JustFrederik/nllb-200-1.3B-ct2".to_string(),
308+
files: nllb.clone(),
309+
commit: None,
310+
}),
311+
},
312+
);
313+
#[cfg(feature = "nllb")]
314+
models.insert(
315+
"nllb-200-1.3B-ct2-float16".to_string(),
316+
Model {
317+
directory: PathBuf::from_str("translators/nllb-200-1.3B-ct2-float16").unwrap(),
318+
version: "05/13/2023".to_string(),
319+
source: ModelSource::Huggingface(HuggingfaceModel {
320+
repo: "JustFrederik/nllb-200-1.3B-ct2-float16".to_string(),
321+
files: nllb.clone(),
322+
commit: None,
323+
}),
324+
},
325+
);
326+
#[cfg(feature = "nllb")]
288327
models.insert(
289328
"nllb-200-1.3B-ct2-int8".to_string(),
290329
Model {
@@ -298,6 +337,33 @@ pub fn register(mm: &mut ModelManager) {
298337
},
299338
);
300339
#[cfg(feature = "nllb")]
340+
models.insert(
341+
"nllb-200-distilled-600M-ct2".to_string(),
342+
Model {
343+
directory: PathBuf::from_str("translators/nllb-200-distilled-600M-ct2").unwrap(),
344+
version: "05/13/2023".to_string(),
345+
source: ModelSource::Huggingface(HuggingfaceModel {
346+
repo: "JustFrederik/nllb-200-distilled-600M-ct2".to_string(),
347+
files: nllb.clone(),
348+
commit: None,
349+
}),
350+
},
351+
);
352+
#[cfg(feature = "nllb")]
353+
models.insert(
354+
"nllb-200-distilled-600M-ct2-float16".to_string(),
355+
Model {
356+
directory: PathBuf::from_str("translators/nllb-200-distilled-600M-ct2-float16")
357+
.unwrap(),
358+
version: "05/13/2023".to_string(),
359+
source: ModelSource::Huggingface(HuggingfaceModel {
360+
repo: "JustFrederik/nllb-200-distilled-600M-ct2-float16".to_string(),
361+
files: nllb.clone(),
362+
commit: None,
363+
}),
364+
},
365+
);
366+
#[cfg(feature = "nllb")]
301367
models.insert(
302368
"nllb-200-distilled-600M-ct2-int8".to_string(),
303369
Model {
@@ -311,6 +377,33 @@ pub fn register(mm: &mut ModelManager) {
311377
},
312378
);
313379
#[cfg(feature = "nllb")]
380+
models.insert(
381+
"nllb-200-distilled-1.3B-ct2".to_string(),
382+
Model {
383+
directory: PathBuf::from_str("translators/nllb-200-distilled-1.3B-ct2").unwrap(),
384+
version: "05/13/2023".to_string(),
385+
source: ModelSource::Huggingface(HuggingfaceModel {
386+
repo: "JustFrederik/nllb-200-distilled-1.3B-ct2".to_string(),
387+
files: nllb.clone(),
388+
commit: None,
389+
}),
390+
},
391+
);
392+
#[cfg(feature = "nllb")]
393+
models.insert(
394+
"nllb-200-distilled-1.3B-ct2-float16".to_string(),
395+
Model {
396+
directory: PathBuf::from_str("translators/nllb-200-distilled-1.3B-ct2-float16")
397+
.unwrap(),
398+
version: "05/13/2023".to_string(),
399+
source: ModelSource::Huggingface(HuggingfaceModel {
400+
repo: "JustFrederik/nllb-200-distilled-1.3B-ct2-float16".to_string(),
401+
files: nllb.clone(),
402+
commit: None,
403+
}),
404+
},
405+
);
406+
#[cfg(feature = "nllb")]
314407
models.insert(
315408
"nllb-200-distilled-1.3B-ct2-int8".to_string(),
316409
Model {

src/translators/mod.rs

Lines changed: 53 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
use std::fmt::Formatter;
12
use std::future::Future;
23
#[cfg(feature = "retries")]
34
use std::time::Duration;
@@ -7,6 +8,7 @@ use std::vec;
78
use futures::future::FutureExt;
89
#[cfg(not(feature = "offline_req"))]
910
use futures::{stream, StreamExt};
11+
use log::{info, warn};
1012
#[cfg(feature = "offline_req")]
1113
use model_manager::model_manager::ModelManager;
1214
use reqwest::Client;
@@ -60,7 +62,7 @@ pub enum ConversationStyleClone {
6062
}
6163
/// Enum Containing all the translators
6264
/// NOTE: when defining new translator add the is_api, is_fetch,get_api_available in the impl
63-
#[derive(EnumIter, IntoStaticStr, Clone, PartialEq, Debug)]
65+
#[derive(EnumIter, Clone, PartialEq, Debug)]
6466
pub enum Translator {
6567
/// For Deepl Translate with API key
6668
#[cfg(feature = "deepl")]
@@ -99,15 +101,50 @@ pub enum Translator {
99101
Sugoi(Device, ModelFormat),
100102
}
101103

104+
impl std::fmt::Display for Translator {
105+
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
106+
match self {
107+
#[cfg(feature = "deepl")]
108+
Translator::Deepl => write!(f, "Deepl"),
109+
#[cfg(feature = "chatgpt")]
110+
Translator::ChatGPT(_, _, _, _) => write!(f, "ChatGPT"),
111+
#[cfg(feature = "edge-gpt-scrape")]
112+
Translator::EdgeGPT(_, _) => write!(f, "EdgeGPT"),
113+
#[cfg(feature = "google-scrape")]
114+
Translator::Google => write!(f, "Google"),
115+
#[cfg(feature = "bing-scrape")]
116+
Translator::Bing => write!(f, "Bing"),
117+
#[cfg(feature = "libre")]
118+
Translator::LibreTranslate => write!(f, "LibreTranslate"),
119+
#[cfg(feature = "mymemory")]
120+
Translator::MyMemory => write!(f, "MyMemory"),
121+
#[cfg(feature = "papago")]
122+
Translator::Papago => write!(f, "Papago"),
123+
#[cfg(feature = "youdao")]
124+
Translator::Youdao => write!(f, "Youdao"),
125+
#[cfg(feature = "baidu")]
126+
Translator::Baidu => write!(f, "Baidu"),
127+
#[cfg(feature = "nllb")]
128+
Translator::Nllb(_, _, _) => write!(f, "Nllb"),
129+
#[cfg(feature = "m2m100")]
130+
Translator::M2M100(_, _, _) => write!(f, "M2M100"),
131+
#[cfg(feature = "jparacrawl")]
132+
Translator::JParaCrawl(_, _, _) => write!(f, "JparaCrawl"),
133+
#[cfg(feature = "sugoi")]
134+
Translator::Sugoi(_, _) => write!(f, "Sugui"),
135+
}
136+
}
137+
}
138+
102139
impl Translator {
103140
/// Get all translators
104141
pub fn get_all() -> Vec<Self> {
105142
Self::iter().collect()
106143
}
107144

108145
/// gets strings
109-
pub fn convert_to_str(v: Vec<Translator>) -> Vec<&'static str> {
110-
v.into_iter().map(|v| v.into()).collect()
146+
pub fn convert_to_str(v: Vec<Translator>) -> Vec<String> {
147+
v.into_iter().map(|v| v.to_string()).collect()
111148
}
112149

113150
/// Get api translators
@@ -441,6 +478,11 @@ impl Translators {
441478
#[cfg(feature = "offline_req")] translator_models: &mut CTranslateModels,
442479
#[cfg(feature = "offline_req")] tokenizer_models: &mut TokenizerModels,
443480
) -> Result<TranslationOutput, Error> {
481+
info!(
482+
"Translate \"{}\" with {}",
483+
query,
484+
translator.translator.to_string()
485+
);
444486
let text = match &translator.data {
445487
TranslatorDyn::WC(v) => {
446488
self.retry(v.translate(&self.client, query, from, &translator.to, context_data))
@@ -565,21 +607,18 @@ impl Translators {
565607
from,
566608
),
567609
};
568-
#[cfg(feature = "offline_req")]
569610
let text = self
570611
.translate_vec_fetch(
571612
queries,
572613
from,
573614
translator,
574615
context_data,
616+
#[cfg(feature = "offline_req")]
575617
translator_models,
618+
#[cfg(feature = "offline_req")]
576619
tokenizer_models,
577620
)
578621
.await?;
579-
#[cfg(not(feature = "offline_req"))]
580-
let text = self
581-
.translate_vec_fetch(queries, from, translator, context_data)
582-
.await?;
583622
if translations.len() == 1 {
584623
if let Some(v) = translations.last_mut() {
585624
if v.lang == Language::Unknown && text.lang != Language::Unknown {
@@ -604,6 +643,11 @@ impl Translators {
604643
#[cfg(feature = "offline_req")] translator_models: &mut CTranslateModels,
605644
#[cfg(feature = "offline_req")] tokenizer_models: &mut TokenizerModels,
606645
) -> Result<TranslationVecOutput, Error> {
646+
info!(
647+
"Translate {:?} with {}",
648+
queries,
649+
translator.translator.to_string()
650+
);
607651
match &translator.data {
608652
TranslatorDyn::WC(v) => {
609653
self.retry(v.translate_vec(
@@ -647,6 +691,7 @@ impl Translators {
647691
if res.is_ok() {
648692
break;
649693
}
694+
warn!("Retry {} of {}", retry_count, max);
650695
if self.retry_count.is_some() {
651696
retry_count += 1;
652697
}

src/translators/offline/ctranslate2/model_management.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
use crate::error::Error;
22
use crate::translators::offline::ctranslate2::tokenizer::Tokenizer;
33
use crate::translators::offline::ctranslate2::Device;
4+
use log::info;
45
use rustyctranslate2::CTranslator;
56
use std::collections::HashMap;
67
use std::path::PathBuf;
@@ -36,6 +37,7 @@ impl CTranslateModels {
3637
) -> Result<&mut CTranslator, Error> {
3738
let v = self.ctranslate2_models.get(ident);
3839
if v.is_none() {
40+
info!("Loading ctranslate2 model {}", ident);
3941
let ctranslate2_model =
4042
CTranslator::new(path, device.is_cuda(), compressed).map_err(Error::new_option)?;
4143
self.ctranslate2_models
@@ -52,6 +54,7 @@ impl CTranslateModels {
5254
pub fn cleanup(&mut self) {
5355
//TODO: remove only model that was created(multithreaded)
5456
if self.mode == ModelLifetime::Dispose {
57+
info!("Unload ctranslate2 models");
5558
self.ctranslate2_models.clear();
5659
}
5760
}
@@ -80,6 +83,7 @@ impl TokenizerModels {
8083
pub fn get_tokenizer(&mut self, ident: &str, path: PathBuf) -> Result<&Tokenizer, Error> {
8184
let v = self.tokenizers.get(ident);
8285
if v.is_none() {
86+
info!("Loading tokenizer {}", ident);
8387
let tokenizer = Tokenizer::new(&path, ident.to_string());
8488
self.tokenizers.insert(ident.to_string(), tokenizer);
8589
return self
@@ -96,6 +100,7 @@ impl TokenizerModels {
96100
pub fn cleanup(&mut self) {
97101
//TODO: remove only model that was created(multithreaded)
98102
if self.mode == ModelLifetime::Dispose {
103+
info!("Unload tokenizers");
99104
self.tokenizers.clear();
100105
}
101106
}

0 commit comments

Comments
 (0)