4141import java .util .ArrayDeque ;
4242import java .util .ArrayList ;
4343import java .util .HashMap ;
44+ import java .util .Locale ;
4445import java .util .Map ;
4546import java .util .Optional ;
4647
@@ -480,10 +481,11 @@ private void performTextTranslation(final String textToTranslate, final CustomLo
480481 //tokenization
481482 long time = System .currentTimeMillis ();
482483 TokenizerResult input = null ;
484+ String correctedSubText = correctText (textSplit .get (i ), inputLanguage .getLocale ());
483485 if (mode == MADLAD_CACHE ) {
484- input = tokenizer .tokenize (inputLanguage .getCode (), outputLanguage .getCode (), textSplit . get ( i ) );
486+ input = tokenizer .tokenize (inputLanguage .getCode (), outputLanguage .getCode (), correctedSubText );
485487 } else { //if mode == NLLB_CACHE
486- input = tokenizer .tokenize (getNllbLanguageCode (inputLanguage .getCode ()), getNllbLanguageCode (outputLanguage .getCode ()), textSplit . get ( i ) );
488+ input = tokenizer .tokenize (getNllbLanguageCode (inputLanguage .getCode ()), getNllbLanguageCode (outputLanguage .getCode ()), correctedSubText );
487489 }
488490 android .util .Log .i ("performance" , "Tokenization done in: " + (System .currentTimeMillis () - time ) + "ms" );
489491 //encoder execution
@@ -508,10 +510,11 @@ private void performTextTranslation(final String textToTranslate, final CustomLo
508510 executeCacheDecoderGreedy (input , encoderResult , completeOutput , outputLanguage , new TranslateListener () {
509511 @ Override
510512 public void onTranslatedText (String text , long resultID , boolean isFinal , CustomLocale languageOfText ) {
513+ //we return the partial results
511514 String outputText ;
512515 if (joinedStringOutput [0 ].equals ("" )){
513516 outputText = joinedStringOutput [0 ] + text ;
514- }else {
517+ } else {
515518 outputText = joinedStringOutput [0 ] + " " + text ;
516519 }
517520 if (saveResults ) {
@@ -527,6 +530,7 @@ public void onTranslatedText(String text, long resultID, boolean isFinal, Custom
527530
528531 @ Override
529532 public void onFailure (int [] reasons , long value ) {
533+ //we do not return the partial results and notify an error
530534 if (responseListener != null ) {
531535 mainHandler .post (() -> responseListener .onFailure (reasons , value ));
532536 } else {
@@ -1114,6 +1118,41 @@ public long getCurrentResultID(){
11141118 return currentResultID ;
11151119 }
11161120
1121+ private String correctText (String text , Locale locale ){
1122+ String correctedText = text ;
1123+ String language = locale .getLanguage ();
1124+ //we add an eventual period if missing (or in general a terminator symbol)
1125+ if (!language .equals ("th" )) {
1126+ correctedText = correctedText .trim (); //we remove eventual white space from both ends of the text
1127+ if (correctedText .length () >= 2 ) {
1128+ if (!Character .isLetterOrDigit (correctedText .charAt (correctedText .length () - 1 ))) {
1129+ return correctedText ;
1130+ }
1131+ return correctedText + getSentenceTerminator (locale );
1132+ }
1133+ }
1134+ return text ;
1135+ }
1136+
1137+ private static String getSentenceTerminator (Locale locale ) {
1138+ // Assuming most languages use a period (.)
1139+ // Add custom cases for specific languages as needed
1140+ String language = locale .getLanguage ();
1141+ switch (language ) {
1142+ case "zh" : // Chinese
1143+ case "ja" : // Japanese
1144+ case "ko" : // Korean
1145+ return "。" ; // Ideographic full stop
1146+ case "hi" : // Hindi
1147+ return "।" ;
1148+ case "my" : // Burmese
1149+ return "။" ; // Burmese full stop
1150+ // Add other cases as needed for more languages
1151+ default :
1152+ return "." ;
1153+ }
1154+ }
1155+
11171156
11181157 private void initializeNllbLanguagesCodes (Context context ){
11191158 DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory .newInstance ();
0 commit comments