Question regarding the model size, token limits and model optimizaion?

## ❓ Questions and Help

### Before asking:
1. search the issues.
2. search the docs.



#### I want to use this model "nllb-200-distilled-600M" in an application lets say an mobile app so is there any limitations for this model like will the token expire?? and another important query is that i want to know the size of the model and i want to limit the language translation into fixed number of predefined and not all so what will be its size and how can we optimize the size of the model?  

#### Code
import streamlit as st
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import time
from typing import Dict

class MobileTranslator:
    def __init__(self, model_size="distilled-600M"):
        self.model_name = f"facebook/nllb-200-{model_size}"
        
        # Updated language codes with correct Nepali code
        self.language_codes = {
            "english": "eng_Latn",
            "hindi": "hin_Deva", 
            "nepali": "npi_Deva",
            # "chinese": "zho_Hans",
            "chinese": "zho_Hans",  # Simplified Chinese
            "chinese_traditional": "zho_Hant",  # Traditional Chinese
            "malaysian": "zsm_Latn"
        }
        
        st.text("Loading translation model...")
        self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
        self.model = AutoModelForSeq2SeqLM.from_pretrained(self.model_name)
        
        self.model.eval()
        if torch.cuda.is_available():
            self.model = self.model.cuda()
            st.text("GPU acceleration enabled")
        else:
            st.text("Running on CPU")
        
        self._cache_language_tokens()
        st.text("Model loaded successfully!")
    
    def _cache_language_tokens(self):
        self.lang_token_ids = {}
        for lang_name, lang_code in self.language_codes.items():
            token_id = self.tokenizer.convert_tokens_to_ids([lang_code])[0]
            self.lang_token_ids[lang_name] = token_id
            st.text(f"Cached {lang_name} ({lang_code}): token ID {token_id}")
    
    def translate(self, text: str, source_lang: str, target_lang: str) -> Dict:
        start_time = time.time()
        
        if source_lang.lower() not in self.language_codes:
            return {"error": f"Unsupported source language: {source_lang}"}
        
        if target_lang.lower() not in self.language_codes:
            return {"error": f"Unsupported target language: {target_lang}"}
        
        source_code = self.language_codes[source_lang.lower()]
        target_code = self.language_codes[target_lang.lower()]
        
        try:
            # For Nepali specifically, we can add some preprocessing
            if target_lang.lower() == "nepali":
                text = text.replace("?", " ?")  # Helps with question translation
            
            input_text = f"{source_code} {text}"
            
            inputs = self.tokenizer(
                input_text, 
                return_tensors="pt", 
                padding=True, 
                truncation=True, 
                max_length=512
            )
            
            if torch.cuda.is_available():
                inputs = {k: v.cuda() for k, v in inputs.items()}
            
            target_token_id = self.lang_token_ids[target_lang.lower()]
            
            # Adjust generation parameters for better Nepali translation
            generation_args = {
                "forced_bos_token_id": target_token_id,
                "max_length": 512,
                "num_beams": 5 if target_lang.lower() == "nepali" else 4,  # More beams for Nepali
                "length_penalty": 1.2 if target_lang.lower() == "nepali" else 1.0,
                "early_stopping": True,
                "do_sample": False,
                "pad_token_id": self.tokenizer.pad_token_id
            }
            
            with torch.no_grad():
                generated_tokens = self.model.generate(**inputs, **generation_args)
            
            translated_text = self.tokenizer.batch_decode(
                generated_tokens, skip_special_tokens=True
            )[0]
            
            # Special post-processing for Nepali
            if target_lang.lower() == "nepali":
                translated_text = translated_text.replace(" .", ".").replace(" ,", ",")
                translated_text = translated_text.replace(" ?", "?").replace(" !", "!")
            
            # General cleanup
            for lang_code in self.language_codes.values():
                translated_text = translated_text.replace(lang_code, "").strip()
            
            processing_time = time.time() - start_time
            
            return {
                "original_text": text,
                "translated_text": translated_text,
                "source_language": source_lang,
                "target_language": target_lang,
                "processing_time": round(processing_time, 3),
                "success": True
            }
            
        except Exception as e:
            return {
                "error": f"Translation failed: {str(e)}",
                "success": False,
                "processing_time": round(time.time() - start_time, 3)
            }

def main():
    st.title("Mobile Language Translator")
    st.markdown("Translate text between English, Hindi, Nepali, Chinese, and Malaysian")
    
    # Initialize translator with caching
    @st.cache_resource
    def load_translator():
        translator = MobileTranslator()
        return translator
    
    translator = load_translator()
    
    # Input section
    col1, col2 = st.columns(2)
    
    with col1:
        source_lang = st.selectbox(
            "From:",
            options=list(translator.language_codes.keys()),
            index=0,  # Default to English
            format_func=lambda x: x.capitalize(),
            key="source_lang"
        )
    
    with col2:
        target_lang = st.selectbox(
            "To:",
            options=list(translator.language_codes.keys()),
            index=2,  # Default to Nepali to showcase the fix
            format_func=lambda x: x.capitalize(),
            key="target_lang"
        )
    
    # Text input with session state to preserve between runs
    if 'text_to_translate' not in st.session_state:
        st.session_state.text_to_translate = "Hello, how are you today?"
    
    text_to_translate = st.text_area(
        "Enter text to translate:",
        value=st.session_state.text_to_translate,
        height=150,
        key="text_input"
    )
    
    # Translate button
    if st.button("Translate", type="primary"):
        if not text_to_translate.strip():
            st.warning("Please enter some text to translate")
        else:
            with st.spinner("Translating..."):
                result = translator.translate(
                    text_to_translate,
                    source_lang,
                    target_lang
                )
            
            if result.get("success"):
                st.subheader("Translation Result:")
                st.markdown(f"**{source_lang.capitalize()}:** {result['original_text']}")
                st.markdown(f"**{target_lang.capitalize()}:** {result['translated_text']}")
                
                # Show special note if Nepali
                # if target_lang.lower() == "nepali":
                #     st.success("Note: Nepali translation is now using the correct language code (npi_Deva)")
                
                st.caption(f"Translation took {result['processing_time']} seconds")
            else:
                st.error(f"Error: {result.get('error', 'Unknown error')}")
    
    # Add Nepali-specific examples
    # st.markdown("### Nepali Translation Examples:")
    # nepali_examples = st.columns(2)
    
    # with nepali_examples[0]:
    #     if st.button("Basic Greeting"):
    #         st.session_state.text_to_translate = "Hello, how are you?"
    #         st.session_state.source_lang = "english"
    #         st.session_state.target_lang = "nepali"
    #         st.rerun()
    
    # with nepali_examples[1]:
    #     if st.button("Common Question"):
    #         st.session_state.text_to_translate = "What is your name?"
    #         st.session_state.source_lang = "english"
    #         st.session_state.target_lang = "nepali"
    #         st.rerun()
    
    # General examples
    # st.markdown("### Other Language Examples:")
    # other_examples = st.columns(3)
    
    # with other_examples[0]:
    #     if st.button("English → Hindi"):
    #         st.session_state.text_to_translate = "Good morning, how are you?"
    #         st.session_state.source_lang = "english"
    #         st.session_state.target_lang = "hindi"
    #         st.rerun()
    
    # with other_examples[1]:
    #     if st.button("English → Chinese"):
    #         st.session_state.text_to_translate = "Where is the restaurant?"
    #         st.session_state.source_lang = "english"
    #         st.session_state.target_lang = "chinese"
    #         st.rerun()
    
    # with other_examples[2]:
    #     if st.button("English → Malaysian"):
    #         st.session_state.text_to_translate = "Thank you very much!"
    #         st.session_state.source_lang = "english"
    #         st.session_state.target_lang = "malaysian"
    #         st.rerun()

if __name__ == "__main__":
    main()



#### What have you tried?

#### What's your environment?

 - fairseq Version (e.g., 1.0 or main):
 - PyTorch Version (e.g., 1.0)
 - OS (e.g., Linux):
 - How you installed fairseq (`pip`, source):
 - Build command you used (if compiling from source):
 - Python version:
 - CUDA/cuDNN version:
 - GPU models and configuration:
 - Any other relevant information:

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Question regarding the model size, token limits and model optimizaion? #5622

❓ Questions and Help

Before asking:

Code

What have you tried?

What's your environment?

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Question regarding the model size, token limits and model optimizaion? #5622

Description

❓ Questions and Help

Before asking:

Code

What have you tried?

What's your environment?

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions