Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions app/src/components/Generation/EngineModelSelector.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ import type { GenerationFormValues } from '@/lib/hooks/useGenerationForm';
const ENGINE_OPTIONS = [
{ value: 'qwen:1.7B', label: 'Qwen3-TTS 1.7B', engine: 'qwen' },
{ value: 'qwen:0.6B', label: 'Qwen3-TTS 0.6B', engine: 'qwen' },
{ value: 'qwen:1.7B-4bit', label: 'Qwen3-TTS 1.7B ⚡ Fast', engine: 'qwen' },
{ value: 'qwen:0.6B-4bit', label: 'Qwen3-TTS 0.6B ⚡ Fast', engine: 'qwen' },
Comment on lines +22 to +23
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

4-bit options shown unconditionally — non-Apple users will see broken entries.

The PR description states 4-bit models are "hidden on non-Apple platforms" and the backend (_get_qwen_model_configs in backend/backends/__init__.py) only emits these configs when backend_type == "mlx". However, this dropdown hardcodes qwen:1.7B-4bit / qwen:0.6B-4bit for every platform, so PyTorch/non-Apple users will see "⚡ Fast" entries that fail when selected (model lookup returns no config; load endpoint will error).

Consider gating these two entries on backend capability — e.g., fetch the available models from /models (or apiClient.getModelStatus()) and only render options whose model_name is present. Filtering ENGINE_OPTIONS against the backend-reported model list also removes the need to keep the UI list and backend config in lockstep going forward.

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@app/src/components/Generation/EngineModelSelector.tsx` around lines 22 - 23,
ENGINE_OPTIONS currently hardcodes 4-bit Qwen entries (e.g., 'qwen:1.7B-4bit',
'qwen:0.6B-4bit') which should be conditionally shown only when the backend
reports those models; update EngineModelSelector to call the backend model
listing (e.g., apiClient.getModelStatus() or GET /models) and filter
ENGINE_OPTIONS by presence in that returned model list before rendering the
dropdown. Specifically, fetch the available model names on mount (or use
existing model status hook), then derive a filteredOptions =
ENGINE_OPTIONS.filter(opt => returnedModels.includes(opt.value) ||
returnedModels.includes(opt.model_name || opt.value.split(':')[0])) and render
filteredOptions in place of ENGINE_OPTIONS so the 4-bit qwen entries only appear
when the backend exposes them.

{ value: 'qwen_custom_voice:1.7B', label: 'Qwen CustomVoice 1.7B', engine: 'qwen_custom_voice' },
{ value: 'qwen_custom_voice:0.6B', label: 'Qwen CustomVoice 0.6B', engine: 'qwen_custom_voice' },
{ value: 'luxtts', label: 'LuxTTS', engine: 'luxtts' },
Expand Down
8 changes: 6 additions & 2 deletions app/src/lib/hooks/useGenerationForm.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ const generationSchema = z.object({
text: z.string().min(1, '').max(50000),
language: z.enum(LANGUAGE_CODES as [LanguageCode, ...LanguageCode[]]),
seed: z.number().int().optional(),
modelSize: z.enum(['1.7B', '0.6B', '1B', '3B']).optional(),
modelSize: z.enum(['1.7B', '0.6B', '1.7B-4bit', '0.6B-4bit', '1B', '3B']).optional(),
instruct: z.string().max(500).optional(),
engine: z
.enum([
Expand Down Expand Up @@ -122,7 +122,11 @@ export function useGenerationForm(options: UseGenerationFormOptions = {}) {
: 'Qwen CustomVoice 0.6B'
: data.modelSize === '1.7B'
? 'Qwen TTS 1.7B'
: 'Qwen TTS 0.6B';
: data.modelSize === '1.7B-4bit'
? 'Qwen TTS 1.7B ⚡ Fast'
: data.modelSize === '0.6B-4bit'
? 'Qwen TTS 0.6B ⚡ Fast'
: 'Qwen TTS 0.6B';

// Check if model needs downloading
try {
Expand Down
39 changes: 36 additions & 3 deletions backend/backends/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,11 +228,17 @@ def _get_qwen_model_configs() -> list[ModelConfig]:
if backend_type == "mlx":
repo_1_7b = "mlx-community/Qwen3-TTS-12Hz-1.7B-Base-bf16"
repo_0_6b = "mlx-community/Qwen3-TTS-12Hz-0.6B-Base-bf16"
repo_1_7b_4bit = "mlx-community/Qwen3-TTS-12Hz-1.7B-Base-4bit"
repo_0_6b_4bit = "mlx-community/Qwen3-TTS-12Hz-0.6B-Base-4bit"
else:
repo_1_7b = "Qwen/Qwen3-TTS-12Hz-1.7B-Base"
repo_0_6b = "Qwen/Qwen3-TTS-12Hz-0.6B-Base"
repo_1_7b_4bit = None
repo_0_6b_4bit = None

return [
_languages = ["zh", "en", "ja", "ko", "de", "fr", "ru", "pt", "es", "it"]

configs = [
ModelConfig(
model_name="qwen-tts-1.7B",
display_name="Qwen TTS 1.7B",
Expand All @@ -241,7 +247,7 @@ def _get_qwen_model_configs() -> list[ModelConfig]:
model_size="1.7B",
size_mb=3500,
supports_instruct=False, # Base model drops instruct silently
languages=["zh", "en", "ja", "ko", "de", "fr", "ru", "pt", "es", "it"],
languages=_languages,
),
ModelConfig(
model_name="qwen-tts-0.6B",
Expand All @@ -251,10 +257,37 @@ def _get_qwen_model_configs() -> list[ModelConfig]:
model_size="0.6B",
size_mb=1200,
supports_instruct=False,
languages=["zh", "en", "ja", "ko", "de", "fr", "ru", "pt", "es", "it"],
languages=_languages,
),
]

# 4-bit quantized variants — MLX only, ~2-3x faster on Apple Silicon
if backend_type == "mlx":
configs.extend([
ModelConfig(
model_name="qwen-tts-1.7B-4bit",
display_name="Qwen TTS 1.7B ⚡ Fast (4-bit)",
engine="qwen",
hf_repo_id=repo_1_7b_4bit,
model_size="1.7B-4bit",
size_mb=1100,
supports_instruct=False,
languages=_languages,
),
ModelConfig(
model_name="qwen-tts-0.6B-4bit",
display_name="Qwen TTS 0.6B ⚡ Fast (4-bit)",
engine="qwen",
hf_repo_id=repo_0_6b_4bit,
model_size="0.6B-4bit",
size_mb=400,
supports_instruct=False,
languages=_languages,
),
])

return configs


def _get_qwen_custom_voice_configs() -> list[ModelConfig]:
"""Return Qwen CustomVoice model configs."""
Expand Down
4 changes: 3 additions & 1 deletion backend/backends/mlx_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,14 +39,16 @@ def _get_model_path(self, model_size: str) -> str:
Get the MLX model path.

Args:
model_size: Model size (1.7B or 0.6B)
model_size: Model size (1.7B, 0.6B, 1.7B-4bit, 0.6B-4bit)

Returns:
HuggingFace Hub model ID for MLX
"""
mlx_model_map = {
"1.7B": "mlx-community/Qwen3-TTS-12Hz-1.7B-Base-bf16",
"0.6B": "mlx-community/Qwen3-TTS-12Hz-0.6B-Base-bf16",
"1.7B-4bit": "mlx-community/Qwen3-TTS-12Hz-1.7B-Base-4bit",
"0.6B-4bit": "mlx-community/Qwen3-TTS-12Hz-0.6B-Base-4bit",
}

if model_size not in mlx_model_map:
Expand Down
39 changes: 39 additions & 0 deletions backend/utils/chunked_tts.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
# Lowercase for case-insensitive matching.
_ABBREVIATIONS = frozenset(
{
# English
"mr",
"mrs",
"ms",
Expand All @@ -50,6 +51,44 @@
"u.s",
"u.s.a",
"u.k",
# Russian
"т.д", # и т.д. (и так далее)
"т.п", # и т.п. (и тому подобное)
"т.е", # т.е. (то есть)
"т.к", # т.к. (так как)
"т.н", # т.н. (так называемый)
"т.о", # т.о. (таким образом)
Comment on lines +55 to +60
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Dotted Russian abbreviations won’t match with current period parser

This addition won’t work for entries like т.д, т.е, т.к, т.о, н.э because Line 162 only backtracks over letters, so at the final dot it extracts only the last segment (e.g., д) and misses _ABBREVIATIONS. The splitter can still break mid-phrase, contrary to the PR goal.

Suggested fix (parse abbreviation stem including internal dots)
diff --git a/backend/utils/chunked_tts.py b/backend/utils/chunked_tts.py
@@
 def _find_last_sentence_end(text: str) -> int:
@@
         if char == ".":
-            # Walk backwards to find the preceding word
-            word_start = pos - 1
-            while word_start >= 0 and text[word_start].isalpha():
-                word_start -= 1
-            word = text[word_start + 1 : pos].lower()
-            if word in _ABBREVIATIONS:
+            # Walk backwards to capture abbreviation stems with internal dots,
+            # e.g. "e.g.", "u.s.", "т.д."
+            token_start = pos - 1
+            while token_start >= 0 and (
+                text[token_start].isalpha() or text[token_start] == "."
+            ):
+                token_start -= 1
+            token = text[token_start + 1 : pos].strip(".").lower()
+            if token in _ABBREVIATIONS:
                 continue
             # Skip decimal numbers (digit immediately before the period)
-            if word_start >= 0 and text[word_start].isdigit():
+            if token_start >= 0 and text[token_start].isdigit():
                 continue

Also applies to: 67-67

🧰 Tools
🪛 Ruff (0.15.11)

[warning] 57-57: String contains ambiguous е (CYRILLIC SMALL LETTER IE). Did you mean e (LATIN SMALL LETTER E)?

(RUF001)


[warning] 57-57: Comment contains ambiguous е (CYRILLIC SMALL LETTER IE). Did you mean e (LATIN SMALL LETTER E)?

(RUF003)


[warning] 60-60: String contains ambiguous о (CYRILLIC SMALL LETTER O). Did you mean o (LATIN SMALL LETTER O)?

(RUF001)


[warning] 60-60: Comment contains ambiguous о (CYRILLIC SMALL LETTER O). Did you mean o (LATIN SMALL LETTER O)?

(RUF003)

"др", # и др. (и другие)
"пр", # и пр. (и прочее)
"г", # г. (год / город)
"гг", # гг. (годы)
"в", # в. (век)
"вв", # вв. (века)
"н.э", # н.э. (нашей эры)
"ул", # ул. (улица)
"д", # д. (дом)
"корп", # корп. (корпус)
"стр", # стр. (строение / страница)
"руб", # руб. (рублей)
"коп", # коп. (копеек)
"тыс", # тыс. (тысяч)
"млн", # млн. (миллионов)
"млрд", # млрд. (миллиардов)
"трлн", # трлн. (триллионов)
"кв", # кв. (квадратный)
"см", # см. (смотри / сантиметр)
"им", # им. (имени)
"проф", # проф. (профессор)
"акад", # акад. (академик)
"доц", # доц. (доцент)
"ред", # ред. (редактор)
"изд", # изд. (издание)
"обл", # обл. (область)
"р", # р. (река / рублей)
"оз", # оз. (озеро)
"о", # о. (остров)
"м", # м. (метро / метр)
"гр", # гр. (гражданин / грамм)
}
)

Expand Down