mhpdev-com · coskunomer · Jan 5, 2026 · Mar 8, 2026
diff --git a/android/src/main/java/com/speech/RNSpeechModule.kt b/android/src/main/java/com/speech/RNSpeechModule.kt
@@ -4,13 +4,14 @@ import java.util.UUID
 import java.util.Locale
 import android.os.Build
 import android.os.Bundle
+import android.os.Handler
+import android.os.Looper
 import android.content.Intent
 import android.content.Context
 import android.speech.tts.Voice
 import android.media.AudioManager
 import android.media.AudioAttributes
 import android.media.AudioFocusRequest
-import android.annotation.SuppressLint
 import android.speech.tts.TextToSpeech
 import com.facebook.react.bridge.Promise
 import com.facebook.react.bridge.Arguments
@@ -19,6 +20,7 @@ import com.facebook.react.bridge.ReadableMap
 import android.speech.tts.UtteranceProgressListener
 import com.facebook.react.bridge.ReactApplicationContext
 import com.facebook.react.module.annotations.ReactModule
+import android.util.Log
 
 @ReactModule(name = RNSpeechModule.NAME)
 class RNSpeechModule(reactContext: ReactApplicationContext) :
@@ -36,6 +38,7 @@ class RNSpeechModule(reactContext: ReactApplicationContext) :
 
   companion object {
     const val NAME = "RNSpeech"
+    private const val TAG = "RNSpeech"
 
     private val defaultOptions: Map<String, Any> = mapOf(
       "rate" to 0.5f,
@@ -45,9 +48,11 @@ class RNSpeechModule(reactContext: ReactApplicationContext) :
       "language" to Locale.getDefault().toLanguageTag()
     )
   }
+
   private val queueLock = Any()
   private val maxInputLength = TextToSpeech.getMaxSpeechInputLength()
   private val isSupportedPausing = Build.VERSION.SDK_INT >= Build.VERSION_CODES.O
+  private val mainHandler = Handler(Looper.getMainLooper())
 
   private lateinit var synthesizer: TextToSpeech
 
@@ -56,6 +61,7 @@ class RNSpeechModule(reactContext: ReactApplicationContext) :
 
   private var isInitialized = false
   private var isInitializing = false
+  private var listenerSet = false
   private val pendingOperations = mutableListOf<Pair<() -> Unit, Promise>>()
 
   private var globalOptions: MutableMap<String, Any> = defaultOptions.toMutableMap()
@@ -174,86 +180,139 @@ class RNSpeechModule(reactContext: ReactApplicationContext) :
     }
   }
 
+  private fun verifyTTSReady(retryCount: Int = 0) {
+    val maxRetries = 20
+    val delay = when {
+        retryCount == 0 -> 500L
+        retryCount < 5  -> 1000L
+        else            -> 2000L
+    }
+
+    mainHandler.postDelayed({
+        try {
+            val voices = synthesizer.voices
+            val engines = synthesizer.engines
+
+            if (!voices.isNullOrEmpty() && !engines.isNullOrEmpty()) {
+                Log.d(TAG, "TTS verified ready with ${voices.size} voices and ${engines.size} engines")
+                cachedEngines = engines
+
+                if (!listenerSet) {
+                    synthesizer.setOnUtteranceProgressListener(object : UtteranceProgressListener() {
+                        override fun onStart(utteranceId: String) {
+                            synchronized(queueLock) {
+                                speechQueue.find { it.utteranceId == utteranceId }?.let { item ->
+                                    item.status = SpeechStatus.SPEAKING
+                                    if (isResuming && item.position > 0) {
+                                        emitOnResume(getEventData(utteranceId))
+                                        isResuming = false
+                                    } else {
+                                        emitOnStart(getEventData(utteranceId))
+                                    }
+                                }
+                            }
+                        }
+
+                        override fun onDone(utteranceId: String) {
+                            synchronized(queueLock) {
+                                speechQueue.find { it.utteranceId == utteranceId }?.let { item ->
+                                    item.status = SpeechStatus.COMPLETED
+                                    deactivateDuckingSession()
+                                    emitOnFinish(getEventData(utteranceId))
+                                    if (!isPaused) {
+                                        currentQueueIndex++
+                                        processNextQueueItem()
+                                    }
+                                }
+                            }
+                        }
+
+                        override fun onError(utteranceId: String) {
+                            synchronized(queueLock) {
+                                speechQueue.find { it.utteranceId == utteranceId }?.let { item ->
+                                    item.status = SpeechStatus.ERROR
+                                    deactivateDuckingSession()
+                                    emitOnError(getEventData(utteranceId))
+                                    if (!isPaused) {
+                                        currentQueueIndex++
+                                        processNextQueueItem()
+                                    }
+                                }
+                            }
+                        }
+
+                        override fun onStop(utteranceId: String, interrupted: Boolean) {
+                            synchronized(queueLock) {
+                                speechQueue.find { it.utteranceId == utteranceId }?.let { item ->
+                                    if (isPaused) {
+                                        item.status = SpeechStatus.PAUSED
+                                        emitOnPause(getEventData(utteranceId))
+                                    } else {
+                                        item.status = SpeechStatus.COMPLETED
+                                        emitOnStopped(getEventData(utteranceId))
+                                    }
+                                }
+                            }
+                        }
+
+                        override fun onRangeStart(utteranceId: String, start: Int, end: Int, frame: Int) {
+                            synchronized(queueLock) {
+                                speechQueue.find { it.utteranceId == utteranceId }?.let { item ->
+                                    item.position = item.offset + start
+                                    val data = Arguments.createMap().apply {
+                                        putInt("id", utteranceId.hashCode())
+                                        putInt("length", end - start)
+                                        putInt("location", item.position)
+                                    }
+                                    emitOnProgress(data)
+                                }
+                            }
+                        }
+                    })
+                    listenerSet = true
+                }
+
+                applyGlobalOptions()
+                isInitialized = true
+                isInitializing = false
+                processPendingOperations()
+
+            } else if (retryCount < maxRetries) {
+                Log.w(TAG, "TTS not ready yet (retry ${retryCount + 1}/$maxRetries), voices=${voices?.size ?: 0}, engines=${engines?.size ?: 0}")
+                verifyTTSReady(retryCount + 1)
+            } else {
+                Log.e(TAG, "TTS failed to become ready after $maxRetries retries")
+                isInitialized = false
+                isInitializing = false
+                rejectPendingOperations()
+            }
+
+        } catch (e: Exception) {
+            Log.e(TAG, "Exception during TTS verification (retry $retryCount)", e)
+            if (retryCount < maxRetries) {
+                verifyTTSReady(retryCount + 1)
+            } else {
+                isInitialized = false
+                isInitializing = false
+                rejectPendingOperations()
+            }
+        }
+    }, delay)
+}
+
   private fun initializeTTS() {
     if (isInitializing) return
     isInitializing = true
 
     synthesizer = TextToSpeech(reactApplicationContext, { status ->
-      isInitialized = status == TextToSpeech.SUCCESS
-      isInitializing = false
-
-      if (isInitialized) {
-        cachedEngines = synthesizer.engines
-        synthesizer.setOnUtteranceProgressListener(object : UtteranceProgressListener() {
-          override fun onStart(utteranceId: String) {
-            synchronized(queueLock) {
-              speechQueue.find { it.utteranceId == utteranceId }?.let { item ->
-                item.status = SpeechStatus.SPEAKING
-                if (isResuming && item.position > 0) {
-                  emitOnResume(getEventData(utteranceId))
-                  isResuming = false
-                } else {
-                  emitOnStart(getEventData(utteranceId))
-                }
-              }
-            }
-          }
-          override fun onDone(utteranceId: String) {
-            synchronized(queueLock) {
-              speechQueue.find { it.utteranceId == utteranceId }?.let { item ->
-                item.status = SpeechStatus.COMPLETED
-                deactivateDuckingSession()
-                emitOnFinish(getEventData(utteranceId))
-                if (!isPaused) {
-                  currentQueueIndex++
-                  processNextQueueItem()
-                }
-              }
-            }
-          }
-          override fun onError(utteranceId: String) {
-            synchronized(queueLock) {
-              speechQueue.find { it.utteranceId == utteranceId }?.let { item ->
-                item.status = SpeechStatus.ERROR
-                deactivateDuckingSession()
-                emitOnError(getEventData(utteranceId))
-                if (!isPaused) {
-                  currentQueueIndex++
-                  processNextQueueItem()
-                }
-              }
-            }
-          }
-          override fun onStop(utteranceId: String, interrupted: Boolean) {
-            synchronized(queueLock) {
-              speechQueue.find { it.utteranceId == utteranceId }?.let { item ->
-                if (isPaused) {
-                  item.status = SpeechStatus.PAUSED
-                  emitOnPause(getEventData(utteranceId))
-                } else {
-                  item.status = SpeechStatus.COMPLETED
-                  emitOnStopped(getEventData(utteranceId))
-                }
-              }
-            }
-          }
-          override fun onRangeStart(utteranceId: String, start: Int, end: Int, frame: Int) {
-            synchronized(queueLock) {
-              speechQueue.find { it.utteranceId == utteranceId }?.let { item ->
-                item.position = item.offset + start
-                val data = Arguments.createMap().apply {
-                  putInt("id", utteranceId.hashCode())
-                  putInt("length", end - start)
-                  putInt("location", item.position)
-                }
-                emitOnProgress(data)
-              }
-            }
-          }
-        })
-        applyGlobalOptions()
-        processPendingOperations()
+      if (status == TextToSpeech.SUCCESS) {
+        // Don't set isInitialized yet - wait for verification
+        Log.d(TAG, "TTS engine callback SUCCESS, verifying voices...")
+        verifyTTSReady()
       } else {
+        Log.e(TAG, "TTS engine initialization failed with status: $status")
+        isInitialized = false
+        isInitializing = false
         rejectPendingOperations()
       }
     }, selectedEngine)
@@ -273,8 +332,12 @@ class RNSpeechModule(reactContext: ReactApplicationContext) :
       }
       else -> {
         pendingOperations.add(Pair(operation, promise))
+        // Shut down any zombie instance before reinitializing
+        if (::synthesizer.isInitialized) {
+            try { synthesizer.stop(); synthesizer.shutdown() } catch (_: Exception) {}
+        }
         initializeTTS()
-      }
+}
     }
   }
 
@@ -284,10 +347,10 @@ class RNSpeechModule(reactContext: ReactApplicationContext) :
       synthesizer.setLanguage(locale)
     }
     globalOptions["pitch"]?.let {
-      synthesizer.setPitch(it as Float)
+      synthesizer.setPitch((it as? Number)?.toFloat() ?: 1.0f)
     }
     globalOptions["rate"]?.let {
-      synthesizer.setSpeechRate(it as Float)
+      synthesizer.setSpeechRate((it as? Number)?.toFloat() ?: 0.5f)
     }
     globalOptions["voice"]?.let { voiceId ->
       synthesizer.voices?.forEach { voice ->
@@ -308,10 +371,10 @@ class RNSpeechModule(reactContext: ReactApplicationContext) :
       synthesizer.setLanguage(locale)
     }
     tempOptions["pitch"]?.let {
-      synthesizer.setPitch(it as Float)
+    synthesizer.setPitch((it as? Number)?.toFloat() ?: 1.0f)
     }
     tempOptions["rate"]?.let {
-      synthesizer.setSpeechRate(it as Float)
+        synthesizer.setSpeechRate((it as? Number)?.toFloat() ?: 0.5f)
     }
     tempOptions["voice"]?.let { voiceId ->
       synthesizer.voices?.forEach { voice ->
@@ -528,11 +591,11 @@ class RNSpeechModule(reactContext: ReactApplicationContext) :
       val queueItem = SpeechQueueItem(text = text, options = validatedOptions, utteranceId = utteranceId)
       synchronized(queueLock) {
         speechQueue.add(queueItem)
-        if (!synthesizer.isSpeaking && !isPaused) {
-          currentQueueIndex = speechQueue.size - 1
-          processNextQueueItem()
+        if (!synthesizer.isSpeaking && !isPaused && currentQueueIndex == -1) {
+            currentQueueIndex = 0  // always start from beginning if nothing is running
+            processNextQueueItem()
         }
-      }
+    }
       promise.resolve(null)
     }
   }
@@ -565,12 +628,24 @@ class RNSpeechModule(reactContext: ReactApplicationContext) :
         return
       }
     }
+
+    // Shutdown existing TTS before switching engines
+    if (::synthesizer.isInitialized) {
+      try {
+        synthesizer.stop()
+        synthesizer.shutdown()
+      } catch (e: Exception) {
+        Log.w(TAG, "Error shutting down TTS before engine switch", e)
+      }
+    }
+
     selectedEngine = engineName
+    isInitialized = false
+    isInitializing = false
+    resetQueueState()
 
-    invalidate()
-
-    pendingOperations.add(Pair({ promise.resolve(null) }, promise))
     initializeTTS()
+    promise.resolve(null)
   }
 
    override fun openVoiceDataInstaller(promise: Promise) {
@@ -594,11 +669,13 @@ class RNSpeechModule(reactContext: ReactApplicationContext) :
 
   override fun invalidate() {
     super.invalidate()
+    mainHandler.removeCallbacksAndMessages(null)
     if (::synthesizer.isInitialized) {
       synthesizer.stop()
       synthesizer.shutdown()
       resetQueueState()
     }
     isInitialized = false
+    isInitializing = false
   }
-}
+}
diff --git a/package.json b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@mhpdev/react-native-speech",
-  "version": "1.3.1",
+  "version": "1.3.2",
   "description": "A high-performance React Native library for text-to-speech on iOS and Android",
   "source": "./src/index.tsx",
   "main": "./lib/module/index.js",