diff --git a/android/src/main/java/com/speech/RNSpeechModule.kt b/android/src/main/java/com/speech/RNSpeechModule.kt index 1dc60e8..71f2e39 100644 --- a/android/src/main/java/com/speech/RNSpeechModule.kt +++ b/android/src/main/java/com/speech/RNSpeechModule.kt @@ -4,13 +4,14 @@ import java.util.UUID import java.util.Locale import android.os.Build import android.os.Bundle +import android.os.Handler +import android.os.Looper import android.content.Intent import android.content.Context import android.speech.tts.Voice import android.media.AudioManager import android.media.AudioAttributes import android.media.AudioFocusRequest -import android.annotation.SuppressLint import android.speech.tts.TextToSpeech import com.facebook.react.bridge.Promise import com.facebook.react.bridge.Arguments @@ -19,6 +20,7 @@ import com.facebook.react.bridge.ReadableMap import android.speech.tts.UtteranceProgressListener import com.facebook.react.bridge.ReactApplicationContext import com.facebook.react.module.annotations.ReactModule +import android.util.Log @ReactModule(name = RNSpeechModule.NAME) class RNSpeechModule(reactContext: ReactApplicationContext) : @@ -36,6 +38,7 @@ class RNSpeechModule(reactContext: ReactApplicationContext) : companion object { const val NAME = "RNSpeech" + private const val TAG = "RNSpeech" private val defaultOptions: Map = mapOf( "rate" to 0.5f, @@ -45,9 +48,11 @@ class RNSpeechModule(reactContext: ReactApplicationContext) : "language" to Locale.getDefault().toLanguageTag() ) } + private val queueLock = Any() private val maxInputLength = TextToSpeech.getMaxSpeechInputLength() private val isSupportedPausing = Build.VERSION.SDK_INT >= Build.VERSION_CODES.O + private val mainHandler = Handler(Looper.getMainLooper()) private lateinit var synthesizer: TextToSpeech @@ -56,6 +61,7 @@ class RNSpeechModule(reactContext: ReactApplicationContext) : private var isInitialized = false private var isInitializing = false + private var listenerSet = false private val pendingOperations = mutableListOf Unit, Promise>>() private var globalOptions: MutableMap = defaultOptions.toMutableMap() @@ -174,86 +180,139 @@ class RNSpeechModule(reactContext: ReactApplicationContext) : } } + private fun verifyTTSReady(retryCount: Int = 0) { + val maxRetries = 20 + val delay = when { + retryCount == 0 -> 500L + retryCount < 5 -> 1000L + else -> 2000L + } + + mainHandler.postDelayed({ + try { + val voices = synthesizer.voices + val engines = synthesizer.engines + + if (!voices.isNullOrEmpty() && !engines.isNullOrEmpty()) { + Log.d(TAG, "TTS verified ready with ${voices.size} voices and ${engines.size} engines") + cachedEngines = engines + + if (!listenerSet) { + synthesizer.setOnUtteranceProgressListener(object : UtteranceProgressListener() { + override fun onStart(utteranceId: String) { + synchronized(queueLock) { + speechQueue.find { it.utteranceId == utteranceId }?.let { item -> + item.status = SpeechStatus.SPEAKING + if (isResuming && item.position > 0) { + emitOnResume(getEventData(utteranceId)) + isResuming = false + } else { + emitOnStart(getEventData(utteranceId)) + } + } + } + } + + override fun onDone(utteranceId: String) { + synchronized(queueLock) { + speechQueue.find { it.utteranceId == utteranceId }?.let { item -> + item.status = SpeechStatus.COMPLETED + deactivateDuckingSession() + emitOnFinish(getEventData(utteranceId)) + if (!isPaused) { + currentQueueIndex++ + processNextQueueItem() + } + } + } + } + + override fun onError(utteranceId: String) { + synchronized(queueLock) { + speechQueue.find { it.utteranceId == utteranceId }?.let { item -> + item.status = SpeechStatus.ERROR + deactivateDuckingSession() + emitOnError(getEventData(utteranceId)) + if (!isPaused) { + currentQueueIndex++ + processNextQueueItem() + } + } + } + } + + override fun onStop(utteranceId: String, interrupted: Boolean) { + synchronized(queueLock) { + speechQueue.find { it.utteranceId == utteranceId }?.let { item -> + if (isPaused) { + item.status = SpeechStatus.PAUSED + emitOnPause(getEventData(utteranceId)) + } else { + item.status = SpeechStatus.COMPLETED + emitOnStopped(getEventData(utteranceId)) + } + } + } + } + + override fun onRangeStart(utteranceId: String, start: Int, end: Int, frame: Int) { + synchronized(queueLock) { + speechQueue.find { it.utteranceId == utteranceId }?.let { item -> + item.position = item.offset + start + val data = Arguments.createMap().apply { + putInt("id", utteranceId.hashCode()) + putInt("length", end - start) + putInt("location", item.position) + } + emitOnProgress(data) + } + } + } + }) + listenerSet = true + } + + applyGlobalOptions() + isInitialized = true + isInitializing = false + processPendingOperations() + + } else if (retryCount < maxRetries) { + Log.w(TAG, "TTS not ready yet (retry ${retryCount + 1}/$maxRetries), voices=${voices?.size ?: 0}, engines=${engines?.size ?: 0}") + verifyTTSReady(retryCount + 1) + } else { + Log.e(TAG, "TTS failed to become ready after $maxRetries retries") + isInitialized = false + isInitializing = false + rejectPendingOperations() + } + + } catch (e: Exception) { + Log.e(TAG, "Exception during TTS verification (retry $retryCount)", e) + if (retryCount < maxRetries) { + verifyTTSReady(retryCount + 1) + } else { + isInitialized = false + isInitializing = false + rejectPendingOperations() + } + } + }, delay) +} + private fun initializeTTS() { if (isInitializing) return isInitializing = true synthesizer = TextToSpeech(reactApplicationContext, { status -> - isInitialized = status == TextToSpeech.SUCCESS - isInitializing = false - - if (isInitialized) { - cachedEngines = synthesizer.engines - synthesizer.setOnUtteranceProgressListener(object : UtteranceProgressListener() { - override fun onStart(utteranceId: String) { - synchronized(queueLock) { - speechQueue.find { it.utteranceId == utteranceId }?.let { item -> - item.status = SpeechStatus.SPEAKING - if (isResuming && item.position > 0) { - emitOnResume(getEventData(utteranceId)) - isResuming = false - } else { - emitOnStart(getEventData(utteranceId)) - } - } - } - } - override fun onDone(utteranceId: String) { - synchronized(queueLock) { - speechQueue.find { it.utteranceId == utteranceId }?.let { item -> - item.status = SpeechStatus.COMPLETED - deactivateDuckingSession() - emitOnFinish(getEventData(utteranceId)) - if (!isPaused) { - currentQueueIndex++ - processNextQueueItem() - } - } - } - } - override fun onError(utteranceId: String) { - synchronized(queueLock) { - speechQueue.find { it.utteranceId == utteranceId }?.let { item -> - item.status = SpeechStatus.ERROR - deactivateDuckingSession() - emitOnError(getEventData(utteranceId)) - if (!isPaused) { - currentQueueIndex++ - processNextQueueItem() - } - } - } - } - override fun onStop(utteranceId: String, interrupted: Boolean) { - synchronized(queueLock) { - speechQueue.find { it.utteranceId == utteranceId }?.let { item -> - if (isPaused) { - item.status = SpeechStatus.PAUSED - emitOnPause(getEventData(utteranceId)) - } else { - item.status = SpeechStatus.COMPLETED - emitOnStopped(getEventData(utteranceId)) - } - } - } - } - override fun onRangeStart(utteranceId: String, start: Int, end: Int, frame: Int) { - synchronized(queueLock) { - speechQueue.find { it.utteranceId == utteranceId }?.let { item -> - item.position = item.offset + start - val data = Arguments.createMap().apply { - putInt("id", utteranceId.hashCode()) - putInt("length", end - start) - putInt("location", item.position) - } - emitOnProgress(data) - } - } - } - }) - applyGlobalOptions() - processPendingOperations() + if (status == TextToSpeech.SUCCESS) { + // Don't set isInitialized yet - wait for verification + Log.d(TAG, "TTS engine callback SUCCESS, verifying voices...") + verifyTTSReady() } else { + Log.e(TAG, "TTS engine initialization failed with status: $status") + isInitialized = false + isInitializing = false rejectPendingOperations() } }, selectedEngine) @@ -273,8 +332,12 @@ class RNSpeechModule(reactContext: ReactApplicationContext) : } else -> { pendingOperations.add(Pair(operation, promise)) + // Shut down any zombie instance before reinitializing + if (::synthesizer.isInitialized) { + try { synthesizer.stop(); synthesizer.shutdown() } catch (_: Exception) {} + } initializeTTS() - } +} } } @@ -284,10 +347,10 @@ class RNSpeechModule(reactContext: ReactApplicationContext) : synthesizer.setLanguage(locale) } globalOptions["pitch"]?.let { - synthesizer.setPitch(it as Float) + synthesizer.setPitch((it as? Number)?.toFloat() ?: 1.0f) } globalOptions["rate"]?.let { - synthesizer.setSpeechRate(it as Float) + synthesizer.setSpeechRate((it as? Number)?.toFloat() ?: 0.5f) } globalOptions["voice"]?.let { voiceId -> synthesizer.voices?.forEach { voice -> @@ -308,10 +371,10 @@ class RNSpeechModule(reactContext: ReactApplicationContext) : synthesizer.setLanguage(locale) } tempOptions["pitch"]?.let { - synthesizer.setPitch(it as Float) + synthesizer.setPitch((it as? Number)?.toFloat() ?: 1.0f) } tempOptions["rate"]?.let { - synthesizer.setSpeechRate(it as Float) + synthesizer.setSpeechRate((it as? Number)?.toFloat() ?: 0.5f) } tempOptions["voice"]?.let { voiceId -> synthesizer.voices?.forEach { voice -> @@ -528,11 +591,11 @@ class RNSpeechModule(reactContext: ReactApplicationContext) : val queueItem = SpeechQueueItem(text = text, options = validatedOptions, utteranceId = utteranceId) synchronized(queueLock) { speechQueue.add(queueItem) - if (!synthesizer.isSpeaking && !isPaused) { - currentQueueIndex = speechQueue.size - 1 - processNextQueueItem() + if (!synthesizer.isSpeaking && !isPaused && currentQueueIndex == -1) { + currentQueueIndex = 0 // always start from beginning if nothing is running + processNextQueueItem() } - } + } promise.resolve(null) } } @@ -565,12 +628,24 @@ class RNSpeechModule(reactContext: ReactApplicationContext) : return } } + + // Shutdown existing TTS before switching engines + if (::synthesizer.isInitialized) { + try { + synthesizer.stop() + synthesizer.shutdown() + } catch (e: Exception) { + Log.w(TAG, "Error shutting down TTS before engine switch", e) + } + } + selectedEngine = engineName + isInitialized = false + isInitializing = false + resetQueueState() - invalidate() - - pendingOperations.add(Pair({ promise.resolve(null) }, promise)) initializeTTS() + promise.resolve(null) } override fun openVoiceDataInstaller(promise: Promise) { @@ -594,11 +669,13 @@ class RNSpeechModule(reactContext: ReactApplicationContext) : override fun invalidate() { super.invalidate() + mainHandler.removeCallbacksAndMessages(null) if (::synthesizer.isInitialized) { synthesizer.stop() synthesizer.shutdown() resetQueueState() } isInitialized = false + isInitializing = false } -} +} \ No newline at end of file diff --git a/package.json b/package.json index f88564c..bece879 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@mhpdev/react-native-speech", - "version": "1.3.1", + "version": "1.3.2", "description": "A high-performance React Native library for text-to-speech on iOS and Android", "source": "./src/index.tsx", "main": "./lib/module/index.js",