Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
253 changes: 165 additions & 88 deletions android/src/main/java/com/speech/RNSpeechModule.kt
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,14 @@ import java.util.UUID
import java.util.Locale
import android.os.Build
import android.os.Bundle
import android.os.Handler
import android.os.Looper
import android.content.Intent
import android.content.Context
import android.speech.tts.Voice
import android.media.AudioManager
import android.media.AudioAttributes
import android.media.AudioFocusRequest
import android.annotation.SuppressLint
import android.speech.tts.TextToSpeech
import com.facebook.react.bridge.Promise
import com.facebook.react.bridge.Arguments
Expand All @@ -19,6 +20,7 @@ import com.facebook.react.bridge.ReadableMap
import android.speech.tts.UtteranceProgressListener
import com.facebook.react.bridge.ReactApplicationContext
import com.facebook.react.module.annotations.ReactModule
import android.util.Log

@ReactModule(name = RNSpeechModule.NAME)
class RNSpeechModule(reactContext: ReactApplicationContext) :
Expand All @@ -36,6 +38,7 @@ class RNSpeechModule(reactContext: ReactApplicationContext) :

companion object {
const val NAME = "RNSpeech"
private const val TAG = "RNSpeech"

private val defaultOptions: Map<String, Any> = mapOf(
"rate" to 0.5f,
Expand All @@ -45,9 +48,11 @@ class RNSpeechModule(reactContext: ReactApplicationContext) :
"language" to Locale.getDefault().toLanguageTag()
)
}

private val queueLock = Any()
private val maxInputLength = TextToSpeech.getMaxSpeechInputLength()
private val isSupportedPausing = Build.VERSION.SDK_INT >= Build.VERSION_CODES.O
private val mainHandler = Handler(Looper.getMainLooper())

private lateinit var synthesizer: TextToSpeech

Expand All @@ -56,6 +61,7 @@ class RNSpeechModule(reactContext: ReactApplicationContext) :

private var isInitialized = false
private var isInitializing = false
private var listenerSet = false
private val pendingOperations = mutableListOf<Pair<() -> Unit, Promise>>()

private var globalOptions: MutableMap<String, Any> = defaultOptions.toMutableMap()
Expand Down Expand Up @@ -174,86 +180,139 @@ class RNSpeechModule(reactContext: ReactApplicationContext) :
}
}

private fun verifyTTSReady(retryCount: Int = 0) {
val maxRetries = 20
val delay = when {
retryCount == 0 -> 500L
retryCount < 5 -> 1000L
else -> 2000L
}

mainHandler.postDelayed({
try {
val voices = synthesizer.voices
val engines = synthesizer.engines

if (!voices.isNullOrEmpty() && !engines.isNullOrEmpty()) {
Log.d(TAG, "TTS verified ready with ${voices.size} voices and ${engines.size} engines")
cachedEngines = engines

if (!listenerSet) {
synthesizer.setOnUtteranceProgressListener(object : UtteranceProgressListener() {
override fun onStart(utteranceId: String) {
synchronized(queueLock) {
speechQueue.find { it.utteranceId == utteranceId }?.let { item ->
item.status = SpeechStatus.SPEAKING
if (isResuming && item.position > 0) {
emitOnResume(getEventData(utteranceId))
isResuming = false
} else {
emitOnStart(getEventData(utteranceId))
}
}
}
}

override fun onDone(utteranceId: String) {
synchronized(queueLock) {
speechQueue.find { it.utteranceId == utteranceId }?.let { item ->
item.status = SpeechStatus.COMPLETED
deactivateDuckingSession()
emitOnFinish(getEventData(utteranceId))
if (!isPaused) {
currentQueueIndex++
processNextQueueItem()
}
}
}
}

override fun onError(utteranceId: String) {
synchronized(queueLock) {
speechQueue.find { it.utteranceId == utteranceId }?.let { item ->
item.status = SpeechStatus.ERROR
deactivateDuckingSession()
emitOnError(getEventData(utteranceId))
if (!isPaused) {
currentQueueIndex++
processNextQueueItem()
}
}
}
}

override fun onStop(utteranceId: String, interrupted: Boolean) {
synchronized(queueLock) {
speechQueue.find { it.utteranceId == utteranceId }?.let { item ->
if (isPaused) {
item.status = SpeechStatus.PAUSED
emitOnPause(getEventData(utteranceId))
} else {
item.status = SpeechStatus.COMPLETED
emitOnStopped(getEventData(utteranceId))
}
}
}
}

override fun onRangeStart(utteranceId: String, start: Int, end: Int, frame: Int) {
synchronized(queueLock) {
speechQueue.find { it.utteranceId == utteranceId }?.let { item ->
item.position = item.offset + start
val data = Arguments.createMap().apply {
putInt("id", utteranceId.hashCode())
putInt("length", end - start)
putInt("location", item.position)
}
emitOnProgress(data)
}
}
}
})
listenerSet = true
}

applyGlobalOptions()
isInitialized = true
isInitializing = false
processPendingOperations()

} else if (retryCount < maxRetries) {
Log.w(TAG, "TTS not ready yet (retry ${retryCount + 1}/$maxRetries), voices=${voices?.size ?: 0}, engines=${engines?.size ?: 0}")
verifyTTSReady(retryCount + 1)
} else {
Log.e(TAG, "TTS failed to become ready after $maxRetries retries")
isInitialized = false
isInitializing = false
rejectPendingOperations()
}

} catch (e: Exception) {
Log.e(TAG, "Exception during TTS verification (retry $retryCount)", e)
if (retryCount < maxRetries) {
verifyTTSReady(retryCount + 1)
} else {
isInitialized = false
isInitializing = false
rejectPendingOperations()
}
}
}, delay)
}

private fun initializeTTS() {
if (isInitializing) return
isInitializing = true

synthesizer = TextToSpeech(reactApplicationContext, { status ->
isInitialized = status == TextToSpeech.SUCCESS
isInitializing = false

if (isInitialized) {
cachedEngines = synthesizer.engines
synthesizer.setOnUtteranceProgressListener(object : UtteranceProgressListener() {
override fun onStart(utteranceId: String) {
synchronized(queueLock) {
speechQueue.find { it.utteranceId == utteranceId }?.let { item ->
item.status = SpeechStatus.SPEAKING
if (isResuming && item.position > 0) {
emitOnResume(getEventData(utteranceId))
isResuming = false
} else {
emitOnStart(getEventData(utteranceId))
}
}
}
}
override fun onDone(utteranceId: String) {
synchronized(queueLock) {
speechQueue.find { it.utteranceId == utteranceId }?.let { item ->
item.status = SpeechStatus.COMPLETED
deactivateDuckingSession()
emitOnFinish(getEventData(utteranceId))
if (!isPaused) {
currentQueueIndex++
processNextQueueItem()
}
}
}
}
override fun onError(utteranceId: String) {
synchronized(queueLock) {
speechQueue.find { it.utteranceId == utteranceId }?.let { item ->
item.status = SpeechStatus.ERROR
deactivateDuckingSession()
emitOnError(getEventData(utteranceId))
if (!isPaused) {
currentQueueIndex++
processNextQueueItem()
}
}
}
}
override fun onStop(utteranceId: String, interrupted: Boolean) {
synchronized(queueLock) {
speechQueue.find { it.utteranceId == utteranceId }?.let { item ->
if (isPaused) {
item.status = SpeechStatus.PAUSED
emitOnPause(getEventData(utteranceId))
} else {
item.status = SpeechStatus.COMPLETED
emitOnStopped(getEventData(utteranceId))
}
}
}
}
override fun onRangeStart(utteranceId: String, start: Int, end: Int, frame: Int) {
synchronized(queueLock) {
speechQueue.find { it.utteranceId == utteranceId }?.let { item ->
item.position = item.offset + start
val data = Arguments.createMap().apply {
putInt("id", utteranceId.hashCode())
putInt("length", end - start)
putInt("location", item.position)
}
emitOnProgress(data)
}
}
}
})
applyGlobalOptions()
processPendingOperations()
if (status == TextToSpeech.SUCCESS) {
// Don't set isInitialized yet - wait for verification
Log.d(TAG, "TTS engine callback SUCCESS, verifying voices...")
verifyTTSReady()
} else {
Log.e(TAG, "TTS engine initialization failed with status: $status")
isInitialized = false
isInitializing = false
rejectPendingOperations()
}
}, selectedEngine)
Expand All @@ -273,8 +332,12 @@ class RNSpeechModule(reactContext: ReactApplicationContext) :
}
else -> {
pendingOperations.add(Pair(operation, promise))
// Shut down any zombie instance before reinitializing
if (::synthesizer.isInitialized) {
try { synthesizer.stop(); synthesizer.shutdown() } catch (_: Exception) {}
}
initializeTTS()
}
}
}
}

Expand All @@ -284,10 +347,10 @@ class RNSpeechModule(reactContext: ReactApplicationContext) :
synthesizer.setLanguage(locale)
}
globalOptions["pitch"]?.let {
synthesizer.setPitch(it as Float)
synthesizer.setPitch((it as? Number)?.toFloat() ?: 1.0f)
}
globalOptions["rate"]?.let {
synthesizer.setSpeechRate(it as Float)
synthesizer.setSpeechRate((it as? Number)?.toFloat() ?: 0.5f)
}
globalOptions["voice"]?.let { voiceId ->
synthesizer.voices?.forEach { voice ->
Expand All @@ -308,10 +371,10 @@ class RNSpeechModule(reactContext: ReactApplicationContext) :
synthesizer.setLanguage(locale)
}
tempOptions["pitch"]?.let {
synthesizer.setPitch(it as Float)
synthesizer.setPitch((it as? Number)?.toFloat() ?: 1.0f)
}
tempOptions["rate"]?.let {
synthesizer.setSpeechRate(it as Float)
synthesizer.setSpeechRate((it as? Number)?.toFloat() ?: 0.5f)
}
tempOptions["voice"]?.let { voiceId ->
synthesizer.voices?.forEach { voice ->
Expand Down Expand Up @@ -528,11 +591,11 @@ class RNSpeechModule(reactContext: ReactApplicationContext) :
val queueItem = SpeechQueueItem(text = text, options = validatedOptions, utteranceId = utteranceId)
synchronized(queueLock) {
speechQueue.add(queueItem)
if (!synthesizer.isSpeaking && !isPaused) {
currentQueueIndex = speechQueue.size - 1
processNextQueueItem()
if (!synthesizer.isSpeaking && !isPaused && currentQueueIndex == -1) {
currentQueueIndex = 0 // always start from beginning if nothing is running
processNextQueueItem()
}
}
}
promise.resolve(null)
}
}
Expand Down Expand Up @@ -565,12 +628,24 @@ class RNSpeechModule(reactContext: ReactApplicationContext) :
return
}
}

// Shutdown existing TTS before switching engines
if (::synthesizer.isInitialized) {
try {
synthesizer.stop()
synthesizer.shutdown()
} catch (e: Exception) {
Log.w(TAG, "Error shutting down TTS before engine switch", e)
}
}

selectedEngine = engineName
isInitialized = false
isInitializing = false
resetQueueState()

invalidate()

pendingOperations.add(Pair({ promise.resolve(null) }, promise))
initializeTTS()
promise.resolve(null)
}

override fun openVoiceDataInstaller(promise: Promise) {
Expand All @@ -594,11 +669,13 @@ class RNSpeechModule(reactContext: ReactApplicationContext) :

override fun invalidate() {
super.invalidate()
mainHandler.removeCallbacksAndMessages(null)
if (::synthesizer.isInitialized) {
synthesizer.stop()
synthesizer.shutdown()
resetQueueState()
}
isInitialized = false
isInitializing = false
}
}
}
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@mhpdev/react-native-speech",
"version": "1.3.1",
"version": "1.3.2",
"description": "A high-performance React Native library for text-to-speech on iOS and Android",
"source": "./src/index.tsx",
"main": "./lib/module/index.js",
Expand Down