Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,10 @@ jobs:
pip3 install scons
scons -s
cp bin/*.dll nvda/synthDrivers
cp bin/b32_helper.exe nvda/synthDrivers
mkdir b32_assets
cd bin
7z a -tzip ../b32_assets/b32_bin.zip *.dll b32_spk.exe
7z a -tzip ../b32_assets/b32_bin.zip *.dll b32_spk.exe b32_helper.exe
cd ..
cd nvda
7z a -tzip ../b32_assets/bestspeech.nvda-addon *
Expand Down
1 change: 1 addition & 0 deletions SConstruct
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@ env.Program("bin/test", "obj/test.cpp", LIBS = ["user32", "winmm", "bin/b32_wrap
env.Program("bin/test_rapid", "obj/test_rapid.cpp", LIBS = ["user32", "winmm", "bin/b32_wrapper"])
b32_wrapper_static = env.Object("obj/b32_wrapper_static", "obj/b32_wrapper.cpp", CPPDEFINES = [("b32w_export", "")])
env.Program("bin/b32_spk", ["obj/argparse.c", b32_wrapper_static, "obj/b32_spk.cpp", sonic], CPPDEFINES = [("b32w_export", "")], LIBS = ["user32", "winmm", "bin/MinHook"])
env.Program("bin/b32_helper", [b32_wrapper_static, "obj/b32_helper.cpp", sonic], CPPDEFINES = [("b32w_export", "")], LIBS = ["user32", "winmm", "bin/MinHook"])
6 changes: 3 additions & 3 deletions nvda/manifest.ini
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
name = BestSpeech
summary = "BeSTSpeech TTS for NVDA"
version = 2025.5
author = "Rommix, Tamasg, Quin G, Mason, Sam Tupy"
version = 2026.1
author = "Rommix, Tamasg, Quin G, Mason, Sam Tupy, Christopher Duffley with Claude/GitHub Copilot"
description = """Keynote Gold software (BeStSpeech)"""
url = "https://github.com/samtupy/b32tts_wrapper"
minimumNVDAVersion = 2020.4
lastTestedNVDAVersion = 2025.1
lastTestedNVDAVersion = 2026.1
112 changes: 104 additions & 8 deletions nvda/synthDrivers/bestspeech.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import os
import struct
import subprocess
from synthDriverHandler import SynthDriver, synthIndexReached, synthDoneSpeaking, VoiceInfo
from speech.commands import IndexCommand, PitchCommand, CharacterModeCommand
import ctypes
Expand Down Expand Up @@ -98,18 +100,29 @@ def __init__(self):
super().__init__()
path = os.path.join(os.path.dirname(__file__), 'b32_tts.dll')
wrapper_path = os.path.join(os.path.dirname(__file__), 'b32_wrapper.dll')
self.dll = ctypes.cdll[wrapper_path]
self._dll_path = path
self.player = None
try:
currentSoundcardOutput = config.conf['speech']['outputDevice']
except:
currentSoundcardOutput = config.conf["audio"]["outputDevice"]
self.player = nvwave.WavePlayer(1, 11025, 16, outputDevice=currentSoundcardOutput)
self.dll.bst_init_w.argtypes = (ctypes.c_wchar_p,)
self.dll.bst_init_w.restype = c_void_p
self.dll.bst_free.argtypes = (c_void_p,)
self.dll.bst_speak_async.restype = c_void_p
self.handle = self.dll.bst_init_w(path)
self._helper = None
try:
self.dll = ctypes.cdll[wrapper_path]
self.dll.bst_init_w.argtypes = (ctypes.c_wchar_p,)
self.dll.bst_init_w.restype = c_void_p
self.dll.bst_free.argtypes = (c_void_p,)
self.dll.bst_speak_async.restype = c_void_p
self.handle = self.dll.bst_init_w(path)
self._use_helper = False
except (OSError, AttributeError):
# b32_wrapper.dll could not be loaded in-process (e.g. 32-bit DLL in
# 64-bit NVDA 2026+, or DLL simply absent). Fall back to the
# out-of-process 32-bit helper.
self.dll = None
self._use_helper = True
self._start_helper()
global bgQueue
bgQueue = queue.Queue()
self.bgThread = BgThread()
Expand All @@ -120,9 +133,19 @@ def __init__(self):
self.numberProcessing = False
self.abbreviations = True
self._phrasePrediction = True
self.table = str.maketrans("", "'")
self.table = str.maketrans("\u2019", "'")
self.canceled = False

def _start_helper(self):
helper_path = os.path.join(os.path.dirname(__file__), 'b32_helper.exe')
self._helper = subprocess.Popen(
[helper_path, self._dll_path],
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.DEVNULL,
creationflags=subprocess.CREATE_NO_WINDOW
)

def loadSettings(self, onlyChanged = False):
# We can probably remove this in a bit, we override this to make sure people's excitation setting doesn't break across addon versions.
super().loadSettings(onlyChanged)
Expand Down Expand Up @@ -256,6 +279,12 @@ def speak(self, speechSequence):
_execWhenDone(self._speakBg, text, idx, mustBeAsync=True)

def _speakBg(self, text, idx):
if self._use_helper:
self._speakBg_helper(text, idx)
else:
self._speakBg_dll(text, idx)

def _speakBg_dll(self, text, idx):
@bst_async_callback
def on_audio(data, size, user):
if not self.speaking: return False
Expand All @@ -272,6 +301,54 @@ def on_audio(data, size, user):
self.player.feed(b"", 0, onDone=f)
self.player.idle()

def _speakBg_helper(self, text, idx):
# Restart helper if it died unexpectedly.
if self._helper is None or self._helper.poll() is not None:
self._start_helper()
self.speaking = True
# As a dirty hack to make indent nav beeps mostly work, indicate that we've reached the first index immedietly.
if idx and len(idx) > 1:
synthIndexReached.notify(synth=self, index=idx.pop(0))
txt = text.translate(self.table).encode('windows-1252', 'replace')
rate_mult = 4.0 if self._rateBoost else 1.0
# Send SPEAK command: [uint32 text_len][float32 rate_mult][text bytes]
try:
self._helper.stdin.write(struct.pack('<If', len(txt), rate_mult))
self._helper.stdin.write(txt)
self._helper.stdin.flush()
except OSError:
return
# Read audio chunks until end-of-utterance sentinel (chunk_len == 0).
while True:
hdr = self._helper_read_exact(4)
if hdr is None:
break
chunk_len = struct.unpack('<I', hdr)[0]
if chunk_len == 0:
break
chunk = self._helper_read_exact(chunk_len)
if chunk is None:
break
if self.speaking:
self.player.feed(chunk, len(chunk))
if not self.speaking:
return
f = lambda idx=idx: self.done(idx)
self.player.feed(b"", 0, onDone=f)
self.player.idle()

def _helper_read_exact(self, n):
buf = b""
while len(buf) < n:
try:
chunk = self._helper.stdout.read(n - len(buf))
except OSError:
return None
if not chunk:
return None
buf += chunk
return buf

def done(self, idx):
for i in idx:
synthIndexReached.notify(synth=self, index=i)
Expand All @@ -281,7 +358,19 @@ def terminate(self):
self.cancel()
bgQueue.put((None, None, None))
self.bgThread.join()
self.dll.bst_free(self.handle)
if self._use_helper:
if self._helper is not None:
# Send QUIT command then wait for clean exit.
try:
self._helper.stdin.write(struct.pack('<I', 0xFFFFFFFF))
self._helper.stdin.flush()
except OSError:
pass
self._helper.wait(timeout=2)
if self._helper.poll() is None:
self._helper.kill()
else:
self.dll.bst_free(self.handle)

def cancel(self):
self.speaking = False
Expand All @@ -292,6 +381,13 @@ def cancel(self):
break
if self.player:
self.player.stop()
if self._use_helper and self._helper is not None:
# Send CANCEL command: text_len == 0
try:
self._helper.stdin.write(struct.pack('<I', 0))
self._helper.stdin.flush()
except OSError:
pass

def pause(self, switch):
if self.player: self.player.pause(switch)
182 changes: 182 additions & 0 deletions src/b32_helper.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
// b32_helper.cpp
// 32-bit out-of-process TTS helper, used by the NVDA addon when running under
// 64-bit NVDA (2026+). Launched as a subprocess with the b32_tts.dll path as
// its sole command-line argument.
//
// === stdin protocol (binary, little-endian) ===
// SPEAK command : [uint32 text_len (> 0)] [float32 rate_mult] [text_len bytes, windows-1252]
// CANCEL command : [uint32 = 0]
// QUIT command : [uint32 = 0xFFFFFFFF]
// EOF on stdin is treated the same as QUIT.
//
// === stdout protocol (binary, little-endian) ===
// Audio chunk : [uint32 chunk_len (> 0)] [chunk_len bytes raw 16-bit mono PCM @ 11025 Hz]
// End-of-utter. : [uint32 = 0]
// One end-of-utterance sentinel is emitted after every SPEAK command
// (whether it completed normally or was cancelled).

#define WIN32_LEAN_AND_MEAN
#include <windows.h>
#include <fcntl.h>
#include <io.h>
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include "b32_wrapper.h"

// Set to true by the stdin thread to abort the current synthesis callback.
static volatile bool g_cancel = false;
// Set to true when the helper should exit cleanly.
static volatile bool g_quit = false;

// ---- Pending-command queue (capacity 1) ----------------------------------- //
// The stdin reader thread deposits a single pending speak command here.
// The main thread drains it before starting synthesis.

struct PendingCmd {
char* text;
uint32_t text_len;
float rate_mult;
bool valid;
};

static PendingCmd g_pending = {};
static CRITICAL_SECTION g_pending_cs;
// Signalled when g_pending.valid becomes true, or g_quit becomes true.
static HANDLE g_cmd_event;

// ---------------------------------------------------------------------------

static bool audio_cb(char* data, long size, void* /*user*/)
{
if (g_cancel) return false;

uint32_t len = (uint32_t)size;
if (fwrite(&len, sizeof(uint32_t), 1, stdout) != 1) return false;
if (fwrite(data, 1, (size_t)size, stdout) != (size_t)size) return false;
fflush(stdout);
return !g_cancel;
}

// Reads exactly `n` bytes from `fp` into `buf`. Returns false on short read.
static bool read_exact(FILE* fp, void* buf, size_t n)
{
size_t got = 0;
while (got < n) {
size_t r = fread((char*)buf + got, 1, n - got, fp);
if (r == 0) return false;
got += r;
}
return true;
}

// ---------------------------------------------------------------------------
// Stdin reader thread: runs for the lifetime of the process.
// Parses incoming commands and either sets g_cancel / g_quit, or enqueues a
// SPEAK command for the main thread.
// ---------------------------------------------------------------------------
static DWORD WINAPI stdin_reader(LPVOID /*unused*/)
{
while (true) {
uint32_t text_len = 0;
if (!read_exact(stdin, &text_len, sizeof(uint32_t))) {
// EOF or read error -> quit
g_quit = true;
SetEvent(g_cmd_event);
return 0;
}

if (text_len == 0xFFFFFFFFu) {
g_quit = true;
g_cancel = true;
SetEvent(g_cmd_event);
return 0;
}

if (text_len == 0) {
// Cancel: abort whatever synthesis is currently running.
g_cancel = true;
continue;
}

float rate_mult = 1.0f;
if (!read_exact(stdin, &rate_mult, sizeof(float))) {
g_quit = true;
SetEvent(g_cmd_event);
return 0;
}

char* text = new char[text_len + 1];
if (!read_exact(stdin, text, text_len)) {
delete[] text;
g_quit = true;
SetEvent(g_cmd_event);
return 0;
}
text[text_len] = '\0';

// Replace any unprocessed pending command (NVDA always cancels before
// issuing a new speak, but guard just in case).
EnterCriticalSection(&g_pending_cs);
if (g_pending.valid) {
delete[] g_pending.text;
}
g_pending.text = text;
g_pending.text_len = text_len;
g_pending.rate_mult = rate_mult;
g_pending.valid = true;
LeaveCriticalSection(&g_pending_cs);

SetEvent(g_cmd_event);
}
}

// ---------------------------------------------------------------------------

int main(int argc, const char** argv)
{
// Switch stdin/stdout to binary mode to avoid newline translation.
_setmode(_fileno(stdin), _O_BINARY);
_setmode(_fileno(stdout), _O_BINARY);

const char* dll_path = "b32_tts.dll";
if (argc >= 2) dll_path = argv[1];

bst_state* state = bst_init(dll_path);
if (!state) return 1;

InitializeCriticalSection(&g_pending_cs);
g_cmd_event = CreateEvent(NULL, /*manualReset=*/FALSE, /*initial=*/FALSE, NULL);

CreateThread(NULL, 0, stdin_reader, NULL, 0, NULL);

while (true) {
WaitForSingleObject(g_cmd_event, INFINITE);
if (g_quit) break;

PendingCmd cmd = {};
EnterCriticalSection(&g_pending_cs);
if (g_pending.valid) {
cmd = g_pending;
g_pending.valid = false;
g_pending.text = nullptr;
}
LeaveCriticalSection(&g_pending_cs);

if (!cmd.valid) continue;

g_cancel = false;
bst_speak_async(state, audio_cb, nullptr, cmd.text, -1, 0, cmd.rate_mult, 0);
delete[] cmd.text;

// Always emit end-of-utterance sentinel, even if cancelled.
uint32_t zero = 0;
fwrite(&zero, sizeof(uint32_t), 1, stdout);
fflush(stdout);
}

bst_free(state);
DeleteCriticalSection(&g_pending_cs);
CloseHandle(g_cmd_event);
return 0;
}