Skip to content

Commit 1f84fe5

Browse files
committed
Initial commit
0 parents  commit 1f84fe5

File tree

10 files changed

+298
-0
lines changed

10 files changed

+298
-0
lines changed

.dockerignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
.github
2+
docker-compose.yml
3+
Dockerfile

.github/workflows/publish.yml

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
---
2+
# This workflow uses actions that are not certified by GitHub.
3+
# They are provided by a third-party and are governed by
4+
# separate terms of service, privacy policy, and support
5+
# documentation.
6+
7+
# GitHub recommends pinning actions to a commit SHA.
8+
# To get a newer version, you will need to update the SHA.
9+
# You can also reference a tag or branch, but the action may change without warning.
10+
11+
name: Publish Docker image
12+
13+
on:
14+
release:
15+
types: [published]
16+
17+
jobs:
18+
push_to_registry:
19+
name: Push Docker image to Docker Hub
20+
runs-on: ubuntu-latest
21+
steps:
22+
- name: Check out the repo
23+
uses: actions/checkout@v3
24+
25+
- name: Log in to Docker Hub
26+
uses: docker/login-action@f4ef78c080cd8ba55a85445d5b36e214a81df20a
27+
with:
28+
username: ${{ secrets.DOCKER_USERNAME }}
29+
password: ${{ secrets.DOCKER_PASSWORD }}
30+
31+
- name: Extract metadata (tags, labels) for Docker
32+
id: meta
33+
uses: docker/metadata-action@9ec57ed1fcdbf14dcef7dfbe97b2010124a938b7
34+
with:
35+
images: samboo/wyoming-tts
36+
37+
- name: Build and push Docker image
38+
uses: docker/build-push-action@3b5e8027fcad23fda98b2e3ac259d8d67585f671
39+
with:
40+
context: .
41+
file: ./Dockerfile
42+
push: true
43+
tags: ${{ steps.meta.outputs.tags }}
44+
labels: ${{ steps.meta.outputs.labels }}

Dockerfile

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
FROM python:3.7
2+
3+
WORKDIR /app
4+
5+
RUN mkdir /data && mkdir -p /root/.local/share && ln -s /data /root/.local/share/tts
6+
7+
COPY requirements.txt requirements.txt
8+
9+
RUN pip3 install --no-cache-dir -r requirements.txt
10+
11+
COPY . .
12+
13+
VOLUME [ "/data" ]
14+
15+
ENTRYPOINT ["python3", "wyoming_tts"]

LICENSE

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
MIT License
2+
3+
Copyright (c) 2023 Sam Büth
4+
5+
Permission is hereby granted, free of charge, to any person obtaining a copy
6+
of this software and associated documentation files (the "Software"), to deal
7+
in the Software without restriction, including without limitation the rights
8+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
copies of the Software, and to permit persons to whom the Software is
10+
furnished to do so, subject to the following conditions:
11+
12+
The above copyright notice and this permission notice shall be included in all
13+
copies or substantial portions of the Software.
14+
15+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
SOFTWARE.

README.md

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# wyoming TTS
2+
3+
coqui-ai TTS Wyoming protocol implementation.
4+
5+
## TODO
6+
7+
- [ ] Multi-lingual and multi-speaker selection via wyoming protocol (currently not transmitted by home-assistant)
8+
9+
- [ ] GPU support
10+
11+
## Contributions
12+
13+
Pull request a very welcome.

docker-compose.yml

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
version: '3.0'
2+
3+
services:
4+
tts:
5+
image: samboo/wyoming-tts
6+
restart: always
7+
command: --uri tcp://0.0.0.0:10201 --voice tts_models/de/thorsten/vits
8+
environment:
9+
- COQUI_STUDIO_TOKEN= #optional
10+
volumes:
11+
- ./tts:/data
12+
ports:
13+
- 10201:10201

requirements.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
wyoming==0.0.1
2+
tts~=0.13.3

wyoming_tts/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
"""Wyoming server for tts."""

wyoming_tts/__main__.py

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
#!/usr/bin/env python3
2+
import argparse
3+
import asyncio
4+
import logging
5+
from functools import partial
6+
7+
from TTS.api import TTS
8+
from wyoming.info import Attribution, Info, TtsProgram, TtsVoice
9+
from wyoming.server import AsyncServer
10+
11+
from handler import PiperEventHandler
12+
13+
_LOGGER = logging.getLogger(__name__)
14+
15+
async def main() -> None:
16+
"""Main entry point."""
17+
parser = argparse.ArgumentParser()
18+
parser.add_argument(
19+
"--voice",
20+
default=None,
21+
help="The Voice to use for TTS",
22+
)
23+
parser.add_argument(
24+
"--speaker",
25+
help="Set the target speaker",
26+
)
27+
parser.add_argument(
28+
"--language",
29+
help="Set the target language",
30+
)
31+
parser.add_argument("--samples-per-chunk", type=int, default=1024)
32+
parser.add_argument("--uri", required=True, help="unix:// or tcp://")
33+
parser.add_argument("--debug", action="store_true", help="Log DEBUG messages")
34+
args = parser.parse_args()
35+
logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)
36+
37+
if (args.voice == None):
38+
_LOGGER.info("The following voices are available (specify with --voice [model_name]): ")
39+
_LOGGER.info("\n".join(TTS.list_models()))
40+
exit()
41+
42+
tts = TTS(args.voice)
43+
44+
if (tts.is_multi_lingual and args.language is None):
45+
_LOGGER.error("The following languages are available (specify with --language [lang]): ")
46+
_LOGGER.info("\n".join(tts.languages))
47+
exit()
48+
if (tts.is_multi_speaker and args.speaker is None):
49+
_LOGGER.error("The following speakers are available (specify with --speakers [speaker]): ")
50+
_LOGGER.info("\n".join(tts.speakers))
51+
exit()
52+
53+
language = None
54+
if (tts.is_multi_lingual is False):
55+
language = args.voice.split("/")[1]
56+
_LOGGER.info("Using language: %s", language)
57+
58+
_LOGGER.info("TTS ready")
59+
60+
wyoming_info = Info(
61+
tts=[
62+
TtsProgram(
63+
name="coqui-ai TTS",
64+
attribution=Attribution(
65+
name="coqui-ai", url="https://github.com/coqui-ai/TTS"
66+
),
67+
installed=True,
68+
voices=[
69+
TtsVoice(
70+
name=speaker,
71+
attribution=Attribution(
72+
name="coqui-ai", url="https://github.com/coqui-ai/TTS"
73+
),
74+
installed=True,
75+
languages=tts.languages if tts.is_multi_lingual else [language],
76+
) for speaker in ([args.speaker] if tts.is_multi_speaker else ["Default"]) # Preparation for multi speaker support in wyoming event
77+
],
78+
)
79+
],
80+
)
81+
82+
server = AsyncServer.from_uri(args.uri)
83+
_LOGGER.info("Ready")
84+
await server.run(
85+
partial(
86+
PiperEventHandler,
87+
wyoming_info,
88+
args,
89+
tts
90+
)
91+
)
92+
93+
94+
95+
# -----------------------------------------------------------------------------
96+
97+
if __name__ == "__main__":
98+
asyncio.run(main())

wyoming_tts/handler.py

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
"""Event handler for clients of the server."""
2+
import argparse
3+
import logging
4+
import math
5+
import wave
6+
7+
from TTS.api import TTS
8+
from wyoming.audio import AudioChunk, AudioStart, AudioStop
9+
from wyoming.event import Event
10+
from wyoming.info import Describe, Info
11+
from wyoming.server import AsyncEventHandler
12+
from wyoming.tts import Synthesize
13+
14+
_LOGGER = logging.getLogger(__name__)
15+
16+
class PiperEventHandler(AsyncEventHandler):
17+
def __init__(
18+
self,
19+
wyoming_info: Info,
20+
cli_args: argparse.Namespace,
21+
tts: TTS,
22+
*args,
23+
) -> None:
24+
super().__init__(*args)
25+
self.cli_args = cli_args
26+
self.wyoming_info_event = wyoming_info.event()
27+
self.tts = tts
28+
29+
async def handle_event(self, event: Event) -> bool:
30+
if Describe.is_type(event.type):
31+
await self.write_event(self.wyoming_info_event)
32+
_LOGGER.debug("Sent info")
33+
return True
34+
35+
if not Synthesize.is_type(event.type):
36+
_LOGGER.warning("Unexpected event: %s", event)
37+
return True
38+
synthesize = Synthesize.from_event(event)
39+
raw_text = synthesize.text
40+
text = raw_text.strip()
41+
42+
output_path = "/tmp/output.wav"
43+
_LOGGER.debug(event)
44+
tts_args = dict()
45+
if (self.tts.is_multi_lingual):
46+
tts_args["language"] = self.cli_args.language
47+
48+
if (self.tts.is_multi_speaker):
49+
tts_args["speaker"] = self.cli_args.speaker
50+
self.tts.tts_to_file(text, **tts_args, file_path=output_path)
51+
wav_file: wave.Wave_read = wave.open(output_path, "rb")
52+
with wav_file:
53+
rate = wav_file.getframerate()
54+
width = wav_file.getsampwidth()
55+
channels = wav_file.getnchannels()
56+
57+
await self.write_event(
58+
AudioStart(
59+
rate=rate,
60+
width=width,
61+
channels=channels,
62+
).event(),
63+
)
64+
65+
# Audio
66+
audio_bytes = wav_file.readframes(wav_file.getnframes())
67+
bytes_per_sample = width * channels
68+
bytes_per_chunk = bytes_per_sample * self.cli_args.samples_per_chunk
69+
num_chunks = int(math.ceil(len(audio_bytes) / bytes_per_chunk))
70+
71+
# Split into chunks
72+
for i in range(num_chunks):
73+
offset = i * bytes_per_chunk
74+
chunk = audio_bytes[offset : offset + bytes_per_chunk]
75+
await self.write_event(
76+
AudioChunk(
77+
audio=chunk,
78+
rate=rate,
79+
width=width,
80+
channels=channels,
81+
).event(),
82+
)
83+
84+
await self.write_event(AudioStop().event())
85+
_LOGGER.debug("Completed request")
86+
87+
88+
return True

0 commit comments

Comments
 (0)