Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 23 additions & 24 deletions .github/workflows/build-and-push-to-ghcr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ on:
branches: [main]
pull_request:
jobs:
build-and-push-to-ghcr-cuda117:
build-and-push-to-ghcr-cuda118:
runs-on: ubuntu-22.04
steps:
-
Expand All @@ -27,7 +27,7 @@ jobs:
sudo rm -rf "/usr/local/share/boost"
sudo rm -rf "$AGENT_TOOLSDIRECTORY"

- name: Build only for PR Cuda 11.7
- name: Build only for PR Cuda 11.8
if: github.ref != 'refs/heads/main'
uses: docker/build-push-action@v5
with:
Expand All @@ -37,7 +37,7 @@ jobs:
cache-from: type=registry,ref=ghcr.io/coqui-ai/xtts-streaming-server:cache-latest; type=registry,ref=ghcr.io/coqui-ai/xtts-streaming-server:cache-pr-${{ github.event.number }}
cache-to: type=registry,ref=ghcr.io/coqui-ai/xtts-streaming-server:cache-pr-${{ github.event.number }}

- name: Build and Push image Cuda 11.7
- name: Build and Push image Cuda 11.8
if: github.ref == 'refs/heads/main'
uses: docker/build-push-action@v5
with:
Expand All @@ -49,7 +49,7 @@ jobs:
tags: ghcr.io/coqui-ai/xtts-streaming-server:latest, ghcr.io/coqui-ai/xtts-streaming-server:main-${{ github.sha }}
#build-args:

build-and-push-to-ghcr-cuda118:
build-and-push-to-ghcr-cuda121:
runs-on: ubuntu-22.04
steps:
-
Expand All @@ -72,29 +72,28 @@ jobs:
sudo rm -rf "/usr/local/share/boost"
sudo rm -rf "$AGENT_TOOLSDIRECTORY"

- name: Build only for PR cuda 11.8
- name: Build only for PR cuda 12.1
if: github.ref != 'refs/heads/main'
uses: docker/build-push-action@v5
with:
context: "{{defaultContext}}:server"
file: Dockerfile.cuda118
file: Dockerfile.cuda121
push: false # Do not push image for PR
cache-from: type=registry,ref=ghcr.io/coqui-ai/xtts-streaming-server:cache-latest-cuda118; type=registry,ref=ghcr.io/coqui-ai/xtts-streaming-server:cache-pr-cuda118-${{ github.event.number }}
cache-to: type=registry,ref=ghcr.io/coqui-ai/xtts-streaming-server:cache-pr-cuda118-${{ github.event.number }}
cache-from: type=registry,ref=ghcr.io/coqui-ai/xtts-streaming-server:cache-latest-cuda121; type=registry,ref=ghcr.io/coqui-ai/xtts-streaming-server:cache-pr-cuda121-${{ github.event.number }}
cache-to: type=registry,ref=ghcr.io/coqui-ai/xtts-streaming-server:cache-pr-cuda121-${{ github.event.number }}

- name: Build and Push image cuda 11.8
- name: Build and Push image cuda 12.1
if: github.ref == 'refs/heads/main'
uses: docker/build-push-action@v5
with:
context: "{{defaultContext}}:server"
file: Dockerfile.cuda118
file: Dockerfile.cuda121
push: true # Push if merged
cache-from: type=registry,ref=ghcr.io/coqui-ai/xtts-streaming-server:cache-latest-cuda118
cache-to: type=registry,ref=ghcr.io/coqui-ai/xtts-streaming-server:cache-latest-cuda118
tags: ghcr.io/coqui-ai/xtts-streaming-server:latest-cuda118, ghcr.io/coqui-ai/xtts-streaming-server:main-cuda118-${{ github.sha }}
cache-from: type=registry,ref=ghcr.io/coqui-ai/xtts-streaming-server:cache-latest-cuda121
cache-to: type=registry,ref=ghcr.io/coqui-ai/xtts-streaming-server:cache-latest-cuda121
tags: ghcr.io/coqui-ai/xtts-streaming-server:latest-cuda121, ghcr.io/coqui-ai/xtts-streaming-server:main-cuda121-${{ github.sha }}
#build-args:

build-and-push-to-ghcr-cuda121:
build-and-push-to-ghcr-cpu:
runs-on: ubuntu-22.04
steps:
-
Expand All @@ -117,24 +116,24 @@ jobs:
sudo rm -rf "/usr/local/share/boost"
sudo rm -rf "$AGENT_TOOLSDIRECTORY"

- name: Build only for PR cuda 12.1
- name: Build only for PR CPU
if: github.ref != 'refs/heads/main'
uses: docker/build-push-action@v5
with:
context: "{{defaultContext}}:server"
file: Dockerfile.cuda121
file: Dockerfile.cpu
push: false # Do not push image for PR
cache-from: type=registry,ref=ghcr.io/coqui-ai/xtts-streaming-server:cache-latest-cuda121; type=registry,ref=ghcr.io/coqui-ai/xtts-streaming-server:cache-pr-cuda121-${{ github.event.number }}
cache-to: type=registry,ref=ghcr.io/coqui-ai/xtts-streaming-server:cache-pr-cuda121-${{ github.event.number }}
cache-from: type=registry,ref=ghcr.io/coqui-ai/xtts-streaming-server:cache-latest-cpu; type=registry,ref=ghcr.io/coqui-ai/xtts-streaming-server:cache-pr-cuda121-${{ github.event.number }}
cache-to: type=registry,ref=ghcr.io/coqui-ai/xtts-streaming-server:cache-pr-cpu-${{ github.event.number }}

- name: Build and Push image cuda 12.1
- name: Build and Push image CPU
if: github.ref == 'refs/heads/main'
uses: docker/build-push-action@v5
with:
context: "{{defaultContext}}:server"
file: Dockerfile.cuda121
file: Dockerfile.cpu
push: true # Push if merged
cache-from: type=registry,ref=ghcr.io/coqui-ai/xtts-streaming-server:cache-latest-cuda121
cache-to: type=registry,ref=ghcr.io/coqui-ai/xtts-streaming-server:cache-latest-cuda121
tags: ghcr.io/coqui-ai/xtts-streaming-server:latest-cuda121, ghcr.io/coqui-ai/xtts-streaming-server:main-cuda121-${{ github.sha }}
cache-from: type=registry,ref=ghcr.io/coqui-ai/xtts-streaming-server:cache-latest-cpu
cache-to: type=registry,ref=ghcr.io/coqui-ai/xtts-streaming-server:cache-latest-cpu
tags: ghcr.io/coqui-ai/xtts-streaming-server:latest-cpu, ghcr.io/coqui-ai/xtts-streaming-server:main-cpu-${{ github.sha }}
#build-args:
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
demo_outputs
78 changes: 54 additions & 24 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,53 +1,83 @@
# XTTS streaming server
*Warning: XTTS-streaming-server doesn't support concurrent streaming requests, it's a demo server, not meant for production.*

## Running the server
https://github.com/coqui-ai/xtts-streaming-server/assets/17219561/7220442a-e88a-4288-8a73-608c4b39d06c

To run a pre-built container (CUDA 11.7):

## 1) Run the server

### Use a pre-built image

CUDA 12.1:

```bash
$ docker run --gpus=all -e COQUI_TOS_AGREED=1 --rm -p 8000:80 ghcr.io/coqui-ai/xtts-streaming-server:latest-cuda121
```

CUDA 11.8 (for older cards):

```bash
$ docker run --gpus=all -e COQUI_TOS_AGREED=1 --rm -p 8000:80 ghcr.io/coqui-ai/xtts-streaming-server:latest
```

CUDA 11.8 version (for newer cards, tested on 4060 and L4 instance)
CPU (not recommended):

```bash
$ docker run --gpus=all -e COQUI_TOS_AGREED=1 --rm -p 8000:80 ghcr.io/coqui-ai/xtts-streaming-server:latest-cuda118
$ docker run -e COQUI_TOS_AGREED=1 --rm -p 8000:80 ghcr.io/coqui-ai/xtts-streaming-server:latest-cpu
```

If you have already downloaded v1.1 model and like to use this server, and using Ubuntu, change your /home/YOUR_USER_NAME
Run with a fine-tuned model:

Make sure the model folder `/path/to/model/folder` contains the following files:
- `config.json`
- `model.pth`
- `vocab.json`

```bash
$ docker run -v /home/YOUR_USER_NAME/.local/share/tts/tts_models--multilingual--multi-dataset--xtts_v1.1:/root/.local/share/tts/tts_models--multilingual--multi-dataset--xtts_v1.1 --env NVIDIA_DISABLE_REQUIRE=1 --gpus=all -e COQUI_TOS_AGREED=1 --rm -p 8000:80 ghcr.io/coqui-ai/xtts-streaming-server:latest-cuda118`
$ docker run -v /path/to/model/folder:/app/tts_models --gpus=all -e COQUI_TOS_AGREED=1 --rm -p 8000:80 ghcr.io/coqui-ai/xtts-streaming-server:latest`
```

Setting the `COQUI_TOS_AGREED` environment variable to `1` indicates you have read and agreed to
the terms of the [CPML license](https://coqui.ai/cpml).
the terms of the [CPML license](https://coqui.ai/cpml). (Fine-tuned XTTS models also are under the [CPML license](https://coqui.ai/cpml))

### Build the image yourself

## Testing the server
To build the Docker container Pytorch 2.1 and CUDA 11.8 :

1. Generate audio with the test script:
`DOCKERFILE` may be `Dockerfile`, `Dockerfile.cpu`, `Dockerfile.cuda121`, or your own custom Dockerfile.

```bash
$ cd test
$ python -m pip install -r requirements.txt
$ python test_streaming.py
$ git clone [email protected]:coqui-ai/xtts-streaming-server.git
$ cd xtts-streaming-server/server
$ docker build -t xtts-stream . -f DOCKERFILE
$ docker run --gpus all -e COQUI_TOS_AGREED=1 --rm -p 8000:80 xtts-stream
```

## Building the container
Setting the `COQUI_TOS_AGREED` environment variable to `1` indicates you have read and agreed to
the terms of the [CPML license](https://coqui.ai/cpml). (Fine-tuned XTTS models also are under the [CPML license](https://coqui.ai/cpml))

## 2) Testing the running server

Once your Docker container is running, you can test that it's working properly. You will need to run the following code from a fresh terminal.

1. To build the Docker container (Pytorch 2.01 Cuda 11.7) :
### Clone `xtts-streaming-server` if you haven't already

```bash
$ cd server
$ docker build -t xtts-stream .
$ git clone [email protected]:coqui-ai/xtts-streaming-server.git
```
For Pytorch 2.1 and CUDA 11.8 version (when running set NVIDIA_DISABLE_REQUIRE=1 if you have Cuda < 11.8 drivers)

### Using the gradio demo

```bash
$ cd server
# docker build -t xtts-stream . -f Dockerfile.cuda118
$ cd xtts-streaming-server
$ python -m pip install -r test/requirements.txt
$ python demo.py
```
2. Run the server container:

### Using the test script

```bash
$ docker run --gpus=all -e COQUI_TOS_AGREED=1 --rm -p 8000:80 xtts-stream
$ cd xtts-streaming-server/test
$ python -m pip install -r requirements.txt
$ python test_streaming.py
```

Setting the `COQUI_TOS_AGREED` environment variable to `1` indicates you have read and agreed to
the terms of the [CPML license](https://coqui.ai/cpml).
121 changes: 121 additions & 0 deletions demo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
import gradio as gr
import requests
import base64
import tempfile
import json
import os


SERVER_URL = 'http://localhost:8000'
OUTPUT = "./demo_outputs"
cloned_speakers = {}

print("Preparing file structure...")
if not os.path.exists(OUTPUT):
os.mkdir(OUTPUT)
os.mkdir(os.path.join(OUTPUT, "cloned_speakers"))
os.mkdir(os.path.join(OUTPUT, "generated_audios"))
elif os.path.exists(os.path.join(OUTPUT, "cloned_speakers")):
print("Loading existing cloned speakers...")
for file in os.listdir(os.path.join(OUTPUT, "cloned_speakers")):
if file.endswith(".json"):
with open(os.path.join(OUTPUT, "cloned_speakers", file), "r") as fp:
cloned_speakers[file[:-5]] = json.load(fp)
print("Available cloned speakers:", ", ".join(cloned_speakers.keys()))

try:
print("Getting metadata from server ...")
LANUGAGES = requests.get(SERVER_URL + "/languages").json()
print("Available languages:", ", ".join(LANUGAGES))
STUDIO_SPEAKERS = requests.get(SERVER_URL + "/studio_speakers").json()
print("Available studio speakers:", ", ".join(STUDIO_SPEAKERS.keys()))
except:
raise Exception("Please make sure the server is running first.")


def clone_speaker(upload_file, clone_speaker_name, cloned_speaker_names):
files = {"wav_file": ("reference.wav", open(upload_file, "rb"))}
embeddings = requests.post(SERVER_URL + "/clone_speaker", files=files).json()
with open(os.path.join(OUTPUT, "cloned_speakers", clone_speaker_name + ".json"), "w") as fp:
json.dump(embeddings, fp)
cloned_speakers[clone_speaker_name] = embeddings
cloned_speaker_names.append(clone_speaker_name)
return upload_file, clone_speaker_name, cloned_speaker_names, gr.Dropdown.update(choices=cloned_speaker_names)

def tts(text, speaker_type, speaker_name_studio, speaker_name_custom, lang):
embeddings = STUDIO_SPEAKERS[speaker_name_studio] if speaker_type == 'Studio' else cloned_speakers[speaker_name_custom]
generated_audio = requests.post(
SERVER_URL + "/tts",
json={
"text": text,
"language": lang,
"speaker_embedding": embeddings["speaker_embedding"],
"gpt_cond_latent": embeddings["gpt_cond_latent"]
}
).content
generated_audio_path = os.path.join("demo_outputs", "generated_audios", next(tempfile._get_candidate_names()) + ".wav")
with open(generated_audio_path, "wb") as fp:
fp.write(base64.b64decode(generated_audio))
return fp.name

with gr.Blocks() as demo:
cloned_speaker_names = gr.State(list(cloned_speakers.keys()))
with gr.Tab("TTS"):
with gr.Column() as row4:
with gr.Row() as col4:
speaker_name_studio = gr.Dropdown(
label="Studio speaker",
choices=STUDIO_SPEAKERS.keys(),
value="Asya Anara" if "Asya Anara" in STUDIO_SPEAKERS.keys() else None,
)
speaker_name_custom = gr.Dropdown(
label="Cloned speaker",
choices=cloned_speaker_names.value,
value=cloned_speaker_names.value[0] if len(cloned_speaker_names.value) != 0 else None,
)
speaker_type = gr.Dropdown(label="Speaker type", choices=["Studio", "Cloned"], value="Studio")
with gr.Column() as col2:
lang = gr.Dropdown(label="Language", choices=LANUGAGES, value="en")
text = gr.Textbox(label="text", value="A quick brown fox jumps over the lazy dog.")
tts_button = gr.Button(value="TTS")
with gr.Column() as col3:
generated_audio = gr.Audio(label="Generated audio", autoplay=True)
with gr.Tab("Clone a new speaker"):
with gr.Column() as col1:
upload_file = gr.Audio(label="Upload reference audio", type="filepath")
clone_speaker_name = gr.Textbox(label="Speaker name", value="default_speaker")
clone_button = gr.Button(value="Clone speaker")

clone_button.click(
fn=clone_speaker,
inputs=[upload_file, clone_speaker_name, cloned_speaker_names],
outputs=[upload_file, clone_speaker_name, cloned_speaker_names, speaker_name_custom],
)

tts_button.click(
fn=tts,
inputs=[text, speaker_type, speaker_name_studio, speaker_name_custom, lang],
outputs=[generated_audio],
)

if __name__ == "__main__":
print("Warming up server...")
with open("test/default_speaker.json", "r") as fp:
warmup_speaker = json.load(fp)
resp = requests.post(
SERVER_URL + "/tts",
json={
"text": "This is a warmup request.",
"language": "en",
"speaker_embedding": warmup_speaker["speaker_embedding"],
"gpt_cond_latent": warmup_speaker["gpt_cond_latent"],
}
)
resp.raise_for_status()
print("Starting the demo...")
demo.launch(
share=False,
debug=False,
server_port=3009,
server_name="0.0.0.0",
)
20 changes: 20 additions & 0 deletions server/Dockerfile.cpu
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
FROM python:3.11.7
ARG DEBIAN_FRONTEND=noninteractive

RUN apt-get update && \
apt-get install --no-install-recommends -y sox libsox-fmt-all curl wget gcc git git-lfs build-essential libaio-dev libsndfile1 ssh ffmpeg && \
apt-get clean && apt-get -y autoremove

WORKDIR /app
COPY requirements_cpu.txt .
RUN python -m pip install --use-deprecated=legacy-resolver -r requirements_cpu.txt \
&& python -m pip cache purge

RUN python -m unidic download
RUN mkdir -p /app/tts_models

COPY main.py .
ENV USE_CPU=1

EXPOSE 80
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "80"]
11 changes: 11 additions & 0 deletions server/requirements_cpu.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
TTS @ git+https://github.com/coqui-ai/TTS@fa28f99f1508b5b5366539b2149963edcb80ba62
uvicorn[standard]==0.23.2
fastapi==0.95.2
pydantic==1.10.13
python-multipart==0.0.6
typing-extensions>=4.8.0
numpy==1.24.3
cutlet
mecab-python3==1.0.6
unidic-lite==1.0.8
unidic==1.1.0
1 change: 1 addition & 0 deletions test/requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
requests==2.31.0
gradio==3.50.2
Loading