From e1b33bc0bce96d7cdb6cce7e34b460474ea16d53 Mon Sep 17 00:00:00 2001 From: Julian Weber Date: Tue, 13 Feb 2024 17:47:13 +0100 Subject: [PATCH 1/4] Update README.md disable threading to allow production use accidenatlly setup like it was flask not fastapi - use locks instead --- README.md | 2 +- server/main.py | 90 +++++++++++++++++++++++++++----------------------- 2 files changed, 50 insertions(+), 42 deletions(-) diff --git a/README.md b/README.md index 5506d88..31d7e73 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ # XTTS streaming server - +*Warning: XTTS-streaming-server doesn't support concurrent streaming requests, it's a demo server, not meant for production.* https://github.com/coqui-ai/xtts-streaming-server/assets/17219561/7220442a-e88a-4288-8a73-608c4b39d06c diff --git a/server/main.py b/server/main.py index 9a6aeb7..0e7a2ad 100644 --- a/server/main.py +++ b/server/main.py @@ -5,6 +5,7 @@ import wave import torch import numpy as np +import threading from typing import List from pydantic import BaseModel @@ -19,7 +20,7 @@ torch.set_num_threads(int(os.environ.get("NUM_THREADS", os.cpu_count()))) device = torch.device("cuda" if os.environ.get("USE_CPU", "0") == "0" else "cpu") if not torch.cuda.is_available() and device == "cuda": - raise RuntimeError("CUDA device unavailable, please use Dockerfile.cpu instead.") + raise RuntimeError("CUDA device unavailable, please use Dockerfile.cpu instead.") custom_model_path = os.environ.get("CUSTOM_MODEL_PATH", "/app/tts_models") @@ -44,6 +45,9 @@ print("Running XTTS Server ...", flush=True) +lock = threading.Lock() # Create a lock object +print("Establishing lock ...", flush=True) + ##### Run fastapi ##### app = FastAPI( title="XTTS Streaming server", @@ -52,20 +56,20 @@ docs_url="/", ) - @app.post("/clone_speaker") def predict_speaker(wav_file: UploadFile): - """Compute conditioning inputs from reference audio file.""" - temp_audio_name = next(tempfile._get_candidate_names()) - with open(temp_audio_name, "wb") as temp, torch.inference_mode(): - temp.write(io.BytesIO(wav_file.file.read()).getbuffer()) - gpt_cond_latent, speaker_embedding = model.get_conditioning_latents( - temp_audio_name - ) - return { - "gpt_cond_latent": gpt_cond_latent.cpu().squeeze().half().tolist(), - "speaker_embedding": speaker_embedding.cpu().squeeze().half().tolist(), - } + with lock: + """Compute conditioning inputs from reference audio file.""" + temp_audio_name = next(tempfile._get_candidate_names()) + with open(temp_audio_name, "wb") as temp, torch.inference_mode(): + temp.write(io.BytesIO(wav_file.file.read()).getbuffer()) + gpt_cond_latent, speaker_embedding = model.get_conditioning_latents( + temp_audio_name + ) + return { + "gpt_cond_latent": gpt_cond_latent.cpu().squeeze().half().tolist(), + "speaker_embedding": speaker_embedding.cpu().squeeze().half().tolist(), + } def postprocess(wav): @@ -137,10 +141,11 @@ def predict_streaming_generator(parsed_input: dict = Body(...)): @app.post("/tts_stream") def predict_streaming_endpoint(parsed_input: StreamingInputs): - return StreamingResponse( - predict_streaming_generator(parsed_input), - media_type="audio/wav", - ) + with lock: + return StreamingResponse( + predict_streaming_generator(parsed_input), + media_type="audio/wav", + ) class TTSInputs(BaseModel): speaker_embedding: List[float] @@ -150,36 +155,39 @@ class TTSInputs(BaseModel): @app.post("/tts") def predict_speech(parsed_input: TTSInputs): - speaker_embedding = torch.tensor(parsed_input.speaker_embedding).unsqueeze(0).unsqueeze(-1) - gpt_cond_latent = torch.tensor(parsed_input.gpt_cond_latent).reshape((-1, 1024)).unsqueeze(0) - text = parsed_input.text - language = parsed_input.language - - out = model.inference( - text, - language, - gpt_cond_latent, - speaker_embedding, - ) + with lock: + speaker_embedding = torch.tensor(parsed_input.speaker_embedding).unsqueeze(0).unsqueeze(-1) + gpt_cond_latent = torch.tensor(parsed_input.gpt_cond_latent).reshape((-1, 1024)).unsqueeze(0) + text = parsed_input.text + language = parsed_input.language + + out = model.inference( + text, + language, + gpt_cond_latent, + speaker_embedding, + ) - wav = postprocess(torch.tensor(out["wav"])) + wav = postprocess(torch.tensor(out["wav"])) - return encode_audio_common(wav.tobytes()) + return encode_audio_common(wav.tobytes()) @app.get("/studio_speakers") def get_speakers(): - if hasattr(model, "speaker_manager") and hasattr(model.speaker_manager, "speakers"): - return { - speaker: { - "speaker_embedding": model.speaker_manager.speakers[speaker]["speaker_embedding"].cpu().squeeze().half().tolist(), - "gpt_cond_latent": model.speaker_manager.speakers[speaker]["gpt_cond_latent"].cpu().squeeze().half().tolist(), + with lock: + if hasattr(model, "speaker_manager") and hasattr(model.speaker_manager, "speakers"): + return { + speaker: { + "speaker_embedding": model.speaker_manager.speakers[speaker]["speaker_embedding"].cpu().squeeze().half().tolist(), + "gpt_cond_latent": model.speaker_manager.speakers[speaker]["gpt_cond_latent"].cpu().squeeze().half().tolist(), + } + for speaker in model.speaker_manager.speakers.keys() } - for speaker in model.speaker_manager.speakers.keys() - } - else: - return {} - + else: + return {} + @app.get("/languages") def get_languages(): - return config.languages \ No newline at end of file + with lock: + return config.languages From 777839055d006e6c6ff3ab0746600cfb97093d11 Mon Sep 17 00:00:00 2001 From: Joe Meyer Date: Tue, 30 Apr 2024 19:23:33 -0500 Subject: [PATCH 2/4] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 31d7e73..4dd97c9 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ # XTTS streaming server -*Warning: XTTS-streaming-server doesn't support concurrent streaming requests, it's a demo server, not meant for production.* +*Warning: XTTS-streaming-server is a demo server, not meant for production.* https://github.com/coqui-ai/xtts-streaming-server/assets/17219561/7220442a-e88a-4288-8a73-608c4b39d06c From d59100bff9031f20d3a01f162b65763e2fb8fd16 Mon Sep 17 00:00:00 2001 From: Joe Meyer Date: Wed, 26 Jun 2024 16:24:00 -0500 Subject: [PATCH 3/4] change coqui-ai as ghcr user to ${{ env.OWNER_LC }} --- .github/workflows/build-and-push-to-ghcr.yml | 54 +++++++++++++------- 1 file changed, 36 insertions(+), 18 deletions(-) diff --git a/.github/workflows/build-and-push-to-ghcr.yml b/.github/workflows/build-and-push-to-ghcr.yml index 3e6267f..d8c04a4 100644 --- a/.github/workflows/build-and-push-to-ghcr.yml +++ b/.github/workflows/build-and-push-to-ghcr.yml @@ -7,6 +7,12 @@ jobs: build-and-push-to-ghcr-cuda118: runs-on: ubuntu-22.04 steps: + - + name: Set owner name to lower case + run: | + echo "OWNER_LC=${OWNER,,}" >>${GITHUB_ENV} + env: + OWNER: '${{ github.repository_owner }}' - name: Checkout uses: actions/checkout@v3 @@ -21,7 +27,7 @@ jobs: docker login --username ${{ github.actor }} --password ${{ secrets.GITHUB_TOKEN }} ghcr.io - name: 'Remove cache' - run: | + run: | sudo rm -rf /usr/share/dotnet sudo rm -rf /opt/ghc sudo rm -rf "/usr/local/share/boost" @@ -34,8 +40,8 @@ jobs: context: "{{defaultContext}}:server" file: Dockerfile push: false # Do not push image for PR - cache-from: type=registry,ref=ghcr.io/coqui-ai/xtts-streaming-server:cache-latest; type=registry,ref=ghcr.io/coqui-ai/xtts-streaming-server:cache-pr-${{ github.event.number }} - cache-to: type=registry,ref=ghcr.io/coqui-ai/xtts-streaming-server:cache-pr-${{ github.event.number }} + cache-from: type=registry,ref=ghcr.io/${{ env.OWNER_LC }}/xtts-streaming-server:cache-latest; type=registry,ref=ghcr.io/${{ env.OWNER_LC }}/xtts-streaming-server:cache-pr-${{ github.event.number }} + cache-to: type=registry,ref=ghcr.io/${{ env.OWNER_LC }}/xtts-streaming-server:cache-pr-${{ github.event.number }} - name: Build and Push image Cuda 11.8 if: github.ref == 'refs/heads/main' @@ -44,14 +50,20 @@ jobs: context: "{{defaultContext}}:server" file: Dockerfile push: true # Push if merged - cache-from: type=registry,ref=ghcr.io/coqui-ai/xtts-streaming-server:cache-latest - cache-to: type=registry,ref=ghcr.io/coqui-ai/xtts-streaming-server:cache-latest - tags: ghcr.io/coqui-ai/xtts-streaming-server:latest, ghcr.io/coqui-ai/xtts-streaming-server:main-${{ github.sha }} + cache-from: type=registry,ref=ghcr.io/${{ env.OWNER_LC }}/xtts-streaming-server:cache-latest + cache-to: type=registry,ref=ghcr.io/${{ env.OWNER_LC }}/xtts-streaming-server:cache-latest + tags: ghcr.io/${{ env.OWNER_LC }}/xtts-streaming-server:latest, ghcr.io/${{ env.OWNER_LC }}/xtts-streaming-server:main-${{ github.sha }} #build-args: build-and-push-to-ghcr-cuda121: runs-on: ubuntu-22.04 steps: + - + name: Set owner name to lower case + run: | + echo "OWNER_LC=${OWNER,,}" >>${GITHUB_ENV} + env: + OWNER: '${{ github.repository_owner }}' - name: Checkout uses: actions/checkout@v3 @@ -66,7 +78,7 @@ jobs: docker login --username ${{ github.actor }} --password ${{ secrets.GITHUB_TOKEN }} ghcr.io - name: 'Remove cache' - run: | + run: | sudo rm -rf /usr/share/dotnet sudo rm -rf /opt/ghc sudo rm -rf "/usr/local/share/boost" @@ -79,8 +91,8 @@ jobs: context: "{{defaultContext}}:server" file: Dockerfile.cuda121 push: false # Do not push image for PR - cache-from: type=registry,ref=ghcr.io/coqui-ai/xtts-streaming-server:cache-latest-cuda121; type=registry,ref=ghcr.io/coqui-ai/xtts-streaming-server:cache-pr-cuda121-${{ github.event.number }} - cache-to: type=registry,ref=ghcr.io/coqui-ai/xtts-streaming-server:cache-pr-cuda121-${{ github.event.number }} + cache-from: type=registry,ref=ghcr.io/${{ env.OWNER_LC }}/xtts-streaming-server:cache-latest-cuda121; type=registry,ref=ghcr.io/${{ env.OWNER_LC }}/xtts-streaming-server:cache-pr-cuda121-${{ github.event.number }} + cache-to: type=registry,ref=ghcr.io/${{ env.OWNER_LC }}/xtts-streaming-server:cache-pr-cuda121-${{ github.event.number }} - name: Build and Push image cuda 12.1 if: github.ref == 'refs/heads/main' @@ -89,13 +101,19 @@ jobs: context: "{{defaultContext}}:server" file: Dockerfile.cuda121 push: true # Push if merged - cache-from: type=registry,ref=ghcr.io/coqui-ai/xtts-streaming-server:cache-latest-cuda121 - cache-to: type=registry,ref=ghcr.io/coqui-ai/xtts-streaming-server:cache-latest-cuda121 - tags: ghcr.io/coqui-ai/xtts-streaming-server:latest-cuda121, ghcr.io/coqui-ai/xtts-streaming-server:main-cuda121-${{ github.sha }} + cache-from: type=registry,ref=ghcr.io/${{ env.OWNER_LC }}/xtts-streaming-server:cache-latest-cuda121 + cache-to: type=registry,ref=ghcr.io/${{ env.OWNER_LC }}/xtts-streaming-server:cache-latest-cuda121 + tags: ghcr.io/${{ env.OWNER_LC }}/xtts-streaming-server:latest-cuda121, ghcr.io/${{ env.OWNER_LC }}/xtts-streaming-server:main-cuda121-${{ github.sha }} #build-args: build-and-push-to-ghcr-cpu: runs-on: ubuntu-22.04 steps: + - + name: Set owner name to lower case + run: | + echo "OWNER_LC=${OWNER,,}" >>${GITHUB_ENV} + env: + OWNER: '${{ github.repository_owner }}' - name: Checkout uses: actions/checkout@v3 @@ -110,7 +128,7 @@ jobs: docker login --username ${{ github.actor }} --password ${{ secrets.GITHUB_TOKEN }} ghcr.io - name: 'Remove cache' - run: | + run: | sudo rm -rf /usr/share/dotnet sudo rm -rf /opt/ghc sudo rm -rf "/usr/local/share/boost" @@ -123,8 +141,8 @@ jobs: context: "{{defaultContext}}:server" file: Dockerfile.cpu push: false # Do not push image for PR - cache-from: type=registry,ref=ghcr.io/coqui-ai/xtts-streaming-server:cache-latest-cpu; type=registry,ref=ghcr.io/coqui-ai/xtts-streaming-server:cache-pr-cuda121-${{ github.event.number }} - cache-to: type=registry,ref=ghcr.io/coqui-ai/xtts-streaming-server:cache-pr-cpu-${{ github.event.number }} + cache-from: type=registry,ref=ghcr.io/${{ env.OWNER_LC }}/xtts-streaming-server:cache-latest-cpu; type=registry,ref=ghcr.io/${{ env.OWNER_LC }}/xtts-streaming-server:cache-pr-cuda121-${{ github.event.number }} + cache-to: type=registry,ref=ghcr.io/${{ env.OWNER_LC }}/xtts-streaming-server:cache-pr-cpu-${{ github.event.number }} - name: Build and Push image CPU if: github.ref == 'refs/heads/main' @@ -133,7 +151,7 @@ jobs: context: "{{defaultContext}}:server" file: Dockerfile.cpu push: true # Push if merged - cache-from: type=registry,ref=ghcr.io/coqui-ai/xtts-streaming-server:cache-latest-cpu - cache-to: type=registry,ref=ghcr.io/coqui-ai/xtts-streaming-server:cache-latest-cpu - tags: ghcr.io/coqui-ai/xtts-streaming-server:latest-cpu, ghcr.io/coqui-ai/xtts-streaming-server:main-cpu-${{ github.sha }} + cache-from: type=registry,ref=ghcr.io/${{ env.OWNER_LC }}/xtts-streaming-server:cache-latest-cpu + cache-to: type=registry,ref=ghcr.io/${{ env.OWNER_LC }}/xtts-streaming-server:cache-latest-cpu + tags: ghcr.io/${{ env.OWNER_LC }}/xtts-streaming-server:latest-cpu, ghcr.io/${{ env.OWNER_LC }}/xtts-streaming-server:main-cpu-${{ github.sha }} #build-args: From 5f28a53cbe02e1fe4193333b57f3fd1653ae8570 Mon Sep 17 00:00:00 2001 From: Joe Meyer Date: Wed, 26 Jun 2024 16:25:38 -0500 Subject: [PATCH 4/4] update readme --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index 4dd97c9..8c06946 100644 --- a/README.md +++ b/README.md @@ -81,3 +81,6 @@ $ cd xtts-streaming-server/test $ python -m pip install -r requirements.txt $ python test_streaming.py ``` + +### Forked Repos +If forked, GitHub action will automatically build and push a Docker image to your container registry - so it will be ghcr.io/yourusername/xtts-streaming-server.