-
Notifications
You must be signed in to change notification settings - Fork 13
Expand file tree
/
Copy pathDockerfile.cuda
More file actions
45 lines (32 loc) · 1.1 KB
/
Copy pathDockerfile.cuda
File metadata and controls
45 lines (32 loc) · 1.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
FROM nvidia/cuda:12.8.1-devel-ubuntu24.04
ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get update && apt-get install -y --no-install-recommends \
python3.12 \
python3.12-venv \
ffmpeg \
libsndfile1 \
sox \
&& rm -rf /var/lib/apt/lists/*
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
WORKDIR /app
ENV UV_PYTHON=python3.12
COPY pyproject.toml uv.lock ./
# Install all dependencies (CPU torch from lock file).
RUN uv sync --frozen --no-dev
# Replace CPU torch with CUDA 12.8 variant.
ENV VIRTUAL_ENV=/app/.venv \
PATH="/app/.venv/bin:$PATH"
RUN uv pip install torch torchaudio --index-url https://download.pytorch.org/whl/cu128
# Install flash attention for reduced GPU memory usage.
RUN uv pip install flash-attn --no-build-isolation
COPY main.py .
ENV QWEN_TTS_DEVICE=cuda:0 \
QWEN_TTS_DTYPE=bfloat16 \
QWEN_TTS_ATTN=flash_attention_2 \
HOST=0.0.0.0 \
PORT=8000 \
NVIDIA_VISIBLE_DEVICES=all \
NVIDIA_DRIVER_CAPABILITIES=compute,utility \
PYTHONWARNINGS="ignore::SyntaxWarning"
EXPOSE 8000
CMD ["/app/.venv/bin/python", "main.py"]