-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdocker-compose.yml
More file actions
67 lines (65 loc) · 2.16 KB
/
docker-compose.yml
File metadata and controls
67 lines (65 loc) · 2.16 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
# Compose file for ZimaOS / CasaOS / generic Docker hosts.
# Import via the CasaOS dashboard ("Custom Install") or run:
# docker compose up -d
# CasaOS one-liner install (per the user's note):
# curl -fsSL https://get.casaos.io | sudo bash
#
# Persistent data lives in named volumes so model downloads + datasets survive
# container restarts. GPU support requires the NVIDIA Container Toolkit on the
# host. Comment out the `deploy` block if you only have a CPU.
services:
trainer-dashboard:
image: ml-stack-trainer-dashboard:latest
build:
context: .
dockerfile: dashboard/Dockerfile
container_name: trainer-dashboard
restart: unless-stopped
ports:
- "8888:8888"
- "1234:1234"
environment:
AUTO_LAUNCH_LMSTUDIO: "0" # headless container; llama-server is the provider
DASHBOARD_HOST: "0.0.0.0"
DASHBOARD_PORT: "8888"
LMS_API_PORT: "1234"
LLAMA_BIND: "0.0.0.0"
LLAMA_DIR: "/opt/llama.cpp-bin"
LLAMA_BIN: "/opt/llama.cpp-bin/llama-server"
GGUF_MODELS_DIR: "/app/models"
HF_HOME: "/app/hf-cache"
volumes:
- models:/app/models
- data:/app/data
- runs:/app/runs
- hf-cache:/app/hf-cache
# Multi-GPU note: `count: all` exposes every visible NVIDIA GPU to the
# container, which is what DDP/FSDP/ZeRO-3 need. To pin specific GPUs use
# `device_ids: ['0','1','2','3']` instead of `count`. NVIDIA Container
# Toolkit must be installed on the host. Comment the whole `deploy` block
# for CPU-only operation.
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
# NCCL needs shared-mem >64MB and IPC=host for stable multi-GPU collectives.
shm_size: "8gb"
ipc: host
ulimits:
memlock: -1
stack: 67108864
labels:
# CasaOS metadata (shows up in the CasaOS dashboard)
casaos.icon: "https://cdn-icons-png.flaticon.com/512/8055/8055598.png"
casaos.name: "ML Trainer"
casaos.title: "ML Stack Trainer"
casaos.category: "AI"
casaos.port_map: "8765"
volumes:
models:
data:
runs:
hf-cache: