|
| 1 | +ARG CUDA_VERSION=12.6.1 |
| 2 | +FROM nvidia/cuda:${CUDA_VERSION}-cudnn-devel-ubuntu22.04 |
| 3 | + |
| 4 | +ARG PYTHON_VERSION=3.10 |
| 5 | +ARG MAMBA_VERSION=24.7.1-0 |
| 6 | +ARG TARGETPLATFORM |
| 7 | + |
| 8 | +ENV PATH=/opt/conda/bin:$PATH \ |
| 9 | + CONDA_PREFIX=/opt/conda |
| 10 | + |
| 11 | +RUN chmod 777 -R /tmp && apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ |
| 12 | + ca-certificates \ |
| 13 | + libssl-dev \ |
| 14 | + curl \ |
| 15 | + g++ \ |
| 16 | + make \ |
| 17 | + git && \ |
| 18 | + rm -rf /var/lib/apt/lists/* |
| 19 | + |
| 20 | +RUN case ${TARGETPLATFORM} in \ |
| 21 | + "linux/arm64") MAMBA_ARCH=aarch64 ;; \ |
| 22 | + *) MAMBA_ARCH=x86_64 ;; \ |
| 23 | + esac && \ |
| 24 | + curl -fsSL -o ~/mambaforge.sh -v "https://github.com/conda-forge/miniforge/releases/download/${MAMBA_VERSION}/Mambaforge-${MAMBA_VERSION}-Linux-${MAMBA_ARCH}.sh" && \ |
| 25 | + bash ~/mambaforge.sh -b -p /opt/conda && \ |
| 26 | + rm ~/mambaforge.sh |
| 27 | + |
| 28 | +RUN case ${TARGETPLATFORM} in \ |
| 29 | + "linux/arm64") exit 1 ;; \ |
| 30 | + *) /opt/conda/bin/conda update -y conda && \ |
| 31 | + /opt/conda/bin/conda install -y "python=${PYTHON_VERSION}" ;; \ |
| 32 | + esac && \ |
| 33 | + /opt/conda/bin/conda clean -ya |
| 34 | + |
| 35 | + |
| 36 | +WORKDIR /root |
| 37 | + |
| 38 | +COPY ./requirements.txt /lightllm/requirements.txt |
| 39 | +RUN --mount=type=cache,target=/root/.cache/pip pip install -r /lightllm/requirements.txt --ignore-installed --extra-index-url https://download.pytorch.org/whl/cu124 |
| 40 | + |
| 41 | +RUN --mount=type=cache,target=/root/.cache/pip pip install vllm --pre --extra-index-url https://wheels.vllm.ai/nightly |
| 42 | +RUN --mount=type=cache,target=/root/.cache/pip git clone https://github.com/ModelTC/LightKernel.git && cd LightKernel && pip install --no-deps -v . |
| 43 | + |
| 44 | +RUN apt-get update && apt-get install -y libnuma-dev wget devscripts debhelper dh-make build-essential dkms |
| 45 | +RUN apt-get install -y ibverbs-providers infiniband-diags perftest rdma-core libibverbs-dev librdmacm-dev |
| 46 | + |
| 47 | +ENV CUDA_HOME=/usr/local/cuda \ |
| 48 | + GDRCOPY_HOME=/usr/src/gdrdrv-2.4.4/ |
| 49 | + |
| 50 | +RUN mkdir -p /tmp/gdrcopy && cd /tmp \ |
| 51 | + && git clone https://github.com/NVIDIA/gdrcopy.git -b v2.4.4 \ |
| 52 | + && cd gdrcopy/packages \ |
| 53 | + && CUDA=/usr/local/cuda ./build-deb-packages.sh \ |
| 54 | + && dpkg -i gdrdrv-dkms_*.deb libgdrapi_*.deb gdrcopy-tests_*.deb gdrcopy_*.deb \ |
| 55 | + && cd / && rm -rf /tmp/gdrcopy |
| 56 | + |
| 57 | + # Fix DeepEP IBGDA symlink |
| 58 | +RUN ln -sf /usr/lib/x86_64-linux-gnu/libmlx5.so.1 /usr/lib/x86_64-linux-gnu/libmlx5.so |
| 59 | + |
| 60 | +RUN wget https://developer.download.nvidia.com/compute/redist/nvshmem/3.3.9/source/nvshmem_src_cuda12-all-all-3.3.9.tar.gz \ |
| 61 | + && tar -xf nvshmem_src_cuda12-all-all-3.3.9.tar.gz && mv nvshmem_src nvshmem \ |
| 62 | + && cd nvshmem \ |
| 63 | + && rm -f /root/nvshmem_src_cuda12-all-all-3.3.9.tar.gz \ |
| 64 | + && NVSHMEM_SHMEM_SUPPORT=0 \ |
| 65 | + NVSHMEM_UCX_SUPPORT=0 \ |
| 66 | + NVSHMEM_USE_NCCL=0 \ |
| 67 | + NVSHMEM_MPI_SUPPORT=0 \ |
| 68 | + NVSHMEM_IBGDA_SUPPORT=1 \ |
| 69 | + NVSHMEM_PMIX_SUPPORT=0 \ |
| 70 | + NVSHMEM_TIMEOUT_DEVICE_POLLING=0 \ |
| 71 | + NVSHMEM_USE_GDRCOPY=1 \ |
| 72 | + cmake -S . -B build/ -DCMAKE_INSTALL_PREFIX=/root/nvshmem/install -DCMAKE_CUDA_ARCHITECTURES=90 \ |
| 73 | + && cmake --build build --target install -j64 |
| 74 | + |
| 75 | +ARG DEEPEP_COMMIT=b6ce310bb0b75079682d09bc2ebc063a074fbd58 |
| 76 | +RUN git clone https://github.com/deepseek-ai/DeepEP.git && cd DeepEP && git checkout ${DEEPEP_COMMIT} && cd .. |
| 77 | + |
| 78 | +WORKDIR /root/DeepEP |
| 79 | +ENV NVSHMEM_DIR=/root/nvshmem/install |
| 80 | +RUN NVSHMEM_DIR=/root/nvshmem/install python setup.py install |
| 81 | + |
| 82 | +RUN apt-get update && apt-get install -y cmake automake autotools-dev libtool libz-dev && \ |
| 83 | + DEBIAN_FRONTEND=noninteractive apt-get -y install --reinstall libibverbs-dev rdma-core ibverbs-utils libibumad-dev; \ |
| 84 | + rm -rf /usr/lib/ucx && \ |
| 85 | + rm -rf /opt/hpcx/ucx && \ |
| 86 | + cd /usr/local/src && \ |
| 87 | + git clone https://github.com/openucx/ucx.git && \ |
| 88 | + cd ucx && \ |
| 89 | + git checkout v1.19.x && \ |
| 90 | + ./autogen.sh && ./configure \ |
| 91 | + --enable-shared \ |
| 92 | + --disable-static \ |
| 93 | + --disable-doxygen-doc \ |
| 94 | + --enable-optimizations \ |
| 95 | + --enable-cma \ |
| 96 | + --enable-devel-headers \ |
| 97 | + --with-cuda=/usr/local/cuda \ |
| 98 | + --with-verbs=yes \ |
| 99 | + --with-dm \ |
| 100 | + --with-gdrcopy=/usr/local \ |
| 101 | + --with-efa \ |
| 102 | + --enable-mt && \ |
| 103 | + make -j && \ |
| 104 | + make -j install-strip && \ |
| 105 | + ldconfig; |
| 106 | + |
| 107 | +RUN apt-get update && apt-get install -y pkg-config tmux net-tools ; \ |
| 108 | + cd /usr/local/src; \ |
| 109 | + pip install --upgrade meson pybind11 patchelf; \ |
| 110 | + git clone https://github.com/ai-dynamo/nixl.git -b main && \ |
| 111 | + cd nixl && \ |
| 112 | + rm -rf build && \ |
| 113 | + mkdir build && \ |
| 114 | + meson setup build/ --prefix=/usr/local/nixl --buildtype=release && \ |
| 115 | + cd build && \ |
| 116 | + ninja && \ |
| 117 | + ninja install && \ |
| 118 | + cd .. && pip install . --no-deps; |
| 119 | + |
| 120 | +COPY . /lightllm |
| 121 | +RUN pip install -e /lightllm --no-cache-dir |
0 commit comments