From 95159ad4bc5cdb923e0d63eca97eb5ace2c323b6 Mon Sep 17 00:00:00 2001
From: hzjane <a1015616934@qq.com>
Date: Tue, 26 Nov 2024 14:36:37 +0800
Subject: [PATCH 1/7] build image to test prefix caching

---
 docker/llm/serving/xpu/docker/Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker/llm/serving/xpu/docker/Dockerfile b/docker/llm/serving/xpu/docker/Dockerfile
index 0b1e7267047..267430a99a6 100644
--- a/docker/llm/serving/xpu/docker/Dockerfile
+++ b/docker/llm/serving/xpu/docker/Dockerfile
@@ -85,7 +85,7 @@ RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRO
     rm -rf /tmp/neo && \
     mkdir -p /llm && \
     cd /llm && \
-    git clone -b 0.6.2 https://github.com/analytics-zoo/vllm.git /llm/vllm && \
+    git clone -b add_prefix_caching https://github.com/hzjane/vllm.git /llm/vllm && \
     cd /llm/vllm && \
     pip install setuptools-scm && \
     pip install --upgrade cmake && \

From ac2335472b0752d409ec90dc45c4a8525a406659 Mon Sep 17 00:00:00 2001
From: hzjane <a1015616934@qq.com>
Date: Wed, 18 Dec 2024 16:08:55 +0800
Subject: [PATCH 2/7] test new

---
 docker/llm/serving/xpu/docker/Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker/llm/serving/xpu/docker/Dockerfile b/docker/llm/serving/xpu/docker/Dockerfile
index 267430a99a6..aa234505376 100644
--- a/docker/llm/serving/xpu/docker/Dockerfile
+++ b/docker/llm/serving/xpu/docker/Dockerfile
@@ -85,7 +85,7 @@ RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRO
     rm -rf /tmp/neo && \
     mkdir -p /llm && \
     cd /llm && \
-    git clone -b add_prefix_caching https://github.com/hzjane/vllm.git /llm/vllm && \
+    git clone -b add_logits_attn_softcap https://github.com/hzjane/vllm.git /llm/vllm && \
     cd /llm/vllm && \
     pip install setuptools-scm && \
     pip install --upgrade cmake && \

From ba52ae2a92edd122934a7b5760e52a8fee804def Mon Sep 17 00:00:00 2001
From: hzjane <a1015616934@qq.com>
Date: Tue, 24 Dec 2024 14:21:33 +0800
Subject: [PATCH 3/7] test torch 2.5

---
 docker/llm/serving/xpu/docker/Dockerfile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docker/llm/serving/xpu/docker/Dockerfile b/docker/llm/serving/xpu/docker/Dockerfile
index aa234505376..4d3482f668a 100644
--- a/docker/llm/serving/xpu/docker/Dockerfile
+++ b/docker/llm/serving/xpu/docker/Dockerfile
@@ -1,4 +1,4 @@
-FROM intel/oneapi-basekit:2024.1.1-devel-ubuntu22.04
+FROM intel/oneapi-basekit:2025.0.1-0-devel-ubuntu22.04
 
 ARG http_proxy
 ARG https_proxy
@@ -62,7 +62,7 @@ RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRO
     rm -rf ./ipex-llm && \
     # Install torch-ccl
     cd /tmp/ && \
-    pip install torch==2.1.0.post2 torchvision==0.16.0.post2 torchaudio==2.1.0.post2 intel-extension-for-pytorch==2.1.30.post0 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/cn/ && \
+    pip install torch==2.5.1+cxx11.abi torchvision==0.20.1+cxx11.abi torchaudio==2.5.1+cxx11.abi intel-extension-for-pytorch==2.5.10+xpu --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/cn/ && \
     # Internal oneccl
     wget https://sourceforge.net/projects/oneccl-wks/files/2024.0.0.6.2-release/oneccl_wks_installer_2024.0.0.6.2.sh && \
     bash oneccl_wks_installer_2024.0.0.6.2.sh && \

From 0ae5dca79865c285e7c7c671850541f7f1693e34 Mon Sep 17 00:00:00 2001
From: hzjane <a1015616934@qq.com>
Date: Tue, 24 Dec 2024 14:34:58 +0800
Subject: [PATCH 4/7] test inference

---
 docker/llm/inference/xpu/docker/Dockerfile | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/docker/llm/inference/xpu/docker/Dockerfile b/docker/llm/inference/xpu/docker/Dockerfile
index b96f9203f90..1215c4153e3 100644
--- a/docker/llm/inference/xpu/docker/Dockerfile
+++ b/docker/llm/inference/xpu/docker/Dockerfile
@@ -1,4 +1,4 @@
-FROM intel/oneapi:2024.2.1-0-devel-ubuntu22.04
+FROM intel/oneapi-basekit:2025.0.1-0-devel-ubuntu22.04
 
 ARG http_proxy
 ARG https_proxy
@@ -56,6 +56,7 @@ RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRO
     pip install --upgrade requests argparse urllib3 && \
     pip install --pre --upgrade ipex-llm[xpu_arc] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/cn/ && \
     pip install --pre pytorch-triton-xpu==3.0.0+1b2f15840e --index-url https://download.pytorch.org/whl/nightly/xpu && \
+    pip install torch==2.5.1+cxx11.abi torchvision==0.20.1+cxx11.abi torchaudio==2.5.1+cxx11.abi intel-extension-for-pytorch==2.5.10+xpu --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/cn/ && \
     # Fix Trivy CVE Issues
     pip install transformers_stream_generator einops tiktoken && \
     # Install opencl-related repos

From 7c737ca24e14a0535bad655894b4362c6a0737ca Mon Sep 17 00:00:00 2001
From: hzjane <a1015616934@qq.com>
Date: Wed, 25 Dec 2024 11:05:02 +0800
Subject: [PATCH 5/7] update

---
 docker/llm/serving/xpu/docker/Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker/llm/serving/xpu/docker/Dockerfile b/docker/llm/serving/xpu/docker/Dockerfile
index 4d3482f668a..e6a176159a3 100644
--- a/docker/llm/serving/xpu/docker/Dockerfile
+++ b/docker/llm/serving/xpu/docker/Dockerfile
@@ -1,4 +1,4 @@
-FROM intel/oneapi-basekit:2025.0.1-0-devel-ubuntu22.04
+FROM intel/oneapi-basekit:2024.2.1-0-devel-ubuntu22.04
 
 ARG http_proxy
 ARG https_proxy

From 4bc9563b49fed330fc2c69803ff72394483f843c Mon Sep 17 00:00:00 2001
From: hzjane <a1015616934@qq.com>
Date: Wed, 25 Dec 2024 13:39:25 +0800
Subject: [PATCH 6/7] to debug on docker

---
 docker/llm/serving/xpu/docker/Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker/llm/serving/xpu/docker/Dockerfile b/docker/llm/serving/xpu/docker/Dockerfile
index e6a176159a3..bb3d5d18e0d 100644
--- a/docker/llm/serving/xpu/docker/Dockerfile
+++ b/docker/llm/serving/xpu/docker/Dockerfile
@@ -62,7 +62,7 @@ RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRO
     rm -rf ./ipex-llm && \
     # Install torch-ccl
     cd /tmp/ && \
-    pip install torch==2.5.1+cxx11.abi torchvision==0.20.1+cxx11.abi torchaudio==2.5.1+cxx11.abi intel-extension-for-pytorch==2.5.10+xpu --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/cn/ && \
+    pip install torch==2.1.0.post2 torchvision==0.16.0.post2 torchaudio==2.1.0.post2 intel-extension-for-pytorch==2.1.30.post0 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/cn/ && \
     # Internal oneccl
     wget https://sourceforge.net/projects/oneccl-wks/files/2024.0.0.6.2-release/oneccl_wks_installer_2024.0.0.6.2.sh && \
     bash oneccl_wks_installer_2024.0.0.6.2.sh && \

From cb5dc598f12a7f56df62a6154f8814ba04a8b929 Mon Sep 17 00:00:00 2001
From: hzjane <a1015616934@qq.com>
Date: Mon, 30 Dec 2024 11:18:32 +0800
Subject: [PATCH 7/7] update

---
 docker/llm/serving/xpu/docker/Dockerfile | 36 ++++++++++++------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/docker/llm/serving/xpu/docker/Dockerfile b/docker/llm/serving/xpu/docker/Dockerfile
index bb3d5d18e0d..912cea006bb 100644
--- a/docker/llm/serving/xpu/docker/Dockerfile
+++ b/docker/llm/serving/xpu/docker/Dockerfile
@@ -73,23 +73,23 @@ RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRO
     apt-get update && \
     apt-get install -y --no-install-recommends libfabric-dev wrk libaio-dev numactl && \
     # apt-get install -y intel-opencl-icd intel-level-zero-gpu=1.3.26241.33-647~22.04 level-zero level-zero-dev --allow-downgrades && \
-    mkdir -p /tmp/neo && \
-    cd /tmp/neo && \
-    wget https://github.com/oneapi-src/level-zero/releases/download/v1.18.5/level-zero_1.18.5+u22.04_amd64.deb && \
-    wget https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.17791.9/intel-igc-core_1.0.17791.9_amd64.deb && \
-    wget https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.17791.9/intel-igc-opencl_1.0.17791.9_amd64.deb && \
-    wget https://github.com/intel/compute-runtime/releases/download/24.39.31294.12/intel-level-zero-gpu_1.6.31294.12_amd64.deb && \
-    wget https://github.com/intel/compute-runtime/releases/download/24.39.31294.12/intel-opencl-icd_24.39.31294.12_amd64.deb && \
-    wget https://github.com/intel/compute-runtime/releases/download/24.39.31294.12/libigdgmm12_22.5.2_amd64.deb && \
-    dpkg -i *.deb && \
-    rm -rf /tmp/neo && \
-    mkdir -p /llm && \
-    cd /llm && \
-    git clone -b add_logits_attn_softcap https://github.com/hzjane/vllm.git /llm/vllm && \
-    cd /llm/vllm && \
-    pip install setuptools-scm && \
-    pip install --upgrade cmake && \
-    VLLM_TARGET_DEVICE=xpu pip install --no-build-isolation -v /llm/vllm && \
+    # mkdir -p /tmp/neo && \
+    # cd /tmp/neo && \
+    # wget https://github.com/oneapi-src/level-zero/releases/download/v1.18.5/level-zero_1.18.5+u22.04_amd64.deb && \
+    # wget https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.17791.9/intel-igc-core_1.0.17791.9_amd64.deb && \
+    # wget https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.17791.9/intel-igc-opencl_1.0.17791.9_amd64.deb && \
+    # wget https://github.com/intel/compute-runtime/releases/download/24.39.31294.12/intel-level-zero-gpu_1.6.31294.12_amd64.deb && \
+    # wget https://github.com/intel/compute-runtime/releases/download/24.39.31294.12/intel-opencl-icd_24.39.31294.12_amd64.deb && \
+    # wget https://github.com/intel/compute-runtime/releases/download/24.39.31294.12/libigdgmm12_22.5.2_amd64.deb && \
+    # dpkg -i *.deb && \
+    # rm -rf /tmp/neo && \
+    # mkdir -p /llm && \
+    # cd /llm && \
+    # git clone -b add_logits_attn_softcap https://github.com/hzjane/vllm.git /llm/vllm && \
+    # cd /llm/vllm && \
+    # pip install setuptools-scm && \
+    # pip install --upgrade cmake && \
+    # VLLM_TARGET_DEVICE=xpu pip install --no-build-isolation -v /llm/vllm && \
     # pip install -r /llm/vllm/requirements-xpu.txt && \
     # VLLM_TARGET_DEVICE=xpu python setup.py install && \
     pip install mpi4py fastapi uvicorn openai && \
@@ -109,6 +109,6 @@ COPY ./start-fastchat-service.sh       /llm/
 COPY ./start-pp_serving-service.sh       /llm/
 COPY ./start-lightweight_serving-service.sh       /llm/
 
-ENV LD_LIBRARY_PATH /usr/local/lib/python3.11/dist-packages/intel_extension_for_pytorch/lib/:/opt/intel/oneapi/tbb/2021.12/env/../lib/intel64/gcc4.8:/opt/intel/oneapi/mpi/2021.12/opt/mpi/libfabric/lib:/opt/intel/oneapi/mpi/2021.12/lib:/opt/intel/oneapi/mkl/2024.1/lib:/opt/intel/oneapi/ippcp/2021.11/lib/:/opt/intel/oneapi/ipp/2021.11/lib:/opt/intel/oneapi/dpl/2022.5/lib:/opt/intel/oneapi/dnnl/2024.1/lib:/opt/intel/oneapi/debugger/2024.1/opt/debugger/lib:/opt/intel/oneapi/dal/2024.2/lib:/opt/intel/oneapi/compiler/2024.1/opt/oclfpga/host/linux64/lib:/opt/intel/oneapi/compiler/2024.1/opt/compiler/lib:/opt/intel/oneapi/compiler/2024.1/lib:/opt/intel/oneapi/ccl/2021.12/lib/
+ENV LD_LIBRARY_PATH /usr/local/lib/python3.11/dist-packages/intel_extension_for_pytorch/lib/:/opt/intel/oneapi/tbb/2021.12/env/../lib/intel64/gcc4.8:/opt/intel/oneapi/mpi/2021.12/opt/mpi/libfabric/lib:/opt/intel/oneapi/mpi/2021.12/lib:/opt/intel/oneapi/mkl/2024.2/lib:/opt/intel/oneapi/ippcp/2021.11/lib/:/opt/intel/oneapi/ipp/2021.11/lib:/opt/intel/oneapi/dpl/2022.5/lib:/opt/intel/oneapi/dnnl/2024.2/lib:/opt/intel/oneapi/debugger/2024.2/opt/debugger/lib:/opt/intel/oneapi/dal/2024.2/lib:/opt/intel/oneapi/compiler/2024.2/opt/oclfpga/host/linux64/lib:/opt/intel/oneapi/compiler/2024.2/opt/compiler/lib:/opt/intel/oneapi/compiler/2024.2/lib:/opt/intel/oneapi/ccl/2021.12/lib/
 
 WORKDIR /llm/