reeselevine
diff --git a/‎.devops/cann.Dockerfile‎
Lines changed: 5 additions & 6 deletions b/‎.devops/cann.Dockerfile‎
Lines changed: 5 additions & 6 deletions
diff --git a/‎.github/copilot-instructions.md‎
Lines changed: 1 addition & 1 deletion b/‎.github/copilot-instructions.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/build-amd.yml‎
Lines changed: 0 additions & 52 deletions b/‎.github/workflows/build-amd.yml‎
Lines changed: 0 additions & 52 deletions
diff --git a/‎.github/workflows/build.yml‎
Lines changed: 32 additions & 8 deletions b/‎.github/workflows/build.yml‎
Lines changed: 32 additions & 8 deletions
diff --git a/‎.github/workflows/release.yml‎
Lines changed: 46 additions & 0 deletions b/‎.github/workflows/release.yml‎
Lines changed: 46 additions & 0 deletions
diff --git a/‎.gitignore‎
Lines changed: 44 additions & 62 deletions b/‎.gitignore‎
Lines changed: 44 additions & 62 deletions
diff --git a/‎README.md‎
Lines changed: 1 addition & 0 deletions b/‎README.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎common/CMakeLists.txt‎
Lines changed: 2 additions & 0 deletions b/‎common/CMakeLists.txt‎
Lines changed: 2 additions & 0 deletions
@@ -3,17 +3,15 @@
 # ==============================================================================
 
 # Define the CANN base image for easier version updates later
-ARG CANN_BASE_IMAGE=quay.io/ascend/cann:8.1.rc1-910b-openeuler22.03-py3.10
+ARG CHIP_TYPE=910b
+ARG CANN_BASE_IMAGE=quay.io/ascend/cann:8.3.rc1.alpha001-${CHIP_TYPE}-openeuler22.03-py3.11
 
 # ==============================================================================
 # BUILD STAGE
 # Compile all binary files and libraries
 # ==============================================================================
 FROM ${CANN_BASE_IMAGE} AS build
 
-# Define the Ascend chip model for compilation. Default is Ascend910B3
-ARG ASCEND_SOC_TYPE=Ascend910B3
-
 # -- Install build dependencies --
 RUN yum install -y gcc g++ cmake make git libcurl-devel python3 python3-pip && \
     yum clean all && \
@@ -36,13 +34,14 @@ ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/runtime/lib64/stub:$LD_LIBRARY_PATH
 # For brevity, only core variables are listed here. You can paste the original ENV list here.
 
 # -- Build llama.cpp --
-# Use the passed ASCEND_SOC_TYPE argument and add general build options
+# Use the passed CHIP_TYPE argument and add general build options
+ARG CHIP_TYPE
 RUN source /usr/local/Ascend/ascend-toolkit/set_env.sh --force \
     && \
     cmake -B build \
         -DGGML_CANN=ON \
         -DCMAKE_BUILD_TYPE=Release \
-        -DSOC_TYPE=${ASCEND_SOC_TYPE} \
+        -DSOC_TYPE=ascend${CHIP_TYPE} \
         . && \
     cmake --build build --config Release -j$(nproc)
 
 
@@ -9,7 +9,7 @@ llama.cpp is a large-scale C/C++ project for efficient LLM (Large Language Model
 - **Size**: ~200k+ lines of code across 1000+ files
 - **Architecture**: Modular design with main library (`libllama`) and 40+ executable tools/examples
 - **Core dependency**: ggml tensor library (vendored in `ggml/` directory)
-- **Backends supported**: CPU (AVX/NEON optimized), CUDA, Metal, Vulkan, SYCL, ROCm, MUSA
+- **Backends supported**: CPU (AVX/NEON/RVV optimized), CUDA, Metal, Vulkan, SYCL, ROCm, MUSA
 - **License**: MIT
 
 ## Build Instructions
 
@@ -1430,14 +1430,10 @@ jobs:
     strategy:
       matrix:
         arch: [x86, aarch64]
-        cann:
-          - '8.1.RC1.alpha001-910b-openeuler22.03-py3.10'
-        device:
-          - 'ascend910b3'
-        build:
-          - 'Release'
+        chip_type: ['910b', '310p']
+        build: ['Release']
     runs-on: ${{ matrix.arch == 'aarch64' && 'ubuntu-24.04-arm' || 'ubuntu-24.04' }}
-    container: ascendai/cann:${{ matrix.cann }}
+    container: ascendai/cann:${{ matrix.chip_type == '910b' &&  '8.3.rc1.alpha001-910b-openeuler22.03-py3.11' || '8.2.rc1-310p-openeuler22.03-py3.11' }}
     steps:
       - name: Checkout
         uses: actions/checkout@v4
@@ -1454,7 +1450,7 @@ jobs:
           cmake -S . -B build \
               -DCMAKE_BUILD_TYPE=${{ matrix.build }} \
               -DGGML_CANN=on \
-              -DSOC_TYPE=${{ matrix.device }}
+              -DSOC_TYPE=ascend${{ matrix.chip_type }}
           cmake --build build -j $(nproc)
 
 # TODO: simplify the following workflows using a matrix
@@ -1639,6 +1635,34 @@ jobs:
         run: |
           bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
 
+  ggml-ci-x64-amd-vulkan:
+    runs-on: [self-hosted, Linux, X64, AMD]
+
+    steps:
+      - name: Clone
+        id: checkout
+        uses: actions/checkout@v4
+
+      - name: Test
+        id: ggml-ci
+        run: |
+          vulkaninfo --summary
+          GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
+
+  ggml-ci-x64-amd-rocm:
+    runs-on: [self-hosted, Linux, X64, AMD]
+
+    steps:
+      - name: Clone
+        id: checkout
+        uses: actions/checkout@v4
+
+      - name: Test
+        id: ggml-ci
+        run: |
+          amd-smi static
+          GG_BUILD_ROCM=1 GG_BUILD_AMDGPU_TARGETS="gfx1101" bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
+
   ggml-ci-mac-metal:
     runs-on: [self-hosted, macOS, ARM64]
 
 
@@ -693,6 +693,51 @@ jobs:
           path: llama-${{ steps.tag.outputs.name }}-xcframework.zip
           name: llama-${{ steps.tag.outputs.name }}-xcframework
 
+  openEuler-cann:
+    strategy:
+      matrix:
+        arch: [x86, aarch64]
+        chip_type: ['910b', '310p']
+        build: ['Release']
+    runs-on: ${{ matrix.arch == 'aarch64' && 'ubuntu-24.04-arm' || 'ubuntu-24.04' }}
+    container: ascendai/cann:${{ matrix.chip_type == '910b' &&  '8.3.rc1.alpha001-910b-openeuler22.03-py3.11' || '8.2.rc1-310p-openeuler22.03-py3.11' }}
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Dependencies
+        run: |
+          yum update -y
+          yum install -y git gcc gcc-c++ make cmake libcurl-devel
+          git config --global --add safe.directory "$GITHUB_WORKSPACE"
+
+      - name: Build
+        run: |
+          export LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/$(uname -m)-linux/devlib/:${LD_LIBRARY_PATH}
+
+          cmake -S . -B build \
+              -DCMAKE_BUILD_TYPE=${{ matrix.build }} \
+              -DGGML_CANN=on \
+              -DSOC_TYPE=ascend${{ matrix.chip_type }}
+          cmake --build build -j $(nproc)
+
+      - name: Determine tag name
+        id: tag
+        uses: ./.github/actions/get-tag-name
+
+      - name: Pack artifacts
+        run: |
+          cp LICENSE ./build/bin/
+          zip -r llama-${{ steps.tag.outputs.name }}-bin-${{ matrix.chip_type }}-openEuler-${{ matrix.arch }}.zip ./build/bin/*
+
+      - name: Upload artifacts
+        uses: actions/upload-artifact@v4
+        with:
+          path: llama-${{ steps.tag.outputs.name }}-bin-${{ matrix.chip_type }}-openEuler-${{ matrix.arch }}.zip
+          name: llama-bin-${{ matrix.chip_type }}-openEuler-${{ matrix.arch }}.zip
+
   release:
     if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
 
@@ -714,6 +759,7 @@ jobs:
       - macOS-arm64
       - macOS-x64
       - ios-xcode-build
+      - openEuler-cann
 
     steps:
       - name: Clone
 
@@ -20,52 +20,40 @@
 *.so
 *.swp
 *.tmp
+*.DS_Store
 
 # IDE / OS
 
-.cache/
-.ccls-cache/
-.direnv/
-.DS_Store
-.envrc
-.idea/
-.swiftpm
-.vs/
-.vscode/
-nppBackup
+/.cache/
+/.ccls-cache/
+/.direnv/
+/.envrc
+/.idea/
+/.swiftpm
+/.vs/
+/.vscode/
+/nppBackup
 
 
 # Coverage
 
-gcovr-report/
-lcov-report/
+/gcovr-report/
+/lcov-report/
 
 # Build Artifacts
 
-tags
-.build/
-build*
-release
-debug
-!build-info.cmake
-!build-info.cpp.in
-!build-info.sh
-!build.zig
-!docs/build.md
+/tags
+/.build/
+/build*
+/release
+/debug
 /libllama.so
 /llama-*
 /vulkan-shaders-gen
-android-ndk-*
-arm_neon.h
-cmake-build-*
-CMakeSettings.json
-compile_commands.json
-ggml-metal-embed.metal
-llama-batched-swift
 /rpc-server
-out/
-tmp/
-autogen-*.md
+/out/
+/tmp/
+/autogen-*.md
 
 # Deprecated
 
@@ -74,44 +62,38 @@ autogen-*.md
 
 # CI
 
-!.github/workflows/*.yml
+!/.github/workflows/*.yml
 
 # Models
 
-models/*
-models-mnt
-!models/.editorconfig
-!models/ggml-vocab-*.gguf*
-!models/templates
+/models/*
+/models-mnt
+!/models/.editorconfig
+!/models/ggml-vocab-*.gguf*
+!/models/templates
 
 # Zig
-zig-out/
-zig-cache/
-
-# Logs
-
-ppl-*.txt
-qnt-*.txt
-perf-*.txt
+/zig-out/
+/zig-cache/
 
 # Examples
 
-examples/jeopardy/results.txt
-tools/server/*.css.hpp
-tools/server/*.html.hpp
-tools/server/*.js.hpp
-tools/server/*.mjs.hpp
-tools/server/*.gz.hpp
-!build_64.sh
-!examples/*.bat
-!examples/*/*.kts
-!examples/*/*/*.kts
-!examples/sycl/*.bat
-!examples/sycl/*.sh
+/examples/jeopardy/results.txt
+/tools/server/*.css.hpp
+/tools/server/*.html.hpp
+/tools/server/*.js.hpp
+/tools/server/*.mjs.hpp
+/tools/server/*.gz.hpp
+!/build_64.sh
+!/examples/*.bat
+!/examples/*/*.kts
+!/examples/*/*/*.kts
+!/examples/sycl/*.bat
+!/examples/sycl/*.sh
 
 # Server Web UI temporary files
-node_modules
-tools/server/webui/dist
+/tools/server/webui/node_modules
+/tools/server/webui/dist
 
 # Python
 
@@ -147,10 +129,10 @@ poetry.toml
 # Local scripts
 /run-vim.sh
 /run-chat.sh
-.ccache/
+/.ccache/
 
 # IDE
 *.code-workspace
-.windsurf/
+/.windsurf/
 # emscripten
 a.out.*
@@ -61,6 +61,7 @@ range of hardware - locally and in the cloud.
 - Plain C/C++ implementation without any dependencies
 - Apple silicon is a first-class citizen - optimized via ARM NEON, Accelerate and Metal frameworks
 - AVX, AVX2, AVX512 and AMX support for x86 architectures
+- RVV, ZVFH, ZFH and ZICBOP support for RISC-V architectures
 - 1.5-bit, 2-bit, 3-bit, 4-bit, 5-bit, 6-bit, and 8-bit integer quantization for faster inference and reduced memory use
 - Custom CUDA kernels for running LLMs on NVIDIA GPUs (support for AMD GPUs via HIP and Moore Threads GPUs via MUSA)
 - Vulkan and SYCL backend support
 
@@ -50,6 +50,8 @@ add_library(${TARGET} STATIC
     base64.hpp
     chat-parser.cpp
     chat-parser.h
+    chat-parser-xml-toolcall.h
+    chat-parser-xml-toolcall.cpp
     chat.cpp
     chat.h
     common.cpp