Skip to content

Commit b4ecd7f

Browse files
committed
Merge remote-tracking branch 'upstream/master'
2 parents f9ba819 + 7d77f07 commit b4ecd7f

File tree

128 files changed

+26726
-10714
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

128 files changed

+26726
-10714
lines changed

.devops/cann.Dockerfile

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,17 +3,15 @@
33
# ==============================================================================
44

55
# Define the CANN base image for easier version updates later
6-
ARG CANN_BASE_IMAGE=quay.io/ascend/cann:8.1.rc1-910b-openeuler22.03-py3.10
6+
ARG CHIP_TYPE=910b
7+
ARG CANN_BASE_IMAGE=quay.io/ascend/cann:8.3.rc1.alpha001-${CHIP_TYPE}-openeuler22.03-py3.11
78

89
# ==============================================================================
910
# BUILD STAGE
1011
# Compile all binary files and libraries
1112
# ==============================================================================
1213
FROM ${CANN_BASE_IMAGE} AS build
1314

14-
# Define the Ascend chip model for compilation. Default is Ascend910B3
15-
ARG ASCEND_SOC_TYPE=Ascend910B3
16-
1715
# -- Install build dependencies --
1816
RUN yum install -y gcc g++ cmake make git libcurl-devel python3 python3-pip && \
1917
yum clean all && \
@@ -36,13 +34,14 @@ ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/runtime/lib64/stub:$LD_LIBRARY_PATH
3634
# For brevity, only core variables are listed here. You can paste the original ENV list here.
3735

3836
# -- Build llama.cpp --
39-
# Use the passed ASCEND_SOC_TYPE argument and add general build options
37+
# Use the passed CHIP_TYPE argument and add general build options
38+
ARG CHIP_TYPE
4039
RUN source /usr/local/Ascend/ascend-toolkit/set_env.sh --force \
4140
&& \
4241
cmake -B build \
4342
-DGGML_CANN=ON \
4443
-DCMAKE_BUILD_TYPE=Release \
45-
-DSOC_TYPE=${ASCEND_SOC_TYPE} \
44+
-DSOC_TYPE=ascend${CHIP_TYPE} \
4645
. && \
4746
cmake --build build --config Release -j$(nproc)
4847

.github/copilot-instructions.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ llama.cpp is a large-scale C/C++ project for efficient LLM (Large Language Model
99
- **Size**: ~200k+ lines of code across 1000+ files
1010
- **Architecture**: Modular design with main library (`libllama`) and 40+ executable tools/examples
1111
- **Core dependency**: ggml tensor library (vendored in `ggml/` directory)
12-
- **Backends supported**: CPU (AVX/NEON optimized), CUDA, Metal, Vulkan, SYCL, ROCm, MUSA
12+
- **Backends supported**: CPU (AVX/NEON/RVV optimized), CUDA, Metal, Vulkan, SYCL, ROCm, MUSA
1313
- **License**: MIT
1414

1515
## Build Instructions

.github/workflows/build-amd.yml

Lines changed: 0 additions & 52 deletions
This file was deleted.

.github/workflows/build.yml

Lines changed: 32 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1430,14 +1430,10 @@ jobs:
14301430
strategy:
14311431
matrix:
14321432
arch: [x86, aarch64]
1433-
cann:
1434-
- '8.1.RC1.alpha001-910b-openeuler22.03-py3.10'
1435-
device:
1436-
- 'ascend910b3'
1437-
build:
1438-
- 'Release'
1433+
chip_type: ['910b', '310p']
1434+
build: ['Release']
14391435
runs-on: ${{ matrix.arch == 'aarch64' && 'ubuntu-24.04-arm' || 'ubuntu-24.04' }}
1440-
container: ascendai/cann:${{ matrix.cann }}
1436+
container: ascendai/cann:${{ matrix.chip_type == '910b' && '8.3.rc1.alpha001-910b-openeuler22.03-py3.11' || '8.2.rc1-310p-openeuler22.03-py3.11' }}
14411437
steps:
14421438
- name: Checkout
14431439
uses: actions/checkout@v4
@@ -1454,7 +1450,7 @@ jobs:
14541450
cmake -S . -B build \
14551451
-DCMAKE_BUILD_TYPE=${{ matrix.build }} \
14561452
-DGGML_CANN=on \
1457-
-DSOC_TYPE=${{ matrix.device }}
1453+
-DSOC_TYPE=ascend${{ matrix.chip_type }}
14581454
cmake --build build -j $(nproc)
14591455
14601456
# TODO: simplify the following workflows using a matrix
@@ -1639,6 +1635,34 @@ jobs:
16391635
run: |
16401636
bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
16411637
1638+
ggml-ci-x64-amd-vulkan:
1639+
runs-on: [self-hosted, Linux, X64, AMD]
1640+
1641+
steps:
1642+
- name: Clone
1643+
id: checkout
1644+
uses: actions/checkout@v4
1645+
1646+
- name: Test
1647+
id: ggml-ci
1648+
run: |
1649+
vulkaninfo --summary
1650+
GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
1651+
1652+
ggml-ci-x64-amd-rocm:
1653+
runs-on: [self-hosted, Linux, X64, AMD]
1654+
1655+
steps:
1656+
- name: Clone
1657+
id: checkout
1658+
uses: actions/checkout@v4
1659+
1660+
- name: Test
1661+
id: ggml-ci
1662+
run: |
1663+
amd-smi static
1664+
GG_BUILD_ROCM=1 GG_BUILD_AMDGPU_TARGETS="gfx1101" bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
1665+
16421666
ggml-ci-mac-metal:
16431667
runs-on: [self-hosted, macOS, ARM64]
16441668

.github/workflows/release.yml

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -693,6 +693,51 @@ jobs:
693693
path: llama-${{ steps.tag.outputs.name }}-xcframework.zip
694694
name: llama-${{ steps.tag.outputs.name }}-xcframework
695695

696+
openEuler-cann:
697+
strategy:
698+
matrix:
699+
arch: [x86, aarch64]
700+
chip_type: ['910b', '310p']
701+
build: ['Release']
702+
runs-on: ${{ matrix.arch == 'aarch64' && 'ubuntu-24.04-arm' || 'ubuntu-24.04' }}
703+
container: ascendai/cann:${{ matrix.chip_type == '910b' && '8.3.rc1.alpha001-910b-openeuler22.03-py3.11' || '8.2.rc1-310p-openeuler22.03-py3.11' }}
704+
steps:
705+
- name: Checkout
706+
uses: actions/checkout@v4
707+
with:
708+
fetch-depth: 0
709+
710+
- name: Dependencies
711+
run: |
712+
yum update -y
713+
yum install -y git gcc gcc-c++ make cmake libcurl-devel
714+
git config --global --add safe.directory "$GITHUB_WORKSPACE"
715+
716+
- name: Build
717+
run: |
718+
export LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/$(uname -m)-linux/devlib/:${LD_LIBRARY_PATH}
719+
720+
cmake -S . -B build \
721+
-DCMAKE_BUILD_TYPE=${{ matrix.build }} \
722+
-DGGML_CANN=on \
723+
-DSOC_TYPE=ascend${{ matrix.chip_type }}
724+
cmake --build build -j $(nproc)
725+
726+
- name: Determine tag name
727+
id: tag
728+
uses: ./.github/actions/get-tag-name
729+
730+
- name: Pack artifacts
731+
run: |
732+
cp LICENSE ./build/bin/
733+
zip -r llama-${{ steps.tag.outputs.name }}-bin-${{ matrix.chip_type }}-openEuler-${{ matrix.arch }}.zip ./build/bin/*
734+
735+
- name: Upload artifacts
736+
uses: actions/upload-artifact@v4
737+
with:
738+
path: llama-${{ steps.tag.outputs.name }}-bin-${{ matrix.chip_type }}-openEuler-${{ matrix.arch }}.zip
739+
name: llama-bin-${{ matrix.chip_type }}-openEuler-${{ matrix.arch }}.zip
740+
696741
release:
697742
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
698743

@@ -714,6 +759,7 @@ jobs:
714759
- macOS-arm64
715760
- macOS-x64
716761
- ios-xcode-build
762+
- openEuler-cann
717763

718764
steps:
719765
- name: Clone

.gitignore

Lines changed: 44 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -20,52 +20,40 @@
2020
*.so
2121
*.swp
2222
*.tmp
23+
*.DS_Store
2324

2425
# IDE / OS
2526

26-
.cache/
27-
.ccls-cache/
28-
.direnv/
29-
.DS_Store
30-
.envrc
31-
.idea/
32-
.swiftpm
33-
.vs/
34-
.vscode/
35-
nppBackup
27+
/.cache/
28+
/.ccls-cache/
29+
/.direnv/
30+
/.envrc
31+
/.idea/
32+
/.swiftpm
33+
/.vs/
34+
/.vscode/
35+
/nppBackup
3636

3737

3838
# Coverage
3939

40-
gcovr-report/
41-
lcov-report/
40+
/gcovr-report/
41+
/lcov-report/
4242

4343
# Build Artifacts
4444

45-
tags
46-
.build/
47-
build*
48-
release
49-
debug
50-
!build-info.cmake
51-
!build-info.cpp.in
52-
!build-info.sh
53-
!build.zig
54-
!docs/build.md
45+
/tags
46+
/.build/
47+
/build*
48+
/release
49+
/debug
5550
/libllama.so
5651
/llama-*
5752
/vulkan-shaders-gen
58-
android-ndk-*
59-
arm_neon.h
60-
cmake-build-*
61-
CMakeSettings.json
62-
compile_commands.json
63-
ggml-metal-embed.metal
64-
llama-batched-swift
6553
/rpc-server
66-
out/
67-
tmp/
68-
autogen-*.md
54+
/out/
55+
/tmp/
56+
/autogen-*.md
6957

7058
# Deprecated
7159

@@ -74,44 +62,38 @@ autogen-*.md
7462

7563
# CI
7664

77-
!.github/workflows/*.yml
65+
!/.github/workflows/*.yml
7866

7967
# Models
8068

81-
models/*
82-
models-mnt
83-
!models/.editorconfig
84-
!models/ggml-vocab-*.gguf*
85-
!models/templates
69+
/models/*
70+
/models-mnt
71+
!/models/.editorconfig
72+
!/models/ggml-vocab-*.gguf*
73+
!/models/templates
8674

8775
# Zig
88-
zig-out/
89-
zig-cache/
90-
91-
# Logs
92-
93-
ppl-*.txt
94-
qnt-*.txt
95-
perf-*.txt
76+
/zig-out/
77+
/zig-cache/
9678

9779
# Examples
9880

99-
examples/jeopardy/results.txt
100-
tools/server/*.css.hpp
101-
tools/server/*.html.hpp
102-
tools/server/*.js.hpp
103-
tools/server/*.mjs.hpp
104-
tools/server/*.gz.hpp
105-
!build_64.sh
106-
!examples/*.bat
107-
!examples/*/*.kts
108-
!examples/*/*/*.kts
109-
!examples/sycl/*.bat
110-
!examples/sycl/*.sh
81+
/examples/jeopardy/results.txt
82+
/tools/server/*.css.hpp
83+
/tools/server/*.html.hpp
84+
/tools/server/*.js.hpp
85+
/tools/server/*.mjs.hpp
86+
/tools/server/*.gz.hpp
87+
!/build_64.sh
88+
!/examples/*.bat
89+
!/examples/*/*.kts
90+
!/examples/*/*/*.kts
91+
!/examples/sycl/*.bat
92+
!/examples/sycl/*.sh
11193

11294
# Server Web UI temporary files
113-
node_modules
114-
tools/server/webui/dist
95+
/tools/server/webui/node_modules
96+
/tools/server/webui/dist
11597

11698
# Python
11799

@@ -147,10 +129,10 @@ poetry.toml
147129
# Local scripts
148130
/run-vim.sh
149131
/run-chat.sh
150-
.ccache/
132+
/.ccache/
151133

152134
# IDE
153135
*.code-workspace
154-
.windsurf/
136+
/.windsurf/
155137
# emscripten
156138
a.out.*

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ range of hardware - locally and in the cloud.
6161
- Plain C/C++ implementation without any dependencies
6262
- Apple silicon is a first-class citizen - optimized via ARM NEON, Accelerate and Metal frameworks
6363
- AVX, AVX2, AVX512 and AMX support for x86 architectures
64+
- RVV, ZVFH, ZFH and ZICBOP support for RISC-V architectures
6465
- 1.5-bit, 2-bit, 3-bit, 4-bit, 5-bit, 6-bit, and 8-bit integer quantization for faster inference and reduced memory use
6566
- Custom CUDA kernels for running LLMs on NVIDIA GPUs (support for AMD GPUs via HIP and Moore Threads GPUs via MUSA)
6667
- Vulkan and SYCL backend support

common/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,8 @@ add_library(${TARGET} STATIC
5050
base64.hpp
5151
chat-parser.cpp
5252
chat-parser.h
53+
chat-parser-xml-toolcall.h
54+
chat-parser-xml-toolcall.cpp
5355
chat.cpp
5456
chat.h
5557
common.cpp

0 commit comments

Comments
 (0)