Skip to content

Commit 904dbe3

Browse files
committed
Merge remote-tracking branch 'origin/master' into mul_mat_opt
2 parents 9edfcc9 + f0cfae4 commit 904dbe3

File tree

296 files changed

+43616
-11535
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

296 files changed

+43616
-11535
lines changed

.github/workflows/build.yml

Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -387,6 +387,39 @@ jobs:
387387
cd build
388388
ctest -L main --verbose
389389
390+
ubuntu-24-cmake-vulkan-deb:
391+
runs-on: ubuntu-24.04
392+
393+
steps:
394+
- name: Clone
395+
id: checkout
396+
uses: actions/checkout@v4
397+
398+
- name: ccache
399+
uses: ggml-org/[email protected]
400+
with:
401+
key: ubuntu-24-cmake-vulkan-deb
402+
evict-old-files: 1d
403+
404+
- name: Dependencies
405+
id: depends
406+
run: |
407+
sudo apt-get install -y glslc libvulkan-dev libcurl4-openssl-dev
408+
409+
- name: Configure
410+
id: cmake_configure
411+
run: |
412+
cmake -B build \
413+
-DCMAKE_BUILD_TYPE=RelWithDebInfo \
414+
-DGGML_BACKEND_DL=ON \
415+
-DGGML_CPU_ALL_VARIANTS=ON \
416+
-DGGML_VULKAN=ON
417+
418+
- name: Build
419+
id: cmake_build
420+
run: |
421+
cmake --build build -j $(nproc)
422+
390423
ubuntu-24-cmake-vulkan:
391424
runs-on: ubuntu-24.04
392425

@@ -1272,6 +1305,81 @@ jobs:
12721305
cd examples/llama.android
12731306
./gradlew build --no-daemon
12741307
1308+
android-ndk-build:
1309+
runs-on: ubuntu-latest
1310+
1311+
env:
1312+
OPENCL_VERSION: 2025.07.22
1313+
1314+
strategy:
1315+
matrix:
1316+
include:
1317+
- build: 'arm64-cpu'
1318+
defines: '-D ANDROID_ABI=arm64-v8a -D ANDROID_PLATFORM=android-31 -D CMAKE_TOOLCHAIN_FILE=${ANDROID_NDK_ROOT}/build/cmake/android.toolchain.cmake -D GGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=armv8.5-a+fp16+i8mm -G Ninja -D LLAMA_CURL=OFF -D GGML_OPENMP=OFF'
1319+
- build: 'arm64-snapdragon'
1320+
defines: '--preset arm64-android-snapdragon-release'
1321+
1322+
steps:
1323+
- name: Clone
1324+
id: checkout
1325+
uses: actions/checkout@v4
1326+
1327+
- name: Install OpenCL Headers and Libs
1328+
id: install_opencl
1329+
if: ${{ matrix.build == 'arm64-snapdragon' }}
1330+
run: |
1331+
mkdir opencl
1332+
curl -L -o opencl/clhpp.tar.gz https://github.com/KhronosGroup/OpenCL-CLHPP/archive/refs/tags/v${OPENCL_VERSION}.tar.gz
1333+
curl -L -o opencl/headers.tar.gz https://github.com/KhronosGroup/OpenCL-Headers/archive/refs/tags/v${OPENCL_VERSION}.tar.gz
1334+
curl -L -o opencl/icd-loader.tar.gz https://github.com/KhronosGroup/OpenCL-ICD-Loader/archive/refs/tags/v${OPENCL_VERSION}.tar.gz
1335+
tar -xaf opencl/headers.tar.gz -C opencl
1336+
tar -xaf opencl/clhpp.tar.gz -C opencl
1337+
tar -xaf opencl/icd-loader.tar.gz -C opencl
1338+
sudo cp -r opencl/OpenCL-Headers-${OPENCL_VERSION}/CL ${ANDROID_NDK_ROOT}/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/include
1339+
sudo cp -r opencl/OpenCL-CLHPP-${OPENCL_VERSION}/include/CL/* ${ANDROID_NDK_ROOT}/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/include/CL
1340+
cd opencl/OpenCL-ICD-Loader-${OPENCL_VERSION}
1341+
cmake -B build -G Ninja -DCMAKE_BUILD_TYPE=Release -DCMAKE_TOOLCHAIN_FILE=${ANDROID_NDK_ROOT}/build/cmake/android.toolchain.cmake -DOPENCL_ICD_LOADER_HEADERS_DIR=${ANDROID_NDK_ROOT}/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/include -DANDROID_ABI=arm64-v8a -DANDROID_PLATFORM=31 -DANDROID_STL=c++_shared
1342+
cmake --build build
1343+
sudo cp build/libOpenCL.so ${ANDROID_NDK_ROOT}/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/lib/aarch64-linux-android
1344+
rm -rf opencl
1345+
1346+
- name: Install Hexagon SDK
1347+
id: install_hexsdk
1348+
if: ${{ matrix.build == 'arm64-snapdragon' }}
1349+
env:
1350+
HEXSDK_VER: 6.4.0.2
1351+
HEXTLS_VER: 19.0.04
1352+
run: |
1353+
curl -L -o hex-sdk.tar.gz https://github.com/snapdragon-toolchain/hexagon-sdk/releases/download/v$HEXSDK_VER/hexagon-sdk-v$HEXSDK_VER-amd64-lnx.tar.xz
1354+
mkdir hex-sdk
1355+
tar -xaf hex-sdk.tar.gz -C hex-sdk
1356+
ls -l hex-sdk
1357+
sudo mv hex-sdk /opt/hexagon
1358+
echo "HEXAGON_SDK_ROOT=/opt/hexagon/$HEXSDK_VER" >> "$GITHUB_ENV"
1359+
echo "HEXAGON_TOOLS_ROOT=/opt/hexagon/$HEXSDK_VER/tools/HEXAGON_Tools/$HEXTLS_VER" >> "$GITHUB_ENV"
1360+
echo "DEFAULT_HLOS_ARCH=64" >> "$GITHUB_ENV"
1361+
echo "DEFAULT_TOOLS_VARIANT=toolv19" >> "$GITHUB_ENV"
1362+
echo "DEFAULT_NO_QURT_INC=0" >> "$GITHUB_ENV"
1363+
echo "DEFAULT_DSP_ARCH=v73" >> "$GITHUB_ENV"
1364+
1365+
- name: Update CMake presets
1366+
id: update_presets
1367+
if: ${{ matrix.build == 'arm64-snapdragon' }}
1368+
run: |
1369+
cp docs/backend/hexagon/CMakeUserPresets.json .
1370+
1371+
- name: Build
1372+
id: ndk_build
1373+
run: |
1374+
cmake ${{ matrix.defines }} -B build
1375+
cmake --build build
1376+
cmake --install build --prefix pkg-adb/llama.cpp
1377+
1378+
- name: Test
1379+
id: cmake_test
1380+
run: |
1381+
echo "FIXME: test on devices"
1382+
12751383
openEuler-latest-cmake-cann:
12761384
if: ${{ github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'Ascend NPU') }}
12771385
defaults:
@@ -1515,3 +1623,29 @@ jobs:
15151623
run: |
15161624
vulkaninfo --summary
15171625
GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp
1626+
1627+
ggml-ci-arm64-cpu-kleidiai:
1628+
runs-on: ubuntu-22.04-arm
1629+
1630+
steps:
1631+
- name: Clone
1632+
id: checkout
1633+
uses: actions/checkout@v4
1634+
1635+
- name: ccache
1636+
uses: ggml-org/[email protected]
1637+
with:
1638+
key: ggml-ci-arm64-cpu-kleidiai
1639+
evict-old-files: 1d
1640+
1641+
- name: Dependencies
1642+
id: depends
1643+
run: |
1644+
sudo apt-get update
1645+
sudo apt-get install -y build-essential libcurl4-openssl-dev
1646+
1647+
- name: Test
1648+
id: ggml-ci
1649+
run: |
1650+
GG_BUILD_KLEIDIAI=1 GG_BUILD_EXTRA_TESTS_0=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
1651+

.github/workflows/release.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,8 @@ jobs:
134134
include:
135135
- build: 'x64'
136136
os: ubuntu-22.04
137+
- build: 's390x-z15' # z15 because our CI runners are on z15
138+
os: ubuntu-22.04-s390x
137139
# GGML_BACKEND_DL and GGML_CPU_ALL_VARIANTS are not currently supported on arm
138140
# - build: 'arm64'
139141
# os: ubuntu-22.04-arm

.github/workflows/update-ops-docs.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,12 @@ name: Update Operations Documentation
33
on:
44
push:
55
paths:
6+
- 'docs/ops.md'
67
- 'docs/ops/**'
78
- 'scripts/create_ops_docs.py'
89
pull_request:
910
paths:
11+
- 'docs/ops.md'
1012
- 'docs/ops/**'
1113
- 'scripts/create_ops_docs.py'
1214

CODEOWNERS

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@
5555
/ggml/src/ggml-cuda/common.cuh @slaren
5656
/ggml/src/ggml-cuda/fattn* @JohannesGaessler
5757
/ggml/src/ggml-cuda/ggml-cuda.cu @slaren
58-
/ggml/src/ggml-cuda/mmf.* @JohannesGaessler
58+
/ggml/src/ggml-cuda/mmf.* @JohannesGaessler @am17an
5959
/ggml/src/ggml-cuda/mmq.* @JohannesGaessler
6060
/ggml/src/ggml-cuda/mmvf.* @JohannesGaessler
6161
/ggml/src/ggml-cuda/mmvq.* @JohannesGaessler
@@ -65,6 +65,7 @@
6565
/ggml/src/ggml-impl.h @ggerganov @slaren
6666
/ggml/src/ggml-metal/ @ggerganov
6767
/ggml/src/ggml-opencl/ @lhez @max-krasnyansky
68+
/ggml/src/ggml-hexagon/ @max-krasnyansky
6869
/ggml/src/ggml-opt.cpp @JohannesGaessler
6970
/ggml/src/ggml-quants.* @ggerganov
7071
/ggml/src/ggml-rpc/ @rgerganov

README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ Instructions for adding support for new models: [HOWTO-add-model.md](docs/develo
8484
- [X] [Mistral 7B](https://huggingface.co/mistralai/Mistral-7B-v0.1)
8585
- [x] [Mixtral MoE](https://huggingface.co/models?search=mistral-ai/Mixtral)
8686
- [x] [DBRX](https://huggingface.co/databricks/dbrx-instruct)
87+
- [x] [Jamba](https://huggingface.co/ai21labs)
8788
- [X] [Falcon](https://huggingface.co/models?search=tiiuae/falcon)
8889
- [X] [Chinese LLaMA / Alpaca](https://github.com/ymcui/Chinese-LLaMA-Alpaca) and [Chinese LLaMA-2 / Alpaca-2](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2)
8990
- [X] [Vigogne (French)](https://github.com/bofenghuang/vigogne)
@@ -138,6 +139,7 @@ Instructions for adding support for new models: [HOWTO-add-model.md](docs/develo
138139
- [x] [Ling models](https://huggingface.co/collections/inclusionAI/ling-67c51c85b34a7ea0aba94c32)
139140
- [x] [LFM2 models](https://huggingface.co/collections/LiquidAI/lfm2-686d721927015b2ad73eaa38)
140141
- [x] [Hunyuan models](https://huggingface.co/collections/tencent/hunyuan-dense-model-6890632cda26b19119c9c5e7)
142+
- [x] [BailingMoeV2 (Ring/Ling 2.0) models](https://huggingface.co/collections/inclusionAI/ling-v2-68bf1dd2fc34c306c1fa6f86)
141143

142144
#### Multimodal
143145

@@ -187,6 +189,7 @@ Instructions for adding support for new models: [HOWTO-add-model.md](docs/develo
187189
- Swift [srgtuszy/llama-cpp-swift](https://github.com/srgtuszy/llama-cpp-swift)
188190
- Swift [ShenghaiWang/SwiftLlama](https://github.com/ShenghaiWang/SwiftLlama)
189191
- Delphi [Embarcadero/llama-cpp-delphi](https://github.com/Embarcadero/llama-cpp-delphi)
192+
- Go (no CGo needed): [hybridgroup/yzma](https://github.com/hybridgroup/yzma)
190193

191194
</details>
192195

@@ -278,6 +281,7 @@ Instructions for adding support for new models: [HOWTO-add-model.md](docs/develo
278281
| [IBM zDNN](docs/backend/zDNN.md) | IBM Z & LinuxONE |
279282
| [WebGPU [In Progress]](docs/build.md#webgpu) | All |
280283
| [RPC](https://github.com/ggml-org/llama.cpp/tree/master/tools/rpc) | All |
284+
| [Hexagon [In Progress]](docs/backend/hexagon/README.md) | Snapdragon |
281285

282286
## Obtaining and quantizing models
283287

ci/run.sh

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@
2222
# # with MUSA support
2323
# GG_BUILD_MUSA=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
2424
#
25+
# # with KLEIDIAI support
26+
# GG_BUILD_KLEIDIAI=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
27+
#
2528

2629
if [ -z "$2" ]; then
2730
echo "usage: $0 <output-dir> <mnt-dir>"
@@ -72,7 +75,7 @@ if [ ! -z ${GG_BUILD_ROCM} ]; then
7275
exit 1
7376
fi
7477

75-
CMAKE_EXTRA="${CMAKE_EXTRA} -DAMDGPU_TARGETS=${GG_BUILD_AMDGPU_TARGETS}"
78+
CMAKE_EXTRA="${CMAKE_EXTRA} -DGPU_TARGETS=${GG_BUILD_AMDGPU_TARGETS}"
7679
fi
7780

7881
if [ ! -z ${GG_BUILD_SYCL} ]; then
@@ -115,6 +118,34 @@ if [ ! -z ${GG_BUILD_NO_SVE} ]; then
115118
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=armv8.5-a+fp16+i8mm"
116119
fi
117120

121+
if [ -n "${GG_BUILD_KLEIDIAI}" ]; then
122+
echo ">>===== Enabling KleidiAI support"
123+
124+
CANDIDATES=("armv9-a+dotprod+i8mm" "armv8.6-a+dotprod+i8mm" "armv8.2-a+dotprod")
125+
CPU=""
126+
127+
for cpu in "${CANDIDATES[@]}"; do
128+
if echo 'int main(){}' | ${CXX:-c++} -march="$cpu" -x c++ - -c -o /dev/null >/dev/null 2>&1; then
129+
CPU="$cpu"
130+
break
131+
fi
132+
done
133+
134+
if [ -z "$CPU" ]; then
135+
echo "ERROR: None of the required ARM baselines (armv9/armv8.6/armv8.2 + dotprod) are supported by this compiler."
136+
exit 1
137+
fi
138+
139+
echo ">>===== Using ARM baseline: ${CPU}"
140+
141+
CMAKE_EXTRA="${CMAKE_EXTRA:+$CMAKE_EXTRA } \
142+
-DGGML_NATIVE=OFF \
143+
-DGGML_CPU_KLEIDIAI=ON \
144+
-DGGML_CPU_AARCH64=ON \
145+
-DGGML_CPU_ARM_ARCH=${CPU} \
146+
-DBUILD_SHARED_LIBS=OFF"
147+
fi
148+
118149
## helpers
119150

120151
# download a file if it does not exist or if it is outdated

0 commit comments

Comments
 (0)