Skip to content

Commit 62e98ad

Browse files
authored
Merge branch 'master' into amd-warp-reduce
2 parents a37d885 + 8c570c9 commit 62e98ad

File tree

257 files changed

+75054
-55161
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

257 files changed

+75054
-55161
lines changed

.devops/main-intel.Dockerfile

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,12 @@ WORKDIR /app
1010

1111
COPY . .
1212

13-
RUN mkdir build && \
14-
cd build && \
15-
if [ "${LLAMA_SYCL_F16}" = "ON" ]; then \
13+
RUN if [ "${LLAMA_SYCL_F16}" = "ON" ]; then \
1614
echo "LLAMA_SYCL_F16 is set" && \
1715
export OPT_SYCL_F16="-DLLAMA_SYCL_F16=ON"; \
1816
fi && \
19-
cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ${OPT_SYCL_F16} && \
20-
cmake --build . --config Release --target main
17+
cmake -B build -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ${OPT_SYCL_F16} && \
18+
cmake --build build --config Release --target main
2119

2220
FROM intel/oneapi-basekit:$ONEAPI_VERSION as runtime
2321

.devops/main-vulkan.Dockerfile

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,8 @@ RUN wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key
1414
# Build it
1515
WORKDIR /app
1616
COPY . .
17-
RUN mkdir build && \
18-
cd build && \
19-
cmake .. -DLLAMA_VULKAN=1 && \
20-
cmake --build . --config Release --target main
17+
RUN cmake -B build -DLLAMA_VULKAN=1 && \
18+
cmake --build build --config Release --target main
2119

2220
# Clean up
2321
WORKDIR /

.devops/server-intel.Dockerfile

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,12 @@ WORKDIR /app
1010

1111
COPY . .
1212

13-
RUN mkdir build && \
14-
cd build && \
15-
if [ "${LLAMA_SYCL_F16}" = "ON" ]; then \
13+
RUN if [ "${LLAMA_SYCL_F16}" = "ON" ]; then \
1614
echo "LLAMA_SYCL_F16 is set" && \
1715
export OPT_SYCL_F16="-DLLAMA_SYCL_F16=ON"; \
1816
fi && \
19-
cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_CURL=ON ${OPT_SYCL_F16} && \
20-
cmake --build . --config Release --target server
17+
cmake -B build -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_CURL=ON ${OPT_SYCL_F16} && \
18+
cmake --build build --config Release --target server
2119

2220
FROM intel/oneapi-basekit:$ONEAPI_VERSION as runtime
2321

.devops/server-vulkan.Dockerfile

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,8 @@ RUN apt-get update && \
1818
# Build it
1919
WORKDIR /app
2020
COPY . .
21-
RUN mkdir build && \
22-
cd build && \
23-
cmake .. -DLLAMA_VULKAN=1 -DLLAMA_CURL=1 && \
24-
cmake --build . --config Release --target server
21+
RUN cmake -B build -DLLAMA_VULKAN=1 -DLLAMA_CURL=1 && \
22+
cmake --build build --config Release --target server
2523

2624
# Clean up
2725
WORKDIR /

.flake8

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,17 @@
11
[flake8]
22
max-line-length = 125
3-
ignore = W503
3+
ignore = E203,E211,E221,E225,E231,E241,E251,E261,E266,E501,E701,E704,W503
4+
exclude =
5+
# Do not traverse examples
6+
examples,
7+
# Do not include package initializers
8+
__init__.py,
9+
# No need to traverse our git directory
10+
.git,
11+
# There's no value in checking cache directories
12+
__pycache__,
13+
# No need to include the build path
14+
build,
15+
# This contains builds that we don't want to check
16+
dist # This is generated with `python build .` for package releases
17+
# max-complexity = 10

.github/workflows/bench.yml

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ on:
3232
- cron: '04 2 * * *'
3333

3434
concurrency:
35-
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}-${{ github.event.inputs.sha }}
35+
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || github.run_id }}-${{ github.event.inputs.sha }}
3636
cancel-in-progress: true
3737

3838
jobs:
@@ -52,7 +52,19 @@ jobs:
5252
ftype: q4_0
5353
pr_comment_enabled: "true"
5454

55-
if: ${{ github.event.inputs.gpu-series == 'Standard_NC4as_T4_v3' || github.event.schedule || github.event.pull_request || github.head_ref == 'master' || github.ref_name == 'master' || github.event.push.ref == 'refs/heads/master' }}
55+
if: |
56+
inputs.gpu-series == 'Standard_NC4as_T4_v3'
57+
|| (
58+
github.event_name == 'schedule'
59+
&& github.ref_name == 'master'
60+
&& github.repository_owner == 'ggerganov'
61+
)
62+
|| github.event_name == 'pull_request_target'
63+
|| (
64+
github.event_name == 'push'
65+
&& github.event.ref == 'refs/heads/master'
66+
&& github.repository_owner == 'ggerganov'
67+
)
5668
steps:
5769
- name: Clone
5870
id: checkout
@@ -96,9 +108,7 @@ jobs:
96108
id: cmake_build
97109
run: |
98110
set -eux
99-
mkdir build
100-
cd build
101-
cmake .. \
111+
cmake -B build \
102112
-DLLAMA_NATIVE=OFF \
103113
-DLLAMA_BUILD_SERVER=ON \
104114
-DLLAMA_CURL=ON \
@@ -109,7 +119,7 @@ jobs:
109119
-DLLAMA_FATAL_WARNINGS=OFF \
110120
-DLLAMA_ALL_WARNINGS=OFF \
111121
-DCMAKE_BUILD_TYPE=Release;
112-
cmake --build . --config Release -j $(nproc) --target server
122+
cmake --build build --config Release -j $(nproc) --target server
113123
114124
- name: Download the dataset
115125
id: download_dataset

.github/workflows/build.yml

Lines changed: 105 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@ jobs:
3232
- name: Clone
3333
id: checkout
3434
uses: actions/checkout@v4
35+
with:
36+
fetch-depth: 0
3537

3638
- name: Dependencies
3739
id: depends
@@ -52,7 +54,7 @@ jobs:
5254
id: cmake_test
5355
run: |
5456
cd build
55-
ctest -L main --verbose --timeout 900
57+
ctest -L 'main|curl' --verbose --timeout 900
5658
5759
- name: Determine tag name
5860
id: tag
@@ -88,6 +90,8 @@ jobs:
8890
- name: Clone
8991
id: checkout
9092
uses: actions/checkout@v4
93+
with:
94+
fetch-depth: 0
9195

9296
- name: Dependencies
9397
id: depends
@@ -101,7 +105,9 @@ jobs:
101105
sysctl -a
102106
mkdir build
103107
cd build
104-
cmake -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_METAL_EMBED_LIBRARY=ON -DLLAMA_CURL=ON ..
108+
# Metal is disabled due to intermittent failures with Github runners not having a GPU:
109+
# https://github.com/ggerganov/llama.cpp/actions/runs/8635935781/job/23674807267#step:5:2313
110+
cmake -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_METAL=OFF -DLLAMA_CURL=ON ..
105111
cmake --build . --config Release -j $(sysctl -n hw.logicalcpu)
106112
107113
- name: Test
@@ -204,26 +210,28 @@ jobs:
204210
- name: Clone
205211
id: checkout
206212
uses: actions/checkout@v4
213+
with:
214+
fetch-depth: 0
207215

208216
- name: Dependencies
209217
id: depends
210218
run: |
211219
sudo apt-get update
212-
sudo apt-get install build-essential
220+
sudo apt-get install build-essential libcurl4-openssl-dev
213221
214222
- name: Build
215223
id: cmake_build
216224
run: |
217225
mkdir build
218226
cd build
219-
cmake .. -DLLAMA_FATAL_WARNINGS=ON
227+
cmake .. -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_CURL=ON
220228
cmake --build . --config Release -j $(nproc)
221229
222230
- name: Test
223231
id: cmake_test
224232
run: |
225233
cd build
226-
ctest -L main --verbose --timeout 900
234+
ctest -L 'main|curl' --verbose --timeout 900
227235
228236
- name: Test llama2c conversion
229237
id: llama2c_test
@@ -236,6 +244,33 @@ jobs:
236244
./bin/convert-llama2c-to-ggml --copy-vocab-from-model ./tok512.bin --llama2c-model stories260K.bin --llama2c-output-model stories260K.gguf
237245
./bin/main -m stories260K.gguf -p "One day, Lily met a Shoggoth" -n 500 -c 256
238246
247+
- name: Determine tag name
248+
id: tag
249+
shell: bash
250+
run: |
251+
BUILD_NUMBER="$(git rev-list --count HEAD)"
252+
SHORT_HASH="$(git rev-parse --short=7 HEAD)"
253+
if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
254+
echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
255+
else
256+
SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
257+
echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
258+
fi
259+
260+
- name: Pack artifacts
261+
id: pack_artifacts
262+
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
263+
run: |
264+
cp LICENSE ./build/bin/
265+
zip -r llama-${{ steps.tag.outputs.name }}-bin-ubuntu-x64.zip ./build/bin/*
266+
267+
- name: Upload artifacts
268+
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
269+
uses: actions/upload-artifact@v4
270+
with:
271+
path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-x64.zip
272+
name: llama-bin-ubuntu-x64.zip
273+
239274
# ubuntu-latest-cmake-sanitizer:
240275
# runs-on: ubuntu-latest
241276
#
@@ -558,6 +593,63 @@ jobs:
558593
run: |
559594
make swift
560595
596+
windows-msys2:
597+
runs-on: windows-latest
598+
599+
strategy:
600+
fail-fast: false
601+
matrix:
602+
include:
603+
- { sys: UCRT64, env: ucrt-x86_64, build: Release }
604+
- { sys: CLANG64, env: clang-x86_64, build: Release }
605+
606+
steps:
607+
- name: Clone
608+
uses: actions/checkout@v4
609+
610+
- name: Setup ${{ matrix.sys }}
611+
uses: msys2/setup-msys2@v2
612+
with:
613+
update: true
614+
msystem: ${{matrix.sys}}
615+
install: >-
616+
base-devel
617+
mingw-w64-${{matrix.env}}-toolchain
618+
mingw-w64-${{matrix.env}}-cmake
619+
mingw-w64-${{matrix.env}}-openblas
620+
621+
- name: Build using make
622+
shell: msys2 {0}
623+
run: |
624+
make -j $(nproc)
625+
626+
- name: Clean after building using make
627+
shell: msys2 {0}
628+
run: |
629+
make clean
630+
631+
- name: Build using make w/ OpenBLAS
632+
shell: msys2 {0}
633+
run: |
634+
make LLAMA_OPENBLAS=1 -j $(nproc)
635+
636+
- name: Build using CMake
637+
shell: msys2 {0}
638+
run: |
639+
cmake -B build
640+
cmake --build build --config ${{ matrix.build }} -j $(nproc)
641+
642+
- name: Clean after building using CMake
643+
shell: msys2 {0}
644+
run: |
645+
rm -rf build
646+
647+
- name: Build using CMake w/ OpenBLAS
648+
shell: msys2 {0}
649+
run: |
650+
cmake -B build -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS
651+
cmake --build build --config ${{ matrix.build }} -j $(nproc)
652+
561653
windows-latest-cmake:
562654
runs-on: windows-latest
563655

@@ -938,6 +1030,12 @@ jobs:
9381030
- name: Download artifacts
9391031
id: download-artifact
9401032
uses: actions/download-artifact@v4
1033+
with:
1034+
path: ./artifact
1035+
1036+
- name: Move artifacts
1037+
id: move_artifacts
1038+
run: mkdir -p ./artifact/release && mv ./artifact/*/*.zip ./artifact/release
9411039

9421040
- name: Create release
9431041
id: create_release
@@ -956,15 +1054,15 @@ jobs:
9561054
const path = require('path');
9571055
const fs = require('fs');
9581056
const release_id = '${{ steps.create_release.outputs.id }}';
959-
for (let file of await fs.readdirSync('./artifact')) {
1057+
for (let file of await fs.readdirSync('./artifact/release')) {
9601058
if (path.extname(file) === '.zip') {
9611059
console.log('uploadReleaseAsset', file);
9621060
await github.repos.uploadReleaseAsset({
9631061
owner: context.repo.owner,
9641062
repo: context.repo.repo,
9651063
release_id: release_id,
9661064
name: file,
967-
data: await fs.readFileSync(`./artifact/${file}`)
1065+
data: await fs.readFileSync(`./artifact/release/${file}`)
9681066
});
9691067
}
9701068
}

.github/workflows/close-issue.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ jobs:
1212
steps:
1313
- uses: actions/stale@v5
1414
with:
15-
exempt-issue-labels: "refactor,help wanted,good first issue,research"
15+
exempt-issue-labels: "refactor,help wanted,good first issue,research,bug"
1616
days-before-issue-stale: 30
1717
days-before-issue-close: 14
1818
stale-issue-label: "stale"

.github/workflows/docker.yml

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -91,14 +91,20 @@ jobs:
9191
echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
9292
fi
9393
94+
- name: Downcase github.repository_owner
95+
run: |
96+
echo "repository_owner_lowercase=${GITHUB_REPOSITORY_OWNER@L}" >> $GITHUB_ENV
97+
env:
98+
GITHUB_REPOSITORY_OWNER: '${{ github.repository_owner }}'
99+
94100
- name: Build and push Docker image (versioned)
95101
if: github.event_name == 'push'
96102
uses: docker/build-push-action@v4
97103
with:
98104
context: .
99105
push: true
100106
platforms: ${{ matrix.config.platforms }}
101-
tags: "ghcr.io/${{ github.repository_owner }}/llama.cpp:${{ matrix.config.tag }}-${{ env.COMMIT_SHA }}"
107+
tags: "ghcr.io/${{ env.repository_owner_lowercase }}/llama.cpp:${{ matrix.config.tag }}-${{ env.COMMIT_SHA }}"
102108
file: ${{ matrix.config.dockerfile }}
103109

104110
- name: Build and push Docker image (tagged)
@@ -107,5 +113,5 @@ jobs:
107113
context: .
108114
push: ${{ github.event_name == 'push' }}
109115
platforms: ${{ matrix.config.platforms }}
110-
tags: "ghcr.io/${{ github.repository_owner }}/llama.cpp:${{ matrix.config.tag }},ghcr.io/${{ github.repository_owner }}/llama.cpp:${{ matrix.config.tag }}-${{ steps.tag.outputs.name }}"
116+
tags: "ghcr.io/${{ env.repository_owner_lowercase }}/llama.cpp:${{ matrix.config.tag }},ghcr.io/${{ env.repository_owner_lowercase }}/llama.cpp:${{ matrix.config.tag }}-${{ steps.tag.outputs.name }}"
111117
file: ${{ matrix.config.dockerfile }}

.github/workflows/python-lint.yml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,5 +20,4 @@ jobs:
2020
- name: flake8 Lint
2121
uses: py-actions/flake8@v2
2222
with:
23-
ignore: "E203,E211,E221,E225,E231,E241,E251,E261,E266,E501,E701,E704,W503"
24-
exclude: "examples/*,examples/*/**,*/**/__init__.py"
23+
plugins: "flake8-no-print"

0 commit comments

Comments
 (0)