Skip to content

Commit 4bb238c

Browse files
committed
sync wih llama.cpp cf348a6
1 parent d763926 commit 4bb238c

File tree

7 files changed

+778
-342
lines changed

7 files changed

+778
-342
lines changed

CMakeLists.txt

Lines changed: 126 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@ add_subdirectory(llama.cpp)
1515
# add_subdirectory(ggml)
1616

1717
file (GLOB CPP_FILES "llama.cpp/*.cpp")
18-
file (GLOB C_FILES "llama.cpp/*.c")
19-
file (GLOB H_FILES "llama.cpp/*.h")
18+
file (GLOB C_FILES "llama.cpp/ggml.c")
19+
file (GLOB H_FILES "llama.cpp/ggml.h" "llama.cpp/llama.h")
2020

2121
# ---------------------------------------------------------------------------------------------
2222

@@ -76,11 +76,19 @@ option(LLAMA_SANITIZE_UNDEFINED "llama: enable undefined sanitizer"
7676
option(LLAMA_AVX "llama: enable AVX" ON)
7777
option(LLAMA_AVX2 "llama: enable AVX2" ON)
7878
option(LLAMA_AVX512 "llama: enable AVX512" OFF)
79+
option(LLAMA_AVX512_VBMI "llama: enable AVX512-VBMI" OFF)
80+
option(LLAMA_AVX512_VNNI "llama: enable AVX512-VNNI" OFF)
7981
option(LLAMA_FMA "llama: enable FMA" ON)
82+
# in MSVC F16C is implied with AVX2/AVX512
83+
if (NOT MSVC)
84+
option(LLAMA_F16C "llama: enable F16C" ON)
85+
endif()
8086

8187
# 3rd party libs
8288
option(LLAMA_ACCELERATE "llama: enable Accelerate framework" ON)
8389
option(LLAMA_OPENBLAS "llama: use OpenBLAS" OFF)
90+
option(LLAMA_CUBLAS "llama: use cuBLAS" OFF)
91+
option(LLAMA_CLBLAST "llama: use CLBlast" OFF)
8492

8593
option(LLAMA_BUILD_TESTS "llama: build tests" ${LLAMA_STANDALONE})
8694
option(LLAMA_BUILD_EXAMPLES "llama: build examples" ${LLAMA_STANDALONE})
@@ -89,6 +97,11 @@ option(LLAMA_BUILD_EXAMPLES "llama: build examples" ${LLAMA_STANDALONE})
8997
# Compile flags
9098
#
9199

100+
set(CMAKE_CXX_STANDARD 11)
101+
set(CMAKE_CXX_STANDARD_REQUIRED true)
102+
set(CMAKE_C_STANDARD 11)
103+
set(CMAKE_C_STANDARD_REQUIRED true)
104+
set(THREADS_PREFER_PTHREAD_FLAG ON)
92105
find_package(Threads REQUIRED)
93106

94107
if (NOT MSVC)
@@ -119,6 +132,7 @@ if (APPLE AND LLAMA_ACCELERATE)
119132
message(WARNING "Accelerate framework not found")
120133
endif()
121134
endif()
135+
122136
if (LLAMA_OPENBLAS)
123137
if (LLAMA_STATIC)
124138
set(BLA_STATIC ON)
@@ -131,11 +145,66 @@ if (LLAMA_OPENBLAS)
131145

132146
add_compile_definitions(GGML_USE_OPENBLAS)
133147
add_link_options(${BLAS_LIBRARIES})
148+
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} openblas)
149+
150+
# find header file
151+
set(OPENBLAS_INCLUDE_SEARCH_PATHS
152+
/usr/include
153+
/usr/include/openblas
154+
/usr/include/openblas-base
155+
/usr/local/include
156+
/usr/local/include/openblas
157+
/usr/local/include/openblas-base
158+
/opt/OpenBLAS/include
159+
$ENV{OpenBLAS_HOME}
160+
$ENV{OpenBLAS_HOME}/include
161+
)
162+
find_path(OPENBLAS_INC NAMES cblas.h PATHS ${OPENBLAS_INCLUDE_SEARCH_PATHS})
163+
add_compile_options(-I${OPENBLAS_INC})
134164
else()
135165
message(WARNING "OpenBLAS not found")
136166
endif()
137167
endif()
138168

169+
if (LLAMA_CUBLAS)
170+
cmake_minimum_required(VERSION 3.17)
171+
172+
find_package(CUDAToolkit)
173+
if (CUDAToolkit_FOUND)
174+
message(STATUS "cuBLAS found")
175+
176+
enable_language(CUDA)
177+
178+
set(GGML_CUDA_SOURCES ggml-cuda.cu ggml-cuda.h)
179+
180+
add_compile_definitions(GGML_USE_CUBLAS)
181+
182+
if (LLAMA_STATIC)
183+
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} CUDA::cudart_static CUDA::cublas_static CUDA::cublasLt_static)
184+
else()
185+
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} CUDA::cudart CUDA::cublas CUDA::cublasLt)
186+
endif()
187+
188+
else()
189+
message(WARNING "cuBLAS not found")
190+
endif()
191+
endif()
192+
193+
if (LLAMA_CLBLAST)
194+
find_package(CLBlast)
195+
if (CLBlast_FOUND)
196+
message(STATUS "CLBlast found")
197+
198+
set(GGML_OPENCL_SOURCES ggml-opencl.c ggml-opencl.h)
199+
200+
add_compile_definitions(GGML_USE_CLBLAST)
201+
202+
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} clblast)
203+
else()
204+
message(WARNING "CLBlast not found")
205+
endif()
206+
endif()
207+
139208
if (LLAMA_ALL_WARNINGS)
140209
if (NOT MSVC)
141210
set(c_flags
@@ -147,14 +216,14 @@ if (LLAMA_ALL_WARNINGS)
147216
-Wshadow
148217
-Wstrict-prototypes
149218
-Wpointer-arith
150-
-Wno-unused-function
151219
)
152220
set(cxx_flags
153221
-Wall
154222
-Wextra
155223
-Wpedantic
156224
-Wcast-qual
157225
-Wno-unused-function
226+
-Wno-multichar
158227
)
159228
else()
160229
# todo : msvc
@@ -167,6 +236,14 @@ if (LLAMA_ALL_WARNINGS)
167236

168237
endif()
169238

239+
if (MSVC)
240+
add_compile_definitions(_CRT_SECURE_NO_WARNINGS)
241+
242+
if (BUILD_SHARED_LIBS)
243+
set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
244+
endif()
245+
endif()
246+
170247
if (LLAMA_LTO)
171248
include(CheckIPOSupported)
172249
check_ipo_supported(RESULT result OUTPUT output)
@@ -202,22 +279,52 @@ if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm" OR ${CMAKE_SYSTEM_PROCESSOR} MATCHES
202279
# TODO: arm msvc?
203280
else()
204281
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64")
282+
# Apple M1, M2, etc.
283+
# Raspberry Pi 3, 4, Zero 2 (64-bit)
205284
add_compile_options(-mcpu=native)
206285
endif()
207-
# TODO: armv6,7,8 version specific flags
286+
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv6")
287+
# Raspberry Pi 1, Zero
288+
add_compile_options(-mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access)
289+
endif()
290+
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv7")
291+
# Raspberry Pi 2
292+
add_compile_options(-mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations)
293+
endif()
294+
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv8")
295+
# Raspberry Pi 3, 4, Zero 2 (32-bit)
296+
add_compile_options(-mfp16-format=ieee -mno-unaligned-access)
297+
endif()
208298
endif()
209299
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^(x86_64|i686|AMD64)$")
210300
message(STATUS "x86 detected")
211301
if (MSVC)
212302
if (LLAMA_AVX512)
213-
add_compile_options(/arch:AVX512)
303+
add_compile_options($<$<COMPILE_LANGUAGE:C>:/arch:AVX512>)
304+
add_compile_options($<$<COMPILE_LANGUAGE:CXX>:/arch:AVX512>)
305+
# MSVC has no compile-time flags enabling specific
306+
# AVX512 extensions, neither it defines the
307+
# macros corresponding to the extensions.
308+
# Do it manually.
309+
if (LLAMA_AVX512_VBMI)
310+
add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AVX512VBMI__>)
311+
add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AVX512VBMI__>)
312+
endif()
313+
if (LLAMA_AVX512_VNNI)
314+
add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AVX512VNNI__>)
315+
add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AVX512VNNI__>)
316+
endif()
214317
elseif (LLAMA_AVX2)
215-
add_compile_options(/arch:AVX2)
318+
add_compile_options($<$<COMPILE_LANGUAGE:C>:/arch:AVX2>)
319+
add_compile_options($<$<COMPILE_LANGUAGE:CXX>:/arch:AVX2>)
216320
elseif (LLAMA_AVX)
217-
add_compile_options(/arch:AVX)
321+
add_compile_options($<$<COMPILE_LANGUAGE:C>:/arch:AVX>)
322+
add_compile_options($<$<COMPILE_LANGUAGE:CXX>:/arch:AVX>)
218323
endif()
219324
else()
220-
add_compile_options(-mf16c)
325+
if (LLAMA_F16C)
326+
add_compile_options(-mf16c)
327+
endif()
221328
if (LLAMA_FMA)
222329
add_compile_options(-mfma)
223330
endif()
@@ -229,13 +336,20 @@ elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^(x86_64|i686|AMD64)$")
229336
endif()
230337
if (LLAMA_AVX512)
231338
add_compile_options(-mavx512f)
232-
# add_compile_options(-mavx512cd)
233-
# add_compile_options(-mavx512dq)
234-
# add_compile_options(-mavx512bw)
339+
add_compile_options(-mavx512bw)
340+
endif()
341+
if (LLAMA_AVX512_VBMI)
342+
add_compile_options(-mavx512vbmi)
343+
endif()
344+
if (LLAMA_AVX512_VNNI)
345+
add_compile_options(-mavx512vnni)
235346
endif()
236347
endif()
348+
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64")
349+
message(STATUS "PowerPC detected")
350+
add_compile_options(-mcpu=native -mtune=native)
351+
#TODO: Add targets for Power8/Power9 (Altivec/VSX) and Power10(MMA) and query for big endian systems (ppc64/le/be)
237352
else()
238-
# TODO: support PowerPC
239353
message(STATUS "Unknown architecture")
240354
endif()
241355

README.md

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -168,10 +168,7 @@ while True:
168168
```
169169

170170
# Supported models
171-
172-
Fully tested with [GPT4All](https://github.com/nomic-ai/gpt4all) model, see [PyGPT4All](https://github.com/nomic-ai/pygpt4all).
173-
174-
But all models supported by `llama.cpp` should be supported as well:
171+
All models supported by `llama.cpp` should be supported basically:
175172

176173
<blockquote>
177174

llama.cpp

0 commit comments

Comments
 (0)