@@ -15,8 +15,8 @@ add_subdirectory(llama.cpp)
1515# add_subdirectory(ggml)
1616
1717file (GLOB CPP_FILES "llama.cpp/*.cpp" )
18- file (GLOB C_FILES "llama.cpp/* .c" )
19- file (GLOB H_FILES "llama.cpp/* .h" )
18+ file (GLOB C_FILES "llama.cpp/ggml .c" )
19+ file (GLOB H_FILES "llama.cpp/ggml.h" "llama.cpp/llama .h" )
2020
2121# ---------------------------------------------------------------------------------------------
2222
@@ -76,11 +76,19 @@ option(LLAMA_SANITIZE_UNDEFINED "llama: enable undefined sanitizer"
7676option (LLAMA_AVX "llama: enable AVX" ON )
7777option (LLAMA_AVX2 "llama: enable AVX2" ON )
7878option (LLAMA_AVX512 "llama: enable AVX512" OFF )
79+ option (LLAMA_AVX512_VBMI "llama: enable AVX512-VBMI" OFF )
80+ option (LLAMA_AVX512_VNNI "llama: enable AVX512-VNNI" OFF )
7981option (LLAMA_FMA "llama: enable FMA" ON )
82+ # in MSVC F16C is implied with AVX2/AVX512
83+ if (NOT MSVC )
84+ option (LLAMA_F16C "llama: enable F16C" ON )
85+ endif ()
8086
8187# 3rd party libs
8288option (LLAMA_ACCELERATE "llama: enable Accelerate framework" ON )
8389option (LLAMA_OPENBLAS "llama: use OpenBLAS" OFF )
90+ option (LLAMA_CUBLAS "llama: use cuBLAS" OFF )
91+ option (LLAMA_CLBLAST "llama: use CLBlast" OFF )
8492
8593option (LLAMA_BUILD_TESTS "llama: build tests" ${LLAMA_STANDALONE} )
8694option (LLAMA_BUILD_EXAMPLES "llama: build examples" ${LLAMA_STANDALONE} )
@@ -89,6 +97,11 @@ option(LLAMA_BUILD_EXAMPLES "llama: build examples" ${LLAMA_STANDALONE})
8997# Compile flags
9098#
9199
100+ set (CMAKE_CXX_STANDARD 11)
101+ set (CMAKE_CXX_STANDARD_REQUIRED true )
102+ set (CMAKE_C_STANDARD 11)
103+ set (CMAKE_C_STANDARD_REQUIRED true )
104+ set (THREADS_PREFER_PTHREAD_FLAG ON )
92105find_package (Threads REQUIRED)
93106
94107if (NOT MSVC )
@@ -119,6 +132,7 @@ if (APPLE AND LLAMA_ACCELERATE)
119132 message (WARNING "Accelerate framework not found" )
120133 endif ()
121134endif ()
135+
122136if (LLAMA_OPENBLAS)
123137 if (LLAMA_STATIC)
124138 set (BLA_STATIC ON )
@@ -131,11 +145,66 @@ if (LLAMA_OPENBLAS)
131145
132146 add_compile_definitions (GGML_USE_OPENBLAS)
133147 add_link_options (${BLAS_LIBRARIES} )
148+ set (LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} openblas)
149+
150+ # find header file
151+ set (OPENBLAS_INCLUDE_SEARCH_PATHS
152+ /usr/include
153+ /usr/include /openblas
154+ /usr/include /openblas-base
155+ /usr/local/include
156+ /usr/local/include /openblas
157+ /usr/local/include /openblas-base
158+ /opt/OpenBLAS/include
159+ $ENV{OpenBLAS_HOME}
160+ $ENV{OpenBLAS_HOME} /include
161+ )
162+ find_path (OPENBLAS_INC NAMES cblas.h PATHS ${OPENBLAS_INCLUDE_SEARCH_PATHS} )
163+ add_compile_options (-I${OPENBLAS_INC} )
134164 else ()
135165 message (WARNING "OpenBLAS not found" )
136166 endif ()
137167endif ()
138168
169+ if (LLAMA_CUBLAS)
170+ cmake_minimum_required (VERSION 3.17)
171+
172+ find_package (CUDAToolkit)
173+ if (CUDAToolkit_FOUND)
174+ message (STATUS "cuBLAS found" )
175+
176+ enable_language (CUDA)
177+
178+ set (GGML_CUDA_SOURCES ggml-cuda.cu ggml-cuda.h)
179+
180+ add_compile_definitions (GGML_USE_CUBLAS)
181+
182+ if (LLAMA_STATIC)
183+ set (LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} CUDA::cudart_static CUDA::cublas_static CUDA::cublasLt_static)
184+ else ()
185+ set (LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} CUDA::cudart CUDA::cublas CUDA::cublasLt)
186+ endif ()
187+
188+ else ()
189+ message (WARNING "cuBLAS not found" )
190+ endif ()
191+ endif ()
192+
193+ if (LLAMA_CLBLAST)
194+ find_package (CLBlast)
195+ if (CLBlast_FOUND)
196+ message (STATUS "CLBlast found" )
197+
198+ set (GGML_OPENCL_SOURCES ggml-opencl.c ggml-opencl.h)
199+
200+ add_compile_definitions (GGML_USE_CLBLAST)
201+
202+ set (LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} clblast)
203+ else ()
204+ message (WARNING "CLBlast not found" )
205+ endif ()
206+ endif ()
207+
139208if (LLAMA_ALL_WARNINGS)
140209 if (NOT MSVC )
141210 set (c_flags
@@ -147,14 +216,14 @@ if (LLAMA_ALL_WARNINGS)
147216 -Wshadow
148217 -Wstrict-prototypes
149218 -Wpointer-arith
150- -Wno-unused-function
151219 )
152220 set (cxx_flags
153221 -Wall
154222 -Wextra
155223 -Wpedantic
156224 -Wcast-qual
157225 -Wno-unused-function
226+ -Wno-multichar
158227 )
159228 else ()
160229 # todo : msvc
@@ -167,6 +236,14 @@ if (LLAMA_ALL_WARNINGS)
167236
168237endif ()
169238
239+ if (MSVC )
240+ add_compile_definitions (_CRT_SECURE_NO_WARNINGS)
241+
242+ if (BUILD_SHARED_LIBS )
243+ set (CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON )
244+ endif ()
245+ endif ()
246+
170247if (LLAMA_LTO)
171248 include (CheckIPOSupported)
172249 check_ipo_supported(RESULT result OUTPUT output )
@@ -202,22 +279,52 @@ if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm" OR ${CMAKE_SYSTEM_PROCESSOR} MATCHES
202279 # TODO: arm msvc?
203280 else ()
204281 if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64" )
282+ # Apple M1, M2, etc.
283+ # Raspberry Pi 3, 4, Zero 2 (64-bit)
205284 add_compile_options (-mcpu=native)
206285 endif ()
207- # TODO: armv6,7,8 version specific flags
286+ if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv6" )
287+ # Raspberry Pi 1, Zero
288+ add_compile_options (-mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access)
289+ endif ()
290+ if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv7" )
291+ # Raspberry Pi 2
292+ add_compile_options (-mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations)
293+ endif ()
294+ if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv8" )
295+ # Raspberry Pi 3, 4, Zero 2 (32-bit)
296+ add_compile_options (-mfp16-format=ieee -mno-unaligned-access)
297+ endif ()
208298 endif ()
209299elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^(x86_64|i686|AMD64)$" )
210300 message (STATUS "x86 detected" )
211301 if (MSVC )
212302 if (LLAMA_AVX512)
213- add_compile_options (/arch:AVX512)
303+ add_compile_options ($<$<COMPILE_LANGUAGE:C>:/arch:AVX512>)
304+ add_compile_options ($<$<COMPILE_LANGUAGE:CXX>:/arch:AVX512>)
305+ # MSVC has no compile-time flags enabling specific
306+ # AVX512 extensions, neither it defines the
307+ # macros corresponding to the extensions.
308+ # Do it manually.
309+ if (LLAMA_AVX512_VBMI)
310+ add_compile_definitions ($<$<COMPILE_LANGUAGE:C>:__AVX512VBMI__>)
311+ add_compile_definitions ($<$<COMPILE_LANGUAGE:CXX>:__AVX512VBMI__>)
312+ endif ()
313+ if (LLAMA_AVX512_VNNI)
314+ add_compile_definitions ($<$<COMPILE_LANGUAGE:C>:__AVX512VNNI__>)
315+ add_compile_definitions ($<$<COMPILE_LANGUAGE:CXX>:__AVX512VNNI__>)
316+ endif ()
214317 elseif (LLAMA_AVX2)
215- add_compile_options (/arch:AVX2)
318+ add_compile_options ($<$<COMPILE_LANGUAGE:C>:/arch:AVX2>)
319+ add_compile_options ($<$<COMPILE_LANGUAGE:CXX>:/arch:AVX2>)
216320 elseif (LLAMA_AVX)
217- add_compile_options (/arch:AVX)
321+ add_compile_options ($<$<COMPILE_LANGUAGE:C>:/arch:AVX>)
322+ add_compile_options ($<$<COMPILE_LANGUAGE:CXX>:/arch:AVX>)
218323 endif ()
219324 else ()
220- add_compile_options (-mf16c)
325+ if (LLAMA_F16C)
326+ add_compile_options (-mf16c)
327+ endif ()
221328 if (LLAMA_FMA)
222329 add_compile_options (-mfma)
223330 endif ()
@@ -229,13 +336,20 @@ elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^(x86_64|i686|AMD64)$")
229336 endif ()
230337 if (LLAMA_AVX512)
231338 add_compile_options (-mavx512f)
232- # add_compile_options(-mavx512cd)
233- # add_compile_options(-mavx512dq)
234- # add_compile_options(-mavx512bw)
339+ add_compile_options (-mavx512bw)
340+ endif ()
341+ if (LLAMA_AVX512_VBMI)
342+ add_compile_options (-mavx512vbmi)
343+ endif ()
344+ if (LLAMA_AVX512_VNNI)
345+ add_compile_options (-mavx512vnni)
235346 endif ()
236347 endif ()
348+ elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64" )
349+ message (STATUS "PowerPC detected" )
350+ add_compile_options (-mcpu=native -mtune=native)
351+ #TODO: Add targets for Power8/Power9 (Altivec/VSX) and Power10(MMA) and query for big endian systems (ppc64/le/be)
237352else ()
238- # TODO: support PowerPC
239353 message (STATUS "Unknown architecture" )
240354endif ()
241355
0 commit comments