diff --git a/CMakeLists.txt b/CMakeLists.txt index 7b2a1845e5c7c..a24eb66eefa64 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,11 @@ cmake_minimum_required(VERSION 3.14) # for add_link_options and implicit target directories. project("llama.cpp" C CXX) + + +if(CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + add_compile_options(-mfma -mavx2) +endif() + include(CheckIncludeFileCXX) #set(CMAKE_WARN_DEPRECATED YES) diff --git a/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp b/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp index c24fd56e20886..3f1cc1ad77a30 100644 --- a/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +++ b/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp @@ -114,7 +114,7 @@ static inline __m512 __avx512_repeat_f32cx16_load(__m128i x) { return _mm512_loadu_ps(tmp); } #endif -static inline __m256 __avx_f32cx8_load(ggml_fp16_t *x) { +static inline __m256 __avx_f32cx8_load(const ggml_fp16_t *x) { float tmp[8]; for (int i = 0; i < 8; i++) { @@ -123,7 +123,7 @@ static inline __m256 __avx_f32cx8_load(ggml_fp16_t *x) { return _mm256_loadu_ps(tmp); } -static inline __m256 __avx_repeat_f32cx8_load(ggml_fp16_t *x) { +static inline __m256 __avx_repeat_f32cx8_load(const ggml_fp16_t *x) { float tmp[8]; for (int i = 0; i < 4; i++) { @@ -133,7 +133,7 @@ static inline __m256 __avx_repeat_f32cx8_load(ggml_fp16_t *x) { return _mm256_loadu_ps(tmp); } -static inline __m256 __avx_rearranged_f32cx8_load(ggml_fp16_t *x, __m128i arrangeMask) { +static inline __m256 __avx_rearranged_f32cx8_load(const ggml_fp16_t *x, __m128i arrangeMask) { uint16_t tmphalf[8]; float tmp[8]; diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c index 084240331ef93..484d1527e602e 100644 --- a/ggml/src/ggml.c +++ b/ggml/src/ggml.c @@ -2,6 +2,7 @@ #define _USE_MATH_DEFINES // For M_PI on MSVC #include "ggml-backend.h" +#include #include "ggml-impl.h" #include "ggml-threading.h" #include "ggml.h"