project: sync with upstream(PR-14501:remove kompute backend)

zhouwg · zhouwg · commit fe95d3b828dd · 2025-07-05T18:08:08.000+08:00
diff --git a/ggml/include/ggml-hexagon.h b/ggml/include/ggml-hexagon.h
@@ -1,6 +1,3 @@
- /*
- * Copyright (c) 2024-2025 The ggml authors
- */
 #pragma once
 
 #include "ggml.h"
@@ -44,6 +41,8 @@ GGML_BACKEND_API void               ggml_backend_hexagon_set_cfg(int new_hexagon
 
 GGML_BACKEND_API int                ggml_backend_hexagon_get_mulmat_algotype(void);
 
+GGML_BACKEND_API void               ggml_backend_hexagon_set_mulmat_algotype(int new_mulmat_algotype);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/ggml/src/ggml-hexagon/CMakeLists.txt b/ggml/src/ggml-hexagon/CMakeLists.txt
@@ -32,7 +32,7 @@ endif()
 #v69 --- Snapdragon 8 Gen1
 #v73 --- Snapdragon 8 Gen2
 #v75 --- Snapdragon 8 Gen3
-#v79 --- Snapdragon 8 Elite(aka Gen4)
+#v79 --- Snapdragon 8 Elite
 if(NOT DEFINED HTP_ARCH_VERSION)
     message(FATAL_ERROR "HTP_ARCH_VERSION not defined, valid htp arch: v68,v69,v73,v75,v79")
 endif()
diff --git a/ggml/src/ggml-hexagon/ggml-hexagon.cpp b/ggml/src/ggml-hexagon/ggml-hexagon.cpp
diff --git a/ggml/src/ggml-hexagon/kernels/add.c b/ggml/src/ggml-hexagon/kernels/add.c
@@ -134,7 +134,6 @@ static void ggml_compute_forward_add_f32(
     GGMLHEXAGON_LOG_DEBUG("leave %s", __func__ );
 }
 
-//FIXME: why failed with test-backend-ops when disable ion rpc mempool
 int ggmlop_dsp_add(remote_handle64 h, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
     GGMLHEXAGON_LOG_DEBUG("enter %s\n", __func__);
     ggml_compute_forward_add_f32(src0, src1, dst);
diff --git a/ggml/src/ggml-hexagon/kernels/ggml-dsp.c b/ggml/src/ggml-hexagon/kernels/ggml-dsp.c
@@ -1,31 +1,3 @@
-/*
- * Copyright (c) 2025 The ggml authors
- *
- * Qualcomm Hexagon SDK and reference tech guides could be found at:
- * https://developer.qualcomm.com/software/hexagon-dsp-sdk/tools
- *
- * this single-source-file or self-contained file is implementation of ggml-dsp:
- *    - a customized tiny ggml running on Qualcomm Hexagon cDSP
- *    - ported from original ggml
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
 #include "ggml-dsp.h"
 
 void ggmlhexagon_log_internal(int level, const char *file, const char *func, int line, const char *format, ...) {
diff --git a/ggml/src/ggml-hexagon/kernels/mulmat.c b/ggml/src/ggml-hexagon/kernels/mulmat.c
@@ -1,46 +1,5 @@
 #include "ggml-dsp.h"
 
-// 128 byte vectors
-#define VSIZE_BYTES 128
-#define VSIZE_WORDS VSIZE_BYTES/4
-
-union ui32f { int32_t i; float f; };
-
-// create a vector of floats from a float
-static __attribute__((always_inline)) HVX_Vector create_sfv_from_sf(float value) {
-    union ui32f cvt;
-    cvt.f = value;
-    HVX_Vector tmp = Q6_V_vsplat_R(cvt.i);
-    return tmp;
-}
-
-// create a vector of qf32's from a float
-static __attribute__((always_inline)) HVX_Vector create_qf32v_from_sf(float value) {
-    HVX_Vector tmp = Q6_Vqf32_vadd_Vqf32Vsf(Q6_V_vsplat_R(0), create_sfv_from_sf(value));
-    return tmp;
-}
-
-// convert qf32 vector to float vector
-static __attribute__((always_inline)) HVX_Vector convert_qf32v_to_fltv(HVX_Vector vect) {
-    HVX_Vector tmp = Q6_Vsf_equals_Vqf32(vect);
-    return tmp;
-}
-
-// get lowest float from a vector of floats
-static __attribute__((always_inline)) float get_flt0_from_fltv(HVX_Vector vect) {
-    union ui32f cvt;
-    cvt.i = vect[0];
-    return cvt.f;
-}
-
-// get lowest float from a vector of qf32's
-static __attribute__((always_inline)) float get_flt0_from_qf32v(HVX_Vector vect) {
-    union ui32f cvt;
-    HVX_Vector tmp = convert_qf32v_to_fltv(vect);
-    cvt.i = tmp[0];
-    return cvt.f;
-}
-
 static void vec_dot_f32(int n, float *GGML_RESTRICT s, size_t bs, const float *GGML_RESTRICT x,
                     size_t bx, const float *GGML_RESTRICT y, size_t by, int nrc) {
     assert(nrc == 1);
@@ -145,7 +104,6 @@ static void ggml_compute_forward_mul_mat_one_chunk(const ggml_tensor *src0, cons
     }
 }
 
-//TODO: only support fp32 mulmat on cDSP
 static int ggmlop_dsp_mulmat_singlethread(remote_handle64 h, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
     GGMLHEXAGON_LOG_DEBUG("enter %s", __func__ );
     ggmlhexagon_dump_tensor(src0, 0);
@@ -274,7 +232,6 @@ static int ggmlop_dsp_mulmat_singlethread(remote_handle64 h, const ggml_tensor *
     return 0;
 }
 
-//TODO:multithreading mulmat
 static int ggmlop_dsp_mulmat_multithread(remote_handle64 h, const struct dsptensor * src0, const struct dsptensor * src1, dsptensor * dst) {
     GGMLHEXAGON_LOG_DEBUG("enter %s", __func__ );
     GGMLHEXAGON_LOG_DEBUG("leave %s", __func__ );
diff --git a/scripts/build-run-android.sh b/scripts/build-run-android.sh
@@ -14,7 +14,7 @@ PROJECT_ROOT_PATH=${PROJECT_HOME_PATH}
 HOST_CPU_COUNTS=`cat /proc/cpuinfo | grep "processor" | wc | awk '{print int($1)}'`
 
 #running path on Android phone
-REMOTE_PATH=/data/local/tmp/
+REMOTE_PATH=/data/local/tmp
 
 #Android NDK can be found at:
 #https://developer.android.com/ndk/downloads
@@ -31,9 +31,11 @@ QNN_SDK_VERSION=2.32.0.250228
 QNN_SDK_VERSION=2.33.0.250327
 QNN_SDK_VERSION=2.34.0.250424
 QNN_SDK_VERSION=2.35.0.250530
+QNN_SDK_VERSION=2.36.0.250627
 #fully official QNN SDK, will be downloaded automatically via this script
-QNN_SDK_PATH=${PROJECT_ROOT_PATH}/prebuilts/QNN_SDK/qairt/2.34.0.250424/
-QNN_SDK_PATH=${PROJECT_ROOT_PATH}/prebuilts/QNN_SDK/qairt/2.35.0.250530/
+QNN_SDK_PATH=${PROJECT_ROOT_PATH}/prebuilts/QNN_SDK/qairt/2.34.0.250424
+QNN_SDK_PATH=${PROJECT_ROOT_PATH}/prebuilts/QNN_SDK/qairt/2.35.0.250530
+QNN_SDK_PATH=${PROJECT_ROOT_PATH}/prebuilts/QNN_SDK/qairt/2.36.0.250627
 
 #Qualcomm Hexagon SDK can be found at:
 #https://developer.qualcomm.com/software/hexagon-dsp-sdk/tools
@@ -42,8 +44,8 @@ HEXAGON_SDK_PATH=/opt/qcom/Hexagon_SDK/6.2.0.1
 #customized/tailored Hexagon SDK from the offcial Hexagon SDK for simplify workflow
 HEXAGON_SDK_PATH=${PROJECT_ROOT_PATH}/prebuilts/Hexagon_SDK/6.2.0.1
 
-#running_params="- ngl 99 -t 8 -n 256 --no-warmup -fa 1 "
-running_params=" -ngl 99 -t 8 -n 256 --no-warmup "
+#running_params="- ngl 99 -t 4 -n 256 --no-warmup -fa 1 "
+running_params=" -ngl 99 -t 4 -n 256 --no-warmup "
 
 ######## part-2 ########
 
@@ -52,8 +54,9 @@ PROMPT_STRING="introduce the movie Once Upon a Time in America briefly.\n"
 #1.12 GiB, will be downloadded automatically via this script
 GGUF_MODEL_NAME=/sdcard/qwen1_5-1_8b-chat-q4_0.gguf
 
-#ref: https://github.com/quic/ai-hub-apps/tree/main/tutorials/llm_on_genie
 #supported htp arch version:
+#v68 --- Snapdragon 888
+#v69 --- Snapdragon 8 Gen1
 #v73 --- Snapdragon 8 Gen2
 #v75 --- Snapdragon 8 Gen3
 #v79 --- Snapdragon 8 Elite
diff --git a/scripts/ggml-hexagon.cfg b/scripts/ggml-hexagon.cfg
@@ -1,6 +1,6 @@
 [general]
 #version of ggml-hexagon.cpp on ARM-AP side
-version = "1.12"
+version = "1.13"
 #version of ggml-dsp.c on cDSP side
 ggmldsp_version = "0.63"
 

Original file line number	Diff line number	Diff line change
`@@ -134,7 +134,6 @@ static void ggml_compute_forward_add_f32(`
`134`	`134`	`GGMLHEXAGON_LOG_DEBUG("leave %s", __func__ );`
`135`	`135`	`}`
`136`	`136`
`137`		`-//FIXME: why failed with test-backend-ops when disable ion rpc mempool`
`138`	`137`	`int ggmlop_dsp_add(remote_handle64 h, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {`
`139`	`138`	`GGMLHEXAGON_LOG_DEBUG("enter %s\n", __func__);`
`140`	`139`	`ggml_compute_forward_add_f32(src0, src1, dst);`