Minor updates for rasing PR

wine99 · wine99 · commit b9e805f1dff3 · 2025-08-14T16:52:29.000+08:00
diff --git a/CMakePresets.json b/CMakePresets.json
@@ -1,26 +1,6 @@
 {
   "version": 4,
   "configurePresets": [
-    {
-        "name": "ReleaseOV",
-        "generator": "Ninja",
-        "binaryDir": "${sourceDir}/build/${presetName}",
-        "installDir": "${sourceDir}/build/install/${presetName}",
-        "cacheVariables": {
-            "CMAKE_BUILD_TYPE": "Release",
-            "GGML_OPENVINO": true,
-            "OpenVINO_DIR": "$env{OPENVINO_LLAMA_PATH}/build/Release"
-        }
-    },
-    {
-        "name": "ReleaseCPU",
-        "generator": "Ninja",
-        "binaryDir": "${sourceDir}/build/${presetName}",
-        "installDir": "${sourceDir}/build/install/${presetName}",
-        "cacheVariables": {
-            "CMAKE_BUILD_TYPE": "Release"
-        }
-    },
     {
         "name":  "base",
         "hidden": true,
diff --git a/docs/build.md b/docs/build.md
@@ -595,7 +595,7 @@ To read documentation for how to build on IBM Z & LinuxONE, [click here](./build
 
 ## OpenVINO
 
-[OpenVINO](https://docs.openvino.ai/2025/index.html) is an open-source toolkit for optimizing and deploying high-performance AI inference, specifically designed for Intel hardware, including CPUs, GPUs, and NPUs, in the cloud, on-premises, and on the edge. 
+[OpenVINO](https://docs.openvino.ai/2025/index.html) is an open-source toolkit for optimizing and deploying high-performance AI inference, specifically designed for Intel hardware, including CPUs, GPUs, and NPUs, in the cloud, on-premises, and on the edge.
 The OpenVINO backend enhances performance by leveraging hardware-specific optimizations and can be enabled for use with llama.cpp.
 
 Follow the instructions below to install OpenVINO runtime and build llama.cpp with OpenVINO support.
@@ -697,9 +697,8 @@ export GGML_OPENVINO_CACHE_DIR=/tmp/ov_cache
 
 Control OpenVINO behavior using these environment variables:
 
--   **`GGML_OPENVINO_DEVICE`**: Specify the target device for OpenVINO inference.  If not set, automatically selects the first available device in priority order: GPU, CPU, NPU. When set to `NPU` to use Intel NPUs, it enables static compilation mode for optimal performance. 
--   **`GGML_OPENVINO_CACHE_DIR`**: Directory for model caching (recommended: `/tmp/ov_cache`). If set, enables model caching in OpenVINO. Note: Not supported when using NPU devices yet.  
--   **`GGML_OPENVINO_WEIGHT_AS_INPUT`**: Pass the weights as input to the OpenVINO model instead of creating Constant nodes for them.
+-   **`GGML_OPENVINO_DEVICE`**: Specify the target device for OpenVINO inference.  If not set, automatically selects the first available device in priority order: GPU, CPU, NPU. When set to `NPU` to use Intel NPUs, it enables static compilation mode for optimal performance.
+-   **`GGML_OPENVINO_CACHE_DIR`**: Directory for model caching (recommended: `/tmp/ov_cache`). If set, enables model caching in OpenVINO. Note: Not supported when using NPU devices yet.
 -   **`GGML_OPENVINO_PROFILING`**: Enable execution time profiling.
 -   **`GGML_OPENVINO_DUMP_CGRAPH`**: Save compute graph to `cgraph.txt`.
 -   **`GGML_OPENVINO_DUMP_IR`**: Export OpenVINO IR files with timestamps.
@@ -714,20 +713,6 @@ export GGML_OPENVINO_PROFILING=1
 
 ./build/ReleaseOV/bin/llama-simple -m ~/models/Llama-3.2-1B-Instruct.fp16.gguf -n 50 "The story of AI is "
 ```
-> **Note:** To apply your code changes, clear the `GGML_OPENVINO_CACHE_DIR` directory and rebuild the project.
-
-### Using Llama.cpp's Built-in CPU Backend (for Comparison)
-
-To compare performance with the default CPU backend:
-
-```bash
-# Build CPU-only version
-cmake --preset ReleaseCPU
-cmake --build build/ReleaseCPU --parallel
-
-# Run with the default CPU backend
-./build/ReleaseCPU/bin/llama-simple -m ~/models/Llama-3.2-1B-Instruct.fp16.gguf -n 50 "The story of AI is "
-```
 
 ## Notes about GPU-accelerated backends
 
diff --git a/ggml/src/ggml-openvino/ggml-decoder.cpp b/ggml/src/ggml-openvino/ggml-decoder.cpp
@@ -57,8 +57,7 @@ GgmlOvDecoder::GgmlOvDecoder(struct ggml_cgraph* cgraph,
     }
 
     if (getenv("GGML_OPENVINO_DUMP_CGRAPH")) {
-        auto timestamp = (long long) ggml_time_us();
-        std::string filename = "cgraph_" + std::to_string(timestamp) + ".txt";
+        std::string filename = "cgraph.txt";
         dump_cgraph(cgraph, filename);
     }
 

Original file line number	Diff line number	Diff line change
`@@ -57,8 +57,7 @@ GgmlOvDecoder::GgmlOvDecoder(struct ggml_cgraph* cgraph,`
`57`	`57`	`}`
`58`	`58`
`59`	`59`	`if (getenv("GGML_OPENVINO_DUMP_CGRAPH")) {`
`60`		`- auto timestamp = (long long) ggml_time_us();`
`61`		`- std::string filename = "cgraph_" + std::to_string(timestamp) + ".txt";`
	`60`	`+ std::string filename = "cgraph.txt";`
`62`	`61`	`dump_cgraph(cgraph, filename);`
`63`	`62`	`}`
`64`	`63`