diff --git a/.github/scripts/get_cpu_info.sh b/.github/scripts/get_cpu_info.sh new file mode 100755 index 00000000..82c46b84 --- /dev/null +++ b/.github/scripts/get_cpu_info.sh @@ -0,0 +1,34 @@ +#!/bin/bash +# Copyright 2025 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Script to get CPU information using platform-agnostic python packages + +# Install python packages if not present in the environment +if ! python -m pip show archspec > /dev/null 2>&1; then + python -m pip install archspec +fi + +if ! python -m pip show py-cpuinfo > /dev/null 2>&1; then + python -m pip install py-cpuinfo +fi + +# Print host microarchitecture +python -c "import archspec.cpu; \ + print('Host Microarchitecture[archspec]:', archspec.cpu.host().name)" + +# Print full CPU information +python -c "import pprint, cpuinfo; \ + print('CPU info[py-cpuinfo]:'); \ + pprint.pprint(cpuinfo.get_cpu_info(), indent=4, compact=True)" diff --git a/.github/scripts/install_sde.sh b/.github/scripts/install_sde.sh new file mode 100755 index 00000000..25310c5a --- /dev/null +++ b/.github/scripts/install_sde.sh @@ -0,0 +1,19 @@ +#!/bin/bash +# Copyright 2025 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +wget --content-disposition "https://downloadmirror.intel.com/850782/sde-external-9.53.0-2025-03-16-lin.tar.xz" +tar -xf sde-external-*-lin.tar.xz +cd sde-external-*/ +echo "$PWD" >> $GITHUB_PATH diff --git a/.github/workflows/build-linux-arm.yml b/.github/workflows/build-linux-arm.yml index 07811f00..7ea8c425 100644 --- a/.github/workflows/build-linux-arm.yml +++ b/.github/workflows/build-linux-arm.yml @@ -47,6 +47,10 @@ jobs: steps: - uses: actions/checkout@v4 + - name: Get CPU info + run: | + bash ${GITHUB_WORKSPACE}/.github/scripts/get_cpu_info.sh + - name: Configure build working-directory: ${{ runner.temp }} env: @@ -70,3 +74,17 @@ jobs: CTEST_OUTPUT_ON_FAILURE: 1 working-directory: ${{ runner.temp }}/build/tests run: ctest -C ${{ matrix.build_type }} + + - name: Build Python Bindings + env: + CXX: ${{ matrix.cxx }} + CC: ${{ matrix.cc }} + run: | + cd bindings/python + python -m pip install . + + - name: Run Python Microarch Test + run: | + cd bindings/python + python -c "import svs; svs.microarch.describe()" + python -m unittest discover -p "test_microarch.py" -s . diff --git a/.github/workflows/build-linux.yml b/.github/workflows/build-linux.yml index a03bdd78..23d9a90a 100644 --- a/.github/workflows/build-linux.yml +++ b/.github/workflows/build-linux.yml @@ -56,6 +56,12 @@ jobs: source /opt/intel/oneapi/setvars.sh printenv >> $GITHUB_ENV + - name: Install Intel(R) SDE + run: source ${GITHUB_WORKSPACE}/.github/scripts/install_sde.sh + + - name: Get CPU info + run: bash ${GITHUB_WORKSPACE}/.github/scripts/get_cpu_info.sh + - name: Configure build working-directory: ${{ runner.temp }} env: @@ -86,3 +92,21 @@ jobs: CTEST_OUTPUT_ON_FAILURE: 1 working-directory: ${{ runner.temp }}/build/examples/cpp run: ctest -C RelWithDebugInfo + + - name: Build Python Bindings + env: + CXX: ${{ matrix.cxx }} + CC: ${{ matrix.cc }} + run: | + cd bindings/python + python -m pip install . + + - name: Run Python Microarch Test with SDE + run: | + cd bindings/python + for flag in nhm hsw skx clx icl; do + echo "SDE emulation: $flag" + export SDE_FLAG=$flag + sde64 -$flag -- python -c "import svs; svs.microarch.describe()" + sde64 -$flag -- python -m unittest discover -p "test_microarch.py" -s . + done diff --git a/.github/workflows/build-macos.yaml b/.github/workflows/build-macos.yaml index a382d525..9069fda1 100644 --- a/.github/workflows/build-macos.yaml +++ b/.github/workflows/build-macos.yaml @@ -46,6 +46,10 @@ jobs: steps: - uses: actions/checkout@v4 + - name: Get CPU info + run: | + bash ${GITHUB_WORKSPACE}/.github/scripts/get_cpu_info.sh + - name: Install Compiler run: | echo "Installing ${{ matrix.package }}..." @@ -83,3 +87,28 @@ jobs: CTEST_OUTPUT_ON_FAILURE: 1 working-directory: ${{ runner.temp }}/build/tests run: ctest -C ${{ matrix.build_type }} + + - name: Build Python Bindings + env: + CXX: ${{ matrix.cxx }} + CC: ${{ matrix.cc }} + run: | + if [[ "${{ matrix.needs_prefix }}" == "true" ]]; then + # For non-default packages like llvm@15, get the install prefix + COMPILER_PREFIX=$(brew --prefix ${{ matrix.package }}) + export CC="${COMPILER_PREFIX}/bin/${{ matrix.cc_name }}" + export CXX="${COMPILER_PREFIX}/bin/${{ matrix.cxx_name }}" + else + # For versioned GCC installs, the name is usually directly available + export CC="${{ matrix.cc_name }}" + export CXX="${{ matrix.cxx_name }}" + fi + + cd bindings/python + python -m pip install . + + - name: Run Python Microarch Test + run: | + cd bindings/python + python -c "import svs; svs.microarch.describe()" + python -m unittest discover -p "test_microarch.py" -s . diff --git a/.github/workflows/cibuildwheel.yml b/.github/workflows/cibuildwheel.yml index 87198b98..8ca5dc92 100644 --- a/.github/workflows/cibuildwheel.yml +++ b/.github/workflows/cibuildwheel.yml @@ -43,6 +43,10 @@ jobs: - name: Install cibuildwheel run: python -m pip install cibuildwheel + - name: Get CPU info + run: | + bash ${GITHUB_WORKSPACE}/.github/scripts/get_cpu_info.sh + # Install inside the temporary working directory. - name: Build Wheel env: diff --git a/.licenserc.yaml b/.licenserc.yaml index 815de7ee..48f920b2 100644 --- a/.licenserc.yaml +++ b/.licenserc.yaml @@ -45,6 +45,9 @@ header: - 'THIRD-PARTY-PROGRAMS' - '.github/renovate.json' - 'cmake/mkl_functions' + - 'cmake/microarch_targets_aarch64' + - 'cmake/microarch_targets_aarch64_darwin' + - 'cmake/microarch_targets_x86_64' - 'cmake/patches/tomlplusplus_v330.patch' - 'docker/x86_64/manylinux2014/oneAPI.repo' - 'docs/cpp/index/loader-compatibility.csv' diff --git a/CMakeLists.txt b/CMakeLists.txt index 314a6b33..a89230e5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -67,6 +67,7 @@ target_compile_options( include("cmake/options.cmake") +include("cmake/microarch.cmake") include("cmake/clang-tidy.cmake") include("cmake/eve.cmake") include("cmake/pthread.cmake") @@ -80,6 +81,8 @@ include("cmake/toml.cmake") ##### Build Objects ##### +create_microarch_instantiations() + if(SVS_BUILD_BINARIES) add_subdirectory(utils) endif() @@ -105,7 +108,7 @@ endif() ##### Install Logic ##### -include(GNUInstallDirs) +include(GNUInstallDirs) # Location of auxiliary generated cmake files to help consumers of this package. set(LIB_CONFIG_INSTALL_DIR "${CMAKE_INSTALL_LIBDIR}/cmake/svs") diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt index 5c042c29..67a49a46 100644 --- a/benchmark/CMakeLists.txt +++ b/benchmark/CMakeLists.txt @@ -48,7 +48,7 @@ set(SHARED_LIBRARY_FILES src/inverted/memory/executables/memory_test.cpp ) -add_library(svs_benchmark_library SHARED ${SHARED_LIBRARY_FILES}) +add_library(svs_benchmark_library SHARED ${SHARED_LIBRARY_FILES} ${MICROARCH_OBJECT_FILES}) target_include_directories(svs_benchmark_library PUBLIC ${CMAKE_CURRENT_LIST_DIR}/include) # Minimal @@ -104,7 +104,7 @@ target_link_libraries( PUBLIC ${SVS_LIB} svs_compile_options - svs_native_options + ${MICROARCH_OBJECT_FILES} fmt::fmt ) diff --git a/bindings/python/CMakeLists.txt b/bindings/python/CMakeLists.txt index 495eec2c..6bc333c6 100644 --- a/bindings/python/CMakeLists.txt +++ b/bindings/python/CMakeLists.txt @@ -24,68 +24,9 @@ FetchContent_Declare( ) FetchContent_MakeAvailable(pybind11) -# Try to find the Python executable. -# -# If it's given as part of the Cmake arguments given by "scikit build", then use that. -# Otherwise, fall back to using plain old "python". -# If *THAT* doesn't work, give up. -if(DEFINED PYTHON_EXECUTABLE) - set(SVS_PYTHON_EXECUTABLE "${PYTHON_EXECUTABLE}") -else() - set(SVS_PYTHON_EXECUTABLE "python") -endif() - -# The micro architectures to compile for. -if(NOT DEFINED SVS_MICROARCHS) - set(SVS_MICROARCHS native) -endif() - # Include the SVS library directly. add_subdirectory("../.." "${CMAKE_CURRENT_BINARY_DIR}/svs") -# Run the python script to get optimization flags for the desired back-ends. -# -# FLAGS_SCRIPT - Path to the Python script that will take the compiler, compiler version, -# and list of desired microarchitectures and generate optimization flags for each -# microarchitecture. -# -# FLAGS_TEXT_FILE - List of optimization flags for each architecture. -# Expected format: -# -march=arch1,-mtune=arch1 -# -march=arch2,-mtune=arch2 -# ... -# -march=archN,-mtune=archN -# -# The number of lines should be equal to the number of microarchitectures. -# NOTE: The entries within each line are separated by a comma on purpose to allow CMake -# to read the whole file as a List and then use string replacement on the commas to turn -# each line into a list in its own right. -# -# TEMP_JSON - JSON Manifest file describing the generated binaries. This is meant to be -# included in the Python package to allow the Python code to reason about the packaged -# libraries and select the correct one for loading. -# -set(FLAGS_SCRIPT "${CMAKE_CURRENT_LIST_DIR}/microarch.py") -set(FLAGS_TEXT_FILE "${CMAKE_CURRENT_BINARY_DIR}/optimization_flags.txt") -set(FLAGS_MANIFEST_JSON "${CMAKE_CURRENT_BINARY_DIR}/flags_manifest.json") - -execute_process( - COMMAND - ${SVS_PYTHON_EXECUTABLE} - ${FLAGS_SCRIPT} - ${FLAGS_TEXT_FILE} - ${FLAGS_MANIFEST_JSON} - --compiler ${CMAKE_CXX_COMPILER_ID} - --compiler-version ${CMAKE_CXX_COMPILER_VERSION} - --microarchitectures ${SVS_MICROARCHS} - COMMAND_ERROR_IS_FATAL ANY -) - -file(STRINGS "${FLAGS_TEXT_FILE}" OPTIMIZATION_FLAGS) -message("Flags: ${OPTIMIZATION_FLAGS}") -list(LENGTH OPTIMIZATION_FLAGS OPT_FLAGS_LENGTH) -message("Length of flags: ${OPT_FLAGS_LENGTH}") - # C++ files makind up the python bindings. set(CPP_FILES src/allocator.cpp @@ -98,51 +39,33 @@ set(CPP_FILES src/svs_mkl.cpp ) -# Generate a shared library for each target microarchitecture. -foreach(MICRO OPT_FLAGS IN ZIP_LISTS SVS_MICROARCHS OPTIMIZATION_FLAGS) - set(LIB_NAME "_svs_${MICRO}") +set(LIB_NAME "_svs") +pybind11_add_module(${LIB_NAME} MODULE ${CPP_FILES} ${MICROARCH_OBJECT_FILES}) +target_link_libraries(${LIB_NAME} PRIVATE pybind11::module) +target_link_libraries(${LIB_NAME} PUBLIC svs::svs) +# Dependency "fmt::fmt" obtained from "svs" +target_link_libraries(${LIB_NAME} PRIVATE svs::compile_options fmt::fmt svs::microarch_options_base) +target_include_directories( + ${LIB_NAME} + PUBLIC $ +) - pybind11_add_module(${LIB_NAME} MODULE ${CPP_FILES}) - target_link_libraries(${LIB_NAME} PUBLIC svs::svs) - # Dependency "fmt::fmt" obtained from "svs" - target_link_libraries(${LIB_NAME} PRIVATE svs::compile_options fmt::fmt) +if(DEFINED SKBUILD) + install(TARGETS ${LIB_NAME} DESTINATION .) - string(REPLACE "," ";" OPT_FLAGS ${OPT_FLAGS}) - message("OPT Flags: ${OPT_FLAGS}") - target_compile_options(${LIB_NAME} PRIVATE ${OPT_FLAGS}) + # The extension module may need to load build or included libraries when loaded. - # Header files. - target_include_directories( + # Placing build depedencies in the package and using relative RPATHs that + # don't point outside of the package means that the built package is + # relocatable. This allows for safe binary redistribution. + set_target_properties( ${LIB_NAME} - PUBLIC $ + PROPERTIES + INSTALL_RPATH "$ORIGIN/${CMAKE_INSTALL_LIBDIR}" ) - - # Comunicate to the C++ library the desired name of the library - target_compile_options(${LIB_NAME} PRIVATE "-DSVS_MODULE_NAME=${LIB_NAME}") - - # If scikit build is running the compilation process, - if(DEFINED SKBUILD) - install(TARGETS ${LIB_NAME} DESTINATION .) - - # The extension module may need to load build or included libraries when loaded. - - # Placing build depedencies in the package and using relative RPATHs that - # don't point outside of the package means that the built package is - # relocatable. This allows for safe binary redistribution. - set_target_properties( - ${LIB_NAME} - PROPERTIES - INSTALL_RPATH "$ORIGIN/${CMAKE_INSTALL_LIBDIR}" - ) - endif() -endforeach() +endif() if(DEFINED SKBUILD) - # Install the manifest JSON file. - # This is kind of a hack to avoid the needing to explicitly move JSON file into the - # source folder of the python library. - install(FILES ${FLAGS_MANIFEST_JSON} DESTINATION .) - # Install header files. install( DIRECTORY "${CMAKE_CURRENT_LIST_DIR}/include/svs" diff --git a/bindings/python/setup.py b/bindings/python/setup.py index 5d310749..7e8aa254 100644 --- a/bindings/python/setup.py +++ b/bindings/python/setup.py @@ -13,7 +13,6 @@ # limitations under the License. from skbuild import setup -import archspec.cpu as cpu import os # If building in a cibuildwheel context, compile multiple versions of the library for @@ -25,27 +24,6 @@ "-DCMAKE_EXPORT_COMPILE_COMMANDS=YES", ] -# Utility to convert micro-architecture strings to -def target(arch): - return cpu.TARGETS[arch] - -# N.B.: cibuildwheel must configure the multi-arch environment variable. -# Also, the micro-architectures defined below should be in order of preference. -if os.environ.get("SVS_MULTIARCH", None) is not None: - svs_microarchs = [ - "cascadelake", - "x86_64_v3", # conservative base CPU for x86 CPUs. - ] - - # Add the current host to the list of micro-architecture if it doesn't already exist. - last_target = target(svs_microarchs[-1]) - host_name = cpu.host().name - if host_name not in svs_microarchs and target(host_name) < last_target: - svs_microarchs.append(host_name) - - cmake_array = ";".join(svs_microarchs) - cmake_args.append(f"-DSVS_MICROARCHS={cmake_array}") - # Determine the root of the repository base_dir = os.path.relpath(os.path.join(os.path.dirname(__file__), '..', '..')) diff --git a/bindings/python/src/python_bindings.cpp b/bindings/python/src/python_bindings.cpp index e1ac92b6..dfbf3e88 100644 --- a/bindings/python/src/python_bindings.cpp +++ b/bindings/python/src/python_bindings.cpp @@ -26,6 +26,7 @@ // SVS dependencies #include "svs/core/distance.h" #include "svs/core/io.h" +#include "svs/lib/arch.h" #include "svs/lib/array.h" #include "svs/lib/datatype.h" #include "svs/lib/float16.h" @@ -43,15 +44,7 @@ // stl #include #include - -// Get the expected name of the library -// Make sure CMake stays up to date with defining this parameter. -// -// The variable allows us to customize the name of the python module to support -// micro-architecture versioning. -#if !defined(SVS_MODULE_NAME) -#define SVS_MODULE_NAME _svs_native -#endif +#include namespace py = pybind11; @@ -144,7 +137,7 @@ class ScopedModuleNameOverride { } // namespace -PYBIND11_MODULE(SVS_MODULE_NAME, m) { +PYBIND11_MODULE(_svs, m) { // Internall, the top level `__init__.py` imports everything from the C++ module named // `_svs`. // @@ -196,6 +189,90 @@ Convert the `fvecs` file on disk with 32-bit floating point entries to a `fvecs` wrap_conversion(m); + m.def( + "_print_cpu_extensions_status", + []() { + svs::arch::write_extensions_status(std::cout); + } + ); + + // Wrapper for svs::arch::MicroArchEnvironment + py::class_(m, "microarch", "Microarchitecture management singleton") + .def_static( + "get", + []() -> svs::arch::MicroArchEnvironment& { + return svs::arch::MicroArchEnvironment::get_instance(); + }, + py::return_value_policy::reference + ) + .def_property_static( + "current", + [](py::object) { + auto& env = svs::arch::MicroArchEnvironment::get_instance(); + return svs::arch::microarch_to_string(env.get_microarch()); + }, + [](py::object, const std::string& arch_name) { + auto& env = svs::arch::MicroArchEnvironment::get_instance(); + auto arch = svs::arch::string_to_microarch(arch_name); + env.set_microarch(arch); + }, + "Gets or sets the current microarchitecture." + ) + .def_property_readonly_static( + "supported", + [](py::object) { + auto& env = svs::arch::MicroArchEnvironment::get_instance(); + std::vector result; + for (const auto& arch : env.get_supported_microarchs()) { + result.push_back(svs::arch::microarch_to_string(arch)); + } + return result; + }, + "Returns a list of supported microarchitectures." + ) + .def_property_readonly_static( + "compiled", + [](py::object) { + auto& env = svs::arch::MicroArchEnvironment::get_instance(); + std::vector result; + for (const auto& arch : env.get_compiled_microarchs()) { + result.push_back(svs::arch::microarch_to_string(arch)); + } + return result; + }, + "Returns a list of compiled microarchitectures." + ) + .def_static( + "describe", + []() { + std::ostream& out = std::cout; + auto& arch_env = svs::arch::MicroArchEnvironment::get_instance(); + + // Print support status for all ISA extensions + svs::arch::write_extensions_status(out); + + // Print current microarchitecture + auto current_arch = arch_env.get_microarch(); + out << "\nCurrent µarch: " << svs::arch::microarch_to_string(current_arch) << std::endl; + + // Print all supported microarchitectures + const auto& supported_archs = arch_env.get_supported_microarchs(); + out << "\nSupported µarchs: "; + for (const auto& arch : supported_archs) { + out << svs::arch::microarch_to_string(arch) << " "; + } + out << std::endl; + + // Print all compiled microarchitectures + const auto& compiled_archs = arch_env.get_compiled_microarchs(); + out << "\nCompiled µarchs: "; + for (const auto& arch : compiled_archs) { + out << svs::arch::microarch_to_string(arch) << " "; + } + out << std::endl; + } + ); + // Allocators svs::python::allocators::wrap(m); diff --git a/bindings/python/src/svs/__init__.py b/bindings/python/src/svs/__init__.py index dd9948e7..6379826b 100644 --- a/bindings/python/src/svs/__init__.py +++ b/bindings/python/src/svs/__init__.py @@ -13,7 +13,7 @@ # limitations under the License. # Dynamic loading logic. -from .loader import library, current_backend, available_backends +from .loader import library # Reexport all public functions and structs from the inner module. lib = library() diff --git a/bindings/python/src/svs/loader.py b/bindings/python/src/svs/loader.py index 1390cf79..06d057c6 100644 --- a/bindings/python/src/svs/loader.py +++ b/bindings/python/src/svs/loader.py @@ -12,163 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -# dep pre-coms -import archspec.cpu as cpu - -# standard library -import json import importlib -import os -from pathlib import Path - -# Get environment variables for configuring warnings and overriding backend selection. -def _is_quiet(): - """ - Return whether or not backend loading should be "quiet". - In this context, "quiet" means not warning for older architectures. - """ - return os.environ.get("SVS_QUIET", False) - -def _override_backend(): - """ - Return a manual override for the backend. - If no override is set, return `None`. - """ - return os.environ.get("SVS_OVERRIDE_BACKEND", None) - - -# The name of the manifest file. -FLAGS_MANIFEST = "flags_manifest.json" # Keep in-sync with CMakeLists.txt - -def _library_from_suffix(suffix): - return f"._svs_{suffix}" - -def _message_prehook(spec, host = cpu.host()): - """ - Emit any special messages for the given microarchitecture spec. - """ - if _is_quiet(): - return - - if isinstance(spec, str): - spec = cpu.TARGETS[spec] - - import warnings - if spec <= cpu.TARGETS["skylake_avx512"]: - message = f""" - Loading library for an older CPU architecture ({spec}). - Performance may be degraded. - """ - warnings.warn(message, RuntimeWarning) - - if host < spec: - message = """ - Override backend is target for a newer CPU than the one you're currently using. - Application may crash. - """ - warnings.warn(message, RuntimeWarning) - - -# The backend being used for this session -__CURRENT_BACKEND__ = None -def current_backend(): - """ - Return the name of the current backend. - """ - return __CURRENT_BACKEND__ - -def __set_backend_once__(suffix: str, spec): - global __CURRENT_BACKEND__ - if __CURRENT_BACKEND__ == None: - _message_prehook(spec) - __CURRENT_BACKEND__ = str(suffix) - - return current_backend() -# The dynamically loaded module. -__LIBRARY__ = None - -def _load_manifest(): - """ - Determine which shared library to load to supply the C++ extentions. - """ - json_file = Path(__file__).parent / FLAGS_MANIFEST - json_file_alternate = Path(__file__).parent.parent / FLAGS_MANIFEST - - # Try to give a somewhat helpful error message if the JSON manifest file was not - # generated properly by Scikit-build/CMake - if json_file.exists(): - with open(json_file, "r") as io: - return json.load(io) - elif json_file_alternate.exists(): - with open(json_file_alternate, "r") as io: - return json.load(io) - else: - print(Path(str(json_file).replace("ai.similarity-search.gss/", ""))) - raise RuntimeError(f""" - Expected a file {FLAGS_MANIFEST} to exist in the source directory to describe the - attributes of the libraries bundled with this application. - - No such file was found. - - Please report this to the project maintainer! - """) - -def available_backends(): - """ - Return a list of the available backends that where compiled when this module was built. - - Each backend in the list may be used to initialize ``SVS_OVERRIDE_BACKEND`` - environment variable prior to application start to override the default loading logic. - """ - return list(_load_manifest()["libraries"].keys()) - -def _find_library(): - """ - Find the appropriate library to load for this micro architecture. - """ - - # Get the current CPU and the manifest of compiled libraries that ship with this - # library. - host = cpu.host() - manifest = _load_manifest() - - # Respect override requests. - # Down stream loading will fail if the given option doesn't exist. - # - # However, if an override is explicitly given, then we can assume that the use knows - # what they're doing and can respond to a loading failure correctly. - override = _override_backend() - if override is not None: - spec = cpu.TARGETS[manifest["libraries"][override]] - return __set_backend_once__(override, spec) - - # Assume architectures in the manifest are place in order of preference. - # TODO: Revisit this assumption. - for (suffix, microarch) in manifest["libraries"].items(): - # Are we compatible with this micro architecture? - spec = cpu.TARGETS[microarch] - if spec <= host: - return __set_backend_once__(suffix, spec) - - raise RuntimeError(f""" - Could not find a suitable backend for your machine ({host}). - Please contact the project maintainers! - """) - -def __load_module_once__(): - global __LIBRARY__ - if __LIBRARY__ is None: - library_name = _library_from_suffix(_find_library()) - __LIBRARY__ = importlib.import_module(library_name, package = "svs") def library(): - """ - Return the library backend as a module. Dynamically loads the library when first called. - - Dynamically loading the library may trigger warnings related to correctness or - performance. If you really **really** don't want these warnings, they can be suppressed - by defining the environemtn variable ``SVS_QUIET=YES`` prior to application start. - """ - __load_module_once__() - return __LIBRARY__ + return importlib.import_module("._svs", package = "svs") diff --git a/bindings/python/tests/test_loader.py b/bindings/python/tests/test_loader.py deleted file mode 100644 index c9abb886..00000000 --- a/bindings/python/tests/test_loader.py +++ /dev/null @@ -1,121 +0,0 @@ -# Copyright 2023 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Test the dynamic loading logic -import archspec.cpu as cpu -import unittest -import os -import warnings - -import svs.loader as loader - -def set_quiet(): - os.environ["SVS_QUIET"] = "YES" - -def clear_quiet(): - os.environ.pop("SVS_QUIET", None) - -def set_override(override: str): - os.environ["SVS_OVERRIDE_BACKEND"] = override - -def clear_override(): - os.environ.pop("SVS_OVERRIDE_BACKEND", None) - -class LoadingTester(unittest.TestCase): - def __unset_environment_variables__(self): - clear_quiet() - clear_override() - - def tearDown(self): - self.__unset_environment_variables__() - - def test_environment_variables(self): - # Clear the environment variables in question. - self.__unset_environment_variables__() - - # Make sure "is_quiet" behaves correctly. - self.assertFalse(loader._is_quiet()) - set_quiet() - self.assertTrue(loader._is_quiet()) - self.__unset_environment_variables__() - self.assertFalse(loader._is_quiet()) - - # Now, check that "override_backend" works. - self.assertEqual(loader._override_backend(), None) - set_override("hello") - self.assertEqual(loader._override_backend(), "hello") - set_override("north") - self.assertEqual(loader._override_backend(), "north") - clear_override() - self.assertEqual(loader._override_backend(), None) - self.__unset_environment_variables__() - - def test_suffix(self): - self.assertEqual(loader._library_from_suffix("native"), "._svs_native") - self.assertEqual(loader._library_from_suffix("cascadelake"), "._svs_cascadelake") - - def test_available_backends(self): - self.assertGreaterEqual(len(loader.available_backends()), 1) - - def test_manifest(self): - manifest = loader._load_manifest() - self.assertTrue("toolchain" in manifest) - self.assertTrue("libraries" in manifest) - - toolchain = manifest["toolchain"] - self.assertTrue("compiler" in toolchain) - self.assertTrue("compiler_version" in toolchain) - - libraries = manifest["libraries"] - self.assertGreaterEqual(len(libraries), 1) - - def test_message_prehook(self): - # Cause all warnings to always be triggered. - warnings.simplefilter("always") - - # Refer to - # https://docs.python.org/3/library/warnings.html#testing-warnings - # for how to test warnings. - - # Warning for the host being greater than the spec. - spec = cpu.TARGETS["icelake"] - host = cpu.TARGETS["skylake"] - with warnings.catch_warnings(record = True) as w: - loader._message_prehook(spec, host) - self.assertTrue(len(w) == 1) - self.assertTrue(issubclass(w[-1].category, RuntimeWarning)) - self.assertTrue("Override" in str(w[-1].message)) - - # Running again with "quiet" enabled should suppress the warning - set_quiet() - with warnings.catch_warnings(record = True) as w: - loader._message_prehook(spec, host) - self.assertTrue(len(w) == 0) - - # Warning for using an old architecture. - clear_quiet() - archs = ["haswell", "skylake", "skylake_avx512"] - for arch in archs: - with warnings.catch_warnings(record = True) as w: - loader._message_prehook(arch) - # Number of warnings can exceed 1 if running on an older CPU. - # In this latter case, we get a "newer CPU" warning as well. - self.assertTrue(len(w) >= 1) - self.assertTrue(issubclass(w[0].category, RuntimeWarning)) - self.assertTrue("older CPU" in str(w[0].message)) - - def test_loaded(self): - libraries = loader._load_manifest()["libraries"] - self.assertTrue(loader.current_backend() in libraries) - self.assertNotEqual(loader.library(), None) diff --git a/bindings/python/tests/test_microarch.py b/bindings/python/tests/test_microarch.py new file mode 100644 index 00000000..670ae779 --- /dev/null +++ b/bindings/python/tests/test_microarch.py @@ -0,0 +1,37 @@ +# Copyright 2025 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import svs +import archspec.cpu as cpu +import os + + +class MicroarchTester(unittest.TestCase): + def test_microarch(self): + # Get emulated microarch from SDE_FLAG or use the host CPU + host_microarch = os.environ.get("SDE_FLAG", cpu.host().name) + mapping = { + "nhm": "nehalem", + "hsw": "haswell", + "skx": "skylake_avx512", + "clx": "cascadelake", + "icl": "icelake_client", + "icelake": "icelake_client", + "spr": "sapphirerapids", + } + host_microarch = mapping.get(host_microarch, host_microarch) + + if host_microarch in svs.microarch.compiled: + self.assertTrue(host_microarch == svs.microarch.current) diff --git a/cmake/microarch.cmake b/cmake/microarch.cmake new file mode 100644 index 00000000..f14bc128 --- /dev/null +++ b/cmake/microarch.cmake @@ -0,0 +1,175 @@ +# Copyright 2025 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +if(svs_microarch_cmake_included) + return() +endif() +set(svs_microarch_cmake_included true) + +# N.B.: first microarch listed in targets file is treated as "base" microarch +# which is used to build base object files, shared libs and executables +if(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64") + file(STRINGS "${CMAKE_CURRENT_LIST_DIR}/microarch_targets_x86_64" SVS_MICROARCHS) +elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64" OR CMAKE_SYSTEM_PROCESSOR MATCHES "arm64") + if(APPLE) + file(STRINGS "${CMAKE_CURRENT_LIST_DIR}/microarch_targets_aarch64_darwin" SVS_MICROARCHS) + else() + file(STRINGS "${CMAKE_CURRENT_LIST_DIR}/microarch_targets_aarch64" SVS_MICROARCHS) + endif() +else() + message(FATAL_ERROR "Unknown CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}") +endif() + + +# Try to find the Python executable. +# +# If it's given as part of the Cmake arguments given by "scikit build", then use that. +# Otherwise, fall back to using plain old "python". +# If *THAT* doesn't work, give up. +if(DEFINED PYTHON_EXECUTABLE) + set(SVS_PYTHON_EXECUTABLE "${PYTHON_EXECUTABLE}") +else() + set(SVS_PYTHON_EXECUTABLE "python") +endif() + +# Run the python script to get optimization flags for the desired back-ends. +# +# FLAGS_SCRIPT - Path to the Python script that will take the compiler, compiler version, +# and list of desired microarchitectures and generate optimization flags for each +# microarchitecture. +# +# FLAGS_TEXT_FILE - List of optimization flags for each architecture. +# Expected format: +# -march=arch1,-mtune=arch1 +# -march=arch2,-mtune=arch2 +# ... +# -march=archN,-mtune=archN +# +# The number of lines should be equal to the number of microarchitectures. +# NOTE: The entries within each line are separated by a comma on purpose to allow CMake +# to read the whole file as a List and then use string replacement on the commas to turn +# each line into a list in its own right. +# + +##### +##### Generate optimization flags for the given microarchitectures. +##### + +set(FLAGS_SCRIPT "${CMAKE_CURRENT_LIST_DIR}/scripts/microarch.py") +set(FLAGS_TEXT_FILE "${CMAKE_CURRENT_BINARY_DIR}/optimization_flags.txt") + +execute_process( + COMMAND + ${SVS_PYTHON_EXECUTABLE} + ${FLAGS_SCRIPT} + ${FLAGS_TEXT_FILE} + --compiler ${CMAKE_CXX_COMPILER_ID} + --compiler-version ${CMAKE_CXX_COMPILER_VERSION} + --microarchitectures ${SVS_MICROARCHS} + COMMAND_ERROR_IS_FATAL ANY +) + +file(STRINGS "${FLAGS_TEXT_FILE}" OPTIMIZATION_FLAGS) +message("Flags: ${OPTIMIZATION_FLAGS}") +list(LENGTH OPTIMIZATION_FLAGS OPT_FLAGS_LENGTH) +message("Length of flags: ${OPT_FLAGS_LENGTH}") + +##### +##### Generate instantiations for each microarch. +##### + +# Path to the Python generator script +set(SVS_GENERATOR_SCRIPT "${CMAKE_CURRENT_LIST_DIR}/scripts/generate_microarch_source_files.py") + +# Output directory inside the build tree +set(SVS_GENERATED_DIR "${CMAKE_BINARY_DIR}/generated") +file(MAKE_DIRECTORY ${SVS_GENERATED_DIR}) + +# Define which files will be generated by the script +set(SVS_GENERATED_HEADER "${SVS_GENERATED_DIR}/distance_instantiations.h") +set(SVS_GENERATED_SOURCES) +foreach(MICROARCH ${SVS_MICROARCHS}) + list(APPEND SVS_GENERATED_SOURCES "${SVS_GENERATED_DIR}/microarch_${MICROARCH}.cpp") +endforeach() + +# Generate sources +execute_process( + COMMAND ${SVS_PYTHON_EXECUTABLE} ${SVS_GENERATOR_SCRIPT} ${SVS_GENERATED_DIR} + RESULT_VARIABLE GEN_RESULT + WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} + ERROR_VARIABLE GEN_ERROR +) + +if(NOT GEN_RESULT EQUAL 0) + message(FATAL_ERROR "Failed to generate distance instantiations:\n${GEN_ERROR}") +endif() + +# Group the generated sources into a dummy custom target so they are rerun if missing +add_custom_target(svs_generate_distance_instantiations + DEPENDS ${SVS_GENERATED_HEADER} ${SVS_GENERATED_SOURCES} +) + +# Include the generated header +include_directories(${SVS_GENERATED_DIR}) + + +##### +##### Helper targets to support required microarchs and apply relevant compiler optimizations. +##### + +# Create base meta-target to collect common defines +add_library(svs_microarch_options_base INTERFACE) +add_library(svs::microarch_options_base ALIAS svs_microarch_options_base) + +# Add support and compiled defines to base target +foreach(MICROARCH IN LISTS SVS_MICROARCHS) + target_compile_options(svs_microarch_options_base INTERFACE + -DSVS_MICROARCH_SUPPORT_${MICROARCH} + -DSVS_MICROARCH_COMPILED_${MICROARCH} + ) +endforeach() + +# Add per-microarch optimization targets +foreach(MICROARCH OPT_FLAGS IN ZIP_LISTS SVS_MICROARCHS OPTIMIZATION_FLAGS) + string(REPLACE "," ";" OPT_FLAGS ${OPT_FLAGS}) + + add_library(svs_microarch_options_${MICROARCH} INTERFACE) + add_library(svs::microarch_options_${MICROARCH} ALIAS svs_microarch_options_${MICROARCH}) + target_compile_options(svs_microarch_options_${MICROARCH} INTERFACE + ${OPT_FLAGS} + -DSVS_TARGET_MICROARCH=${MICROARCH} + -DSVS_MICROARCH_COMPILED_${MICROARCH} + ) +endforeach() + +function(create_microarch_instantiations) + set(MICROARCH_OBJECT_FILES "") + foreach(MICROARCH OPT_FLAGS IN ZIP_LISTS SVS_MICROARCHS OPTIMIZATION_FLAGS) + set(OBJ_NAME "microarch_${MICROARCH}") + set(SRC_FILE "${SVS_GENERATED_DIR}/microarch_${MICROARCH}.cpp") + + if(NOT EXISTS "${SRC_FILE}") + message(FATAL_ERROR "Missing source file for microarch: ${SRC_FILE}") + endif() + + add_library(${OBJ_NAME} OBJECT ${SRC_FILE}) + target_link_libraries(${OBJ_NAME} + PRIVATE ${SVS_LIB} svs::compile_options fmt::fmt svs_microarch_options_${MICROARCH} + ) + + list(APPEND MICROARCH_OBJECT_FILES $) + endforeach() + + set(MICROARCH_OBJECT_FILES "${MICROARCH_OBJECT_FILES}" PARENT_SCOPE) +endfunction() \ No newline at end of file diff --git a/cmake/microarch_targets_aarch64 b/cmake/microarch_targets_aarch64 new file mode 100644 index 00000000..fe512ced --- /dev/null +++ b/cmake/microarch_targets_aarch64 @@ -0,0 +1,2 @@ +neoverse_v1 +neoverse_n2 diff --git a/cmake/microarch_targets_aarch64_darwin b/cmake/microarch_targets_aarch64_darwin new file mode 100644 index 00000000..b5692e52 --- /dev/null +++ b/cmake/microarch_targets_aarch64_darwin @@ -0,0 +1,2 @@ +m1 +m2 diff --git a/cmake/microarch_targets_x86_64 b/cmake/microarch_targets_x86_64 new file mode 100644 index 00000000..74e49edc --- /dev/null +++ b/cmake/microarch_targets_x86_64 @@ -0,0 +1,6 @@ +haswell +x86_64_v4 +skylake_avx512 +cascadelake +icelake_client +sapphirerapids diff --git a/cmake/scripts/generate_microarch_source_files.py b/cmake/scripts/generate_microarch_source_files.py new file mode 100644 index 00000000..e5f4c81e --- /dev/null +++ b/cmake/scripts/generate_microarch_source_files.py @@ -0,0 +1,161 @@ +# Copyright 2025 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import os +import sys +from itertools import product + +# Base types +types = ["float const", "int8_t const", "uint8_t const", "svs::float16::Float16 const"] + +# Extra mixed type combinations +extra_type_pairs = [ + ("float const", "uint8_t const"), + ("float const", "int8_t const"), + ("float const", "svs::float16::Float16 const"), + ("svs::float16::Float16 const", "float const"), + ("svs::float16::Float16 const", "svs::float16::Float16 const"), +] + +# Unique (ea, eb) pairs only +type_pairs = set(product(types, repeat=2)).union(extra_type_pairs) + +extents = ["Dynamic", "64", "96", "100", "128", "512", "768"] + +arch_platform_map = { + # "x86_64_v2": "defined(__x86_64__)", + # "nehalem": "defined(__x86_64__)", + # "westmere": "defined(__x86_64__)", + # "sandybridge": "defined(__x86_64__)", + # "ivybridge": "defined(__x86_64__)", + "haswell": "defined(__x86_64__)", + # "broadwell": "defined(__x86_64__)", + # "skylake": "defined(__x86_64__)", + "x86_64_v4": "defined(__x86_64__)", + "skylake_avx512": "defined(__x86_64__)", + "cascadelake": "defined(__x86_64__)", + # "cooperlake": "defined(__x86_64__)", + "icelake_client": "defined(__x86_64__)", + # "icelake_server": "defined(__x86_64__)", + "sapphirerapids": "defined(__x86_64__)", + # "graniterapids": "defined(__x86_64__)", + # "graniterapids_d": "defined(__x86_64__)", + "m1": "defined(__aarch64__) && defined(__APPLE__)", + "m2": "defined(__aarch64__) && defined(__APPLE__)", + "neoverse_v1": "defined(__aarch64__) && !defined(__APPLE__)", + "neoverse_n2": "defined(__aarch64__) && !defined(__APPLE__)", +} + + +HEADER = """#include + +#include "svs/core/distance/euclidean.h" +#include "svs/core/distance/inner_product.h" +#include "svs/core/distance/cosine.h" +#include "svs/core/distance/distance_core.h" + +// clang-format off""" + +FOOTER = """// clang-format on +#endif // ARCH GUARD +""" + + +def write_cpp_file(arch: str, output_dir: str): + filename = f"microarch_{arch}.cpp" + path = os.path.join(output_dir, filename) + + guard = arch_platform_map[arch] + arch_enum = f"svs::arch::MicroArch::{arch}" + + lines = [HEADER] + lines.append(f"#if {guard}") + lines.append("namespace svs::distance {\n") + + for ea, eb in sorted(type_pairs): + for extent in extents: + lines.append( + f"template float compute<{ea}, {eb}, {extent}, {extent}, {arch_enum}>(DistanceL2<{arch_enum}>, std::span<{ea}, {extent}>, std::span<{eb}, {extent}>);" + ) + lines.append( + f"template float compute<{ea}, {eb}, {extent}, {extent}, {arch_enum}>(DistanceIP<{arch_enum}>, std::span<{ea}, {extent}>, std::span<{eb}, {extent}>);" + ) + lines.append( + f"template float compute<{ea}, {eb}, {extent}, {extent}, {arch_enum}>(DistanceCosineSimilarity<{arch_enum}>, std::span<{ea}, {extent}>, std::span<{eb}, {extent}>);" + ) + + lines.append("} // namespace svs::distance") + lines.append(FOOTER) + + with open(path, "w") as f: + f.write("\n".join(lines)) + + print(f"Generated: {path}") + + +def write_header_file(output_dir: str): + path = os.path.join(output_dir, "distance_instantiations.h") + lines = [ + "#pragma once", + "#include ", + '#include "svs/core/distance/euclidean.h"', + '#include "svs/core/distance/inner_product.h"', + '#include "svs/core/distance/cosine.h"', + '#include "svs/core/distance/distance_core.h"', + "namespace svs::distance {", + ] + + for arch, guard in arch_platform_map.items(): + arch_enum = f"svs::arch::MicroArch::{arch}" + lines.append(f"#if {guard}") + for ea, eb in product(types, repeat=2): + for extent in extents: + lines.append( + f"extern template float compute<{ea}, {eb}, {extent}, {extent}, {arch_enum}>(DistanceL2<{arch_enum}>, std::span<{ea}, {extent}>, std::span<{eb}, {extent}>);" + ) + lines.append( + f"extern template float compute<{ea}, {eb}, {extent}, {extent}, {arch_enum}>(DistanceIP<{arch_enum}>, std::span<{ea}, {extent}>, std::span<{eb}, {extent}>);" + ) + lines.append( + f"extern template float compute<{ea}, {eb}, {extent}, {extent}, {arch_enum}>(DistanceCosineSimilarity<{arch_enum}>, std::span<{ea}, {extent}>, std::span<{eb}, {extent}>);" + ) + lines.append(f"#endif // {guard}") + lines.append("} // namespace svs::distance") + + with open(path, "w") as f: + f.write("\n".join(lines)) + + print(f"Generated: {path}") + + +def main(): + if len(sys.argv) != 3: + print( + "Usage: generate_distance_instantiations.py ", + file=sys.stderr, + ) + sys.exit(1) + + output_dir = sys.argv[1] + os.makedirs(output_dir, exist_ok=True) + + for arch in arch_platform_map: + write_cpp_file(arch, output_dir) + + write_header_file(output_dir) + + +if __name__ == "__main__": + main() diff --git a/bindings/python/microarch.py b/cmake/scripts/microarch.py similarity index 76% rename from bindings/python/microarch.py rename to cmake/scripts/microarch.py index 99a4ae36..0d4c8d06 100644 --- a/bindings/python/microarch.py +++ b/cmake/scripts/microarch.py @@ -17,13 +17,9 @@ # (1) A text file with compiler optimization flags for each microarchitecture formatted for # relatively easy consumption by CMake. # -# (2) A JSON manifest file describing the micreoarchitecture for each compiled library -# that the python library can use to select the correct shared library. -# -import archspec import archspec.cpu as cpu import argparse -import json + def build_parser(): parser = argparse.ArgumentParser() @@ -31,7 +27,6 @@ def build_parser(): "cmake_flags_text_file", help = "file path to where CMake's text file will go." ) - parser.add_argument("python_output_json_file") parser.add_argument("--compiler", required = True) parser.add_argument("--compiler-version", required = True) parser.add_argument( @@ -48,6 +43,7 @@ def resolve_microarch(name: str): """ custom_aliases = { "native": cpu.host().name, + "icelake_client": "icelake", } # Allow the custom aliases to override the current name. # If an alias doesn't exist, juse pass the name straight through. @@ -55,7 +51,7 @@ def resolve_microarch(name: str): def dump_flags_for_cmake(flags: list, path: str): """ - Save the optimization flags to a text file suitable for CMake to injest easily. + Save the optimization flags to a text file suitable for CMake to ingest easily. Each entry in `flags` will be interpreted as a set of compiler flags for some microarchitecture. By default, archspec passes this as a space-delimited string. @@ -70,13 +66,10 @@ def dump_flags_for_cmake(flags: list, path: str): flags - A list of optimization flags. path - The file path where the output text file will be generated. """ + # white-space separated to comma-separated & one architecture per line + string = "\n".join([",".join(f.split()) for f in flags]) with open(path, "w") as file: - num_flags = len(flags) - for i, flag_set in enumerate(flags): - file.write(",".join(flag_set.split())) - # Add a new line if not the last flag set. - if i != (num_flags - 1): - file.write('\n') + file.write(string) def resolve_compiler(name: str): """ @@ -85,6 +78,7 @@ def resolve_compiler(name: str): aliases = { "GNU": "gcc", "Clang": "clang", + "AppleClang": "clang", "IntelLLVM": "oneapi", } return aliases.get(name, name) @@ -96,40 +90,22 @@ def run(): # Extract elements from the parser architectures = args.microarchitectures output_text = args.cmake_flags_text_file - output_json = args.python_output_json_file compiler = resolve_compiler(args.compiler) compiler_version = args.compiler_version - # Communicate the compiler environment to the python runtime. - toolchain = { - "compiler": compiler, - "compiler_version": compiler_version, - } + # Generate optimization flags. suffix_to_microarch = {} optimization_flags = [] - # Generate optimization flags. for arch in architectures: resolved = resolve_microarch(arch) suffix_to_microarch[arch] = resolved flags = cpu.TARGETS[resolved].optimization_flags(compiler, compiler_version) optimization_flags.append(flags) - # Dump the JSON output - pre_json_dict = { - "toolchain": toolchain, - "libraries": suffix_to_microarch, - } - with open(output_json, "w") as file: - file.write(json.dumps(pre_json_dict, indent = 4)) - # Safe flags to file dump_flags_for_cmake(optimization_flags, output_text) - # Print flags to stdout - for flags in optimization_flags: - print(flags) - ##### ##### Execute as script. ##### diff --git a/examples/cpp/CMakeLists.txt b/examples/cpp/CMakeLists.txt index b9f1c98e..30423576 100644 --- a/examples/cpp/CMakeLists.txt +++ b/examples/cpp/CMakeLists.txt @@ -24,10 +24,10 @@ endif() # # [1] A simple executable is one that takes no commandline arguments. function(create_simple_example exe test file) - add_executable(${exe} ${file}) + add_executable(${exe} ${file} ${MICROARCH_OBJECT_FILES}) target_include_directories(${exe} PRIVATE ${CMAKE_CURRENT_LIST_DIR}) # Link to our library - target_link_libraries(${exe} ${SVS_LIB} svs_compile_options svs_native_options) + target_link_libraries(${exe} ${SVS_LIB} svs_compile_options ${MICROARCH_OBJECT_FILES}) # Create a test. # No-op if the `include(CTest)` line above is not executed. add_test(${test} ${exe}) @@ -37,6 +37,7 @@ endfunction() create_simple_example(saveload test_saveload saveload.cpp) create_simple_example(types test_types types.cpp) create_simple_example(vamana_iterator test_vamana_iterator vamana_iterator.cpp) +create_simple_example(microarch_info test_microarch_info microarch_info.cpp) ## More complicated examples involving more extensive setup. @@ -49,9 +50,9 @@ configure_file(../../data/test_dataset/queries_f32.fvecs . COPYONLY) configure_file(../../data/test_dataset/groundtruth_euclidean.ivecs . COPYONLY) # The vamana test executable. -add_executable(vamana vamana.cpp) +add_executable(vamana vamana.cpp ${MICROARCH_OBJECT_FILES}) target_include_directories(vamana PRIVATE ${CMAKE_CURRENT_LIST_DIR}) -target_link_libraries(vamana ${SVS_LIB} svs_compile_options svs_native_options) +target_link_libraries(vamana ${SVS_LIB} svs_compile_options svs_native_options svs_microarch_options_base) add_test( NAME test_vamana COMMAND @@ -64,7 +65,7 @@ add_test( # The custom thread pool executable. add_executable(custom_thread_pool custom_thread_pool.cpp) target_include_directories(custom_thread_pool PRIVATE ${CMAKE_CURRENT_LIST_DIR}) -target_link_libraries(custom_thread_pool ${SVS_LIB} svs_compile_options svs_native_options) +target_link_libraries(custom_thread_pool ${SVS_LIB} svs_compile_options ${MICROARCH_OBJECT_FILES}) add_test( NAME test_custom_thread_pool COMMAND @@ -79,9 +80,9 @@ add_test( ##### Dispatcher ##### -add_executable(dispatcher dispatcher.cpp) +add_executable(dispatcher dispatcher.cpp ${MICROARCH_OBJECT_FILES}) target_include_directories(dispatcher PRIVATE ${CMAKE_CURRENT_LIST_DIR}) -target_link_libraries(dispatcher ${SVS_LIB} svs_compile_options svs_native_options) +target_link_libraries(dispatcher ${SVS_LIB} svs_compile_options ${MICROARCH_OBJECT_FILES}) # Here we go. add_test( diff --git a/examples/cpp/microarch_info.cpp b/examples/cpp/microarch_info.cpp new file mode 100644 index 00000000..b70954bf --- /dev/null +++ b/examples/cpp/microarch_info.cpp @@ -0,0 +1,49 @@ +/* + * Copyright 2025 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "svs/lib/arch.h" +#include "svs/lib/cpuid.h" +#include + +int main() { + std::ostream& out = std::cout; + auto& arch_env = svs::arch::MicroArchEnvironment::get_instance(); + + // Print support status for all ISA extensions + svs::arch::write_extensions_status(out); + + // Print current microarchitecture + auto current_arch = arch_env.get_microarch(); + out << "\nCurrent µarch: " << svs::arch::microarch_to_string(current_arch) << std::endl; + + // Print all supported microarchitectures + const auto& supported_archs = arch_env.get_supported_microarchs(); + out << "\nSupported µarchs: "; + for (const auto& arch : supported_archs) { + out << svs::arch::microarch_to_string(arch) << " "; + } + out << std::endl; + + // Print all compiled microarchitectures + const auto& compiled_archs = arch_env.get_compiled_microarchs(); + out << "\nCompiled µarchs: "; + for (const auto& arch : compiled_archs) { + out << svs::arch::microarch_to_string(arch) << " "; + } + out << std::endl; + + return 0; +} diff --git a/include/svs/core/distance.h b/include/svs/core/distance.h index e69cf415..0398bdbf 100644 --- a/include/svs/core/distance.h +++ b/include/svs/core/distance.h @@ -17,6 +17,7 @@ #pragma once #include "svs/core/distance/cosine.h" +#include "svs/core/distance/distance_core.h" #include "svs/core/distance/euclidean.h" #include "svs/core/distance/inner_product.h" #include "svs/lib/dispatcher.h" @@ -30,23 +31,6 @@ namespace svs { -// Documentation for these classes lives with the classes themselves. -using DistanceL2 = distance::DistanceL2; -using DistanceIP = distance::DistanceIP; -using DistanceCosineSimilarity = distance::DistanceCosineSimilarity; - -/// -/// @brief Runtime selector for built-in distance functions. -/// -enum DistanceType { - /// Minimize squared L2 distance. See: ``svs::distance::DistanceL2``. - L2, - /// Maximize inner product. See: ``svs::distance::DistanceIP``. - MIP, - /// Minimize cosine similarity. See: ``svs::distance::DistanceCosineSimilarity``. - Cosine -}; - inline constexpr std::string_view name(DistanceType type) { switch (type) { case DistanceType::L2: { @@ -76,13 +60,16 @@ inline DistanceType parse_distance_type(std::string_view str) { namespace detail { template struct DistanceTypeEnumMap; -template <> struct DistanceTypeEnumMap { +template +struct DistanceTypeEnumMap> { static constexpr DistanceType value = DistanceType::L2; }; -template <> struct DistanceTypeEnumMap { +template +struct DistanceTypeEnumMap> { static constexpr DistanceType value = DistanceType::MIP; }; -template <> struct DistanceTypeEnumMap { +template +struct DistanceTypeEnumMap> { static constexpr DistanceType value = DistanceType::Cosine; }; } // namespace detail @@ -103,13 +90,15 @@ template struct DistanceConverter { static std::string_view description() { return name(distance_type_v); } }; -template <> -struct lib::DispatchConverter : DistanceConverter {}; -template <> -struct lib::DispatchConverter : DistanceConverter {}; -template <> -struct lib::DispatchConverter - : DistanceConverter {}; +template +struct lib::DispatchConverter> + : DistanceConverter> {}; +template +struct lib::DispatchConverter> + : DistanceConverter> {}; +template +struct lib::DispatchConverter> + : DistanceConverter> {}; // Saving and Loading. namespace lib { @@ -125,6 +114,23 @@ template <> struct Loader { }; } // namespace lib +// Factory for per-architecture distance dispatching +template struct DistanceTag {}; + +template struct DistanceFactory; + +template struct DistanceFactory { + using type = svs::distance::DistanceL2; +}; + +template struct DistanceFactory { + using type = svs::distance::DistanceIP; +}; + +template struct DistanceFactory { + using type = svs::distance::DistanceCosineSimilarity; +}; + /// /// @brief Dynamically dispatch from an distance enum to a distance functor. /// @@ -153,22 +159,52 @@ class DistanceDispatcher { /// @param f A function who takes distance functor for its first argument. The /// dispatcher will call ``f`` with the functor corresponding to the enum used /// to construct the dispatcher. + /// For MicroArch-dispatching, all of this functionality is wrapped in a lambda + /// which utilizes the DistanceFactory above to instantiate the distance with + /// the correct MicroArch. /// /// All other arguments will be forwarded to ``f`` beginning at argument position 2. - /// @param args Arguements to forward to ``f``. + /// @param args Arguments to forward to ``f``. /// template auto operator()(F&& f, Args&&... args) { switch (distance_type_) { - case DistanceType::L2: { - return f(DistanceL2{}, std::forward(args)...); - } - case DistanceType::MIP: { - return f(DistanceIP{}, std::forward(args)...); - } - case DistanceType::Cosine: { - return f(DistanceCosineSimilarity{}, std::forward(args)...); - } + case DistanceType::L2: + return svs::arch::dispatch_by_arch( + [&](auto&&... inner_args) -> decltype(auto) { + using Distance = + typename DistanceFactory::type; + return f( + Distance{}, std::forward(inner_args)... + ); + }, + std::forward(args)... + ); + + case DistanceType::MIP: + return svs::arch::dispatch_by_arch( + [&](auto&&... inner_args) -> decltype(auto) { + using Distance = + typename DistanceFactory::type; + return f( + Distance{}, std::forward(inner_args)... + ); + }, + std::forward(args)... + ); + + case DistanceType::Cosine: + return svs::arch::dispatch_by_arch( + [&](auto&&... inner_args) -> decltype(auto) { + using Distance = + typename DistanceFactory::type; + return f( + Distance{}, std::forward(inner_args)... + ); + }, + std::forward(args)... + ); } + throw ANNEXCEPTION("unreachable reached"); // Make GCC happy } diff --git a/include/svs/core/distance/cosine.h b/include/svs/core/distance/cosine.h index 9738e881..071fd2f5 100644 --- a/include/svs/core/distance/cosine.h +++ b/include/svs/core/distance/cosine.h @@ -19,6 +19,7 @@ // svs #include "svs/core/distance/distance_core.h" #include "svs/core/distance/simd_utils.h" +#include "svs/lib/arch.h" #include "svs/lib/saveload.h" #include "svs/lib/static.h" @@ -32,7 +33,8 @@ namespace svs::distance { // Forward declare implementation to allow entry point to be near the top. -template struct CosineSimilarityImpl; +template +struct CosineSimilarityImpl; // Generic Entry Point // Call as one of either: @@ -41,18 +43,18 @@ template struct CosineSimilarityImpl; // (2) CosineSimilarity::compute(a, b) // ``` // Where (2) is when length is known at compile time and (1) is when length is not. -class CosineSimilarity { +template class CosineSimilarity { public: template static constexpr float compute(const Ea* a, const Eb* b, float a_norm, size_t N) { - return CosineSimilarityImpl::compute( + return CosineSimilarityImpl::compute( a, b, a_norm, lib::MaybeStatic(N) ); } template static constexpr float compute(const Ea* a, const Eb* b, float a_norm) { - return CosineSimilarityImpl::compute( + return CosineSimilarityImpl::compute( a, b, a_norm, lib::MaybeStatic() ); } @@ -66,8 +68,11 @@ class CosineSimilarity { /// \ref compute_distancecosine "compute" method and is thus capable of being extended /// externally. /// +template struct DistanceCosineSimilarity { public: + static constexpr svs::DistanceType distance_type = svs::DistanceType::Cosine; + /// Vectors are more similar if their similarity is greater. using compare = std::greater<>; @@ -106,8 +111,11 @@ struct DistanceCosineSimilarity { } }; -inline constexpr bool operator==(DistanceCosineSimilarity, DistanceCosineSimilarity) { - return true; +template +inline constexpr bool +operator==(DistanceCosineSimilarity, DistanceCosineSimilarity) { + constexpr bool same = std::is_same_v; + return same; } /// @@ -121,6 +129,8 @@ inline constexpr bool operator==(DistanceCosineSimilarity, DistanceCosineSimilar /// this is to be discovered during runtime. /// @tparam Db The compile-time length of right-hand argument. May be ``svs::Dynamic`` if /// this is to be discovered during runtime. +/// @tparam MicroArch The desired microarch. One specialization per supported microarch will +/// be compiled for run-time dispatching. /// /// @param distance The cosine similarity distance functor. Must have had ``fix_argument`` /// called previously with left-hand argument ``a``. @@ -134,14 +144,20 @@ inline constexpr bool operator==(DistanceCosineSimilarity, DistanceCosineSimilar /// - Specifying the size parameters ``Da`` and ``Db`` can greatly improve performance. /// - Compiling and executing on an Intel(R) AVX-512 system will improve performance. /// -template -float compute(DistanceCosineSimilarity distance, std::span a, std::span b) { +template +float compute( + DistanceCosineSimilarity distance, std::span a, std::span b +) { assert(a.size() == b.size()); constexpr size_t extent = lib::extract_extent(Da, Db); if constexpr (extent == Dynamic) { - return CosineSimilarity::compute(a.data(), b.data(), distance.norm_, a.size()); + return CosineSimilarity::compute( + a.data(), b.data(), distance.norm_, a.size() + ); } else { - return CosineSimilarity::compute(a.data(), b.data(), distance.norm_); + return CosineSimilarity::template compute( + a.data(), b.data(), distance.norm_ + ); } } @@ -166,7 +182,8 @@ float generic_cosine_similarity( return result / (a_norm * std::sqrt(accum)); }; -template struct CosineSimilarityImpl { +template +struct CosineSimilarityImpl { static float compute( const Ea* a, const Eb* b, @@ -224,7 +241,7 @@ template <> struct CosineFloatOp<16> : public svs::simd::ConvertToFloat<16> { // Small Integers SVS_VALIDATE_BOOL_ENV(SVS_AVX512_VNNI) #if SVS_AVX512_VNNI -template struct CosineSimilarityImpl { +template struct CosineSimilarityImpl { SVS_NOINLINE static float compute(const int8_t* a, const int8_t* b, float a_norm, lib::MaybeStatic length) { auto sum = _mm512_setzero_epi32(); @@ -250,7 +267,7 @@ template struct CosineSimilarityImpl { } }; -template struct CosineSimilarityImpl { +template struct CosineSimilarityImpl { SVS_NOINLINE static float compute(const uint8_t* a, const uint8_t* b, float a_norm, lib::MaybeStatic length) { auto sum = _mm512_setzero_epi32(); @@ -278,7 +295,7 @@ template struct CosineSimilarityImpl { #endif // Floating and Mixed Types -template struct CosineSimilarityImpl { +template struct CosineSimilarityImpl { SVS_NOINLINE static float compute(const float* a, const float* b, float a_norm, lib::MaybeStatic length) { auto [sum, norm] = simd::generic_simd_op(CosineFloatOp<16>(), a, b, length); @@ -286,7 +303,7 @@ template struct CosineSimilarityImpl { } }; -template struct CosineSimilarityImpl { +template struct CosineSimilarityImpl { SVS_NOINLINE static float compute(const float* a, const uint8_t* b, float a_norm, lib::MaybeStatic length) { auto [sum, norm] = simd::generic_simd_op(CosineFloatOp<16>(), a, b, length); @@ -294,7 +311,7 @@ template struct CosineSimilarityImpl { }; }; -template struct CosineSimilarityImpl { +template struct CosineSimilarityImpl { SVS_NOINLINE static float compute(const float* a, const int8_t* b, float a_norm, lib::MaybeStatic length) { auto [sum, norm] = simd::generic_simd_op(CosineFloatOp<16>(), a, b, length); @@ -302,7 +319,7 @@ template struct CosineSimilarityImpl { }; }; -template struct CosineSimilarityImpl { +template struct CosineSimilarityImpl { SVS_NOINLINE static float compute(const float* a, const Float16* b, float a_norm, lib::MaybeStatic length) { auto [sum, norm] = simd::generic_simd_op(CosineFloatOp<16>{}, a, b, length); @@ -310,7 +327,7 @@ template struct CosineSimilarityImpl { } }; -template struct CosineSimilarityImpl { +template struct CosineSimilarityImpl { SVS_NOINLINE static float compute(const Float16* a, const float* b, float a_norm, lib::MaybeStatic length) { auto [sum, norm] = simd::generic_simd_op(CosineFloatOp<16>{}, a, b, length); @@ -318,7 +335,7 @@ template struct CosineSimilarityImpl { } }; -template struct CosineSimilarityImpl { +template struct CosineSimilarityImpl { SVS_NOINLINE static float compute(const Float16* a, const Float16* b, float a_norm, lib::MaybeStatic length) { auto [sum, norm] = simd::generic_simd_op(CosineFloatOp<16>{}, a, b, length); diff --git a/include/svs/core/distance/distance_core.h b/include/svs/core/distance/distance_core.h index af4c5e33..373855b7 100644 --- a/include/svs/core/distance/distance_core.h +++ b/include/svs/core/distance/distance_core.h @@ -24,6 +24,28 @@ #include #include +namespace svs { +// TODO: Figure out how these statements were used for docs + +// // Documentation for these classes lives with the classes themselves. +// using DistanceL2 = distance::DistanceL2; +// using DistanceIP = distance::DistanceIP; +// using DistanceCosineSimilarity = distance::DistanceCosineSimilarity; + +// TODO: Figure out if it's okay to move this to distance_core.h +/// +/// @brief Runtime selector for built-in distance functions. +/// +enum DistanceType { + /// Minimize squared L2 distance. See: ``svs::distance::DistanceL2``. + L2, + /// Maximize inner product. See: ``svs::distance::DistanceIP``. + MIP, + /// Minimize cosine similarity. See: ``svs::distance::DistanceCosineSimilarity``. + Cosine +}; +} // namespace svs + namespace svs::distance { using default_accum_type = float; diff --git a/include/svs/core/distance/euclidean.h b/include/svs/core/distance/euclidean.h index 2fc86986..b5352cd7 100644 --- a/include/svs/core/distance/euclidean.h +++ b/include/svs/core/distance/euclidean.h @@ -19,6 +19,7 @@ // svs #include "svs/core/distance/distance_core.h" #include "svs/core/distance/simd_utils.h" +#include "svs/lib/arch.h" #include "svs/lib/float16.h" #include "svs/lib/preprocessor.h" #include "svs/lib/saveload.h" @@ -71,7 +72,7 @@ namespace svs::distance { // Forward declare implementation to allow entry point to be near the top. -template struct L2Impl; +template struct L2Impl; // Generic Entry Point // Call as one of either: @@ -80,16 +81,16 @@ template struct L2Impl; // (2) L2::compute(a, b) // ``` // Where (2) is when length is known at compile time and (1) is when length is not. -class L2 { +template class L2 { public: template static constexpr float compute(const Ea* a, const Eb* b, size_t N) { - return L2Impl::compute(a, b, lib::MaybeStatic(N)); + return L2Impl::compute(a, b, lib::MaybeStatic(N)); } template static constexpr float compute(const Ea* a, const Eb* b) { - return L2Impl::compute(a, b, lib::MaybeStatic()); + return L2Impl::compute(a, b, lib::MaybeStatic()); } }; @@ -101,7 +102,10 @@ class L2 { /// \ref compute_distancel2 "compute" method and is thus capable of being extended /// externally. /// -struct DistanceL2 { +template struct DistanceL2 { + static constexpr svs::arch::MicroArch arch = Arch; + static constexpr svs::DistanceType distance_type = svs::DistanceType::L2; + /// Vectors are more similar if their distance is smaller. using compare = std::less<>; @@ -126,7 +130,11 @@ struct DistanceL2 { } }; -inline constexpr bool operator==(DistanceL2, DistanceL2) { return true; } +template +inline constexpr bool operator==(DistanceL2, DistanceL2) { + constexpr bool same = std::is_same_v; + return same; +} /// /// @ingroup distance_overload @@ -139,6 +147,8 @@ inline constexpr bool operator==(DistanceL2, DistanceL2) { return true; } /// this is to be discovered during runtime. /// @tparam Db The compile-time length of right-hand argument. May be ``svs::Dynamic`` if /// this is to be discovered during runtime. +/// @tparam MicroArch The desired microarch. One specialization per supported microarch will +/// be compiled for run-time dispatching. /// /// @param a The left-hand vector. Typically, this position is used for the query. /// @param b The right-hand vector. Typically, this position is used for a dataset vector. @@ -150,14 +160,14 @@ inline constexpr bool operator==(DistanceL2, DistanceL2) { return true; } /// - Specifying the size parameters ``Da`` and ``Db`` can greatly improve performance. /// - Compiling and executing on an Intel(R) AVX-512 system will improve performance. /// -template -float compute(DistanceL2 /*unused*/, std::span a, std::span b) { +template +float compute(DistanceL2 /*unused*/, std::span a, std::span b) { assert(a.size() == b.size()); constexpr size_t extent = lib::extract_extent(Da, Db); if constexpr (extent == Dynamic) { - return L2::compute(a.data(), b.data(), a.size()); + return L2::compute(a.data(), b.data(), a.size()); } else { - return L2::compute(a.data(), b.data()); + return L2::template compute(a.data(), b.data()); } } @@ -169,6 +179,7 @@ template float generic_l2( const Ea* a, const Eb* b, lib::MaybeStatic length = lib::MaybeStatic() ) { + std::cout << "generic" << std::endl; float result = 0; for (size_t i = 0; i < length.size(); ++i) { auto temp = static_cast(a[i]) - static_cast(b[i]); @@ -177,7 +188,7 @@ float generic_l2( return result; } -template struct L2Impl { +template struct L2Impl { static constexpr float compute(const Ea* a, const Eb* b, lib::MaybeStatic length = lib::MaybeStatic()) { return generic_l2(a, b, length); @@ -252,14 +263,14 @@ template <> struct L2VNNIOp : public svs::simd::ConvertForVNNI struct L2Impl { +template struct L2Impl { SVS_NOINLINE static float compute(const int8_t* a, const int8_t* b, lib::MaybeStatic length) { return simd::generic_simd_op(L2VNNIOp(), a, b, length); } }; -template struct L2Impl { +template struct L2Impl { SVS_NOINLINE static float compute(const uint8_t* a, const uint8_t* b, lib::MaybeStatic length) { return simd::generic_simd_op(L2VNNIOp(), a, b, length); @@ -269,42 +280,43 @@ template struct L2Impl { #endif // Floating and Mixed Types -template struct L2Impl { +template struct L2Impl { SVS_NOINLINE static float compute(const float* a, const float* b, lib::MaybeStatic length) { + std::cout << "optimized" << std::endl; return simd::generic_simd_op(L2FloatOp<16>{}, a, b, length); } }; -template struct L2Impl { +template struct L2Impl { SVS_NOINLINE static float compute(const float* a, const uint8_t* b, lib::MaybeStatic length) { return simd::generic_simd_op(L2FloatOp<16>{}, a, b, length); }; }; -template struct L2Impl { +template struct L2Impl { SVS_NOINLINE static float compute(const float* a, const int8_t* b, lib::MaybeStatic length) { return simd::generic_simd_op(L2FloatOp<16>{}, a, b, length); }; }; -template struct L2Impl { +template struct L2Impl { SVS_NOINLINE static float compute(const float* a, const Float16* b, lib::MaybeStatic length) { return simd::generic_simd_op(L2FloatOp<16>{}, a, b, length); } }; -template struct L2Impl { +template struct L2Impl { SVS_NOINLINE static float compute(const Float16* a, const float* b, lib::MaybeStatic length) { return simd::generic_simd_op(L2FloatOp<16>{}, a, b, length); } }; -template struct L2Impl { +template struct L2Impl { SVS_NOINLINE static float compute(const Float16* a, const Float16* b, lib::MaybeStatic length) { return simd::generic_simd_op(L2FloatOp<16>{}, a, b, length); @@ -320,7 +332,7 @@ template struct L2Impl { SVS_VALIDATE_BOOL_ENV(SVS_AVX512_F) SVS_VALIDATE_BOOL_ENV(SVS_AVX2) #if !SVS_AVX512_F && SVS_AVX2 -template struct L2Impl { +template struct L2Impl { SVS_NOINLINE static float compute(const float* a, const float* b, lib::MaybeStatic length) { constexpr size_t vector_size = 8; @@ -340,7 +352,7 @@ template struct L2Impl { } }; -template struct L2Impl { +template struct L2Impl { SVS_NOINLINE static float compute(const Float16* a, const Float16* b, lib::MaybeStatic length) { constexpr size_t vector_size = 8; @@ -362,7 +374,7 @@ template struct L2Impl { } }; -template struct L2Impl { +template struct L2Impl { SVS_NOINLINE static float compute(const float* a, const Float16* b, lib::MaybeStatic length) { constexpr size_t vector_size = 8; @@ -383,7 +395,7 @@ template struct L2Impl { } }; -template struct L2Impl { +template struct L2Impl { SVS_NOINLINE static float compute(const float* a, const int8_t* b, lib::MaybeStatic length) { constexpr size_t vector_size = 8; @@ -407,7 +419,7 @@ template struct L2Impl { } }; -template struct L2Impl { +template struct L2Impl { SVS_NOINLINE static float compute(const int8_t* a, const int8_t* b, lib::MaybeStatic length) { constexpr size_t vector_size = 8; @@ -434,7 +446,7 @@ template struct L2Impl { } }; -template struct L2Impl { +template struct L2Impl { SVS_NOINLINE static float compute(const uint8_t* a, const uint8_t* b, lib::MaybeStatic length) { constexpr size_t vector_size = 8; diff --git a/include/svs/core/distance/inner_product.h b/include/svs/core/distance/inner_product.h index 2ad51e17..538b54ae 100644 --- a/include/svs/core/distance/inner_product.h +++ b/include/svs/core/distance/inner_product.h @@ -19,6 +19,7 @@ // svs #include "svs/core/distance/distance_core.h" #include "svs/core/distance/simd_utils.h" +#include "svs/lib/arch.h" #include "svs/lib/float16.h" #include "svs/lib/preprocessor.h" #include "svs/lib/saveload.h" @@ -32,7 +33,7 @@ namespace svs::distance { // Forward declare implementation to allow entry point to be near the top. -template struct IPImpl; +template struct IPImpl; // Generic Entry Point // Call as one of either: @@ -41,16 +42,16 @@ template struct IPImpl; // (2) IP::compute(a, b) // ``` // Where (2) is when length is known at compile time and (1) is when length is not. -class IP { +template class IP { public: template static constexpr float compute(const Ea* a, const Eb* b, size_t N) { - return IPImpl::compute(a, b, lib::MaybeStatic(N)); + return IPImpl::compute(a, b, lib::MaybeStatic(N)); } template static constexpr float compute(const Ea* a, const Eb* b) { - return IPImpl::compute(a, b, lib::MaybeStatic()); + return IPImpl::compute(a, b, lib::MaybeStatic()); } }; @@ -62,7 +63,9 @@ class IP { /// \ref compute_distanceip "compute" method and is thus capable of being extended /// externally. /// -struct DistanceIP { +template struct DistanceIP { + static constexpr svs::DistanceType distance_type = svs::DistanceType::MIP; + /// Vectors are more similar if their similarity is greater. using compare = std::greater<>; @@ -88,7 +91,11 @@ struct DistanceIP { } }; -inline constexpr bool operator==(DistanceIP, DistanceIP) { return true; } +template +inline constexpr bool operator==(DistanceIP, DistanceIP) { + constexpr bool same = std::is_same_v; + return same; +} /// /// @ingroup distance_overload @@ -101,6 +108,8 @@ inline constexpr bool operator==(DistanceIP, DistanceIP) { return true; } /// this is to be discovered during runtime. /// @tparam Db The compile-time length of right-hand argument. May be ``svs::Dynamic`` if /// this is to be discovered during runtime. +/// @tparam MicroArch The desired microarch. One specialization per supported microarch will +/// be compiled for run-time dispatching. /// /// @param a The left-hand vector. Typically, this position is used for the query. /// @param b The right-hand vector. Typically, this position is used for a dataset vector. @@ -112,14 +121,14 @@ inline constexpr bool operator==(DistanceIP, DistanceIP) { return true; } /// - Specifying the size parameters ``Da`` and ``Db`` can greatly improve performance. /// - Compiling and executing on an Intel(R) AVX-512 system will improve performance. /// -template -float compute(DistanceIP /*unused*/, std::span a, std::span b) { +template +float compute(DistanceIP /*unused*/, std::span a, std::span b) { assert(a.size() == b.size()); constexpr size_t extent = lib::extract_extent(Da, Db); if constexpr (extent == Dynamic) { - return IP::compute(a.data(), b.data(), a.size()); + return IP::compute(a.data(), b.data(), a.size()); } else { - return IP::compute(a.data(), b.data()); + return IP::template compute(a.data(), b.data()); } } @@ -138,7 +147,7 @@ float generic_ip( return result; } -template struct IPImpl { +template struct IPImpl { static float compute(const Ea* a, const Eb* b, lib::MaybeStatic length = lib::MaybeStatic()) { return generic_ip(a, b, length); @@ -207,14 +216,14 @@ template <> struct IPVNNIOp : public svs::simd::ConvertForVNNI struct IPImpl { +template struct IPImpl { SVS_NOINLINE static float compute(const int8_t* a, const int8_t* b, lib::MaybeStatic length) { return simd::generic_simd_op(IPVNNIOp(), a, b, length); } }; -template struct IPImpl { +template struct IPImpl { SVS_NOINLINE static float compute(const uint8_t* a, const uint8_t* b, lib::MaybeStatic length) { return simd::generic_simd_op(IPVNNIOp(), a, b, length); @@ -224,42 +233,42 @@ template struct IPImpl { #endif // Floating and Mixed Types -template struct IPImpl { +template struct IPImpl { SVS_NOINLINE static float compute(const float* a, const float* b, lib::MaybeStatic length) { return svs::simd::generic_simd_op(IPFloatOp<16>{}, a, b, length); } }; -template struct IPImpl { +template struct IPImpl { SVS_NOINLINE static float compute(const float* a, const uint8_t* b, lib::MaybeStatic length) { return svs::simd::generic_simd_op(IPFloatOp<16>{}, a, b, length); }; }; -template struct IPImpl { +template struct IPImpl { SVS_NOINLINE static float compute(const float* a, const int8_t* b, lib::MaybeStatic length) { return svs::simd::generic_simd_op(IPFloatOp<16>{}, a, b, length); }; }; -template struct IPImpl { +template struct IPImpl { SVS_NOINLINE static float compute(const float* a, const Float16* b, lib::MaybeStatic length) { return svs::simd::generic_simd_op(IPFloatOp<16>{}, a, b, length); } }; -template struct IPImpl { +template struct IPImpl { SVS_NOINLINE static float compute(const Float16* a, const float* b, lib::MaybeStatic length) { return svs::simd::generic_simd_op(IPFloatOp<16>{}, a, b, length); } }; -template struct IPImpl { +template struct IPImpl { SVS_NOINLINE static float compute(const Float16* a, const Float16* b, lib::MaybeStatic length) { return svs::simd::generic_simd_op(IPFloatOp<16>{}, a, b, length); @@ -274,7 +283,7 @@ template struct IPImpl { SVS_VALIDATE_BOOL_ENV(SVS_AVX512_F) SVS_VALIDATE_BOOL_ENV(SVS_AVX2) #if !SVS_AVX512_F && SVS_AVX2 -template struct IPImpl { +template struct IPImpl { SVS_NOINLINE static float compute(const float* a, const float* b, lib::MaybeStatic length) { constexpr size_t vector_size = 8; @@ -293,7 +302,7 @@ template struct IPImpl { } }; -template struct IPImpl { +template struct IPImpl { SVS_NOINLINE static float compute(const Float16* a, const Float16* b, lib::MaybeStatic length) { constexpr size_t vector_size = 8; @@ -314,7 +323,7 @@ template struct IPImpl { } }; -template struct IPImpl { +template struct IPImpl { SVS_NOINLINE static float compute(const float* a, const Float16* b, lib::MaybeStatic length) { constexpr size_t vector_size = 8; @@ -334,7 +343,7 @@ template struct IPImpl { } }; -template struct IPImpl { +template struct IPImpl { SVS_NOINLINE static float compute(const float* a, const int8_t* b, lib::MaybeStatic length) { constexpr size_t vector_size = 8; @@ -357,7 +366,7 @@ template struct IPImpl { } }; -template struct IPImpl { +template struct IPImpl { SVS_NOINLINE static float compute(const int8_t* a, const int8_t* b, lib::MaybeStatic length) { constexpr size_t vector_size = 8; @@ -383,7 +392,7 @@ template struct IPImpl { } }; -template struct IPImpl { +template struct IPImpl { SVS_NOINLINE static float compute(const uint8_t* a, const uint8_t* b, lib::MaybeStatic length) { constexpr size_t vector_size = 8; diff --git a/include/svs/core/distance/instantiations/distance_instantiations.h b/include/svs/core/distance/instantiations/distance_instantiations.h new file mode 100644 index 00000000..799261fc --- /dev/null +++ b/include/svs/core/distance/instantiations/distance_instantiations.h @@ -0,0 +1,3404 @@ +// Copyright 2025 Intel Corporation +// +// Licensed under the Apache License, Version 2.0(the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// clang-format off + +#pragma once +#include +#include "svs/core/distance/euclidean.h" +#include "svs/core/distance/inner_product.h" +#include "svs/core/distance/cosine.h" +#include "svs/core/distance/distance_core.h" +namespace svs::distance { +#if defined(__x86_64__) +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +#endif // defined(__x86_64__) +#if defined(__x86_64__) +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +#endif // defined(__x86_64__) +#if defined(__x86_64__) +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +#endif // defined(__x86_64__) +#if defined(__x86_64__) +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +#endif // defined(__x86_64__) +#if defined(__x86_64__) +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +#endif // defined(__x86_64__) +#if defined(__x86_64__) +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +#endif // defined(__x86_64__) +#if defined(__aarch64__) && defined(__APPLE__) +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +#endif // defined(__aarch64__) && defined(__APPLE__) +#if defined(__aarch64__) && defined(__APPLE__) +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +#endif // defined(__aarch64__) && defined(__APPLE__) +#if defined(__aarch64__) && !defined(__APPLE__) +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +#endif // defined(__aarch64__) && !defined(__APPLE__) +#if defined(__aarch64__) && !defined(__APPLE__) +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +extern template float compute(DistanceL2, std::span, std::span); +extern template float compute(DistanceIP, std::span, std::span); +extern template float compute(DistanceCosineSimilarity, std::span, std::span); +#endif // defined(__aarch64__) && !defined(__APPLE__) +} // namespace svs::distance \ No newline at end of file diff --git a/include/svs/index/vamana/index.h b/include/svs/index/vamana/index.h index 89710b67..78effeb1 100644 --- a/include/svs/index/vamana/index.h +++ b/include/svs/index/vamana/index.h @@ -18,6 +18,8 @@ // svs #include "svs/core/data.h" +#include "svs/core/distance/cosine.h" +#include "svs/core/distance/distance_core.h" #include "svs/core/graph.h" #include "svs/core/loading.h" #include "svs/core/medioid.h" @@ -1007,13 +1009,13 @@ void verify_and_set_default_index_parameters( } } - // Check supported distance type using std::is_same type trait + // Check supported distance type using DistanceType tags using dist_type = std::decay_t; + // Create type flags for each distance type - constexpr bool is_L2 = std::is_same_v; - constexpr bool is_IP = std::is_same_v; - constexpr bool is_Cosine = - std::is_same_v; + constexpr bool is_L2 = dist_type::distance_type == svs::DistanceType::L2; + constexpr bool is_IP = dist_type::distance_type == svs::DistanceType::MIP; + constexpr bool is_Cosine = dist_type::distance_type == svs::DistanceType::Cosine; // Handle alpha based on distance type if constexpr (is_L2) { diff --git a/include/svs/index/vamana/prune.h b/include/svs/index/vamana/prune.h index aeab27ac..e6f5508f 100644 --- a/include/svs/index/vamana/prune.h +++ b/include/svs/index/vamana/prune.h @@ -36,15 +36,16 @@ struct LegacyPruneStrategy {}; template struct PruneStrategy; // Strategy for L2 -template <> struct PruneStrategy { +template struct PruneStrategy> { using type = ProgressivePruneStrategy; }; // Specialize IP to use the iterative strategy. -template <> struct PruneStrategy { +template struct PruneStrategy> { using type = IterativePruneStrategy; }; -template <> struct PruneStrategy { +template +struct PruneStrategy> { using type = IterativePruneStrategy; }; diff --git a/include/svs/lib/arch.h b/include/svs/lib/arch.h new file mode 100644 index 00000000..ac955dbd --- /dev/null +++ b/include/svs/lib/arch.h @@ -0,0 +1,420 @@ +/* + * Copyright 2025 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "svs/lib/cpuid.h" +#include +#include +#include +#include + +// microarch optimization selected for the current translation unit +#ifndef SVS_TARGET_MICROARCH +// default to max available microarch +#define SVS_TUNIT_MICROARCH svs::arch::MicroArch::max +#else +#define SVS_TUNIT_MICROARCH svs::arch::MicroArch::SVS_TARGET_MICROARCH +#endif + +namespace svs::arch { + +enum class MicroArch { +#if defined(__x86_64__) + // Refer to the GCC docs for the list of targeted architectures: + // https://gcc.gnu.org/onlinedocs/gcc/x86-Options.html + x86_64_v2, + nehalem, + westmere, + sandybridge, + ivybridge, + haswell, + broadwell, + skylake, + x86_64_v4, + skylake_avx512, + cascadelake, + cooperlake, + icelake_client, + icelake_server, + sapphirerapids, + graniterapids, + graniterapids_d, +#elif defined(__aarch64__) +#if defined(__APPLE__) + m1, + m2, +#else + neoverse_v1, + neoverse_n2, +#endif +#endif + max, + baseline = 0, +}; + +struct MicroArchInfo { + std::optional parent; + std::vector extensions; + std::string name; +}; + +// Unordered map with MicroArch to MicroArchInfo mapping +inline const std::unordered_map& get_microarch_info_map() { + static const std::unordered_map microarch_info = { +#if defined(__x86_64__) + {MicroArch::x86_64_v2, + {std::nullopt, + {ISAExt::SSE3, + ISAExt::SSSE3, + ISAExt::SSE4_1, + ISAExt::SSE4_2, + ISAExt::POPCNT, + ISAExt::CX16, + ISAExt::SAHF}, + "x86_64_v2"}}, + {MicroArch::nehalem, + {std::nullopt, + {ISAExt::MMX, + ISAExt::SSE, + ISAExt::SSE2, + ISAExt::SSE3, + ISAExt::SSSE3, + ISAExt::SSE4_1, + ISAExt::SSE4_2, + ISAExt::POPCNT, + ISAExt::CX16, + ISAExt::SAHF, + ISAExt::FXSR}, + "nehalem"}}, + {MicroArch::westmere, {MicroArch::nehalem, {ISAExt::PCLMUL}, "westmere"}}, + {MicroArch::sandybridge, + {MicroArch::westmere, {ISAExt::AVX, ISAExt::XSAVE}, "sandybridge"}}, + {MicroArch::ivybridge, + {MicroArch::sandybridge, + {ISAExt::FSGSBASE, ISAExt::RDRND, ISAExt::F16C}, + "ivybridge"}}, + {MicroArch::haswell, + {MicroArch::sandybridge, + {ISAExt::AVX2, + ISAExt::BMI, + ISAExt::BMI2, + ISAExt::LZCNT, + ISAExt::FMA, + ISAExt::MOVBE}, + "haswell"}}, + {MicroArch::broadwell, + {MicroArch::haswell, + {ISAExt::RDSEED, ISAExt::ADCX, ISAExt::PREFETCHW}, + "broadwell"}}, + {MicroArch::skylake, + {MicroArch::broadwell, + {ISAExt::AES, ISAExt::CLFLUSHOPT, ISAExt::XSAVEC, ISAExt::XSAVES, ISAExt::SGX}, + "skylake"}}, + {MicroArch::x86_64_v4, + {std::nullopt, + {ISAExt::AVX512_F, + ISAExt::AVX512_VL, + ISAExt::AVX512_BW, + ISAExt::AVX512_DQ, + ISAExt::AVX512_CD}, + "x86_64_v4"}}, + {MicroArch::skylake_avx512, + {MicroArch::skylake, + {ISAExt::AVX512_F, + ISAExt::CLWB, + ISAExt::AVX512_VL, + ISAExt::AVX512_BW, + ISAExt::AVX512_DQ, + ISAExt::AVX512_CD}, + "skylake_avx512"}}, + {MicroArch::cascadelake, + {MicroArch::skylake_avx512, {ISAExt::AVX512_VNNI}, "cascadelake"}}, + {MicroArch::cooperlake, + {MicroArch::cascadelake, {ISAExt::AVX512_BF16}, "cooperlake"}}, + {MicroArch::icelake_client, + {MicroArch::cascadelake, + {ISAExt::PKU, + ISAExt::AVX512_VBMI, + ISAExt::AVX512_IFMA, + ISAExt::SHA, + ISAExt::GFNI, + ISAExt::VAES, + ISAExt::AVX512_VBMI2, + ISAExt::VPCLMULQDQ, + ISAExt::AVX512_BITALG, + ISAExt::RDPID, + ISAExt::AVX512_VPOPCNTDQ}, + "icelake_client"}}, + {MicroArch::icelake_server, + {MicroArch::icelake_client, + {ISAExt::PCONFIG, ISAExt::WBNOINVD, ISAExt::CLWB}, + "icelake_server"}}, + {MicroArch::sapphirerapids, + {MicroArch::icelake_server, + {ISAExt::MOVDIRI, + ISAExt::MOVDIR64B, + ISAExt::ENQCMD, + ISAExt::CLDEMOTE, + ISAExt::PTWRITE, + ISAExt::WAITPKG, + ISAExt::SERIALIZE, + ISAExt::TSXLDTRK, + ISAExt::UINTR, + ISAExt::AMX_BF16, + ISAExt::AMX_TILE, + ISAExt::AMX_INT8, + ISAExt::AVX_VNNI, + ISAExt::AVX512_FP16, + ISAExt::AVX512_BF16}, + "sapphirerapids"}}, + {MicroArch::graniterapids, + {MicroArch::sapphirerapids, + {ISAExt::AMX_FP16, ISAExt::PREFETCHI}, + "graniterapids"}}, + {MicroArch::graniterapids_d, + {MicroArch::graniterapids, {ISAExt::AMX_COMPLEX}, "graniterapids_d"}}, +#elif defined(__aarch64__) +#if defined(__APPLE__) + {MicroArch::m1, {std::nullopt, {ISAExt::M1}, "m1"}}, + {MicroArch::m2, {std::nullopt, {ISAExt::M2}, "m2"}}, +#else + {MicroArch::neoverse_v1, {std::nullopt, {ISAExt::SVE}, "neoverse_v1"}}, + {MicroArch::neoverse_n2, {MicroArch::neoverse_v1, {ISAExt::SVE2}, "neoverse_n2"}}, +#endif +#endif + {MicroArch::baseline, {std::nullopt, {}, "baseline"}} + }; + return microarch_info; +} + +inline bool arch_is_supported(MicroArch arch) { + const auto& info_map = get_microarch_info_map(); + auto it = info_map.find(arch); + if (it == info_map.end()) { + return false; + } + + const auto& info = it->second; + + // First check if parent architecture is supported + if (info.parent.has_value() && !arch_is_supported(info.parent.value())) { + return false; + } + + // Then check additional extensions + return check_extensions(info.extensions); +} + +inline std::string microarch_to_string(MicroArch arch) { + const auto& info_map = get_microarch_info_map(); + auto it = info_map.find(arch); + if (it != info_map.end()) { + return it->second.name; + } + return "unknown"; +} + +inline MicroArch string_to_microarch(const std::string& arch_name) { + const auto& info_map = get_microarch_info_map(); + for (const auto& [arch, info] : info_map) { + if (info.name == arch_name) { + return arch; + } + } + throw std::invalid_argument("Unknown microarchitecture name: " + arch_name); +} + +class MicroArchEnvironment { + public: + // Delete constructors for singleton + MicroArchEnvironment(const MicroArchEnvironment&) = delete; + MicroArchEnvironment& operator=(const MicroArchEnvironment&) = delete; + MicroArchEnvironment(MicroArchEnvironment&&) = delete; + MicroArchEnvironment& operator=(MicroArchEnvironment&&) = delete; + ~MicroArchEnvironment() = default; + + // Singleton instance + static MicroArchEnvironment& get_instance() { + // TODO: ensure thread safety + static MicroArchEnvironment instance{}; + return instance; + } + MicroArch get_microarch() const { return max_arch_; } + + void set_microarch(MicroArch arch) { + if (arch_is_supported(arch)) { + max_arch_ = arch; + } else { + throw std::invalid_argument("Unsupported microarchitecture"); + } + } + + const std::vector& get_supported_microarchs() const { + return supported_archs_; + } + + static const std::vector get_compiled_microarchs() { + return { +#if defined(SVS_MICROARCH_COMPILED_x86_64_v2) + MicroArch::x86_64_v2, +#endif +#if defined(SVS_MICROARCH_COMPILED_nehalem) + MicroArch::nehalem, +#endif +#if defined(SVS_MICROARCH_COMPILED_westmere) + MicroArch::westmere, +#endif +#if defined(SVS_MICROARCH_COMPILED_sandybridge) + MicroArch::sandybridge, +#endif +#if defined(SVS_MICROARCH_COMPILED_ivybridge) + MicroArch::ivybridge, +#endif +#if defined(SVS_MICROARCH_COMPILED_haswell) + MicroArch::haswell, +#endif +#if defined(SVS_MICROARCH_COMPILED_broadwell) + MicroArch::broadwell, +#endif +#if defined(SVS_MICROARCH_COMPILED_skylake) + MicroArch::skylake, +#endif +#if defined(SVS_MICROARCH_COMPILED_x86_64_v4) + MicroArch::x86_64_v4, +#endif +#if defined(SVS_MICROARCH_COMPILED_skylake_avx512) + MicroArch::skylake_avx512, +#endif +#if defined(SVS_MICROARCH_COMPILED_cascadelake) + MicroArch::cascadelake, +#endif +#if defined(SVS_MICROARCH_COMPILED_cooperlake) + MicroArch::cooperlake, +#endif +#if defined(SVS_MICROARCH_COMPILED_icelake_client) + MicroArch::icelake_client, +#endif +#if defined(SVS_MICROARCH_COMPILED_icelake_server) + MicroArch::icelake_server, +#endif +#if defined(SVS_MICROARCH_COMPILED_sapphirerapids) + MicroArch::sapphirerapids, +#endif +#if defined(SVS_MICROARCH_COMPILED_graniterapids) + MicroArch::graniterapids, +#endif +#if defined(SVS_MICROARCH_COMPILED_graniterapids_d) + MicroArch::graniterapids_d, +#endif +#if defined(SVS_MICROARCH_COMPILED_m1) + MicroArch::m1, +#endif +#if defined(SVS_MICROARCH_COMPILED_m2) + MicroArch::m2, +#endif +#if defined(SVS_MICROARCH_COMPILED_neoverse_v1) + MicroArch::neoverse_v1, +#endif +#if defined(SVS_MICROARCH_COMPILED_neoverse_n2) + MicroArch::neoverse_n2, +#endif + }; + } + + private: + MicroArchEnvironment() { + max_arch_ = MicroArch::baseline; + for (const auto& arch : get_compiled_microarchs()) { + if (arch_is_supported(arch)) { + supported_archs_.push_back(arch); + if (static_cast(arch) > static_cast(max_arch_)) { + max_arch_ = arch; + } + } + } + } + + std::vector supported_archs_; + MicroArch max_arch_; +}; + +template +auto dispatch_by_arch(Functor&& f, Args&&... args) { + auto& arch_env = MicroArchEnvironment::get_instance(); + auto arch = arch_env.get_microarch(); + std::cout << "Dispatch to " << microarch_to_string(arch) << std::endl; + + // clang-format off + switch (arch) { +#if defined(__x86_64__) + case MicroArch::x86_64_v2: + return f.template operator()(std::forward(args)...); + case MicroArch::nehalem: + return f.template operator()(std::forward(args)...); + case MicroArch::westmere: + return f.template operator()(std::forward(args)...); + case MicroArch::sandybridge: + return f.template operator()(std::forward(args)...); + case MicroArch::ivybridge: + return f.template operator()(std::forward(args)...); + case MicroArch::haswell: + return f.template operator()(std::forward(args)...); + case MicroArch::broadwell: + return f.template operator()(std::forward(args)...); + case MicroArch::skylake: + return f.template operator()(std::forward(args)...); + case MicroArch::x86_64_v4: + return f.template operator()(std::forward(args)...); + case MicroArch::skylake_avx512: + return f.template operator()(std::forward(args)...); + case MicroArch::cascadelake: + return f.template operator()(std::forward(args)...); + case MicroArch::cooperlake: + return f.template operator()(std::forward(args)...); + case MicroArch::icelake_client: + return f.template operator()(std::forward(args)...); + case MicroArch::icelake_server: + return f.template operator()(std::forward(args)...); + case MicroArch::sapphirerapids: + return f.template operator()(std::forward(args)...); + case MicroArch::graniterapids: + return f.template operator()(std::forward(args)...); + case MicroArch::graniterapids_d: + return f.template operator()(std::forward(args)...); +#endif // __x86_64__ + +#if defined(__arch64__) && defined(__APPLE__) + case MicroArch::m1: + return f.template operator()(std::forward(args)...); + case MicroArch::m2: + return f.template operator()(std::forward(args)...); +#endif // __APPLE__ + +#if defined(__aarch64__) && !defined(__APPLE__) + case MicroArch::neoverse_v1: + return f.template operator()(std::forward(args)...); + case MicroArch::neoverse_n2: + return f.template operator()(std::forward(args)...); +#endif // __aarch64__ + + default: + throw std::invalid_argument("Unsupported microarchitecture"); + + } + // clang-format on +} +} // namespace svs::arch diff --git a/include/svs/lib/cpuid.h b/include/svs/lib/cpuid.h new file mode 100644 index 00000000..0ed67b0c --- /dev/null +++ b/include/svs/lib/cpuid.h @@ -0,0 +1,335 @@ +/* + * Copyright 2025 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include + +#if defined(__x86_64__) +#include +#endif + +#if defined(__aarch64__) && defined(__APPLE__) +#include +#endif + +namespace svs::arch { + +#if defined(__x86_64__) + +enum class ISAExt { + // Common extensions + MMX, + SSE, + SSE2, + SSE3, + SSSE3, + SSE4_1, + SSE4_2, + POPCNT, + CX16, + SAHF, + FXSR, + AVX, + XSAVE, + PCLMUL, + FSGSBASE, + RDRND, + F16C, + AVX2, + BMI, + BMI2, + LZCNT, + FMA, + MOVBE, + RDSEED, + ADCX, + PREFETCHW, + AES, + CLFLUSHOPT, + XSAVEC, + XSAVES, + SGX, + CLWB, + PKU, + SHA, + GFNI, + VAES, + VPCLMULQDQ, + RDPID, + PCONFIG, + WBNOINVD, + MOVDIRI, + MOVDIR64B, + ENQCMD, + CLDEMOTE, + PTWRITE, + WAITPKG, + SERIALIZE, + TSXLDTRK, + UINTR, + PREFETCHI, + + // AVX family + AVX_VNNI, + + // AVX512 family + AVX512_F, + AVX512_VL, + AVX512_BW, + AVX512_DQ, + AVX512_CD, + AVX512_VBMI, + AVX512_IFMA, + AVX512_VNNI, + AVX512_VBMI2, + AVX512_BITALG, + AVX512_VPOPCNTDQ, + AVX512_BF16, + AVX512_FP16, + + // AMX family + AMX_BF16, + AMX_TILE, + AMX_INT8, + AMX_FP16, + AMX_COMPLEX +}; + +struct CPUIDFlag { + const uint32_t function; // EAX input for CPUID + const uint32_t subfunction; // ECX input for CPUID + const uint32_t reg; // Register index (0=EAX, 1=EBX, 2=ECX, 3=EDX) + const uint32_t bit; // Bit position in the register + const char* name; + + bool get_value() const { + std::array regs{}; + __cpuid_count(function, subfunction, regs[0], regs[1], regs[2], regs[3]); + return (regs[reg] & (1 << bit)) != 0; + } +}; + +inline const std::unordered_map& get_isa_ext_info() { + static const std::unordered_map isa_ext_info = { + // flags are sorted by function, subfunction, register and bit + {ISAExt::MMX, {1, 0, 3, 23, "MMX"}}, + {ISAExt::FXSR, {1, 0, 3, 24, "FXSR"}}, + {ISAExt::SSE, {1, 0, 3, 25, "SSE"}}, + {ISAExt::SSE2, {1, 0, 3, 26, "SSE2"}}, + {ISAExt::SSE3, {1, 0, 2, 0, "SSE3"}}, + {ISAExt::PCLMUL, {1, 0, 2, 1, "PCLMUL"}}, + {ISAExt::SSSE3, {1, 0, 2, 9, "SSSE3"}}, + {ISAExt::FMA, {1, 0, 2, 12, "FMA"}}, + {ISAExt::CX16, {1, 0, 2, 13, "CX16"}}, + {ISAExt::SSE4_1, {1, 0, 2, 19, "SSE4_1"}}, + {ISAExt::SSE4_2, {1, 0, 2, 20, "SSE4_2"}}, + {ISAExt::MOVBE, {1, 0, 2, 22, "MOVBE"}}, + {ISAExt::POPCNT, {1, 0, 2, 23, "POPCNT"}}, + {ISAExt::AES, {1, 0, 2, 25, "AES"}}, + {ISAExt::XSAVE, {1, 0, 2, 26, "XSAVE"}}, + {ISAExt::AVX, {1, 0, 2, 28, "AVX"}}, + {ISAExt::F16C, {1, 0, 2, 29, "F16C"}}, + {ISAExt::RDRND, {1, 0, 2, 30, "RDRND"}}, + {ISAExt::FSGSBASE, {7, 0, 1, 0, "FSGSBASE"}}, + {ISAExt::SGX, {7, 0, 1, 2, "SGX"}}, + {ISAExt::BMI, {7, 0, 1, 3, "BMI"}}, + {ISAExt::AVX2, {7, 0, 1, 5, "AVX2"}}, + {ISAExt::BMI2, {7, 0, 1, 8, "BMI2"}}, + {ISAExt::AVX512_F, {7, 0, 1, 16, "AVX512_F"}}, + {ISAExt::AVX512_DQ, {7, 0, 1, 17, "AVX512_DQ"}}, + {ISAExt::RDSEED, {7, 0, 1, 18, "RDSEED"}}, + {ISAExt::ADCX, {7, 0, 1, 19, "ADCX"}}, + {ISAExt::AVX512_IFMA, {7, 0, 1, 21, "AVX512_IFMA"}}, + {ISAExt::CLFLUSHOPT, {7, 0, 1, 23, "CLFLUSHOPT"}}, + {ISAExt::CLWB, {7, 0, 1, 24, "CLWB"}}, + {ISAExt::AVX512_CD, {7, 0, 1, 28, "AVX512_CD"}}, + {ISAExt::SHA, {7, 0, 1, 29, "SHA"}}, + {ISAExt::AVX512_BW, {7, 0, 1, 30, "AVX512_BW"}}, + {ISAExt::AVX512_VL, {7, 0, 1, 31, "AVX512_VL"}}, + {ISAExt::AVX512_VBMI, {7, 0, 2, 1, "AVX512_VBMI"}}, + {ISAExt::PKU, {7, 0, 2, 3, "PKU"}}, + {ISAExt::WAITPKG, {7, 0, 2, 5, "WAITPKG"}}, + {ISAExt::AVX512_VBMI2, {7, 0, 2, 6, "AVX512_VBMI2"}}, + {ISAExt::GFNI, {7, 0, 2, 8, "GFNI"}}, + {ISAExt::VAES, {7, 0, 2, 9, "VAES"}}, + {ISAExt::VPCLMULQDQ, {7, 0, 2, 10, "VPCLMULQDQ"}}, + {ISAExt::AVX512_VNNI, {7, 0, 2, 11, "AVX512_VNNI"}}, + {ISAExt::AVX512_BITALG, {7, 0, 2, 12, "AVX512_BITALG"}}, + {ISAExt::AVX512_VPOPCNTDQ, {7, 0, 2, 14, "AVX512_VPOPCNTDQ"}}, + {ISAExt::RDPID, {7, 0, 2, 22, "RDPID"}}, + {ISAExt::CLDEMOTE, {7, 0, 2, 25, "CLDEMOTE"}}, + {ISAExt::MOVDIRI, {7, 0, 2, 27, "MOVDIRI"}}, + {ISAExt::MOVDIR64B, {7, 0, 2, 28, "MOVDIR64B"}}, + {ISAExt::ENQCMD, {7, 0, 2, 29, "ENQCMD"}}, + {ISAExt::UINTR, {7, 0, 3, 5, "UINTR"}}, + {ISAExt::SERIALIZE, {7, 0, 3, 14, "SERIALIZE"}}, + {ISAExt::TSXLDTRK, {7, 0, 3, 16, "TSXLDTRK"}}, + {ISAExt::PCONFIG, {7, 0, 3, 18, "PCONFIG"}}, + {ISAExt::AMX_BF16, {7, 0, 3, 22, "AMX_BF16"}}, + {ISAExt::AVX512_FP16, {7, 0, 3, 23, "AVX512_FP16"}}, + {ISAExt::AMX_TILE, {7, 0, 3, 24, "AMX_TILE"}}, + {ISAExt::AMX_INT8, {7, 0, 3, 25, "AMX_INT8"}}, + {ISAExt::AVX_VNNI, {7, 1, 0, 4, "AVX_VNNI"}}, + {ISAExt::AVX512_BF16, {7, 1, 0, 5, "AVX512_BF16"}}, + {ISAExt::AMX_FP16, {7, 1, 0, 21, "AMX_FP16"}}, + {ISAExt::AMX_COMPLEX, {7, 1, 3, 8, "AMX_COMPLEX"}}, + {ISAExt::PREFETCHI, {7, 1, 3, 14, "PREFETCHI"}}, + {ISAExt::XSAVEC, {0xD, 1, 0, 1, "XSAVEC"}}, + {ISAExt::XSAVES, {0xD, 1, 0, 3, "XSAVES"}}, + {ISAExt::PTWRITE, {0x14, 0, 1, 4, "PTWRITE"}}, + {ISAExt::WBNOINVD, {0x80000008, 0, 1, 9, "WBNOINVD"}}, + {ISAExt::SAHF, {0x80000001, 0, 2, 0, "SAHF"}}, + {ISAExt::LZCNT, {0x80000001, 0, 2, 5, "LZCNT"}}, + {ISAExt::PREFETCHW, {0x80000001, 0, 2, 8, "PREFETCHW"}}, + }; + return isa_ext_info; +} + +#elif defined(__aarch64__) + +#if defined(__APPLE__) + +enum class ISAExt { + M1, + M2, +}; + +struct BrandInfo { + const char* name; + + bool get_value() const { + char buffer[256]; + size_t size = sizeof(buffer); + + if (sysctlbyname("machdep.cpu.brand_string", &buffer, &size, nullptr, 0) == 0) { + std::string brand(buffer); + return brand.find(name) != std::string::npos; + } + + return false; + } +}; + +inline const std::unordered_map& get_isa_ext_info() { + static const std::unordered_map isa_ext_info = { + {ISAExt::M1, {"M1"}}, + {ISAExt::M2, {"M2"}}, + }; + return isa_ext_info; +} + +#else + +enum class ISAExt { + SVE, + SVE2, +}; + +// Define register ID values for ARM features detection +#define ID_AA64PFR0_EL1 0 +#define ID_AA64ZFR0_EL1 1 + +// Helper template to read system registers with mrs instruction +template inline uint64_t read_system_reg() { + uint64_t val; + if constexpr (ID == ID_AA64PFR0_EL1) { + asm("mrs %0, id_aa64pfr0_el1" : "=r"(val)); + } else if constexpr (ID == ID_AA64ZFR0_EL1) { + asm("mrs %0, id_aa64zfr0_el1" : "=r"(val)); + } else { + val = 0; + } + return val; +} + +// Extract bits from register value +inline uint64_t extract_bits(uint64_t val, int pos, int len) { + return (val >> pos) & ((1ULL << len) - 1); +} + +struct MSRFlag { + unsigned int reg_id; // System register ID + int bit_pos; // Bit position in the register + int bit_len; // Number of bits to check + uint64_t expected_val; // Expected value for feature to be present + const char* name; // Feature name + + bool get_value() const { + uint64_t reg_val = 0; + + try { + switch (reg_id) { + case ID_AA64PFR0_EL1: + reg_val = read_system_reg(); + break; + case ID_AA64ZFR0_EL1: + if (extract_bits(read_system_reg(), 32, 4) != 0) { + reg_val = read_system_reg(); + } + break; + default: + return false; + } + + return extract_bits(reg_val, bit_pos, bit_len) == expected_val; + } catch (...) { + // If reading the register fails, the feature is not supported + return false; + } + } +}; + +inline const std::unordered_map& get_isa_ext_info() { + static const std::unordered_map isa_ext_info = { + {ISAExt::SVE, {ID_AA64PFR0_EL1, 32, 4, 1, "sve"}}, + {ISAExt::SVE2, {ID_AA64ZFR0_EL1, 0, 4, 1, "sve2"}}, + }; + return isa_ext_info; +} + +#endif +#endif + +inline bool check_extension(ISAExt ext) { return get_isa_ext_info().at(ext).get_value(); } + +inline bool check_extensions(std::vector exts) { + for (const auto& ext : exts) { + if (!check_extension(ext)) { + return false; + } + } + return true; +} + +template inline void write_extensions_status(StreamType& stream) { + const auto& ext_info = get_isa_ext_info(); + + stream << "CPU Extensions Support Status:" << std::endl; + stream << "-----------------------------" << std::endl; + + for (const auto& [ext, info] : ext_info) { + stream << info.name << ": " + << (check_extension(ext) ? "Supported" : "Not supported") << std::endl; + } +} + +} // namespace svs::arch diff --git a/include/svs/quantization/scalar/scalar.h b/include/svs/quantization/scalar/scalar.h index e1fdf264..268fe859 100644 --- a/include/svs/quantization/scalar/scalar.h +++ b/include/svs/quantization/scalar/scalar.h @@ -144,7 +144,7 @@ class InnerProductCompressed { float offset_ = 0; }; -class CosineSimilarityCompressed { +template class CosineSimilarityCompressed { public: using compare = std::greater<>; @@ -180,7 +180,7 @@ class CosineSimilarityCompressed { float scale_; float bias_; - distance::DistanceCosineSimilarity inner_; + distance::DistanceCosineSimilarity inner_; }; namespace detail { @@ -288,19 +288,19 @@ template struct Compressor { // Map from baseline distance functors to the local versions. template struct CompressedDistance; -template -struct CompressedDistance { +template +struct CompressedDistance, ElementType> { using type = EuclideanCompressed; }; -template -struct CompressedDistance { +template +struct CompressedDistance, ElementType> { using type = InnerProductCompressed; }; -template -struct CompressedDistance { - using type = CosineSimilarityCompressed; +template +struct CompressedDistance, ElementType> { + using type = CosineSimilarityCompressed; }; // Trait to identify whether a type has `uses_compressed_data` diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 45054826..85692faf 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -186,7 +186,7 @@ if (SVS_EXPERIMENTAL_ENABLE_NUMA) list(APPEND TEST_SOURCES ${NUMA_TESTS}) endif() -add_executable(tests ${TEST_SOURCES}) +add_executable(tests ${TEST_SOURCES} ${MICROARCH_OBJECT_FILES}) # Path to the test dataset. set(DATA_DIRECTORY "${PROJECT_SOURCE_DIR}/data") @@ -196,7 +196,7 @@ target_compile_definitions(tests PRIVATE SVS_TEST_DATA_DIR="${DATA_DIRECTORY}") target_link_libraries(tests PRIVATE ${SVS_LIB}) target_link_libraries( - tests PRIVATE svs_compile_options svs_native_options svs_benchmark_library + tests PRIVATE svs_compile_options ${MICROARCH_OBJECT_FILES} svs_benchmark_library ) target_link_libraries(tests PRIVATE Catch2::Catch2WithMain) diff --git a/tests/svs/core/distances/cosine.cpp b/tests/svs/core/distances/cosine.cpp index 8c68ebc1..24915d97 100644 --- a/tests/svs/core/distances/cosine.cpp +++ b/tests/svs/core/distances/cosine.cpp @@ -85,11 +85,16 @@ void test_types(T lo, T hi, size_t num_tests) { // Statically Sized Computation auto a_norm = svs::distance::norm(std::span{a.data(), a.size()}); CATCH_REQUIRE( - (svs::distance::CosineSimilarity::compute(a.data(), b.data(), a_norm) == - expected) + // TODO: replace baseline with something else? + (svs::distance::CosineSimilarity::compute( + a.data(), b.data(), a_norm + ) == expected) ); // Dynamically Sized Computation - auto dist = svs::distance::CosineSimilarity::compute(a.data(), b.data(), a_norm, N); + auto dist = + svs::distance::CosineSimilarity::compute( + a.data(), b.data(), a_norm, N + ); CATCH_REQUIRE((dist == expected)); } } diff --git a/tests/svs/core/distances/distance_euclidean.cpp b/tests/svs/core/distances/distance_euclidean.cpp index 88c23fe4..1e375b43 100644 --- a/tests/svs/core/distances/distance_euclidean.cpp +++ b/tests/svs/core/distances/distance_euclidean.cpp @@ -68,9 +68,17 @@ void test_types(T lo, T hi, size_t num_tests) { auto expected = Catch::Approx(euclidean_reference(a, b)); // Statically Sized Computation - CATCH_REQUIRE((svs::distance::L2::compute(a.data(), b.data()) == expected)); + CATCH_REQUIRE( + (svs::distance::L2::compute( + a.data(), b.data() + ) == expected) + ); // Dynamically Sized Computation - CATCH_REQUIRE((svs::distance::L2::compute(a.data(), b.data(), N) == expected)); + CATCH_REQUIRE( + (svs::distance::L2::compute( + a.data(), b.data(), N + ) == expected) + ); } } } // namespace diff --git a/tests/svs/core/distances/inner_product.cpp b/tests/svs/core/distances/inner_product.cpp index a074a058..b5f0462e 100644 --- a/tests/svs/core/distances/inner_product.cpp +++ b/tests/svs/core/distances/inner_product.cpp @@ -76,9 +76,17 @@ void test_types(T lo, T hi, size_t num_tests) { .margin(INNERPRODUCT_MARGIN); // Statically Sized Computation - CATCH_REQUIRE((svs::distance::IP::compute(a.data(), b.data()) == expected)); + CATCH_REQUIRE( + (svs::distance::IP::compute( + a.data(), b.data() + ) == expected) + ); // Dynamically Sized Computation - CATCH_REQUIRE((svs::distance::IP::compute(a.data(), b.data(), N) == expected)); + CATCH_REQUIRE( + (svs::distance::IP::compute( + a.data(), b.data(), N + ) == expected) + ); } } } // anonymous namespace diff --git a/utils/CMakeLists.txt b/utils/CMakeLists.txt index 85c6b316..6e104b24 100644 --- a/utils/CMakeLists.txt +++ b/utils/CMakeLists.txt @@ -13,7 +13,7 @@ # limitations under the License. function(create_utility exe file) - add_executable(${exe} ${file}) + add_executable(${exe} ${file} ${MICROARCH_OBJECT_FILES}) target_include_directories( ${exe} PRIVATE ${CMAKE_CURRENT_LIST_DIR} @@ -22,7 +22,7 @@ function(create_utility exe file) target_link_libraries(${exe} PRIVATE ${SVS_LIB}) # Get common compiler options with the unit tests. - target_link_libraries(${exe} PRIVATE svs_compile_options svs_native_options) + target_link_libraries(${exe} PRIVATE svs_compile_options ${MICROARCH_OBJECT_FILES}) # Link with third-party executables. target_link_libraries(${exe} PRIVATE fmt::fmt)