Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 23 additions & 4 deletions build2cmake/src/config/v2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ impl Build {
Kernel::Cuda { .. } => Backend::Cuda,
Kernel::Metal { .. } => Backend::Metal,
Kernel::Rocm { .. } => Backend::Rocm,
Kernel::Xpu { .. } => Backend::Xpu,
})
.collect()
}
Expand Down Expand Up @@ -111,22 +112,32 @@ pub enum Kernel {
include: Option<Vec<String>>,
src: Vec<String>,
},
#[serde(rename_all = "kebab-case")]
Xpu {
cxx_flags: Option<Vec<String>>,
depends: Vec<Dependencies>,
sycl_flags: Option<Vec<String>>,
include: Option<Vec<String>>,
src: Vec<String>,
},
}

impl Kernel {
pub fn cxx_flags(&self) -> Option<&[String]> {
match self {
Kernel::Cuda { cxx_flags, .. }
| Kernel::Metal { cxx_flags, .. }
| Kernel::Rocm { cxx_flags, .. } => cxx_flags.as_deref(),
| Kernel::Rocm { cxx_flags, .. }
| Kernel::Xpu { cxx_flags, .. } => cxx_flags.as_deref(),
}
}

pub fn include(&self) -> Option<&[String]> {
match self {
Kernel::Cuda { include, .. }
| Kernel::Metal { include, .. }
| Kernel::Rocm { include, .. } => include.as_deref(),
| Kernel::Rocm { include, .. }
| Kernel::Xpu { include, .. } => include.as_deref(),
}
}

Expand All @@ -135,20 +146,25 @@ impl Kernel {
Kernel::Cuda { .. } => Backend::Cuda,
Kernel::Metal { .. } => Backend::Metal,
Kernel::Rocm { .. } => Backend::Rocm,
Kernel::Xpu { .. } => Backend::Xpu,
}
}

pub fn depends(&self) -> &[Dependencies] {
match self {
Kernel::Cuda { depends, .. }
| Kernel::Metal { depends, .. }
| Kernel::Rocm { depends, .. } => depends,
| Kernel::Rocm { depends, .. }
| Kernel::Xpu { depends, .. } => depends,
}
}

pub fn src(&self) -> &[String] {
match self {
Kernel::Cuda { src, .. } | Kernel::Metal { src, .. } | Kernel::Rocm { src, .. } => src,
Kernel::Cuda { src, .. }
| Kernel::Metal { src, .. }
| Kernel::Rocm { src, .. }
| Kernel::Xpu { src, .. } => src,
}
}
}
Expand All @@ -159,6 +175,7 @@ pub enum Backend {
Cuda,
Metal,
Rocm,
Xpu,
}

impl Display for Backend {
Expand All @@ -167,6 +184,7 @@ impl Display for Backend {
Backend::Cuda => write!(f, "cuda"),
Backend::Metal => write!(f, "metal"),
Backend::Rocm => write!(f, "rocm"),
Backend::Xpu => write!(f, "xpu"),
}
}
}
Expand All @@ -179,6 +197,7 @@ impl FromStr for Backend {
"cuda" => Ok(Backend::Cuda),
"metal" => Ok(Backend::Metal),
"rocm" => Ok(Backend::Rocm),
"xpu" => Ok(Backend::Xpu),
_ => Err(format!("Unknown backend: {s}")),
}
}
Expand Down
6 changes: 5 additions & 1 deletion build2cmake/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@ use eyre::{bail, ensure, Context, Result};
use minijinja::Environment;

mod torch;
use torch::{write_torch_ext_cuda, write_torch_ext_metal, write_torch_ext_universal};
use torch::{
write_torch_ext_cuda, write_torch_ext_metal, write_torch_ext_universal, write_torch_ext_xpu,
};

mod config;
use config::{Backend, Build, BuildCompat};
Expand Down Expand Up @@ -180,6 +182,7 @@ fn generate_torch(
write_torch_ext_cuda(&env, backend, &build, target_dir.clone(), ops_id)?
}
Backend::Metal => write_torch_ext_metal(&env, &build, target_dir.clone(), ops_id)?,
Backend::Xpu => write_torch_ext_xpu(&env, &build, target_dir.clone(), ops_id)?,
};
file_set.write(&target_dir, force)?;

Expand Down Expand Up @@ -379,6 +382,7 @@ fn get_generated_files(
Backend::Metal => {
write_torch_ext_metal(env, build, target_dir.clone(), ops_id.clone())?
}
Backend::Xpu => write_torch_ext_xpu(env, build, target_dir.clone(), ops_id.clone())?,
};

all_set.extend(set);
Expand Down
48 changes: 48 additions & 0 deletions build2cmake/src/templates/xpu/kernel.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
set({{kernel_name}}_SRC
{{ sources }}
)

{% if includes %}
# TODO: check if CLion support this:
# https://youtrack.jetbrains.com/issue/CPP-16510/CLion-does-not-handle-per-file-include-directories
set_source_files_properties(
{{'${' + kernel_name + '_SRC}'}}
PROPERTIES INCLUDE_DIRECTORIES "{{ includes }}")
{% endif %}

{% if cxx_flags %}
foreach(_KERNEL_SRC {{'${' + kernel_name + '_SRC}'}})
set_property(
SOURCE ${_KERNEL_SRC}
APPEND PROPERTY
COMPILE_OPTIONS "$<$<COMPILE_LANGUAGE:CXX>:{{ cxx_flags }}>"
)
endforeach()
{% endif %}

# Add SYCL-specific compilation flags for XPU sources
{% if sycl_flags %}
# Use kernel-specific SYCL flags
foreach(_KERNEL_SRC {{'${' + kernel_name + '_SRC}'}})
if(_KERNEL_SRC MATCHES ".*\\.(cpp|cxx|cc)$")
set_property(
SOURCE ${_KERNEL_SRC}
APPEND PROPERTY
COMPILE_OPTIONS "$<$<COMPILE_LANGUAGE:CXX>:{{ sycl_flags }}>"
)
endif()
endforeach()
{% else %}
# Use default SYCL flags
foreach(_KERNEL_SRC {{'${' + kernel_name + '_SRC}'}})
if(_KERNEL_SRC MATCHES ".*\\.(cpp|cxx|cc)$")
set_property(
SOURCE ${_KERNEL_SRC}
APPEND PROPERTY
COMPILE_OPTIONS "$<$<COMPILE_LANGUAGE:CXX>:${sycl_flags}>"
)
endif()
endforeach()
{% endif %}

list(APPEND SRC {{'"${' + kernel_name + '_SRC}"'}})
47 changes: 47 additions & 0 deletions build2cmake/src/templates/xpu/preamble.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
cmake_minimum_required(VERSION 3.26)

# Set Intel SYCL compiler before project() call
find_program(ICPX_COMPILER icpx)
if(ICPX_COMPILER)
set(CMAKE_CXX_COMPILER ${ICPX_COMPILER})
message(STATUS "Using Intel SYCL compiler: ${ICPX_COMPILER}")
else()
message(FATAL_ERROR "Intel SYCL compiler (icpx) not found. Please install Intel oneAPI toolkit.")
endif()

project({{ name }})

include("cmake/utils.cmake")

# Find Python with all necessary components for building extensions
find_package(Python REQUIRED COMPONENTS Interpreter Development.Module Development.SABIModule)

append_cmake_prefix_path("torch" "torch.utils.cmake_prefix_path")

find_package(Torch REQUIRED)

# Intel XPU backend detection and setup
if(NOT TORCH_VERSION)
run_python(TORCH_VERSION "import torch; print(torch.__version__)" "Failed to get Torch version")
endif()

# Check for Intel XPU support in PyTorch
run_python(XPU_AVAILABLE
"import torch; print('true' if hasattr(torch, 'xpu') and torch.xpu.is_available() else 'false')"
"Failed to check XPU availability")

if(NOT XPU_AVAILABLE STREQUAL "true")
message(WARNING "Intel XPU is not available in this PyTorch installation. XPU kernels will be skipped.")
return()
endif()

# Set up XPU compilation flags
set(GPU_LANG "SYCL")
add_compile_definitions(XPU_KERNEL)
add_compile_definitions(USE_XPU)

# Set SYCL-specific flags
# Set comprehensive SYCL compilation and linking flags
set(sycl_link_flags "-fsycl;--offload-compress;-fsycl-targets=spir64_gen,spir64;-Xs;-device pvc,xe-lpg,ats-m150 -options ' -cl-intel-enable-auto-large-GRF-mode -cl-poison-unsupported-fp64-kernels -cl-intel-greater-than-4GB-buffer-required'")
set(sycl_flags "-fsycl;-fhonor-nans;-fhonor-infinities;-fno-associative-math;-fno-approx-func;-fno-sycl-instrument-device-code;--offload-compress;-fsycl-targets=spir64_gen,spir64;")
message(STATUS "Configuring for Intel XPU backend using SYCL")
123 changes: 123 additions & 0 deletions build2cmake/src/templates/xpu/setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
import logging
import os
from shutil import which, move
import subprocess
import sys
from pathlib import Path

from setuptools import Extension, find_packages, setup
from setuptools.command.build_ext import build_ext

logger = logging.getLogger(__name__)


def is_sccache_available() -> bool:
return which("sccache") is not None


def is_ccache_available() -> bool:
return which("ccache") is not None


def is_ninja_available() -> bool:
return which("ninja") is not None


class CMakeExtension(Extension):
def __init__(self, name: str, sourcedir: str = "") -> None:
super().__init__(name, sources=[], py_limited_api=True)
self.sourcedir = os.fspath(Path(sourcedir).resolve())


class CMakeBuild(build_ext):
def build_extension(self, ext: CMakeExtension) -> None:
ext_fullpath = Path.cwd() / self.get_ext_fullpath(ext.name)
extdir = ext_fullpath.parent.resolve()

debug = int(os.environ.get("DEBUG", 0)) if self.debug is None else self.debug
cfg = "Debug" if debug else "Release"

cmake_generator = os.environ.get("CMAKE_GENERATOR", "")

# Set Python_EXECUTABLE instead if you use PYBIND11_FINDPYTHON
# EXAMPLE_VERSION_INFO shows you how to pass a value into the C++ code
# from Python.
cmake_args = [
f"-DCMAKE_LIBRARY_OUTPUT_DIRECTORY={extdir}{os.sep}",
f"-DPython_EXECUTABLE={sys.executable}",
f"-DCMAKE_BUILD_TYPE={cfg}", # not used on MSVC, but no harm
]
build_args = []
if "CMAKE_ARGS" in os.environ:
cmake_args += [item for item in os.environ["CMAKE_ARGS"].split(" ") if item]

if not cmake_generator or cmake_generator == "Ninja":
try:
import ninja

ninja_executable_path = Path(ninja.BIN_DIR) / "ninja"
cmake_args += [
"-GNinja",
f"-DCMAKE_MAKE_PROGRAM:FILEPATH={ninja_executable_path}",
]
except ImportError:
pass

if is_sccache_available():
cmake_args += [
"-DCMAKE_C_COMPILER_LAUNCHER=sccache",
"-DCMAKE_CXX_COMPILER_LAUNCHER=sccache",
]
elif is_ccache_available():
cmake_args += [
"-DCMAKE_C_COMPILER_LAUNCHER=ccache",
"-DCMAKE_CXX_COMPILER_LAUNCHER=ccache",
]

num_jobs = os.getenv("MAX_JOBS", None)
if num_jobs is not None:
num_jobs = int(num_jobs)
logger.info("Using MAX_JOBS=%d as the number of jobs.", num_jobs)
else:
try:
# os.sched_getaffinity() isn't universally available, so fall
# back to os.cpu_count() if we get an error here.
num_jobs = len(os.sched_getaffinity(0))
except AttributeError:
num_jobs = os.cpu_count()

build_args += [f"-j{num_jobs}"]
if sys.platform == "win32":
build_args += ["--config", cfg]

build_temp = Path(self.build_temp) / ext.name
if not build_temp.exists():
build_temp.mkdir(parents=True)

subprocess.run(
["cmake", ext.sourcedir, *cmake_args], cwd=build_temp, check=True
)
subprocess.run(
["cmake", "--build", ".", *build_args], cwd=build_temp, check=True
)
if sys.platform == "win32":
# Move the dylib one folder up for discovery.
for filename in os.listdir(extdir / cfg):
move(extdir / cfg / filename, extdir / filename)


setup(
name="{{ name }}",
# The version is just a stub, it's not used by the final build artefact.
version="0.1.0",
ext_modules=[CMakeExtension("{{ name }}.{{ ops_name }}")],
cmdclass={"build_ext": CMakeBuild},
packages=find_packages(where="torch-ext", include=["{{ name }}*"]),
package_dir={"": "torch-ext"},
{% if data_globs %}
package_data={"{{ name }}": [ {{ data_globs }} ]},
{% endif %}
zip_safe=False,
install_requires=["torch"],
python_requires=">=3.9",
)
13 changes: 13 additions & 0 deletions build2cmake/src/templates/xpu/torch-binding.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
set(TORCH_{{name}}_SRC
{{ src|join(' ') }}
)

{% if includes %}
# TODO: check if CLion support this:
# https://youtrack.jetbrains.com/issue/CPP-16510/CLion-does-not-handle-per-file-include-directories
set_source_files_properties(
{{'${TORCH_' + name + '_SRC}'}}
PROPERTIES INCLUDE_DIRECTORIES "{{ includes }}")
{% endif %}

list(APPEND SRC {{'"${TORCH_' + name + '_SRC}"'}})
11 changes: 11 additions & 0 deletions build2cmake/src/templates/xpu/torch-extension.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
define_gpu_extension_target(
{{ ops_name }}
DESTINATION {{ ops_name }}
LANGUAGE ${GPU_LANG}
SOURCES ${SRC}
COMPILE_FLAGS ${sycl_flags}
USE_SABI 3
WITH_SOABI)

# Add XPU/SYCL specific linker flags
target_link_options({{ ops_name }} PRIVATE ${sycl_link_flags})
6 changes: 5 additions & 1 deletion build2cmake/src/torch/metal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,11 @@ fn write_cmake(

render_binding(env, torch, name, cmake_writer)?;

for (kernel_name, kernel) in &build.kernels {
for (kernel_name, kernel) in build
.kernels
.iter()
.filter(|(_, kernel)| matches!(kernel, Kernel::Metal { .. }))
{
render_kernel(env, kernel_name, kernel, cmake_writer)?;
}

Expand Down
3 changes: 3 additions & 0 deletions build2cmake/src/torch/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,6 @@ pub(crate) use ops_identifier::kernel_ops_identifier;

mod universal;
pub use universal::write_torch_ext_universal;

mod xpu;
pub use xpu::write_torch_ext_xpu;
Loading
Loading