diff --git a/pytorch-cms-aotriton.patch b/pytorch-cms-aotriton.patch new file mode 100644 index 00000000000..4f2f4456677 --- /dev/null +++ b/pytorch-cms-aotriton.patch @@ -0,0 +1,15 @@ +diff --git a/cmake/External/aotriton.cmake b/cmake/External/aotriton.cmake +index de64370b37..986ad6c178 100644 +--- a/cmake/External/aotriton.cmake ++++ b/cmake/External/aotriton.cmake +@@ -5,8 +5,8 @@ if(NOT __AOTRITON_INCLUDED) + set(__AOTRITON_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/aotriton/build") + set(__AOTRITON_INSTALL_DIR "${PROJECT_SOURCE_DIR}/torch") + ExternalProject_Add(aotriton_external +- GIT_REPOSITORY https://github.com/ROCm/aotriton.git +- GIT_TAG 24a3fe9cb57e5cda3c923df29743f9767194cc27 ++ GIT_REPOSITORY https://github.com/cms-externals/aotriton.git ++ GIT_TAG 763f12b4cfb4fcb1460a635dd76094589efb10e9 + SOURCE_DIR ${__AOTRITON_SOURCE_DIR} + BINARY_DIR ${__AOTRITON_BUILD_DIR} + PREFIX ${__AOTRITON_INSTALL_DIR} diff --git a/pytorch-hipcc-clang-flags.patch b/pytorch-hipcc-clang-flags.patch new file mode 100644 index 00000000000..bf68b612908 --- /dev/null +++ b/pytorch-hipcc-clang-flags.patch @@ -0,0 +1,14 @@ +diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake +index a96075245a..aa4e22391e 100644 +--- a/cmake/Dependencies.cmake ++++ b/cmake/Dependencies.cmake +@@ -1305,6 +1305,9 @@ if(USE_ROCM) + endif(CMAKE_BUILD_TYPE MATCHES Debug) + + set(HIP_CLANG_FLAGS ${HIP_CXX_FLAGS}) ++ # TODO: dynamic target ++ list(APPEND HIP_CLANG_FLAGS "--gcc-toolchain=$ENV{GCC_ROOT}") ++ list(APPEND HIP_CLANG_FLAGS "--target=x86_64-redhat-linux-gnu") + # Ask hcc to generate device code during compilation so we can use + # host linker to link. + list(APPEND HIP_CLANG_FLAGS -fno-gpu-rdc) diff --git a/pytorch-rocm-version.patch b/pytorch-rocm-version.patch new file mode 100644 index 00000000000..1937362667c --- /dev/null +++ b/pytorch-rocm-version.patch @@ -0,0 +1,14 @@ +diff --git a/aten/src/ATen/cuda/tunable/TunableGemm.h b/aten/src/ATen/cuda/tunable/TunableGemm.h +index 3ba0d76127..775a04b8e8 100644 +--- a/aten/src/ATen/cuda/tunable/TunableGemm.h ++++ b/aten/src/ATen/cuda/tunable/TunableGemm.h +@@ -22,7 +22,7 @@ + #include <c10/util/StringUtil.h> + + #ifdef USE_ROCM +-#include <rocm-core/rocm_version.h> ++#include <rocm_version.h> + #endif + + #define STRINGIFY(s) #s + diff --git a/pytorch.spec b/pytorch.spec index 02ef28f3f6c..27c7e453849 100644 --- a/pytorch.spec +++ b/pytorch.spec @@ -10,27 +10,41 @@ Source2: FindFMT.cmake Source99: scram-tools.file/tools/eigen/env Patch1: pytorch-missing-braces Patch2: pytorch-system-fmt +Patch3: pytorch-cms-aotriton +Patch4: pytorch-hipcc-clang-flags +Patch5: pytorch-rocm-version -BuildRequires: cmake ninja +BuildRequires: cmake ninja py3-pip Requires: eigen fxdiv numactl openmpi protobuf psimd python3 py3-PyYAML Requires: OpenBLAS zlib protobuf fmt py3-pybind11 py3-typing-extensions -%{!?without_cuda:Requires: cuda cudnn} +Requires: py3-filelock py3-iniconfig py3-packaging py3-packaging py3-pluggy py3-numpy py3-setuptools +%{!?without_rocm:Requires: rocm rocm-rocrand} %prep %setup -n %{n}-%{realversion} %patch1 -p1 %patch2 -p1 +%patch3 -p1 +%patch4 -p1 +%patch5 -p1 %build +# Pregenerate some files +python3 tools/amd_build/build_amd.py + cp %{_sourcedir}/FindEigen3.cmake %{_sourcedir}/FindFMT.cmake cmake/Modules/ rm -rf ../build && mkdir ../build && cd ../build source %{_sourcedir}/env -USE_CUDA=OFF -%if 0%{!?without_cuda:1} -if [ "%{cuda_gcc_support}" = "true" ] ; then -USE_CUDA=ON -fi +%if 0%{!?without_rocm:1} +# Notice: must be environment variables +export ROCM_PATH=${ROCM_ROOT} +export ROCM_SOURCE_DIR=${ROCM_ROOT} +export PYTORCH_ROCM_ARCH=gfx900,gfx906,gfx908,gfx90a,gfx1030 +export PYBIND11_SYSPATH=${PY3_PYBIND11_ROOT} +export TRITON_CACHE_DIR=$WORKSPACE/.triton/cache +export GCC_ROOT=${GCC_ROOT} +hipcc -v %endif cmake ../%{n}-%{realversion} \ @@ -40,11 +54,9 @@ cmake ../%{n}-%{realversion} \ -DBUILD_TEST=OFF \ -DBUILD_BINARY=OFF \ -DBUILD_PYTHON=OFF \ -%if 0%{!?without_cuda:1} - -DUSE_CUDA=${USE_CUDA} \ - -DTORCH_CUDA_ARCH_LIST="%{cuda_arch_float}" \ - -DCUDNN_INCLUDE_DIR=${CUDNN_ROOT}/include \ - -DCUDNN_LIBRARY=${CUDNN_ROOT}/lib64/libcudnn.so \ + -DUSE_CUDA=OFF \ +%if 0%{!?without_rocm:1} + -DUSE_ROCM=ON \ %endif -DUSE_NCCL=OFF \ -DUSE_FBGEMM=OFF \ diff --git a/rocm.spec b/rocm.spec index d26ac00bcc7..725a07081d1 100644 --- a/rocm.spec +++ b/rocm.spec @@ -57,10 +57,36 @@ Source31: https://%{repository}/%{repoversion}/main/rocprofiler-systems-0.1.1.60 Source32: https://%{repository}/%{repoversion}/main/rocprofiler-systems-debuginfo-0.1.1.60302-66.el%{rhel}.%{_arch}.rpm Source33: https://%{repository}/%{repoversion}/main/rocthrust-devel-3.3.0.60302-66.el%{rhel}.%{_arch}.rpm - # sources for rocprofiler-register Source34: git+https://github.com/ROCm/rocprofiler-register.git?obj=%{rocprofiler_register_branch}/%{rocprofiler_register_tag}&export=%{rocprofiler_register_pkg}&submodules=1&output=/%{rocprofiler_register_pkg}.tgz +# extra sources for pytorch +Source35: https://%{repository}/%{repoversion}/main/rocthrust-devel-3.3.0.60302-66.el%{rhel}.%{_arch}.rpm +Source36: https://%{repository}/%{repoversion}/main/hsa-rocr-devel-1.14.0.60302-66.el%{rhel}.%{_arch}.rpm +Source37: https://%{repository}/%{repoversion}/main/rocblas-devel-4.3.0.60302-66.el%{rhel}.%{_arch}.rpm +Source38: https://%{repository}/%{repoversion}/main/rocblas-4.3.0.60302-66.el%{rhel}.%{_arch}.rpm +Source39: https://%{repository}/%{repoversion}/main/hipblas-2.3.0.60302-66.el%{rhel}.%{_arch}.rpm +Source40: https://%{repository}/%{repoversion}/main/hipblas-devel-2.3.0.60302-66.el%{rhel}.%{_arch}.rpm +Source41: https://%{repository}/%{repoversion}/main/hipblaslt-0.10.0.60302-66.el%{rhel}.%{_arch}.rpm +Source42: https://%{repository}/%{repoversion}/main/hipblaslt-devel-0.10.0.60302-66.el%{rhel}.%{_arch}.rpm +Source43: https://%{repository}/%{repoversion}/main/miopen-hip-3.3.0.60302-66.el%{rhel}.%{_arch}.rpm +Source44: https://%{repository}/%{repoversion}/main/miopen-hip-devel-3.3.0.60302-66.el%{rhel}.%{_arch}.rpm +Source45: https://%{repository}/%{repoversion}/main/rocfft-1.0.31.60302-66.el%{rhel}.%{_arch}.rpm +Source46: https://%{repository}/%{repoversion}/main/rocfft-devel-1.0.31.60302-66.el%{rhel}.%{_arch}.rpm +Source47: https://%{repository}/%{repoversion}/main/hipfft-1.0.17.60302-66.el%{rhel}.%{_arch}.rpm +Source48: https://%{repository}/%{repoversion}/main/hipfft-devel-1.0.17.60302-66.el%{rhel}.%{_arch}.rpm +Source49: https://%{repository}/%{repoversion}/main/hipsparse-3.1.2.60302-66.el%{rhel}.%{_arch}.rpm +Source50: https://%{repository}/%{repoversion}/main/hipsparse-devel-3.1.2.60302-66.el%{rhel}.%{_arch}.rpm +Source51: https://%{repository}/%{repoversion}/main/rccl-2.21.5.60302-66.el%{rhel}.%{_arch}.rpm +Source52: https://%{repository}/%{repoversion}/main/rccl-devel-2.21.5.60302-66.el%{rhel}.%{_arch}.rpm +Source53: https://%{repository}/%{repoversion}/main/rocprim-devel-3.3.0.60302-66.el%{rhel}.%{_arch}.rpm +Source54: https://%{repository}/%{repoversion}/main/hipcub-devel-3.3.0.60302-66.el%{rhel}.%{_arch}.rpm +Source55: https://%{repository}/%{repoversion}/main/rocthrust-devel-3.3.0.60302-66.el%{rhel}.%{_arch}.rpm +Source56: https://%{repository}/%{repoversion}/main/hipsolver-2.3.0.60302-66.el%{rhel}.%{_arch}.rpm +Source57: https://%{repository}/%{repoversion}/main/hipsolver-devel-2.3.0.60302-66.el%{rhel}.%{_arch}.rpm +Source58: https://%{repository}/%{repoversion}/main/roctracer-4.1.60302.60302-66.el%{rhel}.%{_arch}.rpm +Source59: https://%{repository}/%{repoversion}/main/roctracer-devel-4.1.60302.60302-66.el%{rhel}.%{_arch}.rpm + BuildRequires: gmake cmake Requires: numactl zstd fmt Requires: python3 @@ -107,6 +133,31 @@ rpm2cpio %{SOURCE30} | cpio -idmv rpm2cpio %{SOURCE31} | cpio -idmv rpm2cpio %{SOURCE32} | cpio -idmv rpm2cpio %{SOURCE33} | cpio -idmv +rpm2cpio %{SOURCE35} | cpio -idmv +rpm2cpio %{SOURCE36} | cpio -idmv +rpm2cpio %{SOURCE37} | cpio -idmv +rpm2cpio %{SOURCE38} | cpio -idmv +rpm2cpio %{SOURCE39} | cpio -idmv +rpm2cpio %{SOURCE40} | cpio -idmv +rpm2cpio %{SOURCE41} | cpio -idmv +rpm2cpio %{SOURCE42} | cpio -idmv +rpm2cpio %{SOURCE43} | cpio -idmv +rpm2cpio %{SOURCE44} | cpio -idmv +rpm2cpio %{SOURCE45} | cpio -idmv +rpm2cpio %{SOURCE46} | cpio -idmv +rpm2cpio %{SOURCE47} | cpio -idmv +rpm2cpio %{SOURCE48} | cpio -idmv +rpm2cpio %{SOURCE49} | cpio -idmv +rpm2cpio %{SOURCE50} | cpio -idmv +rpm2cpio %{SOURCE51} | cpio -idmv +rpm2cpio %{SOURCE52} | cpio -idmv +rpm2cpio %{SOURCE53} | cpio -idmv +rpm2cpio %{SOURCE54} | cpio -idmv +rpm2cpio %{SOURCE55} | cpio -idmv +rpm2cpio %{SOURCE56} | cpio -idmv +rpm2cpio %{SOURCE57} | cpio -idmv +rpm2cpio %{SOURCE58} | cpio -idmv +rpm2cpio %{SOURCE59} | cpio -idmv # build rocprofiler-register sed -i -e 's|add_subdirectory(external)|find_package(fmt REQUIRED)\nadd_subdirectory(external)|' src/%{rocprofiler_register_pkg}/CMakeLists.txt