Skip to content

Commit 587bfbe

Browse files
authored
Merge pull request #513 from daineAMD/master
merge staging 5b1d8ff into master on Conditional GO from CQE#515
2 parents 08619c9 + 98f192a commit 587bfbe

38 files changed

+9532
-5117
lines changed

.githooks/pre-commit

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
# are installed, and if so, uses the installed version to format
55
# the staged changes.
66

7-
export PATH=/opt/rocm/llvm/bin:/opt/rocm/hcc/bin:/usr/bin:/bin
7+
export PATH=/opt/rocm/llvm/bin:/usr/bin:/bin
88

99
# Redirect stdout to stderr.
1010
exec >&2

.github/CODEOWNERS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
* @amcamd @TorreZuk @mahmoodw @daineAMD @bragadeesh @NaveenElumalaiAMD @rkamd

CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ include( ROCMInstallSymlinks )
7979
include( ROCMClients )
8080
include( ROCMHeaderWrapper )
8181

82-
set ( VERSION_STRING "0.52.0" )
82+
set ( VERSION_STRING "0.53.0" )
8383
rocm_setup_version( VERSION ${VERSION_STRING} )
8484

8585
if( NOT DEFINED ENV{HIP_PATH})
@@ -193,7 +193,7 @@ endif( )
193193

194194
# Package specific CPACK vars
195195
if( NOT USE_CUDA )
196-
rocm_package_add_dependencies(DEPENDS "rocblas >= 2.45.0" "rocsolver >= 3.19.0")
196+
rocm_package_add_dependencies(DEPENDS "rocblas >= 2.46.0" "rocsolver >= 3.20.0")
197197
endif( )
198198

199199
set( CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE.md" )

clients/benchmarks/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ if( NOT USE_CUDA )
125125
target_link_libraries( hipblas-bench PRIVATE hip::${CUSTOM_TARGET} )
126126
endif()
127127

128-
if( CMAKE_CXX_COMPILER MATCHES ".*/hcc$|.*/hipcc$" )
128+
if( CMAKE_CXX_COMPILER MATCHES ".*/hipcc$" )
129129
# hip-clang needs specific flag to turn on pthread and m
130130
target_link_libraries( hipblas-bench PRIVATE -lpthread -lm )
131131

clients/benchmarks/client.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,9 @@
219219
#include "testing_trtri_strided_batched.hpp"
220220
// solver functions
221221
#ifdef __HIP_PLATFORM_SOLVER__
222+
#include "testing_gels.hpp"
223+
#include "testing_gels_batched.hpp"
224+
#include "testing_gels_strided_batched.hpp"
222225
#include "testing_geqrf.hpp"
223226
#include "testing_geqrf_batched.hpp"
224227
#include "testing_geqrf_strided_batched.hpp"
@@ -512,6 +515,9 @@ struct perf_blas<T, U, std::enable_if_t<std::is_same<T, float>{} || std::is_same
512515
{"getrs", testing_getrs<T>},
513516
{"getrs_batched", testing_getrs_batched<T>},
514517
{"getrs_strided_batched", testing_getrs_strided_batched<T>},
518+
{"gels", testing_gels<T>},
519+
{"gels_batched", testing_gels_batched<T>},
520+
{"gels_strided_batched", testing_gels_strided_batched<T>},
515521
#endif
516522

517523
// Aux
@@ -732,6 +738,9 @@ struct perf_blas<
732738
{"getrs", testing_getrs<T>},
733739
{"getrs_batched", testing_getrs_batched<T>},
734740
{"getrs_strided_batched", testing_getrs_strided_batched<T>},
741+
{"gels", testing_gels<T>},
742+
{"gels_batched", testing_gels_batched<T>},
743+
{"gels_strided_batched", testing_gels_strided_batched<T>},
735744
#endif
736745
};
737746
run_function(map, arg);

clients/common/cblas_interface.cpp

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,51 @@ void zgeqrf_(int* m,
103103
int* lwork,
104104
int* info);
105105

106+
void sgels_(char* trans,
107+
int* m,
108+
int* n,
109+
int* nrhs,
110+
float* A,
111+
int* lda,
112+
float* B,
113+
int* ldb,
114+
float* work,
115+
int* lwork,
116+
int* info);
117+
void dgels_(char* trans,
118+
int* m,
119+
int* n,
120+
int* nrhs,
121+
double* A,
122+
int* lda,
123+
double* B,
124+
int* ldb,
125+
double* work,
126+
int* lwork,
127+
int* info);
128+
void cgels_(char* trans,
129+
int* m,
130+
int* n,
131+
int* nrhs,
132+
hipblasComplex* A,
133+
int* lda,
134+
hipblasComplex* B,
135+
int* ldb,
136+
hipblasComplex* work,
137+
int* lwork,
138+
int* info);
139+
void zgels_(char* trans,
140+
int* m,
141+
int* n,
142+
int* nrhs,
143+
hipblasDoubleComplex* A,
144+
int* lda,
145+
hipblasDoubleComplex* B,
146+
int* ldb,
147+
hipblasDoubleComplex* work,
148+
int* lwork,
149+
int* info);
150+
106151
void spotrf_(char* uplo, int* m, float* A, int* lda, int* info);
107152
void dpotrf_(char* uplo, int* m, double* A, int* lda, int* info);
108153
void cpotrf_(char* uplo, int* m, hipblasComplex* A, int* lda, int* info);
@@ -3699,3 +3744,72 @@ int cblas_geqrf<hipblasDoubleComplex>(int m,
36993744
zgeqrf_(&m, &n, A, &lda, tau, work, &lwork, &info);
37003745
return info;
37013746
}
3747+
3748+
// gels
3749+
template <>
3750+
int cblas_gels<float>(char trans,
3751+
int m,
3752+
int n,
3753+
int nrhs,
3754+
float* A,
3755+
int lda,
3756+
float* B,
3757+
int ldb,
3758+
float* work,
3759+
int lwork)
3760+
{
3761+
int info;
3762+
sgels_(&trans, &m, &n, &nrhs, A, &lda, B, &ldb, work, &lwork, &info);
3763+
return info;
3764+
}
3765+
3766+
template <>
3767+
int cblas_gels<double>(char trans,
3768+
int m,
3769+
int n,
3770+
int nrhs,
3771+
double* A,
3772+
int lda,
3773+
double* B,
3774+
int ldb,
3775+
double* work,
3776+
int lwork)
3777+
{
3778+
int info;
3779+
dgels_(&trans, &m, &n, &nrhs, A, &lda, B, &ldb, work, &lwork, &info);
3780+
return info;
3781+
}
3782+
3783+
template <>
3784+
int cblas_gels<hipblasComplex>(char trans,
3785+
int m,
3786+
int n,
3787+
int nrhs,
3788+
hipblasComplex* A,
3789+
int lda,
3790+
hipblasComplex* B,
3791+
int ldb,
3792+
hipblasComplex* work,
3793+
int lwork)
3794+
{
3795+
int info;
3796+
cgels_(&trans, &m, &n, &nrhs, A, &lda, B, &ldb, work, &lwork, &info);
3797+
return info;
3798+
}
3799+
3800+
template <>
3801+
int cblas_gels<hipblasDoubleComplex>(char trans,
3802+
int m,
3803+
int n,
3804+
int nrhs,
3805+
hipblasDoubleComplex* A,
3806+
int lda,
3807+
hipblasDoubleComplex* B,
3808+
int ldb,
3809+
hipblasDoubleComplex* work,
3810+
int lwork)
3811+
{
3812+
int info;
3813+
zgels_(&trans, &m, &n, &nrhs, A, &lda, B, &ldb, work, &lwork, &info);
3814+
return info;
3815+
}

0 commit comments

Comments
 (0)