Skip to content

Commit 00b0358

Browse files
authored
Merge pull request #320 from ROCmSoftwarePlatform/revert-319-master
Revert "merge staging into master after CQE GO for c7a7b1c 2021-03-10 16:24:14 -0700"
2 parents f039925 + f0e62e9 commit 00b0358

32 files changed

+588
-1417
lines changed

CHANGELOG.md

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,5 @@
11
# Change Log for hipBLAS
22

3-
## [hipBLAS 0.45.0 for ROCm 4.3.0]
4-
### Added
5-
- Added hipblasStatusToString
6-
73
## [hipBLAS 0.44.0 for ROCm 4.2.0]
84
### Added
95
- Made necessary changes to work with rocBLAS' gemm_ex changes. When using rocBLAS backend, hipBLAS will query the preferable

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ include( ROCMInstallTargets )
4949
include( ROCMPackageConfigHelpers )
5050
include( ROCMInstallSymlinks )
5151

52-
set ( VERSION_STRING "0.45.0" )
52+
set ( VERSION_STRING "0.44.0" )
5353
rocm_setup_version( VERSION ${VERSION_STRING} )
5454

5555
if( NOT DEFINED $ENV{HIP_PATH})

clients/benchmarks/client.cpp

Lines changed: 47 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -62,10 +62,10 @@
6262
#include "testing_rotmg_strided_batched.hpp"
6363
#include "testing_scal.hpp"
6464
#include "testing_scal_batched.hpp"
65-
#include "testing_scal_batched_ex.hpp"
66-
#include "testing_scal_ex.hpp"
65+
// #include "testing_scal_batched_ex.hpp"
66+
// #include "testing_scal_ex.hpp"
6767
#include "testing_scal_strided_batched.hpp"
68-
#include "testing_scal_strided_batched_ex.hpp"
68+
// #include "testing_scal_strided_batched_ex.hpp"
6969
#include "testing_swap.hpp"
7070
#include "testing_swap_batched.hpp"
7171
#include "testing_swap_strided_batched.hpp"
@@ -318,18 +318,9 @@ struct perf_blas<T, U, std::enable_if_t<std::is_same<T, float>{} || std::is_same
318318
void operator()(const Arguments& arg)
319319
{
320320
static const func_map fmap = {
321-
{"asum", testing_asum<T>},
322-
{"asum_batched", testing_asum_batched<T>},
323-
{"asum_strided_batched", testing_asum_strided_batched<T>},
324-
{"axpy", testing_axpy<T>},
325-
{"axpy_batched", testing_axpy_batched<T>},
326-
{"axpy_strided_batched", testing_axpy_strided_batched<T>},
327321
{"copy", testing_copy<T>},
328322
{"copy_batched", testing_copy_batched<T>},
329323
{"copy_strided_batched", testing_copy_strided_batched<T>},
330-
{"dot", testing_dot<T>},
331-
{"dot_batched", testing_dot_batched<T>},
332-
{"dot_strided_batched", testing_dot_strided_batched<T>},
333324
{"swap", testing_swap<T>},
334325
{"swap_batched", testing_swap_batched<T>},
335326
{"swap_strided_batched", testing_swap_strided_batched<T>},
@@ -340,6 +331,15 @@ struct perf_blas<T, U, std::enable_if_t<std::is_same<T, float>{} || std::is_same
340331
{"set_get_matrix", testing_set_get_matrix<T>},
341332
{"set_get_matrix_async", testing_set_get_matrix_async<T>},
342333
// L1
334+
{"asum", testing_asum<T>},
335+
{"asum_batched", testing_asum_batched<T>},
336+
{"asum_strided_batched", testing_asum_strided_batched<T>},
337+
{"axpy", testing_axpy<T>},
338+
{"axpy_batched", testing_axpy_batched<T>},
339+
{"axpy_strided_batched", testing_axpy_strided_batched<T>},
340+
{"dot", testing_dot<T>},
341+
{"dot_batched", testing_dot_batched<T>},
342+
{"dot_strided_batched", testing_dot_strided_batched<T>},
343343
{"iamax", testing_iamax<T>},
344344
{"iamax_batched", testing_iamax_batched<T>},
345345
{"iamax_strided_batched", testing_iamax_strided_batched<T>},
@@ -416,14 +416,13 @@ struct perf_blas<T, U, std::enable_if_t<std::is_same<T, float>{} || std::is_same
416416
{"syrkx", testing_syr2k<T, false>},
417417
{"syrkx_batched", testing_syr2k_batched<T, false>},
418418
{"syrkx_strided_batched", testing_syr2k_strided_batched<T, false>},
419-
419+
{"trmm", testing_trmm<T>},
420+
{"trmm_batched", testing_trmm_batched<T>},
421+
{"trmm_strided_batched", testing_trmm_strided_batched<T>},
420422
{"trtri", testing_trtri<T>},
421423
{"trtri_batched", testing_trtri_batched<T>},
422424
{"trtri_strided_batched", testing_trtri_strided_batched<T>},
423425
*/
424-
{"trmm", testing_trmm<T>},
425-
{"trmm_batched", testing_trmm_batched<T>},
426-
{"trmm_strided_batched", testing_trmm_strided_batched<T>},
427426
{"gemm", testing_gemm<T>},
428427
{"gemm_batched", testing_gemm_batched<T>},
429428
{"gemm_strided_batched", testing_gemm_strided_batched<T>},
@@ -454,9 +453,11 @@ struct perf_blas<T, U, std::enable_if_t<std::is_same<T, hipblasBfloat16>{}>> : h
454453
void operator()(const Arguments& arg)
455454
{
456455
static const func_map map = {
456+
/*
457457
{"dot", testing_dot<T>},
458458
{"dot_batched", testing_dot_batched<T>},
459459
{"dot_strided_batched", testing_dot_strided_batched<T>},
460+
*/
460461
};
461462
run_function(map, arg);
462463
}
@@ -468,12 +469,13 @@ struct perf_blas<T, U, std::enable_if_t<std::is_same<T, hipblasHalf>{}>> : hipbl
468469
void operator()(const Arguments& arg)
469470
{
470471
static const func_map map = {
471-
{"axpy", testing_axpy<T>},
472-
{"axpy_batched", testing_axpy_batched<T>},
473-
{"axpy_strided_batched", testing_axpy_strided_batched<T>},
474-
{"dot", testing_dot<T>},
475-
{"dot_batched", testing_dot_batched<T>},
476-
{"dot_strided_batched", testing_dot_strided_batched<T>},
472+
/*{"axpy", testing_axpy<T>},
473+
{"axpy_batched", testing_axpy_batched<T>},
474+
{"axpy_strided_batched", testing_axpy_strided_batched<T>},
475+
{"dot", testing_dot<T>},
476+
{"dot_batched", testing_dot_batched<T>},
477+
{"dot_strided_batched", testing_dot_strided_batched<T>},
478+
*/
477479
{"gemm", testing_gemm<T>},
478480
{"gemm_batched", testing_gemm_batched<T>},
479481
{"gemm_strided_batched", testing_gemm_strided_batched<T>},
@@ -493,28 +495,27 @@ struct perf_blas<
493495
void operator()(const Arguments& arg)
494496
{
495497
static const func_map map = {
496-
{"asum", testing_asum<T>},
497-
{"asum_batched", testing_asum_batched<T>},
498-
{"asum_strided_batched", testing_asum_strided_batched<T>},
499-
{"axpy", testing_axpy<T>},
500-
{"axpy_batched", testing_axpy_batched<T>},
501-
{"axpy_strided_batched", testing_axpy_strided_batched<T>},
502498
{"copy", testing_copy<T>},
503499
{"copy_batched", testing_copy_batched<T>},
504500
{"copy_strided_batched", testing_copy_strided_batched<T>},
505-
{"dot", testing_dot<T>},
506-
{"dot_batched", testing_dot_batched<T>},
507-
{"dot_strided_batched", testing_dot_strided_batched<T>},
508-
{"dotc", testing_dotc<T>},
509-
{"dotc_batched", testing_dotc_batched<T>},
510-
{"dotc_strided_batched", testing_dotc_strided_batched<T>},
511501
{"swap", testing_swap<T>},
512502
{"swap_batched", testing_swap_batched<T>},
513503
{"swap_strided_batched", testing_swap_strided_batched<T>},
514504
{"scal", testing_scal<T>},
515505
{"scal_batched", testing_scal_batched<T>},
516506
{"scal_strided_batched", testing_scal_strided_batched<T>},
517-
/*
507+
/* {"asum", testing_asum<T>},
508+
{"asum_batched", testing_asum_batched<T>},
509+
{"asum_strided_batched", testing_asum_strided_batched<T>},
510+
{"axpy", testing_axpy<T>},
511+
{"axpy_batched", testing_axpy_batched<T>},
512+
{"axpy_strided_batched", testing_axpy_strided_batched<T>},
513+
{"dot", testing_dot<T>},
514+
{"dot_batched", testing_dot_batched<T>},
515+
{"dot_strided_batched", testing_dot_strided_batched<T>},
516+
{"dotc", testing_dotc<T>},
517+
{"dotc_batched", testing_dotc_batched<T>},
518+
{"dotc_strided_batched", testing_dotc_strided_batched<T>},
518519
{"iamax", testing_iamax<T>},
519520
{"iamax_batched", testing_iamax_batched<T>},
520521
{"iamax_strided_batched", testing_iamax_strided_batched<T>},
@@ -633,9 +634,10 @@ struct perf_blas<
633634
{"trsv", testing_trsv<T>},
634635
{"trsv_batched", testing_trsv_batched<T>},
635636
{"trsv_strided_batched", testing_trsv_strided_batched<T>},
636-
{"trmm", testing_trmm<T>},
637+
/*{"trmm", testing_trmm<T>},
637638
{"trmm_batched", testing_trmm_batched<T>},
638639
{"trmm_strided_batched", testing_trmm_strided_batched<T>},
640+
*/
639641
};
640642
run_function(map, arg);
641643
}
@@ -756,7 +758,6 @@ struct perf_blas_scal_ex<
756758
|| (std::is_same<Ta, hipblasDoubleComplex>{} && std::is_same<Ta, Tx>{}
757759
&& std::is_same<Tx, Tex>{})
758760
|| (std::is_same<Ta, hipblasHalf>{} && std::is_same<Ta, Tx>{} && std::is_same<Tex, float>{})
759-
|| (std::is_same<Ta, float>{} && std::is_same<Tx, hipblasHalf>{} && std::is_same<Ta, Tex>{})
760761
|| (std::is_same<Ta, float>{} && std::is_same<Tx, hipblasComplex>{}
761762
&& std::is_same<Tx, Tex>{})
762763
|| (std::is_same<Ta, double>{} && std::is_same<Tx, hipblasDoubleComplex>{}
@@ -765,9 +766,9 @@ struct perf_blas_scal_ex<
765766
void operator()(const Arguments& arg)
766767
{
767768
static const func_map map = {
768-
{"scal_ex", testing_scal_ex_template<Ta, Tx, Tex>},
769-
{"scal_batched_ex", testing_scal_batched_ex_template<Ta, Tx, Tex>},
770-
{"scal_strided_batched_ex", testing_scal_strided_batched_ex_template<Ta, Tx, Tex>},
769+
// {"scal_ex", testing_scal_ex<Ta, Tx, Tex>},
770+
// {"scal_batched_ex", testing_scal_batched_ex<Ta, Tx, Tex>},
771+
// {"scal_strided_batched_ex", testing_scal_strided_batched_ex<Ta, Tx, Tex>},
771772
};
772773
run_function(map, arg);
773774
}
@@ -961,9 +962,6 @@ int run_bench_test(Arguments& arg)
961962
}
962963
else
963964
{
964-
if(!strcmp(function, "scal_ex") || !strcmp(function, "scal_batched_ex")
965-
|| !strcmp(function, "scal_strided_batched_ex"))
966-
hipblas_blas1_ex_dispatch<perf_blas_scal_ex>(arg);
967965
/*
968966
if(!strcmp(function, "scal") || !strcmp(function, "scal_batched")
969967
|| !strcmp(function, "scal_strided_batched"))
@@ -976,10 +974,13 @@ int run_bench_test(Arguments& arg)
976974
hipblas_blas1_dispatch<perf_blas_rot>(arg);
977975
else if(!strcmp(function, "axpy_ex") || !strcmp(function, "axpy_batched_ex")
978976
|| !strcmp(function, "axpy_strided_batched_ex"))
979-
hipblas_blas1_ex_dispatch<perf_blas_axpy_ex>(arg);*/
980-
977+
hipblas_blas1_ex_dispatch<perf_blas_axpy_ex>(arg);
978+
else if(!strcmp(function, "scal_ex") || !strcmp(function, "scal_batched_ex")
979+
|| !strcmp(function, "scal_strided_batched_ex"))
980+
hipblas_blas1_ex_dispatch<perf_blas_scal_ex>(arg);
981981
else
982-
hipblas_simple_dispatch<perf_blas>(arg);
982+
*/
983+
hipblas_simple_dispatch<perf_blas>(arg);
983984
}
984985
return 0;
985986
}

clients/common/utility.cpp

Lines changed: 0 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -75,48 +75,6 @@ std::string hipblas_exepath()
7575
return pathstr;
7676
}
7777

78-
/*****************
79-
* local handles *
80-
*****************/
81-
82-
hipblasLocalHandle::hipblasLocalHandle()
83-
{
84-
auto status = hipblasCreate(&m_handle);
85-
if(status != HIPBLAS_STATUS_SUCCESS)
86-
throw std::runtime_error(hipblasStatusToString(status));
87-
}
88-
89-
hipblasLocalHandle::hipblasLocalHandle(const Arguments& arg)
90-
: hipblasLocalHandle()
91-
{
92-
// for future customization of handle based on arguments, example from rocblas below
93-
94-
/*
95-
auto status = rocblas_set_atomics_mode(m_handle, arg.atomics_mode);
96-
97-
if(status == rocblas_status_success)
98-
{
99-
// If the test specifies user allocated workspace, allocate and use it
100-
if(arg.user_allocated_workspace)
101-
{
102-
if((hipMalloc)(&m_memory, arg.user_allocated_workspace) != hipSuccess)
103-
throw std::bad_alloc();
104-
status = rocblas_set_workspace(m_handle, m_memory, arg.user_allocated_workspace);
105-
}
106-
}
107-
108-
if(status != rocblas_status_success)
109-
throw std::runtime_error(rocblas_status_to_string(status));
110-
*/
111-
}
112-
113-
hipblasLocalHandle::~hipblasLocalHandle()
114-
{
115-
if(m_memory)
116-
(hipFree)(m_memory);
117-
hipblasDestroy(m_handle);
118-
}
119-
12078
#ifdef __cplusplus
12179
extern "C" {
12280
#endif

clients/gtest/blas1_gtest.cpp

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1934,7 +1934,7 @@ TEST_P(blas1_gtest, asum_float)
19341934
// The Arguments data struture have physical meaning associated.
19351935
// while the tuple is non-intuitive.
19361936
Arguments arg = setup_blas1_arguments(GetParam());
1937-
hipblasStatus_t status = testing_asum<float>(arg);
1937+
hipblasStatus_t status = testing_asum<float, float>(arg);
19381938
// if not success, then the input argument is problematic, so detect the error message
19391939
if(status != HIPBLAS_STATUS_SUCCESS)
19401940
{
@@ -1956,7 +1956,7 @@ TEST_P(blas1_gtest, asum_float)
19561956
TEST_P(blas1_gtest, asum_float_complex)
19571957
{
19581958
Arguments arg = setup_blas1_arguments(GetParam());
1959-
hipblasStatus_t status = testing_asum<hipblasComplex>(arg);
1959+
hipblasStatus_t status = testing_asum<hipblasComplex, float>(arg);
19601960
// if not success, then the input argument is problematic, so detect the error message
19611961
if(status != HIPBLAS_STATUS_SUCCESS)
19621962
{
@@ -1978,7 +1978,7 @@ TEST_P(blas1_gtest, asum_float_complex)
19781978
TEST_P(blas1_gtest, asum_double_complex)
19791979
{
19801980
Arguments arg = setup_blas1_arguments(GetParam());
1981-
hipblasStatus_t status = testing_asum<hipblasDoubleComplex>(arg);
1981+
hipblasStatus_t status = testing_asum<hipblasDoubleComplex, double>(arg);
19821982
// if not success, then the input argument is problematic, so detect the error message
19831983
if(status != HIPBLAS_STATUS_SUCCESS)
19841984
{
@@ -2001,7 +2001,7 @@ TEST_P(blas1_gtest, asum_double_complex)
20012001
TEST_P(blas1_gtest, asum_batched_float)
20022002
{
20032003
Arguments arg = setup_blas1_arguments(GetParam());
2004-
hipblasStatus_t status = testing_asum_batched<float>(arg);
2004+
hipblasStatus_t status = testing_asum_batched<float, float>(arg);
20052005
// if not success, then the input argument is problematic, so detect the error message
20062006
if(status != HIPBLAS_STATUS_SUCCESS)
20072007
{
@@ -2027,7 +2027,7 @@ TEST_P(blas1_gtest, asum_batched_float)
20272027
TEST_P(blas1_gtest, asum_batched_float_complex)
20282028
{
20292029
Arguments arg = setup_blas1_arguments(GetParam());
2030-
hipblasStatus_t status = testing_asum_batched<hipblasComplex>(arg);
2030+
hipblasStatus_t status = testing_asum_batched<hipblasComplex, float>(arg);
20312031
// if not success, then the input argument is problematic, so detect the error message
20322032
if(status != HIPBLAS_STATUS_SUCCESS)
20332033
{
@@ -2053,7 +2053,7 @@ TEST_P(blas1_gtest, asum_batched_float_complex)
20532053
TEST_P(blas1_gtest, asum_batched_double_complex)
20542054
{
20552055
Arguments arg = setup_blas1_arguments(GetParam());
2056-
hipblasStatus_t status = testing_asum_batched<hipblasDoubleComplex>(arg);
2056+
hipblasStatus_t status = testing_asum_batched<hipblasDoubleComplex, double>(arg);
20572057
// if not success, then the input argument is problematic, so detect the error message
20582058
if(status != HIPBLAS_STATUS_SUCCESS)
20592059
{
@@ -2080,7 +2080,7 @@ TEST_P(blas1_gtest, asum_batched_double_complex)
20802080
TEST_P(blas1_gtest, asum_strided_batched_float)
20812081
{
20822082
Arguments arg = setup_blas1_arguments(GetParam());
2083-
hipblasStatus_t status = testing_asum_strided_batched<float>(arg);
2083+
hipblasStatus_t status = testing_asum_strided_batched<float, float>(arg);
20842084
// if not success, then the input argument is problematic, so detect the error message
20852085
if(status != HIPBLAS_STATUS_SUCCESS)
20862086
{
@@ -2106,7 +2106,7 @@ TEST_P(blas1_gtest, asum_strided_batched_float)
21062106
TEST_P(blas1_gtest, asum_strided_batched_float_complex)
21072107
{
21082108
Arguments arg = setup_blas1_arguments(GetParam());
2109-
hipblasStatus_t status = testing_asum_strided_batched<hipblasComplex>(arg);
2109+
hipblasStatus_t status = testing_asum_strided_batched<hipblasComplex, float>(arg);
21102110
// if not success, then the input argument is problematic, so detect the error message
21112111
if(status != HIPBLAS_STATUS_SUCCESS)
21122112
{
@@ -2132,7 +2132,7 @@ TEST_P(blas1_gtest, asum_strided_batched_float_complex)
21322132
TEST_P(blas1_gtest, asum_strided_batched_double_complex)
21332133
{
21342134
Arguments arg = setup_blas1_arguments(GetParam());
2135-
hipblasStatus_t status = testing_asum_strided_batched<hipblasDoubleComplex>(arg);
2135+
hipblasStatus_t status = testing_asum_strided_batched<hipblasDoubleComplex, double>(arg);
21362136
// if not success, then the input argument is problematic, so detect the error message
21372137
if(status != HIPBLAS_STATUS_SUCCESS)
21382138
{

clients/gtest/trmm_gtest.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/* ************************************************************************
2-
* Copyright 2016-2021 Advanced Micro Devices, Inc.
2+
* Copyright 2016-2020 Advanced Micro Devices, Inc.
33
*
44
* ************************************************************************ */
55

@@ -140,7 +140,7 @@ Arguments setup_trmm_arguments(trmm_tuple tup)
140140
arg.transA_option = side_uplo_transA_diag[2];
141141
arg.diag_option = side_uplo_transA_diag[3];
142142

143-
arg.timing = 0;
143+
arg.timing = 1;
144144

145145
arg.stride_scale = stride_scale;
146146
arg.batch_count = batch_count;

clients/include/bytes.hpp

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -199,13 +199,6 @@ constexpr double gemm_gbyte_count(int m, int n, int k)
199199
return (sizeof(T) * (m * k + n * k + m * n)) / 1e9;
200200
}
201201

202-
/* \brief byte counts of TRMM */
203-
template <typename T>
204-
constexpr double trmm_gbyte_count(int m, int n, int k)
205-
{
206-
return (sizeof(T) * (m * n * 2 + k * k / 2)) / 1e9;
207-
}
208-
209202
/* \brief byte counts of TRSM */
210203
template <typename T>
211204
constexpr double trsm_gbyte_count(int m, int n, int k)

0 commit comments

Comments
 (0)