Skip to content

Commit b80975b

Browse files
authored
Merge pull request #520 from daineAMD/master
Merge staging into master for ROCm 5.4
2 parents 587bfbe + 5f9a3b3 commit b80975b

File tree

11 files changed

+209
-26
lines changed

11 files changed

+209
-26
lines changed

.github/workflows/docs.yaml

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
name: Upload to the upload server
2+
3+
# Controls when the workflow will run
4+
on:
5+
push:
6+
branches: [develop, master]
7+
tags:
8+
- rocm-5.*
9+
release:
10+
types: [published]
11+
12+
# Allows you to run this workflow manually from the Actions tab
13+
workflow_dispatch:
14+
15+
# A workflow run is made up of one or more jobs that can run sequentially or in parallel
16+
jobs:
17+
# This workflow contains a single job called "build"
18+
build:
19+
# The type of runner that the job will run on
20+
runs-on: ubuntu-latest
21+
22+
# Steps represent a sequence of tasks that will be executed as part of the job
23+
steps:
24+
# Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
25+
- uses: actions/checkout@v2
26+
27+
- name: getting branch name
28+
shell: bash
29+
run: echo "##[set-output name=branch;]$(echo ${GITHUB_REF#refs/heads/})"
30+
id: branch_name
31+
- name: getting tag name
32+
shell: bash
33+
run: echo "##[set-output name=tag;]$(echo ${GITHUB_REF_NAME})"
34+
id: tag_name
35+
- name: zipping files
36+
run: zip -r ${{ github.event.repository.name }}_${{ steps.tag_name.outputs.tag }}.zip . -x '*.git*' '*.idea*'
37+
- name: echo-step
38+
run: echo "${{ github.event.release.target_commitish }}"
39+
- name: uploading archive to prod
40+
if: ${{ steps.branch_name.outputs.branch == 'master' || github.event.release.target_commitish == 'master'}}
41+
uses: wlixcc/[email protected]
42+
with:
43+
username: ${{ secrets.USERNAME }}
44+
server: ${{ secrets.SERVER }}
45+
ssh_private_key: ${{ secrets.SSH_PRIVATE_KEY }}
46+
local_path: ${{ github.event.repository.name }}_${{ steps.tag_name.outputs.tag }}.zip
47+
remote_path: '${{ secrets.PROD_UPLOAD_URL }}'
48+
args: '-o ConnectTimeout=5'
49+
- name: uploading archive to staging
50+
if: ${{ steps.branch_name.outputs.branch == 'develop' || github.event.release.target_commitish == 'develop' }}
51+
uses: wlixcc/[email protected]
52+
with:
53+
username: ${{ secrets.USERNAME }}
54+
server: ${{ secrets.SERVER }}
55+
ssh_private_key: ${{ secrets.SSH_PRIVATE_KEY }}
56+
local_path: ${{ github.event.repository.name }}_${{ steps.tag_name.outputs.tag }}.zip
57+
remote_path: '${{ secrets.STG_UPLOAD_URL }}'
58+
args: '-o ConnectTimeout=5'

.jenkins/staticanalysis.groovy

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,6 @@ ci: {
2929

3030
properties(auxiliary.addCommonProperties([pipelineTriggers([cron('0 1 * * 2')])]))
3131
stage(urlJobName) {
32-
runCI([ubuntu18:['any']], urlJobName)
32+
runCI([ubuntu20:['any']], urlJobName)
3333
}
3434
}

CHANGELOG.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,12 @@
11
# Change Log for hipBLAS
22

3+
## (Unreleased) hipBLAS 0.53.0
4+
### Added
5+
- Allow for selection of int8 datatype
6+
- Added support for hipblasXgels and hipblasXgelsStridedBatched operations (with s,d,c,z precisions),
7+
only supported with rocBLAS backend
8+
- Added support for hipblasXgelsBatched operations (with s,d,c,z precisions)
9+
310
## (Unreleased) hipBLAS 0.52.0
411
### Added
512
- Added --cudapath option to install.sh to allow user to specify which cuda build they would like to use.

CMakeLists.txt

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -158,11 +158,15 @@ if( BUILD_CLIENTS_SAMPLES OR BUILD_CLIENTS_TESTS OR BUILD_CLIENTS_BENCHMARKS )
158158
string(TOLOWER "${CLIENTS_OS}" CLIENTS_OS)
159159
rocm_read_os_release(CLIENTS_OS_VERSION VERSION_ID)
160160
endif()
161-
set(GFORTRAN_PKG "gcc-gfortran")
162-
if(CLIENTS_OS STREQUAL "sles")
163-
set(GFORTRAN_PKG "gcc-fortran")
164-
elseif(CLIENTS_OS STREQUAL "centos" AND CLIENTS_OS_VERSION EQUAL 7)
165-
set(GFORTRAN_PKG "devtoolset-7-gcc-gfortran")
161+
message(STATUS "OS: ${CLIENTS_OS} ${CLIENTS_OS_VERSION}")
162+
set(GFORTRAN_RPM "libgfortran4")
163+
set(GFORTRAN_DEB "libgfortran4")
164+
if(CLIENTS_OS STREQUAL "centos" OR CLIENTS_OS STREQUAL "rhel")
165+
if(CLIENTS_OS_VERSION VERSION_GREATER_EQUAL "8")
166+
set(GFORTRAN_RPM "libgfortran")
167+
endif()
168+
elseif(CLIENTS_OS STREQUAL "ubuntu" AND CLIENTS_OS_VERSION VERSION_GREATER_EQUAL "20.04")
169+
set(GFORTRAN_DEB "libgfortran5")
166170
endif()
167171
rocm_package_setup_component(clients)
168172
rocm_package_setup_client_component(clients-common)
@@ -171,16 +175,16 @@ if( BUILD_CLIENTS_SAMPLES OR BUILD_CLIENTS_TESTS OR BUILD_CLIENTS_BENCHMARKS )
171175
tests
172176
DEPENDS
173177
COMPONENT clients-common
174-
DEB "gfortran"
175-
RPM "${GFORTRAN_PKG}")
178+
DEB "${GFORTRAN_DEB}"
179+
RPM "${GFORTRAN_RPM}")
176180
endif()
177181
if(BUILD_CLIENTS_BENCHMARKS)
178182
rocm_package_setup_client_component(
179183
benchmarks
180184
DEPENDS
181185
COMPONENT clients-common
182-
DEB "gfortran"
183-
RPM "${GFORTRAN_PKG}")
186+
DEB "${GFORTRAN_DEB}"
187+
RPM "${GFORTRAN_RPM}")
184188
endif()
185189
add_subdirectory( clients )
186190
endif( )

bump_staging_version.sh

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,14 @@
55
# - run this script in master branch
66
# - after running this script merge master into develop
77

8-
OLD_HIPBLAS_VERSION="0.52.0"
9-
NEW_HIPBLAS_VERSION="0.53.0"
8+
OLD_HIPBLAS_VERSION="0.53.0"
9+
NEW_HIPBLAS_VERSION="0.54.0"
1010

11-
OLD_MINIMUM_ROCBLAS_VERSION="2.45.0"
12-
NEW_MINIMUM_ROCBLAS_VERSION="2.46.0"
11+
OLD_MINIMUM_ROCBLAS_VERSION="2.46.0"
12+
NEW_MINIMUM_ROCBLAS_VERSION="2.47.0"
1313

14-
OLD_MINIMUM_ROCSOLVER_VERSION="3.19.0"
15-
NEW_MINIMUM_ROCSOLVER_VERSION="3.20.0"
14+
OLD_MINIMUM_ROCSOLVER_VERSION="3.20.0"
15+
NEW_MINIMUM_ROCSOLVER_VERSION="3.21.0"
1616

1717
sed -i "s/${OLD_HIPBLAS_VERSION}/${NEW_HIPBLAS_VERSION}/g" CMakeLists.txt
1818
sed -i "s/${OLD_MINIMUM_ROCBLAS_VERSION}/${NEW_MINIMUM_ROCBLAS_VERSION}/g" CMakeLists.txt

clients/common/near.cpp

Lines changed: 46 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,8 @@
7676

7777
#endif
7878

79-
#define NEAR_ASSERT_HALF(a, b, err) ASSERT_NEAR(float(a), float(b), err)
79+
#define NEAR_ASSERT_HALF(a, b, err) ASSERT_NEAR(half_to_float(a), half_to_float(b), err)
80+
#define NEAR_ASSERT_BF16(a, b, err) ASSERT_NEAR(bfloat16_to_float(a), bfloat16_to_float(b), err)
8081

8182
#define NEAR_ASSERT_COMPLEX(a, b, err) \
8283
do \
@@ -105,6 +106,13 @@ void near_check_general(
105106
NEAR_CHECK(M, N, 1, lda, 0, hCPU, hGPU, abs_error, NEAR_ASSERT_HALF);
106107
}
107108

109+
template <>
110+
void near_check_general(
111+
int M, int N, int lda, hipblasBfloat16* hCPU, hipblasBfloat16* hGPU, double abs_error)
112+
{
113+
NEAR_CHECK(M, N, 1, lda, 0, hCPU, hGPU, abs_error, NEAR_ASSERT_BF16);
114+
}
115+
108116
template <>
109117
void near_check_general(
110118
int M, int N, int lda, hipblasComplex* hCPU, hipblasComplex* hGPU, double abs_error)
@@ -160,6 +168,19 @@ void near_check_general(int M,
160168
NEAR_CHECK(M, N, batch_count, lda, strideA, hCPU, hGPU, abs_error, NEAR_ASSERT_HALF);
161169
}
162170

171+
template <>
172+
void near_check_general(int M,
173+
int N,
174+
int batch_count,
175+
int lda,
176+
hipblasStride strideA,
177+
hipblasBfloat16* hCPU,
178+
hipblasBfloat16* hGPU,
179+
double abs_error)
180+
{
181+
NEAR_CHECK(M, N, batch_count, lda, strideA, hCPU, hGPU, abs_error, NEAR_ASSERT_BF16);
182+
}
183+
163184
template <>
164185
void near_check_general(int M,
165186
int N,
@@ -200,6 +221,18 @@ void near_check_general(int M,
200221
NEAR_CHECK_B(M, N, batch_count, lda, hCPU, hGPU, abs_error, NEAR_ASSERT_HALF);
201222
}
202223

224+
template <>
225+
void near_check_general(int M,
226+
int N,
227+
int batch_count,
228+
int lda,
229+
host_vector<hipblasBfloat16> hCPU[],
230+
host_vector<hipblasBfloat16> hGPU[],
231+
double abs_error)
232+
{
233+
NEAR_CHECK_B(M, N, batch_count, lda, hCPU, hGPU, abs_error, NEAR_ASSERT_BF16);
234+
}
235+
203236
template <>
204237
void near_check_general(int M,
205238
int N,
@@ -262,6 +295,18 @@ void near_check_general(int M,
262295
NEAR_CHECK_B(M, N, batch_count, lda, hCPU, hGPU, abs_error, NEAR_ASSERT_HALF);
263296
}
264297

298+
template <>
299+
void near_check_general(int M,
300+
int N,
301+
int batch_count,
302+
int lda,
303+
hipblasBfloat16* hCPU[],
304+
hipblasBfloat16* hGPU[],
305+
double abs_error)
306+
{
307+
NEAR_CHECK_B(M, N, batch_count, lda, hCPU, hGPU, abs_error, NEAR_ASSERT_BF16);
308+
}
309+
265310
template <>
266311
void near_check_general(
267312
int M, int N, int batch_count, int lda, float* hCPU[], float* hGPU[], double abs_error)

clients/include/near.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,4 +79,12 @@ void near_check_general(int M,
7979
host_vector<T> hGPU[],
8080
double abs_error);
8181

82+
// currently only used for half-precision comparisons int dot_ex tests
83+
template <class T>
84+
HIPBLAS_CLANG_STATIC constexpr double error_tolerance = 0.0;
85+
86+
// 2 ^ -14, smallest positive normal number for IEEE16
87+
template <>
88+
HIPBLAS_CLANG_STATIC constexpr double error_tolerance<hipblasHalf> = 0.000061035;
89+
8290
#endif

clients/include/testing_dot_batched_ex.hpp

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ hipblasStatus_t testing_dot_batched_ex_template(const Arguments& argus)
105105
double gpu_time_used, hipblas_error_host, hipblas_error_device;
106106

107107
// Initial Data on CPU
108-
hipblas_init(hy, true, true);
108+
hipblas_init(hy, true, false);
109109
hipblas_init_alternating_sign(hx);
110110
CHECK_HIP_ERROR(dx.transfer_from(hx));
111111
CHECK_HIP_ERROR(dy.transfer_from(hy));
@@ -159,8 +159,31 @@ hipblasStatus_t testing_dot_batched_ex_template(const Arguments& argus)
159159

160160
if(argus.unit_check)
161161
{
162-
unit_check_general<Tr>(1, batch_count, 1, h_cpu_result, h_hipblas_result_host);
163-
unit_check_general<Tr>(1, batch_count, 1, h_cpu_result, h_hipblas_result_device);
162+
if(std::is_same<Tr, hipblasHalf>{})
163+
{
164+
double tol = error_tolerance<Tr> * N;
165+
near_check_general(1,
166+
1,
167+
batch_count,
168+
1,
169+
1,
170+
h_cpu_result.data(),
171+
h_hipblas_result_host.data(),
172+
tol);
173+
near_check_general(1,
174+
1,
175+
batch_count,
176+
1,
177+
1,
178+
h_cpu_result.data(),
179+
h_hipblas_result_device.data(),
180+
tol);
181+
}
182+
else
183+
{
184+
unit_check_general<Tr>(1, batch_count, 1, h_cpu_result, h_hipblas_result_host);
185+
unit_check_general<Tr>(1, batch_count, 1, h_cpu_result, h_hipblas_result_device);
186+
}
164187
}
165188
if(argus.norm_check)
166189
{

clients/include/testing_dot_ex.hpp

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -147,8 +147,17 @@ hipblasStatus_t testing_dot_ex_template(const Arguments& argus)
147147

148148
if(argus.unit_check)
149149
{
150-
unit_check_general<Tr>(1, 1, 1, &cpu_result, &hipblas_result_host);
151-
unit_check_general<Tr>(1, 1, 1, &cpu_result, &hipblas_result_device);
150+
if(std::is_same<Tr, hipblasHalf>{})
151+
{
152+
double tol = error_tolerance<Tr> * N;
153+
near_check_general(1, 1, 1, &cpu_result, &hipblas_result_host, tol);
154+
near_check_general(1, 1, 1, &cpu_result, &hipblas_result_device, tol);
155+
}
156+
else
157+
{
158+
unit_check_general<Tr>(1, 1, 1, &cpu_result, &hipblas_result_host);
159+
unit_check_general<Tr>(1, 1, 1, &cpu_result, &hipblas_result_device);
160+
}
152161
}
153162
if(argus.norm_check)
154163
{

clients/include/testing_dot_strided_batched_ex.hpp

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -180,8 +180,31 @@ hipblasStatus_t testing_dot_strided_batched_ex_template(const Arguments& argus)
180180

181181
if(argus.unit_check)
182182
{
183-
unit_check_general<Tr>(1, batch_count, 1, h_cpu_result, h_hipblas_result_host);
184-
unit_check_general<Tr>(1, batch_count, 1, h_cpu_result, h_hipblas_result_device);
183+
if(std::is_same<Tr, hipblasHalf>{})
184+
{
185+
double tol = error_tolerance<Tr> * N;
186+
near_check_general(1,
187+
1,
188+
batch_count,
189+
1,
190+
1,
191+
h_cpu_result.data(),
192+
h_hipblas_result_host.data(),
193+
tol);
194+
near_check_general(1,
195+
1,
196+
batch_count,
197+
1,
198+
1,
199+
h_cpu_result.data(),
200+
h_hipblas_result_device.data(),
201+
tol);
202+
}
203+
else
204+
{
205+
unit_check_general<Tr>(1, batch_count, 1, h_cpu_result, h_hipblas_result_host);
206+
unit_check_general<Tr>(1, batch_count, 1, h_cpu_result, h_hipblas_result_device);
207+
}
185208
}
186209
if(argus.norm_check)
187210
{

0 commit comments

Comments
 (0)