-
Notifications
You must be signed in to change notification settings - Fork 28
add half gemm support #68
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Draft
cayrols
wants to merge
25
commits into
icl-utk-edu:master
Choose a base branch
from
cayrols:seb/gemm.add_hgemm_support
base: master
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Draft
Changes from all commits
Commits
Show all changes
25 commits
Select commit
Hold shift + click to select a range
3998ebd
hgemm: add half definition and interfaces
cayrols 60afc47
hgemm: add hgemm tester; conversion routine from and to half; Nvidia …
cayrols f51f0c2
utils: move utils into test/cuda; Add missing NVCC flag; Clean utils …
cayrols 98e452b
utils: move utils into test/cuda; Add missing NVCC flag; Clean utils …
cayrols 64e6697
half: Add hipify from the cuda utils; update the compilation chain to…
cayrols c7061d0
Replace the definition of half from __half to _float16; rename blas::…
cayrols 68cb36d
Rename test/utils.cuh into test/utils.hh
cayrols 1c99c49
Add hip files that got generated from cuda files.
cayrols 7edd364
hgemm: Add hip support for hgemm
cayrols ffac1ae
hgemm: fix compilation after cleaning.
cayrols bd9511d
test: gemm change the bound by removing sqrt.
cayrols 44a9e73
test: fix scalar type used to get the flop count in half gemm.
cayrols 82752bf
hgemm: Use a class float16 instead of an alias.
cayrols b3c836c
config: fix gpu_backend name issue.
cayrols 6a181e8
hgemm: fix compilation issue.
cayrols 3ced0ce
TMP: Add explicit compilation flag for reproducer purpose.
cayrols d1fd85c
hgemm: Search _Float16 support from compiler; If so, the macro BLAS_U…
cayrols 8b8320b
hgemm: add CPU support through MKL.
cayrols 30cd556
hgemm: add CPU test with cblas wrapper; add cast_onto_device util tha…
cayrols d686451
float16: update configure search and macro definition.
cayrols 5a30ad8
hgemm: Fake casting in cublas_wrapper through pointer casting.
cayrols f4fed1a
test: in hgemm, add cpu casting support, remove cast_onto_device rout…
cayrols d0d2867
float16: add casting routines from/to fp16 to/from fp32.
cayrols 70d893b
float16: add missing config file.
cayrols 8678c82
hgemm: enable CPU hgemm only when MKL is provided.
cayrols File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -37,6 +37,14 @@ make.inc: | |
RANLIB ?= ranlib | ||
prefix ?= /opt/slate | ||
|
||
NVCC ?= nvcc | ||
HIPCC ?= hipcc | ||
hipify ?= hipify-perl | ||
md5sum ?= tools/md5sum.pl | ||
|
||
NVCCFLAGS += -O3 -std=c++11 --compiler-options '-Wall -Wno-unused-function' | ||
HIPCCFLAGS += -std=c++11 -DTCE_HIP -fno-gpu-rdc | ||
|
||
abs_prefix := ${abspath ${prefix}} | ||
|
||
# Default LD=ld won't work; use CXX. Can override in make.inc or environment. | ||
|
@@ -52,11 +60,25 @@ ifneq ($(findstring darwin, $(ostype)),) | |
macos = 1 | ||
endif | ||
|
||
#------------------------------------------------------------------------------- | ||
# Detect which gpu_backend used | ||
cuda = 0 | ||
hip = 0 | ||
sycl = 0 | ||
|
||
ifeq ($(gpu_backend),cuda) | ||
cuda = 1 | ||
else ifeq ($(gpu_backend),hip) | ||
hip = 1 | ||
endif | ||
|
||
#------------------------------------------------------------------------------- | ||
# if shared | ||
ifneq ($(static),1) | ||
CXXFLAGS += -fPIC | ||
LDFLAGS += -fPIC | ||
NVCCFLAGS += --compiler-options '-fPIC' | ||
HIPCCFLAGS += -fPIC | ||
lib_ext = so | ||
else | ||
lib_ext = a | ||
|
@@ -77,7 +99,19 @@ lib_src = $(wildcard src/*.cc) | |
lib_obj = $(addsuffix .o, $(basename $(lib_src))) | ||
dep += $(addsuffix .d, $(basename $(lib_src))) | ||
|
||
cuda_src = $(wildcard test/cuda/*.cu) | ||
hip_src = $(patsubst test/cuda/%.cu,test/hip/%.hip.cc,$(cuda_src)) | ||
|
||
tester_src = $(wildcard test/*.cc) | ||
|
||
ifeq ($(cuda),1) | ||
tester_src += $(cuda_src) | ||
endif | ||
|
||
ifeq ($(hip),1) | ||
tester_src += $(hip_src) | ||
endif | ||
|
||
tester_obj = $(addsuffix .o, $(basename $(tester_src))) | ||
dep += $(addsuffix .d, $(basename $(tester_src))) | ||
|
||
|
@@ -123,6 +157,8 @@ src/version.o: .id | |
#------------------------------------------------------------------------------- | ||
# BLAS++ specific flags and libraries | ||
CXXFLAGS += -I./include | ||
NVCCFLAGS += -I./include | ||
HIPCCFLAGS += -I./include | ||
|
||
# additional flags and libraries for testers | ||
$(tester_obj): CXXFLAGS += -I$(testsweeper_dir) | ||
|
@@ -158,6 +194,59 @@ uninstall: | |
$(RM) $(DESTDIR)$(abs_prefix)/lib$(LIB_SUFFIX)/libblaspp.* | ||
$(RM) $(DESTDIR)$(abs_prefix)/lib$(LIB_SUFFIX)/pkgconfig/blaspp.pc | ||
|
||
#------------------------------------------------------------------------------- | ||
# HIP sources converted from CUDA sources. | ||
|
||
# if_md5_outdated applies the given build rule ($1) only if the md5 sums | ||
# of the target's dependency ($<) doesn't match that stored in the | ||
# target's dep file ([email protected]). If the target ($@) is already up-to-date | ||
# based on md5 sums, its timestamp is updated so make will recognize it | ||
# as up-to-date. Otherwise, the target is built and its dep file | ||
# updated. Instead of depending on the src file, the target depends on | ||
# the md5 file of the src file. This can be adapted for multiple dependencies. | ||
# Example usage: | ||
# | ||
# %: %.c.md5 | ||
# ${call if_md5_outdated,\ | ||
# gcc -o $@ ${basename $<}} | ||
# | ||
define if_md5_outdated | ||
if [ -e $@ ] && diff $< [email protected] > /dev/null 2>&1; then \ | ||
echo " make: '$@' is up-to-date based on md5sum."; \ | ||
echo " touch $@"; \ | ||
touch $@; \ | ||
else \ | ||
echo " make: '$@' is out-of-date based on md5sum."; \ | ||
echo " ${strip $1}"; \ | ||
$1; \ | ||
cp $< [email protected]; \ | ||
fi | ||
endef | ||
|
||
# From GNU manual: Commas ... cannot appear in an argument as written. | ||
# The[y] can be put into the argument value by variable substitution. | ||
comma := , | ||
|
||
# Convert CUDA => HIP code. | ||
# Explicitly mention ${hip_src}, ${hip_hdr}, ${md5_files} | ||
# to prevent them from being intermediate files, | ||
# so they are _always_ generated and never removed. | ||
# Perl updates includes and removes excess spaces that fail style hook. | ||
${hip_src}: test/hip/%.hip.cc: test/cuda/%.cu.md5 | test/hip | ||
@${call if_md5_outdated, \ | ||
${hipify} ${basename $<} > $@; \ | ||
perl -pi -e 's/\.cuh/.hip.hh/g; s/ +(${comma}|;|$$)/$$1/g;' $@} | ||
|
||
hipify: ${hip_src} | ||
|
||
md5_files := ${addsuffix .md5, ${cuda_src}} | ||
|
||
${md5_files}: %.md5: % | ||
${md5sum} $< > $@ | ||
|
||
test/hip: | ||
mkdir -p $@ | ||
|
||
#------------------------------------------------------------------------------- | ||
# if re-configured, recompile everything | ||
$(lib_obj) $(tester_obj): make.inc | ||
|
@@ -286,9 +375,16 @@ hooks: ${hooks} | |
cp $< $@ ; \ | ||
fi | ||
|
||
# .hip.cc rule before .cc rule. | ||
%.hip.o: %.hip.cc | ||
$(HIPCC) $(HIPCCFLAGS) -c $< -o $@ | ||
|
||
%.o: %.cc | ||
$(CXX) $(CXXFLAGS) -c $< -o $@ | ||
|
||
%.o: %.cu | ||
$(NVCC) $(NVCCFLAGS) -c $< -o $@ | ||
|
||
# preprocess source | ||
%.i: %.cc | ||
$(CXX) $(CXXFLAGS) -I$(testsweeper_dir) -E $< -o $@ | ||
|
@@ -333,6 +429,24 @@ echo: | |
@echo | ||
@echo "dep = $(dep)" | ||
@echo | ||
@echo "---------- CUDA options" | ||
@echo "cuda = '$(cuda)'" | ||
@echo "NVCC = $(NVCC)" | ||
@echo "NVCC_which = $(NVCC_which)" | ||
@echo "CUDA_PATH = $(CUDA_PATH)" | ||
@echo "NVCCFLAGS = $(NVCCFLAGS)" | ||
@echo | ||
@echo "---------- HIP options" | ||
@echo "hip = '$(hip)'" | ||
@echo "HIPCC = $(HIPCC)" | ||
@echo "HIPCC_which = $(HIPCC_which)" | ||
@echo "ROCM_PATH = $(ROCM_PATH)" | ||
@echo "HIPCCFLAGS = $(HIPCCFLAGS)" | ||
@echo "hipify = ${hipify}" | ||
@echo "cuda_src = ${cuda_src}" | ||
@echo "hip_src = ${hip_src}" | ||
@echo "md5_files = $(md5_files)" | ||
@echo | ||
@echo "testsweeper_dir = $(testsweeper_dir)" | ||
@echo "testsweeper_src = $(testsweeper_src)" | ||
@echo "testsweeper = $(testsweeper)" | ||
|
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
// Copyright (c) 2017-2022, University of Tennessee. All rights reserved. | ||
cayrols marked this conversation as resolved.
Show resolved
Hide resolved
|
||
// SPDX-License-Identifier: BSD-3-Clause | ||
// This program is free software: you can redistribute it and/or modify it under | ||
// the terms of the BSD 3-Clause license. See the accompanying LICENSE file. | ||
|
||
#include <stdio.h> | ||
|
||
#include "config.h" | ||
|
||
//------------------------------------------------------------------------------ | ||
int main() | ||
{ | ||
_Float16 a = 0.1; | ||
_Float16 b = 0.2; | ||
_Float16 c = a + b; | ||
|
||
printf( "%f + %f = %f -- expected 0.3\n", (float)a, (float)b, (float)c ); | ||
return 0; | ||
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.