Skip to content

Commit 66eff05

Browse files
committed
Merge branch 'main' of https://github.com/KhronosGroup/SPIRV-LLVM-Translator into amd-develop
2 parents 8cb74e2 + a726c62 commit 66eff05

File tree

85 files changed

+1916
-416
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

85 files changed

+1916
-416
lines changed

include/LLVMSPIRVExtensions.inc

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,9 @@ EXT(SPV_INTEL_bfloat16_arithmetic)
8181
EXT(SPV_INTEL_ternary_bitwise_function)
8282
EXT(SPV_INTEL_int4)
8383
EXT(SPV_INTEL_function_variants)
84-
EXT(SPV_INTEL_shader_atomic_bfloat16)
84+
EXT(SPV_INTEL_16bit_atomics)
8585
EXT(SPV_EXT_float8)
8686
EXT(SPV_INTEL_predicated_io)
87+
EXT(SPV_INTEL_sigmoid)
88+
EXT(SPV_INTEL_float4)
89+
EXT(SPV_INTEL_fp_conversions)

lib/SPIRV/LLVMToSPIRVDbgTran.cpp

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1286,17 +1286,8 @@ SPIRVEntry *LLVMToSPIRVDbgTran::transDbgFunction(const DISubprogram *Func) {
12861286
Ops[FunctionIdIdx] = getDebugInfoNoneId();
12871287
for (const llvm::Function &F : M->functions()) {
12881288
if (Func->describes(&F)) {
1289-
// Function definition of spir_kernel can have no "spir_kernel" calling
1290-
// convention because SPIRVRegularizeLLVMBase::addKernelEntryPoint pass
1291-
// could have turned it to spir_func. The "true" entry point is a
1292-
// wrapper kernel function, which can be found further in the module.
1293-
if (FuncDef) {
1294-
if (F.getCallingConv() == CallingConv::SPIR_KERNEL) {
1295-
IsEntryPointKernel = true;
1296-
break;
1297-
}
1289+
if (FuncDef)
12981290
continue;
1299-
}
13001291

13011292
SPIRVValue *SPIRVFunc = SPIRVWriter->getTranslatedValue(&F);
13021293
assert(SPIRVFunc && "All function must be already translated");
@@ -1305,7 +1296,6 @@ SPIRVEntry *LLVMToSPIRVDbgTran::transDbgFunction(const DISubprogram *Func) {
13051296
if (!isNonSemanticDebugInfo())
13061297
break;
13071298

1308-
// Most likely unreachable because of Regularise LLVM pass
13091299
if (F.getCallingConv() == CallingConv::SPIR_KERNEL) {
13101300
IsEntryPointKernel = true;
13111301
break;

lib/SPIRV/OCLToSPIRV.cpp

Lines changed: 107 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,15 +42,21 @@
4242
#include "SPIRVInternal.h"
4343
#include "libSPIRV/SPIRVDebug.h"
4444

45+
#include "llvm/ADT/SmallPtrSet.h"
46+
#include "llvm/ADT/SmallVector.h"
4547
#include "llvm/ADT/StringSwitch.h"
4648
#include "llvm/Analysis/ValueTracking.h"
49+
#include "llvm/IR/Constants.h"
4750
#include "llvm/IR/IRBuilder.h"
4851
#include "llvm/IR/Instruction.h"
4952
#include "llvm/IR/Instructions.h"
53+
#include "llvm/IR/Operator.h"
5054
#include "llvm/IR/PatternMatch.h"
55+
#include "llvm/IR/TypedPointerType.h"
5156
#include "llvm/Support/Debug.h"
5257

5358
#include <algorithm>
59+
#include <optional>
5460
#include <regex>
5561
#include <set>
5662

@@ -62,6 +68,88 @@ using namespace SPIRV;
6268
using namespace OCLUtil;
6369

6470
namespace SPIRV {
71+
72+
static unsigned getAddressSpaceFromType(const Type *Ty) {
73+
assert(Ty && "Can't deduce pointer AS");
74+
if (auto *TypedPtr = dyn_cast<TypedPointerType>(Ty))
75+
return TypedPtr->getAddressSpace();
76+
if (auto *Ptr = dyn_cast<PointerType>(Ty))
77+
return Ptr->getAddressSpace();
78+
llvm_unreachable("Can't deduce pointer AS");
79+
}
80+
81+
// Performs an address space inference analysis.
82+
static unsigned getAddressSpaceFromValue(const Value *Ptr) {
83+
assert(Ptr && "Can't deduce pointer AS");
84+
85+
SmallPtrSet<const Value *, 8> Visited;
86+
SmallVector<const Value *, 8> Worklist;
87+
Worklist.push_back(Ptr);
88+
unsigned AS = SPIRAS_Generic;
89+
90+
while (!Worklist.empty()) {
91+
const Value *Current = Worklist.pop_back_val();
92+
if (!Visited.insert(Current).second)
93+
continue;
94+
95+
unsigned DeducedAS = getAddressSpaceFromType(Current->getType());
96+
if (DeducedAS != SPIRAS_Generic)
97+
return DeducedAS;
98+
AS = DeducedAS;
99+
100+
// Find origins of the pointer and add to the worklist.
101+
if (auto *Op = dyn_cast<Operator>(Current)) {
102+
switch (Op->getOpcode()) {
103+
case Instruction::AddrSpaceCast:
104+
case Instruction::BitCast:
105+
case Instruction::GetElementPtr:
106+
Worklist.push_back(Op->getOperand(0));
107+
break;
108+
case Instruction::Select:
109+
Worklist.push_back(Op->getOperand(1));
110+
Worklist.push_back(Op->getOperand(2));
111+
break;
112+
case Instruction::PHI: {
113+
auto *Phi = cast<PHINode>(Op);
114+
for (Value *Incoming : Phi->incoming_values())
115+
Worklist.push_back(Incoming);
116+
break;
117+
}
118+
default:
119+
break;
120+
}
121+
}
122+
}
123+
124+
return AS;
125+
}
126+
127+
// Sets memory semantic mask of an atomic depending on a pointer argument
128+
// address space.
129+
static unsigned
130+
getAtomicPointerMemorySemanticsMemoryMask(const Value *Ptr,
131+
const Type *RecordedType) {
132+
assert((Ptr && RecordedType) &&
133+
"Can't evaluate atomic builtin's memory semantic");
134+
unsigned AddrSpace = getAddressSpaceFromType(RecordedType);
135+
if (AddrSpace == SPIRAS_Generic)
136+
AddrSpace = getAddressSpaceFromValue(Ptr);
137+
138+
switch (AddrSpace) {
139+
case SPIRAS_Global:
140+
case SPIRAS_GlobalDevice:
141+
case SPIRAS_GlobalHost:
142+
return MemorySemanticsCrossWorkgroupMemoryMask;
143+
case SPIRAS_Local:
144+
return MemorySemanticsWorkgroupMemoryMask;
145+
case SPIRAS_Generic:
146+
return MemorySemanticsCrossWorkgroupMemoryMask |
147+
MemorySemanticsWorkgroupMemoryMask;
148+
default:
149+
return MemorySemanticsMaskNone;
150+
}
151+
}
152+
65153
static size_t getOCLCpp11AtomicMaxNumOps(StringRef Name) {
66154
return StringSwitch<size_t>(Name)
67155
.Cases({"load", "flag_test_and_set", "flag_clear"}, 3)
@@ -704,6 +792,11 @@ void OCLToSPIRVBase::transAtomicBuiltin(CallInst *CI,
704792
const size_t ScopeIdx = ArgsCount - 1;
705793
const size_t OrderIdx = ScopeIdx - NumOrder;
706794

795+
unsigned PtrMemSemantics = MemorySemanticsMaskNone;
796+
if (Mutator.arg_size() > 0)
797+
PtrMemSemantics = getAtomicPointerMemorySemanticsMemoryMask(
798+
Mutator.getArg(0), Mutator.getType(0));
799+
707800
if (NeedsNegate) {
708801
Mutator.mapArg(1, [=](Value *V) {
709802
IRBuilder<> IRB(CI);
@@ -714,9 +807,20 @@ void OCLToSPIRVBase::transAtomicBuiltin(CallInst *CI,
714807
return transOCLMemScopeIntoSPIRVScope(V, OCLMS_device, CI);
715808
});
716809
for (size_t I = 0; I < NumOrder; ++I) {
717-
Mutator.mapArg(OrderIdx + I, [=](Value *V) {
718-
return transOCLMemOrderIntoSPIRVMemorySemantics(V, OCLMO_seq_cst, CI);
719-
});
810+
Mutator.mapArg(
811+
OrderIdx + I, [=](IRBuilder<> &Builder, Value *V) -> Value * {
812+
Value *MemSem =
813+
transOCLMemOrderIntoSPIRVMemorySemantics(V, OCLMO_seq_cst, CI);
814+
if (PtrMemSemantics == MemorySemanticsMaskNone)
815+
return MemSem;
816+
817+
auto *MemSemTy = cast<IntegerType>(MemSem->getType());
818+
auto *Mask = ConstantInt::get(MemSemTy, PtrMemSemantics);
819+
if (auto *Const = dyn_cast<ConstantInt>(MemSem))
820+
return static_cast<Value *>(ConstantInt::get(
821+
MemSemTy, Const->getZExtValue() | PtrMemSemantics));
822+
return Builder.CreateOr(MemSem, Mask);
823+
});
720824
}
721825

722826
// Order of args in SPIR-V:

lib/SPIRV/SPIRVInternal.h

Lines changed: 112 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1050,6 +1050,7 @@ enum FPEncodingWrap {
10501050
BF16 = FPEncoding::FPEncodingBFloat16KHR,
10511051
E4M3 = FPEncoding::FPEncodingFloat8E4M3EXT,
10521052
E5M2 = FPEncoding::FPEncodingFloat8E5M2EXT,
1053+
E2M1 = internal::FPEncodingFloat4E2M1INTEL,
10531054
};
10541055

10551056
// Structure describing non-trivial conversions (FP8 and int4)
@@ -1078,36 +1079,117 @@ typedef SPIRVMap<llvm::StringRef, FPConversionDesc> FPConvertToEncodingMap;
10781079

10791080
// clang-format off
10801081
template <> inline void FPConvertToEncodingMap::init() {
1081-
// 8-bit conversions
1082-
add("ConvertE4M3ToFP16EXT",
1083-
{FPEncodingWrap::E4M3, FPEncodingWrap::IEEE754, OpFConvert});
1084-
add("ConvertE5M2ToFP16EXT",
1085-
{FPEncodingWrap::E5M2, FPEncodingWrap::IEEE754, OpFConvert});
1086-
add("ConvertE4M3ToBF16EXT",
1087-
{FPEncodingWrap::E4M3, FPEncodingWrap::BF16, OpFConvert});
1088-
add("ConvertE5M2ToBF16EXT",
1089-
{FPEncodingWrap::E5M2, FPEncodingWrap::BF16, OpFConvert});
1090-
add("ConvertFP16ToE4M3EXT",
1091-
{FPEncodingWrap::IEEE754, FPEncodingWrap::E4M3, OpFConvert});
1092-
add("ConvertFP16ToE5M2EXT",
1093-
{FPEncodingWrap::IEEE754, FPEncodingWrap::E5M2, OpFConvert});
1094-
add("ConvertBF16ToE4M3EXT",
1095-
{FPEncodingWrap::BF16, FPEncodingWrap::E4M3, OpFConvert});
1096-
add("ConvertBF16ToE5M2EXT",
1097-
{FPEncodingWrap::BF16, FPEncodingWrap::E5M2, OpFConvert});
1098-
1099-
add("ConvertInt4ToE4M3INTEL",
1100-
{FPEncodingWrap::Integer, FPEncodingWrap::E4M3, OpConvertSToF});
1101-
add("ConvertInt4ToE5M2INTEL",
1102-
{FPEncodingWrap::Integer, FPEncodingWrap::E5M2, OpConvertSToF});
1103-
add("ConvertInt4ToFP16INTEL",
1104-
{FPEncodingWrap::Integer, FPEncodingWrap::IEEE754, OpConvertSToF});
1105-
add("ConvertInt4ToBF16INTEL",
1106-
{FPEncodingWrap::Integer, FPEncodingWrap::BF16, OpConvertSToF});
1107-
add("ConvertFP16ToInt4INTEL",
1108-
{FPEncodingWrap::IEEE754, FPEncodingWrap::Integer, OpConvertFToS});
1109-
add("ConvertBF16ToInt4INTEL",
1110-
{FPEncodingWrap::BF16, FPEncodingWrap::Integer, OpConvertFToS});
1082+
// 4-bit conversions
1083+
add("ConvertE2M1ToE4M3INTEL",
1084+
{FPEncodingWrap::E2M1, FPEncodingWrap::E4M3, OpFConvert});
1085+
add("ConvertE2M1ToE5M2INTEL",
1086+
{FPEncodingWrap::E2M1, FPEncodingWrap::E5M2, OpFConvert});
1087+
add("ConvertE2M1ToFP16INTEL",
1088+
{FPEncodingWrap::E2M1, FPEncodingWrap::IEEE754, OpFConvert});
1089+
add("ConvertE2M1ToBF16INTEL",
1090+
{FPEncodingWrap::E2M1, FPEncodingWrap::BF16, OpFConvert});
1091+
1092+
add("ConvertInt4ToE4M3INTEL",
1093+
{FPEncodingWrap::Integer, FPEncodingWrap::E4M3, OpConvertSToF});
1094+
add("ConvertInt4ToE5M2INTEL",
1095+
{FPEncodingWrap::Integer, FPEncodingWrap::E5M2, OpConvertSToF});
1096+
add("ConvertInt4ToFP16INTEL",
1097+
{FPEncodingWrap::Integer, FPEncodingWrap::IEEE754, OpConvertSToF});
1098+
add("ConvertInt4ToBF16INTEL",
1099+
{FPEncodingWrap::Integer, FPEncodingWrap::BF16, OpConvertSToF});
1100+
add("ConvertInt4ToInt8INTEL",
1101+
{FPEncodingWrap::Integer, FPEncodingWrap::Integer, OpSConvert});
1102+
1103+
add("ConvertFP16ToE2M1INTEL",
1104+
{FPEncodingWrap::IEEE754, FPEncodingWrap::E2M1, OpFConvert});
1105+
add("ConvertBF16ToE2M1INTEL",
1106+
{FPEncodingWrap::BF16, FPEncodingWrap::E2M1, OpFConvert});
1107+
add("ConvertFP16ToInt4INTEL",
1108+
{FPEncodingWrap::IEEE754, FPEncodingWrap::Integer, OpConvertFToS});
1109+
add("ConvertBF16ToInt4INTEL",
1110+
{FPEncodingWrap::BF16, FPEncodingWrap::Integer, OpConvertFToS});
1111+
1112+
// 8-bit conversions
1113+
add("ConvertE4M3ToFP16EXT",
1114+
{FPEncodingWrap::E4M3, FPEncodingWrap::IEEE754, OpFConvert});
1115+
add("ConvertE5M2ToFP16EXT",
1116+
{FPEncodingWrap::E5M2, FPEncodingWrap::IEEE754, OpFConvert});
1117+
add("ConvertE4M3ToBF16EXT",
1118+
{FPEncodingWrap::E4M3, FPEncodingWrap::BF16, OpFConvert});
1119+
add("ConvertE5M2ToBF16EXT",
1120+
{FPEncodingWrap::E5M2, FPEncodingWrap::BF16, OpFConvert});
1121+
add("ConvertFP16ToE4M3EXT",
1122+
{FPEncodingWrap::IEEE754, FPEncodingWrap::E4M3, OpFConvert});
1123+
add("ConvertFP16ToE5M2EXT",
1124+
{FPEncodingWrap::IEEE754, FPEncodingWrap::E5M2, OpFConvert});
1125+
add("ConvertBF16ToE4M3EXT",
1126+
{FPEncodingWrap::BF16, FPEncodingWrap::E4M3, OpFConvert});
1127+
add("ConvertBF16ToE5M2EXT",
1128+
{FPEncodingWrap::BF16, FPEncodingWrap::E5M2, OpFConvert});
1129+
1130+
// SPV_INTEL_fp_conversions
1131+
add("ClampConvertFP16ToE2M1INTEL",
1132+
{FPEncodingWrap::IEEE754, FPEncodingWrap::E2M1,
1133+
internal::OpClampConvertFToFINTEL});
1134+
add("ClampConvertBF16ToE2M1INTEL",
1135+
{FPEncodingWrap::BF16, FPEncodingWrap::E2M1,
1136+
internal::OpClampConvertFToFINTEL});
1137+
add("ClampConvertFP16ToE4M3INTEL",
1138+
{FPEncodingWrap::IEEE754, FPEncodingWrap::E4M3,
1139+
internal::OpClampConvertFToFINTEL});
1140+
add("ClampConvertBF16ToE4M3INTEL",
1141+
{FPEncodingWrap::BF16, FPEncodingWrap::E4M3,
1142+
internal::OpClampConvertFToFINTEL});
1143+
add("ClampConvertFP16ToE5M2INTEL",
1144+
{FPEncodingWrap::IEEE754, FPEncodingWrap::E5M2,
1145+
internal::OpClampConvertFToFINTEL});
1146+
add("ClampConvertBF16ToE5M2INTEL",
1147+
{FPEncodingWrap::BF16, FPEncodingWrap::E5M2,
1148+
internal::OpClampConvertFToFINTEL});
1149+
add("ClampConvertFP16ToInt4INTEL",
1150+
{FPEncodingWrap::IEEE754, FPEncodingWrap::Integer,
1151+
internal::OpClampConvertFToSINTEL});
1152+
add("ClampConvertBF16ToInt4INTEL",
1153+
{FPEncodingWrap::BF16, FPEncodingWrap::Integer,
1154+
internal::OpClampConvertFToSINTEL});
1155+
1156+
add("StochasticRoundFP16ToE5M2INTEL",
1157+
{FPEncodingWrap::IEEE754, FPEncodingWrap::E5M2,
1158+
internal::OpStochasticRoundFToFINTEL});
1159+
add("StochasticRoundFP16ToE4M3INTEL",
1160+
{FPEncodingWrap::IEEE754, FPEncodingWrap::E4M3,
1161+
internal::OpStochasticRoundFToFINTEL});
1162+
add("StochasticRoundBF16ToE5M2INTEL",
1163+
{FPEncodingWrap::BF16, FPEncodingWrap::E5M2,
1164+
internal::OpStochasticRoundFToFINTEL});
1165+
add("StochasticRoundBF16ToE4M3INTEL",
1166+
{FPEncodingWrap::BF16, FPEncodingWrap::E4M3,
1167+
internal::OpStochasticRoundFToFINTEL});
1168+
add("StochasticRoundFP16ToE2M1INTEL",
1169+
{FPEncodingWrap::IEEE754, FPEncodingWrap::E2M1,
1170+
internal::OpStochasticRoundFToFINTEL});
1171+
add("StochasticRoundBF16ToE2M1INTEL",
1172+
{FPEncodingWrap::BF16, FPEncodingWrap::E2M1,
1173+
internal::OpStochasticRoundFToFINTEL});
1174+
add("ClampStochasticRoundFP16ToInt4INTEL",
1175+
{FPEncodingWrap::IEEE754, FPEncodingWrap::Integer,
1176+
internal::OpClampStochasticRoundFToSINTEL});
1177+
add("ClampStochasticRoundBF16ToInt4INTEL",
1178+
{FPEncodingWrap::BF16, FPEncodingWrap::Integer,
1179+
internal::OpClampStochasticRoundFToSINTEL});
1180+
1181+
add("ClampStochasticRoundFP16ToE5M2INTEL",
1182+
{FPEncodingWrap::IEEE754, FPEncodingWrap::E5M2,
1183+
internal::OpClampStochasticRoundFToFINTEL});
1184+
add("ClampStochasticRoundFP16ToE4M3INTEL",
1185+
{FPEncodingWrap::IEEE754, FPEncodingWrap::E4M3,
1186+
internal::OpClampStochasticRoundFToFINTEL});
1187+
add("ClampStochasticRoundBF16ToE5M2INTEL",
1188+
{FPEncodingWrap::BF16, FPEncodingWrap::E5M2,
1189+
internal::OpClampStochasticRoundFToFINTEL});
1190+
add("ClampStochasticRoundBF16ToE4M3INTEL",
1191+
{FPEncodingWrap::BF16, FPEncodingWrap::E4M3,
1192+
internal::OpClampStochasticRoundFToFINTEL});
11111193
}
11121194

11131195
// clang-format on

0 commit comments

Comments
 (0)