Skip to content

Commit 607995e

Browse files
committed
[CHERIOT] Use capability registers to store f64 values.
This enables each f64 to be passed by value in a single cap register, rather than in pairs of integer registers. This required adding explicit type annotations to various places in the XCheri tblgen files, as the GPCR class can now hold values type c64 or f64, breaking type inference.
1 parent 5d99e3d commit 607995e

File tree

6 files changed

+459
-91
lines changed

6 files changed

+459
-91
lines changed

llvm/lib/Target/RISCV/RISCVCallingConv.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -486,6 +486,14 @@ bool llvm::CC_RISCV(unsigned ValNo, MVT ValVT, MVT LocVT,
486486
}
487487
}
488488

489+
// Cheriot uses GPCR without a bitcast when possible.
490+
if (LocVT == MVT::f64 && Subtarget.hasVendorXCheriot() && !IsPureCapVarArgs) {
491+
if (MCRegister Reg = State.AllocateReg(ArgGPCRs)) {
492+
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
493+
return false;
494+
}
495+
}
496+
489497
// FP smaller than XLen, uses custom GPR.
490498
if (LocVT == MVT::f16 || LocVT == MVT::bf16 ||
491499
(LocVT == MVT::f32 && XLen == 64)) {

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 55 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,11 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
170170
addRegisterClass(CapType, &RISCV::GPCRRegClass);
171171
}
172172

173+
if (Subtarget.hasVendorXCheriot()) {
174+
// Cheriot holds f64's in capability registers.
175+
addRegisterClass(MVT::f64, &RISCV::GPCRRegClass);
176+
}
177+
173178
static const MVT::SimpleValueType BoolVecVTs[] = {
174179
MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1,
175180
MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1};
@@ -680,6 +685,20 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
680685
setLibcallName(RTLIB::MEMSET, "memset");
681686
}
682687

688+
if (Subtarget.hasVendorXCheriot()) {
689+
setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
690+
691+
static const unsigned CheriotF64ExpandOps[] = {
692+
ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD, ISD::FSUB,
693+
ISD::FMUL, ISD::FMA, ISD::FDIV, ISD::FSQRT,
694+
ISD::FCEIL, ISD::FTRUNC, ISD::FFLOOR, ISD::FROUND,
695+
ISD::FROUNDEVEN, ISD::FRINT, ISD::FNEARBYINT, ISD::IS_FPCLASS,
696+
ISD::SETCC, ISD::FMAXIMUM, ISD::FMINIMUM, ISD::STRICT_FADD,
697+
ISD::STRICT_FSUB, ISD::STRICT_FMUL, ISD::STRICT_FDIV, ISD::STRICT_FSQRT,
698+
ISD::STRICT_FMA};
699+
setOperationAction(CheriotF64ExpandOps, MVT::f64, Expand);
700+
}
701+
683702
// TODO: On M-mode only targets, the cycle[h]/time[h] CSR may not be present.
684703
// Unfortunately this can't be determined just from the ISA naming string.
685704
setOperationAction(ISD::READCYCLECOUNTER, MVT::i64,
@@ -6145,11 +6164,44 @@ static SDValue lowerConstant(SDValue Op, SelectionDAG &DAG,
61456164
return SDValue();
61466165
}
61476166

6148-
SDValue RISCVTargetLowering::lowerConstantFP(SDValue Op,
6149-
SelectionDAG &DAG) const {
6167+
SDValue
6168+
RISCVTargetLowering::lowerConstantFP(SDValue Op, SelectionDAG &DAG,
6169+
const RISCVSubtarget &Subtarget) const {
61506170
MVT VT = Op.getSimpleValueType();
61516171
const APFloat &Imm = cast<ConstantFPSDNode>(Op)->getValueAPF();
61526172

6173+
if (Subtarget.hasVendorXCheriot()) {
6174+
// Cheriot needs to custom lower f64 immediates using csethigh
6175+
if (VT != MVT::f64)
6176+
return Op;
6177+
6178+
SDLoc DL(Op);
6179+
uint64_t Val = Imm.bitcastToAPInt().getLimitedValue();
6180+
6181+
// Materialize 0.0 as cnull
6182+
if (Val == 0)
6183+
return DAG.getRegister(getNullCapabilityRegister(), MVT::f64);
6184+
6185+
// Otherwise, materialize the low part into a 32-bit register.
6186+
auto Lo = DAG.getConstant(Val & 0xFFFFFFFF, DL, MVT::i32);
6187+
auto LoAsCap = DAG.getTargetInsertSubreg(RISCV::sub_cap_addr, DL, MVT::c64,
6188+
DAG.getUNDEF(MVT::f64), Lo);
6189+
6190+
// The high half of a capability register is zeroed by integer ops,
6191+
// so if we wanted a zero high half then we are done.
6192+
if (Val >> 32 == 0)
6193+
return DAG.getBitcast(MVT::f64, LoAsCap);
6194+
6195+
// Otherwise, materialize the high half and use csethigh to combine the two
6196+
// halve.
6197+
auto Hi = DAG.getConstant(Val >> 32, DL, MVT::i32);
6198+
auto Cap = DAG.getNode(
6199+
ISD::INTRINSIC_WO_CHAIN, DL, MVT::c64,
6200+
DAG.getTargetConstant(Intrinsic::cheri_cap_high_set, DL, MVT::i32),
6201+
LoAsCap, Hi);
6202+
return DAG.getBitcast(MVT::f64, Cap);
6203+
}
6204+
61536205
// Can this constant be selected by a Zfa FLI instruction?
61546206
bool Negate = false;
61556207
int Index = getLegalZfaFPImm(Imm, VT);
@@ -6799,7 +6851,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
67996851
case ISD::Constant:
68006852
return lowerConstant(Op, DAG, Subtarget);
68016853
case ISD::ConstantFP:
6802-
return lowerConstantFP(Op, DAG);
6854+
return lowerConstantFP(Op, DAG, Subtarget);
68036855
case ISD::SELECT:
68046856
return lowerSELECT(Op, DAG);
68056857
case ISD::BRCOND:

llvm/lib/Target/RISCV/RISCVISelLowering.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -981,7 +981,8 @@ class RISCVTargetLowering : public TargetLowering {
981981
SelectionDAG &DAG) const;
982982
SDValue getTLSDescAddr(GlobalAddressSDNode *N, SelectionDAG &DAG) const;
983983

984-
SDValue lowerConstantFP(SDValue Op, SelectionDAG &DAG) const;
984+
SDValue lowerConstantFP(SDValue Op, SelectionDAG &DAG,
985+
const RISCVSubtarget &Subtarget) const;
985986
SDValue lowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
986987
SDValue lowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
987988
SDValue lowerConstantPool(SDValue Op, SelectionDAG &DAG) const;

llvm/lib/Target/RISCV/RISCVInstrInfoXCheri.td

Lines changed: 69 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -1451,7 +1451,7 @@ def : PatGpcrUimm12<int_cheri_bounded_stack_cap, CSetBoundsImm>;
14511451
def : PatGpcrGpr<int_cheri_bounded_stack_cap_dynamic, CSetBounds>;
14521452
def : PatGpcrUimm12<int_cheri_bounded_stack_cap_dynamic, CSetBoundsImm>;
14531453

1454-
def : Pat<(CapFrameAddrRegImm GPCR:$rs1, simm12:$imm12),
1454+
def : Pat<(CapFrameAddrRegImm(cPTR GPCR:$rs1), simm12:$imm12),
14551455
(CIncOffsetImm GPCR:$rs1, simm12:$imm12)>;
14561456

14571457
/// Pointer-Arithmetic Instructions
@@ -1463,14 +1463,15 @@ def : Pat<(XLenVT (int_cheri_cap_diff GPCR:$cs1, GPCR:$cs2)),
14631463
(XLenVT (EXTRACT_SUBREG GPCR:$cs2, sub_cap_addr)))>;
14641464

14651465
let Predicates = [IsPureCapABI] in {
1466-
def : Pat<(inttoptr (XLenVT GPR:$rs2)), (CIncOffset C0, GPR:$rs2)>;
1467-
def : Pat<(inttoptr simm12:$imm12), (CIncOffsetImm C0, simm12:$imm12)>;
1468-
def : Pat<(XLenVT (ptrtoint GPCR:$rs1)), (PseudoCGetAddr GPCR:$rs1)>;
1466+
def : Pat<(inttoptr(XLenVT GPR:$rs2)), (cPTR(CIncOffset(cPTR C0), GPR:$rs2))>;
1467+
def : Pat<(inttoptr simm12:$imm12), (cPTR(CIncOffsetImm(cPTR C0),
1468+
simm12:$imm12))>;
1469+
def : Pat<(XLenVT(ptrtoint(cPTR GPCR:$rs1))), (PseudoCGetAddr GPCR:$rs1)>;
14691470
}
14701471

14711472
/// Null Capability Patterns
14721473

1473-
def : Pat<(inttoptr (XLenVT 0)), (CLenVT (COPY C0))>;
1474+
def : Pat<(inttoptr(XLenVT 0)), (CLenVT(COPY(cPTR C0)))>;
14741475
def : Pat<(ptrtoint (CLenVT (inttoptr (XLenVT 0)))),
14751476
(XLenVT (COPY (XLenVT X0)))>;
14761477

@@ -1481,26 +1482,31 @@ def : Pat<(ptrtoint (CLenVT (inttoptr (XLenVT 0)))),
14811482
// * Break untagged < tagged semantics
14821483
// * Can't implement exact equality
14831484
class CheriSetCCPatGpcrGpcr<PatFrag CondOp, dag GprGprDag>
1484-
: Pat<(XLenVT (CondOp GPCR:$cs1, GPCR:$cs2)),
1485-
(OutPatFrag<(ops node:$rs1, node:$rs2), GprGprDag>
1486-
(XLenVT (EXTRACT_SUBREG GPCR:$cs1, sub_cap_addr)),
1487-
(XLenVT (EXTRACT_SUBREG GPCR:$cs2, sub_cap_addr)))>;
1485+
: Pat<(XLenVT(CondOp(cPTR GPCR:$cs1), (cPTR GPCR:$cs2))),
1486+
(OutPatFrag<(ops node:$rs1, node:$rs2), GprGprDag>(XLenVT
1487+
(EXTRACT_SUBREG GPCR:$cs1, sub_cap_addr)),
1488+
(XLenVT(EXTRACT_SUBREG GPCR:$cs2, sub_cap_addr)))>;
14881489

14891490
multiclass CheriSetCCPatGpcrImm<PatFrag CondOp, Operand ImmTy, dag GprImmDag> {
1490-
def : Pat<(XLenVT (CondOp GPCR:$cs1, (inttoptr ImmTy:$imm12))),
1491-
(OutPatFrag<(ops node:$rs1, node:$imm12), GprImmDag>
1492-
(XLenVT (EXTRACT_SUBREG GPCR:$cs1, sub_cap_addr)), ImmTy:$imm12)>;
1493-
def : Pat<(XLenVT (CondOp GPCR:$cs1, (cptradd (inttoptr (XLenVT 0)), ImmTy:$imm12))),
1494-
(OutPatFrag<(ops node:$rs1, node:$imm12), GprImmDag>
1495-
(XLenVT (EXTRACT_SUBREG GPCR:$cs1, sub_cap_addr)), ImmTy:$imm12)>;
1496-
def : Pat<(XLenVT (CondOp GPCR:$cs1,
1497-
(int_cheri_cap_offset_set (inttoptr (XLenVT 0)), ImmTy:$imm12))),
1498-
(OutPatFrag<(ops node:$rs1, node:$imm12), GprImmDag>
1499-
(XLenVT (EXTRACT_SUBREG GPCR:$cs1, sub_cap_addr)), ImmTy:$imm12)>;
1500-
def : Pat<(XLenVT (CondOp GPCR:$cs1,
1501-
(int_cheri_cap_address_set (inttoptr (XLenVT 0)), ImmTy:$imm12))),
1502-
(OutPatFrag<(ops node:$rs1, node:$imm12), GprImmDag>
1503-
(XLenVT (EXTRACT_SUBREG GPCR:$cs1, sub_cap_addr)), ImmTy:$imm12)>;
1491+
def : Pat<(XLenVT(CondOp(cPTR GPCR:$cs1), (inttoptr ImmTy:$imm12))),
1492+
(OutPatFrag<(ops node:$rs1, node:$imm12), GprImmDag>(XLenVT
1493+
(EXTRACT_SUBREG GPCR:$cs1, sub_cap_addr)),
1494+
ImmTy:$imm12)>;
1495+
def : Pat<(XLenVT(CondOp(cPTR GPCR:$cs1), (cptradd(inttoptr(XLenVT 0)),
1496+
ImmTy:$imm12))),
1497+
(OutPatFrag<(ops node:$rs1, node:$imm12), GprImmDag>(XLenVT
1498+
(EXTRACT_SUBREG GPCR:$cs1, sub_cap_addr)),
1499+
ImmTy:$imm12)>;
1500+
def : Pat<(XLenVT(CondOp(cPTR GPCR:$cs1),
1501+
(int_cheri_cap_offset_set(inttoptr(XLenVT 0)), ImmTy:$imm12))),
1502+
(OutPatFrag<(ops node:$rs1, node:$imm12), GprImmDag>(XLenVT
1503+
(EXTRACT_SUBREG GPCR:$cs1, sub_cap_addr)),
1504+
ImmTy:$imm12)>;
1505+
def : Pat<(XLenVT(CondOp(cPTR GPCR:$cs1),
1506+
(int_cheri_cap_address_set(inttoptr(XLenVT 0)), ImmTy:$imm12))),
1507+
(OutPatFrag<(ops node:$rs1, node:$imm12), GprImmDag>(XLenVT
1508+
(EXTRACT_SUBREG GPCR:$cs1, sub_cap_addr)),
1509+
ImmTy:$imm12)>;
15041510
}
15051511

15061512
multiclass CheriSetCCPatGpcrSimm12<PatFrag CondOp, dag GprImmDag>
@@ -1510,9 +1516,9 @@ multiclass CheriSetCCPatGpcrSimm12Plus1<PatFrag CondOp, dag GprImmDag>
15101516
: CheriSetCCPatGpcrImm<CondOp, simm12_plus1, GprImmDag>;
15111517

15121518
class CheriSetCCPatGpcrNull<PatFrag CondOp, dag GprDag>
1513-
: Pat<(XLenVT (CondOp GPCR:$cs1, (inttoptr (XLenVT 0)))),
1514-
(OutPatFrag<(ops node:$rs1), GprDag>
1515-
(XLenVT (EXTRACT_SUBREG GPCR:$cs1, sub_cap_addr)))>;
1519+
: Pat<(XLenVT(CondOp(cPTR GPCR:$cs1), (inttoptr(XLenVT 0)))),
1520+
(OutPatFrag<(ops node:$rs1), GprDag>(XLenVT(EXTRACT_SUBREG GPCR:$cs1,
1521+
sub_cap_addr)))>;
15161522

15171523
class Swap<PatFrag BinFrag>
15181524
: PatFrag<(ops node:$a, node:$b), (BinFrag $b, $a)>;
@@ -1555,11 +1561,10 @@ defm Select_GPCR : SelectCC_GPR_rrirr<GPCR, CLenVT>;
15551561
// No dedicated instructions; see above
15561562

15571563
class CheriBccPat<PatFrag CondOp, RVInstB Inst>
1558-
: Pat<(brcond (XLenVT (CondOp GPCR:$rs1, GPCR:$rs2)), bb:$imm12),
1559-
(Inst
1560-
(XLenVT (EXTRACT_SUBREG GPCR:$rs1, sub_cap_addr)),
1561-
(XLenVT (EXTRACT_SUBREG GPCR:$rs2, sub_cap_addr)),
1562-
simm13_lsb0:$imm12)>;
1564+
: Pat<(brcond(XLenVT(CondOp(cPTR GPCR:$rs1), (cPTR GPCR:$rs2))), bb:$imm12),
1565+
(Inst(XLenVT(EXTRACT_SUBREG GPCR:$rs1, sub_cap_addr)),
1566+
(XLenVT(EXTRACT_SUBREG GPCR:$rs2, sub_cap_addr)),
1567+
simm13_lsb0:$imm12)>;
15631568

15641569
def : CheriBccPat<seteq, BEQ>;
15651570
def : CheriBccPat<setne, BNE>;
@@ -1569,11 +1574,10 @@ def : CheriBccPat<setult, BLTU>;
15691574
def : CheriBccPat<setuge, BGEU>;
15701575

15711576
class CheriBccSwapPat<PatFrag CondOp, RVInst InstBcc>
1572-
: Pat<(brcond (XLenVT (CondOp GPCR:$rs1, GPCR:$rs2)), bb:$imm12),
1573-
(InstBcc
1574-
(XLenVT (EXTRACT_SUBREG GPCR:$rs2, sub_cap_addr)),
1575-
(XLenVT (EXTRACT_SUBREG GPCR:$rs1, sub_cap_addr)),
1576-
simm13_lsb0:$imm12)>;
1577+
: Pat<(brcond(XLenVT(CondOp(cPTR GPCR:$rs1), (cPTR GPCR:$rs2))), bb:$imm12),
1578+
(InstBcc(XLenVT(EXTRACT_SUBREG GPCR:$rs2, sub_cap_addr)),
1579+
(XLenVT(EXTRACT_SUBREG GPCR:$rs1, sub_cap_addr)),
1580+
simm13_lsb0:$imm12)>;
15771581

15781582
def : CheriBccSwapPat<setgt, BLT>;
15791583
def : CheriBccSwapPat<setle, BGE>;
@@ -1587,15 +1591,18 @@ def : PatGpcrGpcr<riscv_cap_equal_exact, CSEQX, XLenVT>;
15871591

15881592
/// Special Capability Register Access Instructions
15891593

1590-
def : Pat<(int_cheri_ddc_get), (CSpecialRW SCR_DDC.Encoding, C0)>;
1591-
let Predicates = [HasCheri, IsPureCapABI] in
1592-
def : Pat<(int_cheri_stack_cap_get), (CLenVT (COPY C2))>;
1594+
def : Pat<(int_cheri_ddc_get), (CSpecialRW SCR_DDC.Encoding, (cPTR C0))>;
1595+
let Predicates = [HasCheri,
1596+
IsPureCapABI] in def : Pat<(int_cheri_stack_cap_get),
1597+
(CLenVT(COPY(cPTR C2)))>;
15931598

15941599
let Predicates = [HasCheri, IsCapMode] in
15951600
def : Pat<(int_cheri_pcc_get), (AUIPCC 0)>;
15961601

1597-
let Predicates = [HasCheri, NotCapMode] in
1598-
def : Pat<(int_cheri_pcc_get), (CSpecialRW SCR_PCC.Encoding, C0)>;
1602+
let Predicates = [HasCheri,
1603+
NotCapMode] in def : Pat<(int_cheri_pcc_get),
1604+
(CSpecialRW SCR_PCC.Encoding,
1605+
(cPTR C0))>;
15991606

16001607
/// Fast Register-Clearing Instructions
16011608

@@ -1824,13 +1831,14 @@ defm : PseudoCmpXchgPat<"atomic_cmp_swap_cap", PseudoCmpXchgCap, CLenVT, GPCR>;
18241831
/// Capability Mode Instructions
18251832

18261833
multiclass CheriLdPat<PatFrag LoadOp, RVInst Inst, ValueType ReturnVt = XLenVT> {
1827-
def : Pat<(ReturnVt (LoadOp (CapRegImm GPCR:$rs1, simm12:$imm12))),
1834+
def : Pat<(ReturnVt(LoadOp(CapRegImm(cPTR GPCR:$rs1), simm12:$imm12))),
18281835
(Inst GPCR:$rs1, simm12:$imm12)>;
18291836
}
18301837

18311838
multiclass CheriStPat<PatFrag StoreOp, RVInst Inst, RegisterClass StTy, ValueType StoreVt = XLenVT> {
1832-
def : Pat<(StoreOp (StoreVt StTy:$rs2), (CapRegImm GPCR:$rs1, simm12:$imm12)),
1833-
(Inst (StoreVt StTy:$rs2), GPCR:$rs1, simm12:$imm12)>;
1839+
def : Pat<(StoreOp(StoreVt StTy:$rs2), (CapRegImm(cPTR GPCR:$rs1),
1840+
simm12:$imm12)),
1841+
(Inst(StoreVt StTy:$rs2), GPCR:$rs1, simm12:$imm12)>;
18341842
}
18351843

18361844
multiclass CheriAtomicStPat<PatFrag StoreOp, RVInst Inst, RegisterClass StTy, ValueType StoreVt>
@@ -2287,6 +2295,11 @@ defm : CheriLdPat<load, CLC_128, CLenVT>;
22872295
defm : CheriStPat<store, CSC_128, GPCR, CLenVT>;
22882296
} // Predicates = [HasCheri, IsRV64, IsCapMode]
22892297

2298+
let Predicates = [HasCheri, HasCheriot, IsRV32, IsCapMode] in {
2299+
defm : CheriLdPat<load, CLC_64, f64>;
2300+
defm : CheriStPat<store, CSC_64, GPCR, f64>;
2301+
} // Predicates = [HasCheri, HasCheriot, IsRV32, IsCapMode]
2302+
22902303
//===----------------------------------------------------------------------===//
22912304
// Compress Instruction tablegen backend.
22922305
//===----------------------------------------------------------------------===//
@@ -2429,7 +2442,17 @@ let Predicates = [HasCheri, IsRV32, IsCapMode, IsRVE] in {
24292442
let mayLoad = true, mayStore = false, hasSideEffects = false in
24302443
def PseudoCLLW : Pseudo<(outs GPCR:$dst), (ins bare_symbol:$src), [],
24312444
"cllc", "$dst, $src">;
2432-
def : Pat<(load (cPTR (load (iPTR globaladdr:$src)))),
2445+
def : Pat<(c64(load(cPTR(load(iPTR globaladdr:$src))))),
2446+
(PseudoCLLW bare_symbol:$src)>;
2447+
def : Pat<(f64(load(cPTR(load(iPTR globaladdr:$src))))),
24332448
(PseudoCLLW bare_symbol:$src)>;
24342449
} // Predicates = [HasCheri, IsRV32, IsCapMode, IsRVE]
2435-
2450+
2451+
// Cheriot stores f64 in cap registers, so bitcasting between f64 and c64
2452+
// is a no-op.
2453+
multiclass NopCapRegCast<ValueType Ty1, ValueType Ty2> {
2454+
def : Pat<(Ty1(bitconvert(Ty2 GPCR:$Val))), (Ty1 GPCR:$Val)>;
2455+
def : Pat<(Ty2(bitconvert(Ty1 GPCR:$Val))), (Ty2 GPCR:$Val)>;
2456+
}
2457+
2458+
let Predicates = [HasCheri, HasCheriot] in { defm : NopCapRegCast<c64, f64>; }

0 commit comments

Comments
 (0)