llvm
diff --git a/‎llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
Lines changed: 8 additions & 3 deletions b/‎llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
Lines changed: 8 additions & 3 deletions
diff --git a/‎llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Lines changed: 4 additions & 8 deletions b/‎llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Lines changed: 4 additions & 8 deletions
diff --git a/‎llvm/lib/Target/AMDGPU/SIInstructions.td
Lines changed: 18 additions & 26 deletions b/‎llvm/lib/Target/AMDGPU/SIInstructions.td
Lines changed: 18 additions & 26 deletions
@@ -5066,15 +5066,20 @@ SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N,
   }
   case ISD::SELECT: {
     // fneg (select c, a, b) -> select c, (fneg a), (fneg b)
-    // This combine became necessary recently to prevent a regression after v2i32 xor was made legal.
-    // When adding this combine a case was added to performFNEGCombine to prevent this combine from
-    // being undone under certain conditions.
+    // This combine became necessary recently to prevent a regression in
+    // fneg-modifier-casting.ll caused by this patch legalising v2i32 xor.
+    // Specifically, additional instructions were added to the final codegen.
+    // When adding this combine a case was added to performFNEGCombine to
+    // prevent this combine from being undone under certain conditions.
     // TODO: Invert conditions of foldFreeOpFromSelect
     SDValue Cond = N0.getOperand(0);
     SDValue LHS = N0.getOperand(1);
     SDValue RHS = N0.getOperand(2);
     EVT LHVT = LHS.getValueType();
     EVT RHVT = RHS.getValueType();
+    // The regression was limited to i32 v2/i32.
+    if (RHVT != MVT::i32 && LHVT != MVT::i32)
+      return SDValue();
 
     SDValue LFNeg = DAG.getNode(ISD::FNEG, SL, LHVT, LHS);
     SDValue RFNeg = DAG.getNode(ISD::FNEG, SL, RHVT, RHS);
 
@@ -5938,10 +5938,10 @@ SDValue SITargetLowering::splitUnaryVectorOp(SDValue Op,
 }
 
 // Enable lowering of ROTR for vxi32 types. This is a workaround for a
-// regression caused by legalising v2i32 or.
+// regression in rotr.ll, whereby extra unnecessary instructions were added to
+// the final codegen caused by legalising v2i32 or.
 SDValue SITargetLowering::lowerROTR(SDValue Op, SelectionDAG &DAG) const {
-  unsigned Opc = Op.getOpcode();
-  EVT VT = Op.getValueType();
+  [[maybe_unused]] EVT VT = Op.getValueType();
 
   assert((VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v8i32 ||
           VT == MVT::v16i32) &&
@@ -12998,7 +12998,7 @@ SDValue SITargetLowering::performXorCombine(SDNode *N,
     SDValue LHS_0 = LHS.getOperand(0);
     SDValue LHS_1 = LHS.getOperand(1);
 
-    if (LHS.getOpcode() == ISD::VSELECT && VT == MVT::v2i32) {
+    if (LHS.getOpcode() == ISD::VSELECT) {
       if (CRHS_0 && CRHS_0->getAPIntValue().isSignMask() &&
           shouldFoldFNegIntoSrc(N, LHS_0))
         if (CRHS_1 && CRHS_1->getAPIntValue().isSignMask() &&
@@ -13015,12 +13015,8 @@ SDValue SITargetLowering::performXorCombine(SDNode *N,
           return DAG.getNode(ISD::BITCAST, DL, VT, NewSelect);
         }
     }
-    // Possibly split vector here if one side does have a constant RHS.
   }
 
-  // Add test for when only one of the RHS vector elements is a const. Might be
-  // possible to optimise this case.
-
   const ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(RHS);
 
   if (CRHS && VT == MVT::i64) {
 
@@ -1793,7 +1793,6 @@ def : GCNPat <
 >;
 }
 
-
 /********** ================================ **********/
 /********** Floating point absolute/negative **********/
 /********** ================================ **********/
@@ -2389,31 +2388,25 @@ def : GCNPat<(i32 (trunc (srl i64:$src0, (i32 ShiftAmt32Imm:$src1)))),
 } // end True16Predicate = NotHasTrue16BitInsts
 
 let True16Predicate = UseRealTrue16Insts in {
+  def : GCNPat<(rotr i32:$src0, i32:$src1),
+               (V_ALIGNBIT_B32_t16_e64 /* src0_modifiers */ 0, $src0,
+                   /* src1_modifiers */ 0, $src0,
+                   /* src2_modifiers */ 0, (EXTRACT_SUBREG $src1, lo16),
+                   /* clamp */ 0, /* op_sel */ 0)>;
 
-def : GCNPat <
-  (rotr i32:$src0, i32:$src1),
-  (V_ALIGNBIT_B32_t16_e64 /* src0_modifiers */ 0, $src0,
-                          /* src1_modifiers */ 0, $src0,
-                          /* src2_modifiers */ 0,
-                          (EXTRACT_SUBREG $src1, lo16),
-                          /* clamp */ 0, /* op_sel */ 0)
->;
-
-def : GCNPat<(i32 (trunc (srl i64:$src0, (i32 ShiftAmt32Imm:$src1)))),
-          (V_ALIGNBIT_B32_t16_e64 0, /* src0_modifiers */
-                          (i32 (EXTRACT_SUBREG (i64 $src0), sub1)),
-                          0, /* src1_modifiers */
-                          (i32 (EXTRACT_SUBREG (i64 $src0), sub0)),
-                          0, /* src2_modifiers */
-                          (i16 (EXTRACT_SUBREG VGPR_32:$src1, lo16)),
-                          /* clamp */ 0, /* op_sel */ 0)>;
-
-def : GCNPat<(fshr i32:$src0, i32:$src1, i32:$src2),
-          (V_ALIGNBIT_B32_t16_e64 /* src0_modifiers */ 0, $src0,
-                          /* src1_modifiers */ 0, $src1,
-                          /* src2_modifiers */ 0,
-                          (EXTRACT_SUBREG VGPR_32:$src2, lo16),
-                          /* clamp */ 0, /* op_sel */ 0)>;
+  def : GCNPat<
+            (i32(trunc(srl i64:$src0, (i32 ShiftAmt32Imm:$src1)))),
+            (V_ALIGNBIT_B32_t16_e64 0,                     /* src0_modifiers */
+                (i32(EXTRACT_SUBREG(i64 $src0), sub1)), 0, /* src1_modifiers */
+                (i32(EXTRACT_SUBREG(i64 $src0), sub0)), 0, /* src2_modifiers */
+                (i16(EXTRACT_SUBREG VGPR_32:$src1, lo16)),
+                /* clamp */ 0, /* op_sel */ 0)>;
+
+  def : GCNPat<(fshr i32:$src0, i32:$src1, i32:$src2),
+               (V_ALIGNBIT_B32_t16_e64 /* src0_modifiers */ 0, $src0,
+                   /* src1_modifiers */ 0, $src1,
+                   /* src2_modifiers */ 0, (EXTRACT_SUBREG VGPR_32:$src2, lo16),
+                   /* clamp */ 0, /* op_sel */ 0)>;
 } // end True16Predicate = UseRealTrue16Insts
 
 let True16Predicate = UseFakeTrue16Insts in {
@@ -2451,7 +2444,6 @@ def : GCNPat<(fshr i32:$src0, i32:$src1, i32:$src2),
 >;
 } // end True16Predicate = UseFakeTrue16Insts
 
-
 /********** ====================== **********/
 /**********   Indirect addressing  **********/
 /********** ====================== **********/