[GlobalIsel] Visit ICmp #105991

tschuett · 2024-08-25T15:57:43Z

inspired by simplifyICmpInst and simplifyICmpWithZero

constant folding
canonicalize constant to the right on icmps
icmp x, 0 combine
- isKnownNonZero analysis

llvmbot · 2024-08-25T15:58:14Z

@llvm/pr-subscribers-backend-amdgpu
@llvm/pr-subscribers-backend-aarch64

@llvm/pr-subscribers-llvm-globalisel

Author: Thorsten Schütt (tschuett)

Changes

inspired by simplifyICmpInst and simplifyICmpWithZero

Patch is 97.96 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/105991.diff

15 Files Affected:

(modified) llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h (+10)
(modified) llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h (+24)
(modified) llvm/include/llvm/CodeGen/GlobalISel/Utils.h (+26)
(modified) llvm/include/llvm/Target/GlobalISel/Combine.td (+44-7)
(modified) llvm/lib/CodeGen/GlobalISel/CMakeLists.txt (+1)
(added) llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp (+305)
(modified) llvm/lib/CodeGen/GlobalISel/Utils.cpp (+323)
(modified) llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll (+56-40)
(modified) llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll (+32-24)
(added) llvm/test/CodeGen/AArch64/GlobalISel/combine-visit-icmp.mir (+167)
(modified) llvm/test/CodeGen/AArch64/arm64-ccmp.ll (+18-42)
(added) llvm/test/CodeGen/AArch64/icmp2.ll (+295)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f64.ll (+9-9)
(modified) llvm/test/CodeGen/AMDGPU/itofp.i128.ll (+68-86)
(modified) llvm/test/CodeGen/AMDGPU/rsq.f64.ll (+23-23)

diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 9b62d6067be39c..da9c7fdbd2a093 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -20,6 +20,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
 #include "llvm/CodeGen/Register.h"
 #include "llvm/CodeGenTypes/LowLevelType.h"
 #include "llvm/IR/InstrTypes.h"
@@ -299,6 +300,12 @@ class CombinerHelper {
   ///     $whatever = COPY $addr
   bool tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen = 0);
 
+  bool visitICmp(const MachineInstr &MI, BuildFnTy &MatchInfo);
+  bool matchSextOfICmp(const MachineInstr &MI, BuildFnTy &MatchInfo);
+  bool matchZextOfICmp(const MachineInstr &MI, BuildFnTy &MatchInfo);
+  /// Try hard to fold icmp with zero RHS because this is a common case.
+  bool matchCmpOfZero(const MachineInstr &MI, BuildFnTy &MatchInfo);
+
   bool matchPtrAddImmedChain(MachineInstr &MI, PtrAddChain &MatchInfo);
   void applyPtrAddImmedChain(MachineInstr &MI, PtrAddChain &MatchInfo);
 
@@ -1017,6 +1024,9 @@ class CombinerHelper {
   bool tryFoldLogicOfFCmps(GLogicalBinOp *Logic, BuildFnTy &MatchInfo);
 
   bool isCastFree(unsigned Opcode, LLT ToTy, LLT FromTy) const;
+
+  bool constantFoldICmp(const GICmp &ICmp, const GIConstant &LHS,
+                        const GIConstant &RHS, BuildFnTy &MatchInfo);
 };
 } // namespace llvm
 
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
index ef1171d9f1f64d..427b5a86b6e0c4 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
@@ -950,6 +950,30 @@ class GExtOrTruncOp : public GCastOp {
   };
 };
 
+/// Represents a splat vector.
+class GSplatVector : public GenericMachineInstr {
+public:
+  Register getValueReg() const { return getOperand(1).getReg(); }
+
+  static bool classof(const MachineInstr *MI) {
+    return MI->getOpcode() == TargetOpcode::G_SPLAT_VECTOR;
+  };
+};
+
+/// Represents an integer-like extending operation.
+class GZextOrSextOp : public GCastOp {
+public:
+  static bool classof(const MachineInstr *MI) {
+    switch (MI->getOpcode()) {
+    case TargetOpcode::G_SEXT:
+    case TargetOpcode::G_ZEXT:
+      return true;
+    default:
+      return false;
+    }
+  };
+};
+
 } // namespace llvm
 
 #endif // LLVM_CODEGEN_GLOBALISEL_GENERICMACHINEINSTRS_H
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
index cf5fd6d6f288bd..a8bf2e722881ac 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
@@ -593,5 +593,31 @@ bool isGuaranteedNotToBeUndef(Register Reg, const MachineRegisterInfo &MRI,
 /// estimate of the type.
 Type *getTypeForLLT(LLT Ty, LLVMContext &C);
 
+enum class GIConstantKind { Scalar, FixedVector, ScalableVector };
+
+/// An integer-like constant.
+class GIConstant {
+  GIConstantKind Kind;
+  SmallVector<APInt> Values;
+  APInt Value;
+
+public:
+  GIConstant(ArrayRef<APInt> Values)
+      : Kind(GIConstantKind::FixedVector), Values(Values) {};
+  GIConstant(const APInt &Value, GIConstantKind Kind)
+      : Kind(Kind), Value(Value) {};
+
+  GIConstantKind getKind() const { return Kind; }
+
+  APInt getScalarValue() const;
+
+  static std::optional<GIConstant> getConstant(Register Const,
+                                               const MachineRegisterInfo &MRI);
+};
+
+/// Return true if the given value is known to be non-zero when defined.
+bool isKnownNonZero(Register Reg, const MachineRegisterInfo &MRI,
+                    GISelKnownBits *KB, unsigned Depth = 0);
+
 } // End namespace llvm.
 #endif
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 525cc815e73cef..175a8ed57b2669 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1007,9 +1007,6 @@ def double_icmp_zero_or_combine: GICombineRule<
          (G_ICMP $root, $p, $ordst, 0))
 >;
 
-def double_icmp_zero_and_or_combine : GICombineGroup<[double_icmp_zero_and_combine,
-                                                      double_icmp_zero_or_combine]>;
-
 def and_or_disjoint_mask : GICombineRule<
   (defs root:$root, build_fn_matchinfo:$info),
   (match (wip_match_opcode G_AND):$root,
@@ -1884,6 +1881,46 @@ def cast_combines: GICombineGroup<[
   buildvector_of_truncate
 ]>;
 
+def visit_icmp : GICombineRule<
+  (defs root:$root, build_fn_matchinfo:$matchinfo),
+  (match (G_ICMP $root, $pred, $lhs, $rhs):$cmp,
+         [{ return Helper.visitICmp(*${cmp}, ${matchinfo}); }]),
+  (apply [{ Helper.applyBuildFn(*${cmp}, ${matchinfo}); }])>;
+
+def sext_icmp : GICombineRule<
+  (defs root:$root, build_fn_matchinfo:$matchinfo),
+  (match (G_SEXT $rhs, $inputR),
+         (G_SEXT $lhs, $inputL),
+         (G_ICMP $root, $pred, $lhs, $rhs):$cmp,
+         [{ return Helper.matchSextOfICmp(*${cmp}, ${matchinfo}); }]),
+  (apply [{ Helper.applyBuildFn(*${cmp}, ${matchinfo}); }])>;
+
+def zext_icmp : GICombineRule<
+  (defs root:$root, build_fn_matchinfo:$matchinfo),
+  (match (G_ZEXT $rhs, $inputR),
+         (G_ZEXT $lhs, $inputL),
+         (G_ICMP $root, $pred, $lhs, $rhs):$cmp,
+         [{ return Helper.matchZextOfICmp(*${cmp}, ${matchinfo}); }]),
+  (apply [{ Helper.applyBuildFn(*${cmp}, ${matchinfo}); }])>;
+
+def icmp_of_zero : GICombineRule<
+  (defs root:$root, build_fn_matchinfo:$matchinfo),
+  (match (G_CONSTANT $zero, 0),
+         (G_ICMP $root, $pred, $lhs, $zero):$cmp,
+         [{ return Helper.matchCmpOfZero(*${cmp}, ${matchinfo}); }]),
+  (apply [{ Helper.applyBuildFn(*${cmp}, ${matchinfo}); }])>;
+
+def icmp_combines: GICombineGroup<[
+  visit_icmp,
+  sext_icmp,
+  zext_icmp,
+  icmp_of_zero,
+  icmp_to_true_false_known_bits,
+  icmp_to_lhs_known_bits,
+  double_icmp_zero_and_combine,
+  double_icmp_zero_or_combine,
+  redundant_binop_in_equality
+]>;
 
 // FIXME: These should use the custom predicate feature once it lands.
 def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero,
@@ -1917,7 +1954,7 @@ def const_combines : GICombineGroup<[constant_fold_fp_ops, const_ptradd_to_i2p,
 
 def known_bits_simplifications : GICombineGroup<[
   redundant_and, redundant_sext_inreg, redundant_or, urem_pow2_to_mask,
-  zext_trunc_fold, icmp_to_true_false_known_bits, icmp_to_lhs_known_bits,
+  zext_trunc_fold,
   sext_inreg_to_zext_inreg]>;
 
 def width_reduction_combines : GICombineGroup<[reduce_shl_of_extend,
@@ -1944,7 +1981,7 @@ def constant_fold_binops : GICombineGroup<[constant_fold_binop,
 
 def prefer_sign_combines : GICombineGroup<[nneg_zext]>;
 
-def all_combines : GICombineGroup<[integer_reassoc_combines, trivial_combines,
+def all_combines : GICombineGroup<[icmp_combines, integer_reassoc_combines, trivial_combines,
     vector_ops_combines, freeze_combines, cast_combines,
     insert_vec_elt_combines, extract_vec_elt_combines, combines_for_extload,
     combine_extracted_vector_load,
@@ -1964,9 +2001,9 @@ def all_combines : GICombineGroup<[integer_reassoc_combines, trivial_combines,
     constant_fold_cast_op, fabs_fneg_fold,
     intdiv_combines, mulh_combines, redundant_neg_operands,
     and_or_disjoint_mask, fma_combines, fold_binop_into_select,
-    sub_add_reg, select_to_minmax, redundant_binop_in_equality,
+    sub_add_reg, select_to_minmax,
     fsub_to_fneg, commute_constant_to_rhs, match_ands, match_ors,
-    combine_concat_vector, double_icmp_zero_and_or_combine, match_addos,
+    combine_concat_vector, match_addos,
     sext_trunc, zext_trunc, prefer_sign_combines, combine_shuffle_concat]>;
 
 // A combine group used to for prelegalizer combiners at -O0. The combines in
diff --git a/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt b/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt
index a15b76440364b1..af1717dbf76f39 100644
--- a/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt
+++ b/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt
@@ -7,6 +7,7 @@ add_llvm_component_library(LLVMGlobalISel
   Combiner.cpp
   CombinerHelper.cpp
   CombinerHelperCasts.cpp
+  CombinerHelperCompares.cpp
   CombinerHelperVectorOps.cpp
   GIMatchTableExecutor.cpp
   GISelChangeObserver.cpp
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp
new file mode 100644
index 00000000000000..415768fb07e59f
--- /dev/null
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp
@@ -0,0 +1,305 @@
+//===- CombinerHelperCompares.cpp------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements CombinerHelper for G_ICMP
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/LowLevelTypeUtils.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cstdlib>
+
+#define DEBUG_TYPE "gi-combiner"
+
+using namespace llvm;
+
+bool CombinerHelper::constantFoldICmp(const GICmp &ICmp,
+                                      const GIConstant &LHSCst,
+                                      const GIConstant &RHSCst,
+                                      BuildFnTy &MatchInfo) {
+  if (LHSCst.getKind() != GIConstantKind::Scalar)
+    return false;
+
+  Register Dst = ICmp.getReg(0);
+  LLT DstTy = MRI.getType(Dst);
+
+  if (!isConstantLegalOrBeforeLegalizer(DstTy))
+    return false;
+
+  CmpInst::Predicate Pred = ICmp.getCond();
+  APInt LHS = LHSCst.getScalarValue();
+  APInt RHS = RHSCst.getScalarValue();
+
+  bool Result;
+
+  switch (Pred) {
+  case CmpInst::Predicate::ICMP_EQ:
+    Result = LHS.eq(RHS);
+    break;
+  case CmpInst::Predicate::ICMP_NE:
+    Result = LHS.ne(RHS);
+    break;
+  case CmpInst::Predicate::ICMP_UGT:
+    Result = LHS.ugt(RHS);
+    break;
+  case CmpInst::Predicate::ICMP_UGE:
+    Result = LHS.uge(RHS);
+    break;
+  case CmpInst::Predicate::ICMP_ULT:
+    Result = LHS.ult(RHS);
+    break;
+  case CmpInst::Predicate::ICMP_ULE:
+    Result = LHS.ule(RHS);
+    break;
+  case CmpInst::Predicate::ICMP_SGT:
+    Result = LHS.sgt(RHS);
+    break;
+  case CmpInst::Predicate::ICMP_SGE:
+    Result = LHS.sge(RHS);
+    break;
+  case CmpInst::Predicate::ICMP_SLT:
+    Result = LHS.slt(RHS);
+    break;
+  case CmpInst::Predicate::ICMP_SLE:
+    Result = LHS.sle(RHS);
+    break;
+  default:
+    llvm_unreachable("Unexpected predicate");
+  }
+
+  MatchInfo = [=](MachineIRBuilder &B) {
+    if (Result)
+      B.buildConstant(Dst, getICmpTrueVal(getTargetLowering(),
+                                          /*IsVector=*/DstTy.isVector(),
+                                          /*IsFP=*/false));
+    else
+      B.buildConstant(Dst, 0);
+  };
+
+  return true;
+}
+
+bool CombinerHelper::visitICmp(const MachineInstr &MI, BuildFnTy &MatchInfo) {
+  const GICmp *Cmp = cast<GICmp>(&MI);
+
+  Register Dst = Cmp->getReg(0);
+  LLT DstTy = MRI.getType(Dst);
+  Register LHS = Cmp->getLHSReg();
+  Register RHS = Cmp->getRHSReg();
+
+  CmpInst::Predicate Pred = Cmp->getCond();
+  assert(CmpInst::isIntPredicate(Pred) && "Not an integer compare!");
+  if (auto CLHS = GIConstant::getConstant(LHS, MRI)) {
+    if (auto CRHS = GIConstant::getConstant(RHS, MRI))
+      return constantFoldICmp(*Cmp, *CLHS, *CRHS, MatchInfo);
+
+    // If we have a constant, make sure it is on the RHS.
+    std::swap(LHS, RHS);
+    Pred = CmpInst::getSwappedPredicate(Pred);
+
+    MatchInfo = [=](MachineIRBuilder &B) { B.buildICmp(Pred, Dst, LHS, RHS); };
+    return true;
+  }
+
+  [[maybe_unused]] MachineInstr *MILHS = MRI.getVRegDef(LHS);
+  MachineInstr *MIRHS = MRI.getVRegDef(RHS);
+
+  // For EQ and NE, we can always pick a value for the undef to make the
+  // predicate pass or fail, so we can return undef.
+  // Matches behavior in llvm::ConstantFoldCompareInstruction.
+  if (isa<GImplicitDef>(MIRHS) && ICmpInst::isEquality(Pred) &&
+      isLegalOrBeforeLegalizer({TargetOpcode::G_IMPLICIT_DEF, {DstTy}})) {
+    MatchInfo = [=](MachineIRBuilder &B) { B.buildUndef(Dst); };
+    return true;
+  }
+
+  // icmp X, X -> true/false
+  // icmp X, undef -> true/false because undef could be X.
+  if ((LHS == RHS || isa<GImplicitDef>(MIRHS)) &&
+      isConstantLegalOrBeforeLegalizer(DstTy)) {
+    MatchInfo = [=](MachineIRBuilder &B) {
+      if (CmpInst::isTrueWhenEqual(Pred))
+        B.buildConstant(Dst, getICmpTrueVal(getTargetLowering(),
+                                            /*IsVector=*/DstTy.isVector(),
+                                            /*IsFP=*/false));
+      else
+        B.buildConstant(Dst, 0);
+    };
+    return true;
+  }
+
+  return false;
+}
+
+bool CombinerHelper::matchSextOfICmp(const MachineInstr &MI,
+                                     BuildFnTy &MatchInfo) {
+  const GICmp *Cmp = cast<GICmp>(&MI);
+
+  Register Dst = Cmp->getReg(0);
+  LLT DstTy = MRI.getType(Dst);
+  Register LHS = Cmp->getLHSReg();
+  Register RHS = Cmp->getRHSReg();
+  CmpInst::Predicate Pred = Cmp->getCond();
+
+  GSext *SL = cast<GSext>(MRI.getVRegDef(LHS));
+  GSext *SR = cast<GSext>(MRI.getVRegDef(RHS));
+
+  LLT SLTy = MRI.getType(SL->getSrcReg());
+  LLT SRTy = MRI.getType(SR->getSrcReg());
+
+  // Turn icmp (sext X), (sext Y) into a compare of X and Y if they have the
+  // same type.
+  if (SLTy != SRTy)
+    return false;
+
+  if (!isLegalOrBeforeLegalizer({TargetOpcode::G_ICMP, {DstTy, SLTy}}))
+    return false;
+
+  // Compare X and Y. Note that the predicate does not change.
+  MatchInfo = [=](MachineIRBuilder &B) {
+    B.buildICmp(Pred, Dst, SL->getSrcReg(), SR->getSrcReg());
+  };
+  return true;
+}
+
+bool CombinerHelper::matchZextOfICmp(const MachineInstr &MI,
+                                     BuildFnTy &MatchInfo) {
+  const GICmp *Cmp = cast<GICmp>(&MI);
+
+  Register Dst = Cmp->getReg(0);
+  LLT DstTy = MRI.getType(Dst);
+  Register LHS = Cmp->getLHSReg();
+  Register RHS = Cmp->getRHSReg();
+  CmpInst::Predicate Pred = Cmp->getCond();
+
+  /*
+    %x:_(p0) = COPY $x0
+    %y:_(p0) = COPY $x1
+    %zero:_(p0) = G_CONSTANT i64 0
+    %cmp1:_(s1) = G_ICMP intpred(eq), %x:_(p0), %zero:_
+   */
+
+  if (MRI.getType(LHS).isPointer() || MRI.getType(RHS).isPointer())
+    return false;
+
+  if (!MRI.getType(LHS).isScalar() || !MRI.getType(RHS).isScalar())
+    return false;
+
+  GZext *ZL = cast<GZext>(MRI.getVRegDef(LHS));
+  GZext *ZR = cast<GZext>(MRI.getVRegDef(RHS));
+
+  LLT ZLTy = MRI.getType(ZL->getSrcReg());
+  LLT ZRTy = MRI.getType(ZR->getSrcReg());
+
+  // Turn icmp (zext X), (zext Y) into a compare of X and Y if they have
+  // the same type.
+  if (ZLTy != ZRTy)
+    return false;
+
+  if (!isLegalOrBeforeLegalizer({TargetOpcode::G_ICMP, {DstTy, ZLTy}}))
+    return false;
+
+  // Compare X and Y. Note that signed predicates become unsigned.
+  MatchInfo = [=](MachineIRBuilder &B) {
+    B.buildICmp(ICmpInst::getUnsignedPredicate(Pred), Dst, ZL->getSrcReg(),
+                ZR->getSrcReg());
+  };
+  return true;
+}
+
+bool CombinerHelper::matchCmpOfZero(const MachineInstr &MI,
+                                    BuildFnTy &MatchInfo) {
+  const GICmp *Cmp = cast<GICmp>(&MI);
+
+  Register Dst = Cmp->getReg(0);
+  LLT DstTy = MRI.getType(Dst);
+  Register LHS = Cmp->getLHSReg();
+  CmpInst::Predicate Pred = Cmp->getCond();
+
+  if (!isConstantLegalOrBeforeLegalizer(DstTy))
+    return false;
+
+  std::optional<bool> Result;
+
+  switch (Pred) {
+  default:
+    llvm_unreachable("Unkonwn ICmp predicate!");
+  case ICmpInst::ICMP_ULT:
+    Result = false;
+    break;
+  case ICmpInst::ICMP_UGE:
+    Result = true;
+    break;
+  case ICmpInst::ICMP_EQ:
+  case ICmpInst::ICMP_ULE:
+    if (isKnownNonZero(LHS, MRI, KB))
+      Result = false;
+    break;
+  case ICmpInst::ICMP_NE:
+  case ICmpInst::ICMP_UGT:
+    if (isKnownNonZero(LHS, MRI, KB))
+      Result = true;
+    break;
+  case ICmpInst::ICMP_SLT: {
+    KnownBits LHSKnown = KB->getKnownBits(LHS);
+    if (LHSKnown.isNegative())
+      Result = true;
+    if (LHSKnown.isNonNegative())
+      Result = false;
+    break;
+  }
+  case ICmpInst::ICMP_SLE: {
+    KnownBits LHSKnown = KB->getKnownBits(LHS);
+    if (LHSKnown.isNegative())
+      Result = true;
+    if (LHSKnown.isNonNegative() && isKnownNonZero(LHS, MRI, KB))
+      Result = false;
+    break;
+  }
+  case ICmpInst::ICMP_SGE: {
+    KnownBits LHSKnown = KB->getKnownBits(LHS);
+    if (LHSKnown.isNegative())
+      Result = false;
+    if (LHSKnown.isNonNegative())
+      Result = true;
+    break;
+  }
+  case ICmpInst::ICMP_SGT: {
+    KnownBits LHSKnown = KB->getKnownBits(LHS);
+    if (LHSKnown.isNegative())
+      Result = false;
+    if (LHSKnown.isNonNegative() && isKnownNonZero(LHS, MRI, KB))
+      Result = true;
+    break;
+  }
+  }
+
+  if (!Result)
+    return false;
+
+  MatchInfo = [=](MachineIRBuilder &B) {
+    if (*Result)
+      B.buildConstant(Dst, getICmpTrueVal(getTargetLowering(),
+                                          /*IsVector=*/DstTy.isVector(),
+                                          /*IsFP=*/false));
+    else
+      B.buildConstant(Dst, 0);
+  };
+
+  return true;
+}
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index cfdd9905c16fa6..e8b9d995a22768 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -1984,3 +1984,326 @@ Type *llvm::getTypeForLLT(LLT Ty, LLVMContext &C) {
                            Ty.getElementCount());
   return IntegerType::get(C, Ty.getSizeInBits());
 }
+
+APInt llvm::GIConstant::getScalarValue() const {
+  assert(Kind == GIConstantKind::Scalar && "Expected scalar constant");
+
+  return Value;
+}
+
+std::optional<GIConstant>
+llvm::GIConstant::getConstant(Register Const, const MachineRegisterInfo &MRI) {
+  MachineInstr *Constant = getDefIgnoringCopies(Const, MRI);
+
+  if (GSplatVector *Splat = dyn_cast<GSplatVector>(Constant)) {
+    std::optional<ValueAndVReg> MayBeConstant =
+        getIConstantVRegValWithLookThrough(Splat->getValueReg(), MRI);
+    if (!MayBeConstant)
+      return std::nullopt;
+    return GIConstant(MayBeConstant->Value, GIConstantKind::ScalableVector);
+  }
+
+  if (GBuildVector *Build = dyn_cast<GBuildVector>(Constant)) {
+    SmallVector<APInt> Values;
+    unsigned NumSources = Build->getNumSources();
+    for (unsigned I = 0; I < NumSources; ++I) {
+      Register SrcReg = Build->getSourceReg(I);
+      std::optional<ValueAndVReg> MayBeConstant =
+          getIConstantVRegValWithLookThrough(SrcReg, MRI);
+      if (!MayBeConstant)
+        return std::nullopt;
+      Values.push_back(MayBeConstant->Value);
+    }
+    return GIConstant(Values);
+  }
+
+  std::optional<ValueAndVReg> MayBeConstant =
+      getIConstantVRegValWithLookThrough(Const, MRI);
+  if (!MayBeConstant)
+    return std::nullopt;
+
+  return GIConstant(MayBeConstant->Value, GIConstantKind::Scalar);
+}
+
+static bool isKnownNonZero(Register Reg, const MachineRegisterInfo &MRI,
+                           GISelKnownBits *KB, unsigned Depth);
+
+bool llvm::isKnownNonZero(Register Reg, const MachineRegisterInfo &MRI,
+                          GISelKnownBits *KB, unsigned Depth) {
+  if (!Reg.isVirtual())
+    return false;
+
+  LLT Ty = MRI.getType(Reg);
+  if (!Ty.isValid())
+    return false;
+
+  if (Ty.isPointer())
+    return false;
+
+  if (!Ty.isScalar())
+    errs() << "type: " << Ty << '\n';
+
+  assert(...
[truncated]

tschuett · 2024-08-25T18:27:25Z

llvm/test/CodeGen/AMDGPU/itofp.i128.ll

@@ -212,13 +212,10 @@ define float @sitofp_i128_to_f32(i128 %x) {
 ; GISEL-NEXT:    v_or_b32_e32 v16, v10, v12
 ; GISEL-NEXT:    v_lshrrev_b64 v[11:12], v14, -1
 ; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v5
-; GISEL-NEXT:    v_cndmask_b32_e32 v11, v11, v15, vcc
-; GISEL-NEXT:    v_cndmask_b32_e32 v12, v12, v16, vcc
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v5


canonicalize constant to the right + matchCmpOfZero

llvm/lib/CodeGen/GlobalISel/Utils.cpp

llvm/include/llvm/Target/GlobalISel/Combine.td

llvm/lib/CodeGen/GlobalISel/Utils.cpp

llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp

llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll

llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp

llvm/lib/CodeGen/GlobalISel/Utils.cpp

nikic

I don't have time to review GlobalISel changes, but I'll say this: Transforms from the middle-end optimizer should only be reimplemented in GlobalISel (or SDAG) if the patterns can plausibly be introduced as a result of legalization. The input to ISel is always assumed to be canonical IR, so anything taken care of by middle-end canonicalization does not need to be handled by GlobalISel (unless GlobalISel itself can introduce the pattern, e.g. via legalization).

tschuett · 2024-08-26T07:57:45Z

It is open discussion whether the backend combiners are purely cleaning up legalization artefacts or do they go beyond:
#92309
For the DAG combiner, I would argue that it goes far beyond. In my first comment above and in the AMDGPU code, you can see that the combine hits.

The move constants to the right also hits.

tschuett · 2024-08-26T07:59:42Z

AArch64 runs a combiner pre-legalization. How could it cleanup legalization artefacts?

nikic · 2024-08-26T08:32:24Z

Okay, maybe my comment didn't spell it out enough. I'm referring to combines that already exist as canonicalizations in the target-independent optimizer. Of course there are also combines during isel that are target-dependent, undoing middle-end canonicalizations, or similar. Those are not targeted at legalization artifacts and can be useful pre-legalization.

What I want to push back against here is blindly copying over optimizations from InstSimplify or InstCombine, without any justification for why these optimizations need to be replicated inside GlobalISel. I'm not necessarily saying that the combines proposed here are bad, but they need more justification than "inspired by simplifyICmpInst and simplifyICmpWithZero". You should understand, and be able to articulate to reviewers, why it is beneficial to have these combines replicated in GlobalISel.

tschuett · 2024-08-26T08:35:59Z

See my first comment. They reduce code size.

tschuett · 2024-08-26T08:42:58Z

llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp

Line 4608 in 533e6bb

Instruction *InstCombinerImpl::visitFreeze(FreezeInst &I) {

is ported almost exactly in both combiners.
https://github.com/llvm/llvm-project/blob/main/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
creates freeze instructions while legalizing illegal freeze instructions, but I could not find other places. Both combiners create freeze instruction while combining, e.g, selects. It is hard to argue that we ported visitFreeze to combine legalization artefacts.

tschuett · 2024-08-26T12:10:33Z

I removed the ext of icmp combines. Please re-search for regressions.

aemerson

@nikic Thanks for pointing this out. This is something I've been asking for a while too but hadn't expressed right. What we need to see is in the PR description, a justification for why a combine is good or is necessary. If we don't see that, then unless it happens to improve some already existing tests, I have to take time out of my day to build the PR and manually benchmark it for changes. So if we can avoid using "do the same thing as X in InstCombine" as a justification then it's clearer why it's a good thing.

llvm/include/llvm/Target/GlobalISel/Combine.td

tschuett · 2024-08-29T17:32:23Z

IR is in perfect canonical state. Codegen may do some target specific optimizations, but I would be surprised if they move constants to the left. The IRTranslator faithfully translates IR into gMIR. Thus porting middle-end optimizations to the backend is a waste of time because they cannot give any size improvements.

I mentioned visitFreeze above. It is not dead code. It actually gives code size improvements, which is at odds with my previous statement.

The same applies for canonicalization of constants to the left for icmps. You can see hits in the AMDGPU code. It also gives size improvements in AArch64 code. It is not dead code. The icmp x, 0 combine also hits and gives code size improvements, see itofp.i128.ll.

tschuett · 2024-08-31T16:33:51Z

Regarding optimizations that exist in the middle-end, if that becomes a big issue, we could move it one floor up. And discuss on discourse the relationship between optimizations in the-middle-end and the two backends.

tschuett · 2024-09-02T06:53:41Z

Another waste of energy: #77855

aemerson · 2024-09-03T17:08:18Z

Some more comments. Frankly I'm having trouble following some of your comments like this:

IR is in perfect canonical state. Codegen may do some target specific optimizations, but I would be surprised if they move constants to the left. The IRTranslator faithfully translates IR into gMIR. Thus porting middle-end optimizations to the backend is a waste of time because they cannot give any size improvements.

Are you agreeing or disagreeing with your current patch? At this point I have no idea.

llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp

aemerson · 2024-09-03T17:05:36Z

llvm/include/llvm/CodeGen/GlobalISel/Utils.h

+/// An integer-like constant.
+class GIConstant {
+  GIConstantKind Kind;
+  SmallVector<APInt> Values;
+  APInt Value;
+
+public:
+  GIConstant(ArrayRef<APInt> Values)
+      : Kind(GIConstantKind::FixedVector), Values(Values) {};
+  GIConstant(const APInt &Value, GIConstantKind Kind)
+      : Kind(Kind), Value(Value) {};
+
+  GIConstantKind getKind() const { return Kind; }
+
+  APInt getScalarValue() const;
+
+  static std::optional<GIConstant> getConstant(Register Const,
+                                               const MachineRegisterInfo &MRI);
+};
+
+/// Return true if the given value is known to be non-zero when defined.
+bool isKnownNonZero(Register Reg, const MachineRegisterInfo &MRI,
+                    GISelKnownBits *KB, unsigned Depth = 0);
+


We have similar interfaces like isConstantOrConstantVector and friends. Can we extend those or unify in some more consistent way rather than have 2 different ways of querying constant values?

Note that isConstantOrConstantVectorstarts with is. It is a test. GIConstant::getConstant gives you a constant. It is a vector of APInt s. We can add arbitrary arithmetic. GIConstant + GIConstant.

Yes I'm aware, I said isConstantOrConstantVector and friends. One of those friends is this:

std::optional<APInt> llvm::isConstantOrConstantSplatVector(

which returns an APInt. So if we add GIConstant now we have even more ways to do similar things. I'm not saying your approach is necessarily wrong here, but taking a step back maybe this change should be a separate effort to unify our utilities better.

We were limit to splat vectors before. GIConstant does not have this restriction.

I am fine with a family of tests for various kinds of constnesses, but I would argue that GIConstant is powerful enough to become the constant. It is a class. We can add arbitrary member functions for tests and arithmetic. Constants can be scalar, fixed-, and scalable-vectors.

I am fine with a family of tests for various kinds of constnesses, but I would argue that GIConstant is powerful enough to become the constant. It is a class. We can add arbitrary member functions for tests and arithmetic. Constants can be scalar, fixed-, and scalable-vectors.

Perhaps you're right, all the more reason to do it in a separate change where we can think about the design with more care.

The design is minimal, it has uses, and getting rid of the splat vector variants will take a lot of energy. I would prefer to continue with this PR.

In a separate change, it would be dead code.

tschuett · 2024-09-03T17:27:39Z

Some more comments. Frankly I'm having trouble following some of your comments like this:

IR is in perfect canonical state. Codegen may do some target specific optimizations, but I would be surprised if they move constants to the left. The IRTranslator faithfully translates IR into gMIR. Thus porting middle-end optimizations to the backend is a waste of time because they cannot give any size improvements.

Are you agreeing or disagreeing with your current patch? At this point I have no idea.

I am supportive of this patch. I was only stating the common assumption. But also the cmp x, 0 is at odds with this statement.

tschuett · 2024-09-03T19:59:07Z

The general assumption is that IR is in canonical form and middle-end optimizations should not be ported to the backends.

But reality is at odds with that statement. There are size improvements in this PR.

tschuett · 2024-09-04T08:12:53Z

I added in this PR two combines. I have no interest to extend the first one. I would rather see more combines with disjoint and precise patterns, e.g., icmp_of_zero.

tschuett · 2024-09-08T09:35:44Z

I strongly disagree with the change request. Compares and only compares deserve special treatment. Placing several optimizations into one function is beneficial. They share resources and order matters. Spreading them over several combines would be less efficient. In most cases, the pattern would be G_ICMP, which I am not interested in.

tschuett · 2024-09-10T16:21:53Z

Could you point me to evidence of:
it adds lot of expensive combines based on things like KnownBits

Thanks.

nikic · 2024-09-10T16:32:57Z

I see at least another unrelated undef combine in here, but I was more talking about the entire infrastructure being added here. You may technically be adding one "combine", but the underlying analysis has lots of cases, most of which do not appear to be tested, and where I seriously doubt the need to replicate them in GlobalISel. This PR has very exotic stuff like "x | (x != 0) is non-zero" -- is this really important for the simplification of legalization artifacts?

I'm sure that there is some subset of the simplifications proposed here that is indeed useful, but I don't think that the PR as a whole, as you have presented is, is appropriate.

The general approach of "take some methods from InstSimplify and copy them to GlobalISel" is something I very much want to discourage. If you want to copy simplifications from somewhere, at least look at DAGCombine instead, where there is at least a chance that somebody has exercised some degree of due diligence when adding things there. But in an case, any handling you add needs to have test coverage, including any cases in underlying analyses.

tschuett · 2024-09-10T16:40:10Z

That didn't answer my question!

The isKnownNonZero analysis is in Utils.cpp and test coverage is in combine-visit-icmp.mir and icmp2.ll.
Combine.td shows that precisely two combines were added and only of them relies on known bits.

aemerson · 2024-09-10T17:07:13Z

That didn't answer my question!

The isKnownNonZero analysis is in Utils.cpp and test coverage is in combine-visit-icmp.mir and icmp2.ll. Combine.td shows that precisely two combines were added and only of them relies on known bits.

I hadn't even closely looked at that combine yet, but from what I can tell you're using KB a lot in that matchCmpOfZero combine. It's quite a lot of code don't you think?

tschuett · 2024-09-10T17:14:11Z

CombinerHelper::matchCmpOfZero uses directly KB at most once per case in the switch statement of the icmp predicate, for reference:

llvm-project/llvm/lib/Analysis/InstructionSimplify.cpp

Line 2974 in ce9f987

static Value *simplifyICmpWithZero(CmpInst::Predicate Pred, Value *LHS,

A different question is the isKnownNonZero analysis, but it is depth limited:
bool isKnownNonZero(Register Reg, const MachineRegisterInfo &MRI, GISelKnownBits *KB, unsigned Depth = 0);

tschuett · 2024-09-10T17:17:53Z

@nikic requested changes with the statement
"This PR is particularly problematic because it adds lot of expensive combines based on things like KnownBits -- these are very much not harmless folds you can add "just because"."

I am still looking for evidence of that claim.

aemerson · 2024-09-10T17:26:47Z

CombinerHelper::matchCmpOfZero uses directly KB at most once per case in the switch statement of the icmp predicate, for reference:

llvm-project/llvm/lib/Analysis/InstructionSimplify.cpp

Line 2974 in ce9f987

static Value *simplifyICmpWithZero(CmpInst::Predicate Pred, Value *LHS,

A different question is the isKnownNonZero analysis, but it is depth limited: bool isKnownNonZero(Register Reg, const MachineRegisterInfo &MRI, GISelKnownBits *KB, unsigned Depth = 0);

MaxAnalysisRecursionDepth is defaulted to 6. We're going to potentially do a 6-deep KB analysis on every icmp. Maybe we don't need to start with that and only use it if the data supports it's worth it from code size or performance metrics? Have you run those benchmarks? Or do I need to do it myself again?

I'm going to level with you here: at this point in the conversation any time you bring up an IR combine for reference to use is not helping your case in the way you think. I really suggest you take a step back from this and consider why we have such resistance to this change.

You basically have a lot of code, and you're resisting splitting up the visitICmp() function on the basis of efficiency, but you're also ok with adding a KB dependent combine without also having data about it's cost/benefit. We need a fundamental rethink in how we add changes to GlobalISel. Individual patches like these are not important. They really aren't. Setting good examples and precedents, along with pragmatic choices backed up by data are what's important.

tschuett · 2024-09-10T17:34:14Z

I said above visitICmp and matchCmpOfZero cooperate.

I said several times llvm/test/CodeGen/AMDGPU/itofp.i128.ll shows evidence of the effects of this PR.

Your resistance was based on naming and not on the combines or code.

aemerson · 2024-09-10T17:39:55Z

I said above visitICmp and matchCmpOfZero cooperate.

I said several times llvm/test/CodeGen/AMDGPU/itofp.i128.ll shows evidence of the effects of this PR.

Your resistance was based on naming and not on the combines or code.

I don't feel you're making a good faith effort to listen to our concerns. I'm no longer going to participate in this PR review.

Resigning

tschuett · 2024-09-10T18:06:42Z

MaxAnalysisRecursionDepth comes from LLVM-IR. The constant is not invented by GlobalIsel. We already use it in production:

llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp

Line 1887 in 90e8411

if (Depth >= MaxAnalysisRecursionDepth)

I never heard on a GlobalIsel Combiner review discussions of data driven decisions of, e.g., MaxAnalysisRecursionDepth. Is 4,5,6, or 7 optimal for GlobalIsel. My feeling so far has been it is sufficient for new combines:

tests for correctness (MIR)
code size improvement in end-to-end tests
[GlobalIsel] Combine trunc of binop #107721 make existing code more efficient code to execute

If these rules have changed, we should move the discussion to #92309

tschuett · 2024-09-10T21:59:33Z

I said above visitICmp and matchCmpOfZero cooperate.
I said several times llvm/test/CodeGen/AMDGPU/itofp.i128.ll shows evidence of the effects of this PR.
Your resistance was based on naming and not on the combines or code.

I don't feel you're making a good faith effort to listen to our concerns. I'm no longer going to participate in this PR review.

Sorry, for my bad communication.

dc03-work · 2024-09-11T04:13:27Z

MaxAnalysisRecursionDepth comes from LLVM-IR. The constant is not invented by GlobalIsel. We already use it in production:

llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp

Line 1887 in 90e8411

if (Depth >= MaxAnalysisRecursionDepth)

I never heard on a GlobalIsel Combiner review discussions of data driven decisions of, e.g., MaxAnalysisRecursionDepth. Is 4,5,6, or 7 optimal for GlobalIsel. My feeling so far has been it is sufficient for new combines:

tests for correctness (MIR)

code size improvement in end-to-end tests

[GlobalIsel] Combine trunc of binop #107721 make existing code more efficient code to execute

If these rules have changed, we should move the discussion to #92309

The problem is neither the numeric value nor where it comes from. KnownBits analysis is expensive, period. I have profiled InstCombine in the past and KB analysis in functions like like isKnownNonZero and SimplifyDemandedBits take up a non-trivial amount of execution time, up to being multiple percent of the overall execution time of the pass. Unless the combine has a tangible improvement on the performance of benchmarks that offset the compile time cost, there is no good reason to add it.

Do also consider that GlobalISel is an instruction selector, not a middle-end pass like InstCombine. We don't need to do redo everything the middle-end does because the middle-end is supposed to do that for us :)

tschuett · 2024-09-11T05:59:01Z

This combiner combines opcodes that are not coming as legalization artefacts from the legalizer. It is a generic optimizing combiner. We use the combiner to reduce code size and make the code more amenable to execute (G_MUL -> G_ADD). It is not the first time that we benefit from the expertise of the middle-end in analysis and optimizations. The isKnownNonZero analysis is established in the middle-end and the icmp x,0 combine shows code size improvements. For reasons completely unknown to me, this PR blows up in comments and a new review style.

I stated that if we change the order of the two undef optimizations the results will change. I am asked to hoist these optimizations into separate combines where I loose control of the order.

#90618 blindly copied code from the middle-end with almost unnoticeable code size improvement and without cost benefit analysis. It wasn't run on a set of benchmarks and there was no compile-time analysis.

I mentioned above v_cmp_eq_u32_e64 s[4:5], 0, v5. We need canonicalization of constants on icmps to the RHS and the icmp x, 0 combine to play to together. At the same time, I have requested changes:
"This needs to be split up into separate PRs, where each only adds a single fold, and each fold is well justified."

I am asked to separate canonicalization and the icmp x, 0 combine into two separate PRs. Neither of which is profitable in isolation. I am stuck again. I am asked to do tasks that I cannot do?!?

tschuett · 2024-09-11T06:26:28Z

" at least look at DAGCombine instead, "
I do not see the DAGCombiner as a reference. It has an odd architecture and limitations that smell.

tschuett · 2024-09-11T06:31:10Z

Alex Bradbury: IMHO use of MachineIR is more of a defining feature than “global” scope.

We test the correctness of combines in MIR. For the DAGCombiner noise in end-to-end tests seems to be sufficient for acceptance.

tschuett · 2024-09-12T08:26:52Z

I tried a new version:

canonicalization of icmps
icmp x, 0

I removed the undef optimizations. There was too much discussion. Canonicalization is not an optimization. We do it blindly for as many opcodes as possible.

The isKnownNonZero analysis sits in Utils.cpp . It might be a bit code, but code size/complexity correlates with precision. I ported it from ValueTracking with adaptions to MIR. The version in ValueTracking is much larger/complex. You will still find the original comments. We need some precision for the icmp x,0 combine to have an effect on code size. I am interested in follow-on PRs to increase precision.

aemerson · 2024-09-14T22:50:41Z

Please split this up into multiple patches as @nikic requested. Start with the canonicalization. Maybe just use the name canonicalize_icmp instead of prepare to be more specific. Then we merge the first part.

I'm not saying that the rest of the changes will be accepted after splitting, we still need to come to an agreement about the known bits issue.

It might be a bit code, but code size/complexity correlates with precision. I ported it from ValueTracking with adaptions to MIR. The version in ValueTracking is much larger/complex. You will still find the original comments. We need some precision for the icmp x,0 combine to have an effect on code size. I am interested in follow-on PRs to increase precision.

As a reminder: I don't care one iota about how precise the analysis is in the combiner if it doesn't pay for its compile time and complexity cost with real world improvements. Not synthetic MIR tests, real world improvements in code quality. This could be systemic improvements across the LLVM test suite at -Os/O3, or optimizing a key loop in a common benchmark. Nothing is landing into the tree without having test cases that exercise the code without synthetic input.

tschuett · 2024-09-15T09:30:49Z

I still have an unanswered question.

aemerson · 2024-09-16T05:41:00Z

I still have an unanswered question.

Which question?

tschuett · 2024-09-16T06:34:57Z

I still have an unanswered question.

Which question?

@nikic requested changes with the statement
"This PR is particularly problematic because it adds lot of expensive combines based on things like KnownBits -- these are very much not harmless folds you can add "just because"."

I am still looking for evidence of that claim.

nikic · 2024-09-16T08:20:13Z

Sorry, my phrasing there was bad. It's not "a lot of expensive combines", it's one combine that is based on isKnownNonZero() analysis being introduced here, which in turn has a lot of recursive KnownBits calls.

FWIW from IR experience, we know that simplifyICmpInst and KnownBits-based icmp simplification take up a large fraction of InstCombine time for many programs, so the cost/benefit question here is not an idle concern.

tschuett · 2024-09-16T15:28:11Z

That makes sense. Thanks.

arsenm · 2024-10-04T15:34:33Z

llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h

+/// Represents a splat vector.
+class GSplatVector : public GenericMachineInstr {
+public:
+  Register getValueReg() const { return getOperand(1).getReg(); }
+
+  static bool classof(const MachineInstr *MI) {
+    return MI->getOpcode() == TargetOpcode::G_SPLAT_VECTOR;
+  };
+};
+
+/// Represents an integer-like extending operation.
+class GZextOrSextOp : public GCastOp {
+public:
+  static bool classof(const MachineInstr *MI) {
+    switch (MI->getOpcode()) {
+    case TargetOpcode::G_SEXT:
+    case TargetOpcode::G_ZEXT:
+      return true;
+    default:
+      return false;
+    }
+  };
+};
+


Should extract the infrastructure bits out of this

GSplatVector is already upstream. GZextOrSextOp has probably a lower market share.

arsenm · 2024-10-04T15:35:14Z

llvm/lib/CodeGen/GlobalISel/Utils.cpp

+static bool isKnownNonZero(Register Reg, const MachineRegisterInfo &MRI,
+                           GISelKnownBits *KB, unsigned Depth);
+
+bool llvm::isKnownNonZero(Register Reg, const MachineRegisterInfo &MRI,


We do want such a helper, but doesn't mean we should aggressively use it

This was one of the most controversial question of this PR. I give up on this one.

arsenm · 2024-10-04T15:35:37Z

llvm/lib/CodeGen/GlobalISel/Utils.cpp

+  if (!Reg.isVirtual())
+    return false;
+
+  LLT Ty = MRI.getType(Reg);
+  if (!Ty.isValid())
+    return false;


These cases are forbidden and shouldn't require defending against

tschuett requested review from aemerson, arsenm and nikic August 25, 2024 15:57

llvmbot added backend:AArch64 backend:AMDGPU llvm:globalisel labels Aug 25, 2024

tschuett commented Aug 25, 2024

View reviewed changes

nvjle reviewed Aug 25, 2024

View reviewed changes

nvjle reviewed Aug 26, 2024

View reviewed changes

llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp Outdated Show resolved Hide resolved

llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp Outdated Show resolved Hide resolved

aemerson reviewed Aug 26, 2024

View reviewed changes

llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll Outdated Show resolved Hide resolved

dc03-work reviewed Aug 26, 2024

View reviewed changes

nikic removed their request for review August 26, 2024 07:44

nikic reviewed Aug 26, 2024

View reviewed changes

aemerson reviewed Aug 29, 2024

View reviewed changes

llvm/include/llvm/Target/GlobalISel/Combine.td Outdated Show resolved Hide resolved

aemerson previously requested changes Sep 3, 2024

View reviewed changes

aemerson requested review from aemerson and removed request for aemerson September 10, 2024 17:42

remove undef combine

286061c

tschuett requested a review from aemerson September 12, 2024 08:27

tschuett closed this Oct 2, 2024

arsenm reviewed Oct 4, 2024

View reviewed changes

[GlobalIsel] Visit ICmp #105991

[GlobalIsel] Visit ICmp #105991

Conversation

tschuett commented Aug 25, 2024 • edited Loading

llvmbot commented Aug 25, 2024 • edited Loading

Choose a reason for hiding this comment

nikic left a comment

Choose a reason for hiding this comment

tschuett commented Aug 26, 2024

tschuett commented Aug 26, 2024

nikic commented Aug 26, 2024

tschuett commented Aug 26, 2024

tschuett commented Aug 26, 2024

tschuett commented Aug 26, 2024

aemerson left a comment

Choose a reason for hiding this comment

tschuett commented Aug 29, 2024

tschuett commented Aug 31, 2024

tschuett commented Sep 2, 2024

aemerson commented Sep 3, 2024

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

tschuett commented Sep 3, 2024

tschuett commented Sep 3, 2024

tschuett commented Sep 4, 2024

tschuett commented Sep 8, 2024 • edited Loading

tschuett commented Sep 10, 2024

nikic commented Sep 10, 2024

tschuett commented Sep 10, 2024 • edited Loading

aemerson commented Sep 10, 2024

tschuett commented Sep 10, 2024

tschuett commented Sep 10, 2024

aemerson commented Sep 10, 2024

tschuett commented Sep 10, 2024

aemerson commented Sep 10, 2024

tschuett commented Sep 10, 2024

tschuett commented Sep 10, 2024

dc03-work commented Sep 11, 2024

tschuett commented Sep 11, 2024

tschuett commented Sep 11, 2024

tschuett commented Sep 11, 2024

tschuett commented Sep 12, 2024 • edited Loading

aemerson commented Sep 14, 2024

tschuett commented Sep 15, 2024

aemerson commented Sep 16, 2024

tschuett commented Sep 16, 2024

nikic commented Sep 16, 2024

tschuett commented Sep 16, 2024

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

tschuett commented Aug 25, 2024 •

edited

Loading

llvmbot commented Aug 25, 2024 •

edited

Loading

tschuett commented Sep 8, 2024 •

edited

Loading

tschuett commented Sep 10, 2024 •

edited

Loading

tschuett commented Sep 12, 2024 •

edited

Loading