Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[GlobalIsel] Visit ICmp #105991

Closed
wants to merge 7 commits into from
Closed

[GlobalIsel] Visit ICmp #105991

wants to merge 7 commits into from

Conversation

tschuett
Copy link

@tschuett tschuett commented Aug 25, 2024

inspired by simplifyICmpInst and simplifyICmpWithZero

  • constant folding
  • canonicalize constant to the right on icmps
  • icmp x, 0 combine
    • isKnownNonZero analysis

@llvmbot
Copy link
Member

llvmbot commented Aug 25, 2024

@llvm/pr-subscribers-backend-amdgpu
@llvm/pr-subscribers-backend-aarch64

@llvm/pr-subscribers-llvm-globalisel

Author: Thorsten Schütt (tschuett)

Changes

inspired by simplifyICmpInst and simplifyICmpWithZero


Patch is 97.96 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/105991.diff

15 Files Affected:

  • (modified) llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h (+10)
  • (modified) llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h (+24)
  • (modified) llvm/include/llvm/CodeGen/GlobalISel/Utils.h (+26)
  • (modified) llvm/include/llvm/Target/GlobalISel/Combine.td (+44-7)
  • (modified) llvm/lib/CodeGen/GlobalISel/CMakeLists.txt (+1)
  • (added) llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp (+305)
  • (modified) llvm/lib/CodeGen/GlobalISel/Utils.cpp (+323)
  • (modified) llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll (+56-40)
  • (modified) llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll (+32-24)
  • (added) llvm/test/CodeGen/AArch64/GlobalISel/combine-visit-icmp.mir (+167)
  • (modified) llvm/test/CodeGen/AArch64/arm64-ccmp.ll (+18-42)
  • (added) llvm/test/CodeGen/AArch64/icmp2.ll (+295)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f64.ll (+9-9)
  • (modified) llvm/test/CodeGen/AMDGPU/itofp.i128.ll (+68-86)
  • (modified) llvm/test/CodeGen/AMDGPU/rsq.f64.ll (+23-23)
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 9b62d6067be39c..da9c7fdbd2a093 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -20,6 +20,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
 #include "llvm/CodeGen/Register.h"
 #include "llvm/CodeGenTypes/LowLevelType.h"
 #include "llvm/IR/InstrTypes.h"
@@ -299,6 +300,12 @@ class CombinerHelper {
   ///     $whatever = COPY $addr
   bool tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen = 0);
 
+  bool visitICmp(const MachineInstr &MI, BuildFnTy &MatchInfo);
+  bool matchSextOfICmp(const MachineInstr &MI, BuildFnTy &MatchInfo);
+  bool matchZextOfICmp(const MachineInstr &MI, BuildFnTy &MatchInfo);
+  /// Try hard to fold icmp with zero RHS because this is a common case.
+  bool matchCmpOfZero(const MachineInstr &MI, BuildFnTy &MatchInfo);
+
   bool matchPtrAddImmedChain(MachineInstr &MI, PtrAddChain &MatchInfo);
   void applyPtrAddImmedChain(MachineInstr &MI, PtrAddChain &MatchInfo);
 
@@ -1017,6 +1024,9 @@ class CombinerHelper {
   bool tryFoldLogicOfFCmps(GLogicalBinOp *Logic, BuildFnTy &MatchInfo);
 
   bool isCastFree(unsigned Opcode, LLT ToTy, LLT FromTy) const;
+
+  bool constantFoldICmp(const GICmp &ICmp, const GIConstant &LHS,
+                        const GIConstant &RHS, BuildFnTy &MatchInfo);
 };
 } // namespace llvm
 
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
index ef1171d9f1f64d..427b5a86b6e0c4 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
@@ -950,6 +950,30 @@ class GExtOrTruncOp : public GCastOp {
   };
 };
 
+/// Represents a splat vector.
+class GSplatVector : public GenericMachineInstr {
+public:
+  Register getValueReg() const { return getOperand(1).getReg(); }
+
+  static bool classof(const MachineInstr *MI) {
+    return MI->getOpcode() == TargetOpcode::G_SPLAT_VECTOR;
+  };
+};
+
+/// Represents an integer-like extending operation.
+class GZextOrSextOp : public GCastOp {
+public:
+  static bool classof(const MachineInstr *MI) {
+    switch (MI->getOpcode()) {
+    case TargetOpcode::G_SEXT:
+    case TargetOpcode::G_ZEXT:
+      return true;
+    default:
+      return false;
+    }
+  };
+};
+
 } // namespace llvm
 
 #endif // LLVM_CODEGEN_GLOBALISEL_GENERICMACHINEINSTRS_H
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
index cf5fd6d6f288bd..a8bf2e722881ac 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
@@ -593,5 +593,31 @@ bool isGuaranteedNotToBeUndef(Register Reg, const MachineRegisterInfo &MRI,
 /// estimate of the type.
 Type *getTypeForLLT(LLT Ty, LLVMContext &C);
 
+enum class GIConstantKind { Scalar, FixedVector, ScalableVector };
+
+/// An integer-like constant.
+class GIConstant {
+  GIConstantKind Kind;
+  SmallVector<APInt> Values;
+  APInt Value;
+
+public:
+  GIConstant(ArrayRef<APInt> Values)
+      : Kind(GIConstantKind::FixedVector), Values(Values) {};
+  GIConstant(const APInt &Value, GIConstantKind Kind)
+      : Kind(Kind), Value(Value) {};
+
+  GIConstantKind getKind() const { return Kind; }
+
+  APInt getScalarValue() const;
+
+  static std::optional<GIConstant> getConstant(Register Const,
+                                               const MachineRegisterInfo &MRI);
+};
+
+/// Return true if the given value is known to be non-zero when defined.
+bool isKnownNonZero(Register Reg, const MachineRegisterInfo &MRI,
+                    GISelKnownBits *KB, unsigned Depth = 0);
+
 } // End namespace llvm.
 #endif
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 525cc815e73cef..175a8ed57b2669 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1007,9 +1007,6 @@ def double_icmp_zero_or_combine: GICombineRule<
          (G_ICMP $root, $p, $ordst, 0))
 >;
 
-def double_icmp_zero_and_or_combine : GICombineGroup<[double_icmp_zero_and_combine,
-                                                      double_icmp_zero_or_combine]>;
-
 def and_or_disjoint_mask : GICombineRule<
   (defs root:$root, build_fn_matchinfo:$info),
   (match (wip_match_opcode G_AND):$root,
@@ -1884,6 +1881,46 @@ def cast_combines: GICombineGroup<[
   buildvector_of_truncate
 ]>;
 
+def visit_icmp : GICombineRule<
+  (defs root:$root, build_fn_matchinfo:$matchinfo),
+  (match (G_ICMP $root, $pred, $lhs, $rhs):$cmp,
+         [{ return Helper.visitICmp(*${cmp}, ${matchinfo}); }]),
+  (apply [{ Helper.applyBuildFn(*${cmp}, ${matchinfo}); }])>;
+
+def sext_icmp : GICombineRule<
+  (defs root:$root, build_fn_matchinfo:$matchinfo),
+  (match (G_SEXT $rhs, $inputR),
+         (G_SEXT $lhs, $inputL),
+         (G_ICMP $root, $pred, $lhs, $rhs):$cmp,
+         [{ return Helper.matchSextOfICmp(*${cmp}, ${matchinfo}); }]),
+  (apply [{ Helper.applyBuildFn(*${cmp}, ${matchinfo}); }])>;
+
+def zext_icmp : GICombineRule<
+  (defs root:$root, build_fn_matchinfo:$matchinfo),
+  (match (G_ZEXT $rhs, $inputR),
+         (G_ZEXT $lhs, $inputL),
+         (G_ICMP $root, $pred, $lhs, $rhs):$cmp,
+         [{ return Helper.matchZextOfICmp(*${cmp}, ${matchinfo}); }]),
+  (apply [{ Helper.applyBuildFn(*${cmp}, ${matchinfo}); }])>;
+
+def icmp_of_zero : GICombineRule<
+  (defs root:$root, build_fn_matchinfo:$matchinfo),
+  (match (G_CONSTANT $zero, 0),
+         (G_ICMP $root, $pred, $lhs, $zero):$cmp,
+         [{ return Helper.matchCmpOfZero(*${cmp}, ${matchinfo}); }]),
+  (apply [{ Helper.applyBuildFn(*${cmp}, ${matchinfo}); }])>;
+
+def icmp_combines: GICombineGroup<[
+  visit_icmp,
+  sext_icmp,
+  zext_icmp,
+  icmp_of_zero,
+  icmp_to_true_false_known_bits,
+  icmp_to_lhs_known_bits,
+  double_icmp_zero_and_combine,
+  double_icmp_zero_or_combine,
+  redundant_binop_in_equality
+]>;
 
 // FIXME: These should use the custom predicate feature once it lands.
 def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero,
@@ -1917,7 +1954,7 @@ def const_combines : GICombineGroup<[constant_fold_fp_ops, const_ptradd_to_i2p,
 
 def known_bits_simplifications : GICombineGroup<[
   redundant_and, redundant_sext_inreg, redundant_or, urem_pow2_to_mask,
-  zext_trunc_fold, icmp_to_true_false_known_bits, icmp_to_lhs_known_bits,
+  zext_trunc_fold,
   sext_inreg_to_zext_inreg]>;
 
 def width_reduction_combines : GICombineGroup<[reduce_shl_of_extend,
@@ -1944,7 +1981,7 @@ def constant_fold_binops : GICombineGroup<[constant_fold_binop,
 
 def prefer_sign_combines : GICombineGroup<[nneg_zext]>;
 
-def all_combines : GICombineGroup<[integer_reassoc_combines, trivial_combines,
+def all_combines : GICombineGroup<[icmp_combines, integer_reassoc_combines, trivial_combines,
     vector_ops_combines, freeze_combines, cast_combines,
     insert_vec_elt_combines, extract_vec_elt_combines, combines_for_extload,
     combine_extracted_vector_load,
@@ -1964,9 +2001,9 @@ def all_combines : GICombineGroup<[integer_reassoc_combines, trivial_combines,
     constant_fold_cast_op, fabs_fneg_fold,
     intdiv_combines, mulh_combines, redundant_neg_operands,
     and_or_disjoint_mask, fma_combines, fold_binop_into_select,
-    sub_add_reg, select_to_minmax, redundant_binop_in_equality,
+    sub_add_reg, select_to_minmax,
     fsub_to_fneg, commute_constant_to_rhs, match_ands, match_ors,
-    combine_concat_vector, double_icmp_zero_and_or_combine, match_addos,
+    combine_concat_vector, match_addos,
     sext_trunc, zext_trunc, prefer_sign_combines, combine_shuffle_concat]>;
 
 // A combine group used to for prelegalizer combiners at -O0. The combines in
diff --git a/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt b/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt
index a15b76440364b1..af1717dbf76f39 100644
--- a/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt
+++ b/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt
@@ -7,6 +7,7 @@ add_llvm_component_library(LLVMGlobalISel
   Combiner.cpp
   CombinerHelper.cpp
   CombinerHelperCasts.cpp
+  CombinerHelperCompares.cpp
   CombinerHelperVectorOps.cpp
   GIMatchTableExecutor.cpp
   GISelChangeObserver.cpp
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp
new file mode 100644
index 00000000000000..415768fb07e59f
--- /dev/null
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp
@@ -0,0 +1,305 @@
+//===- CombinerHelperCompares.cpp------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements CombinerHelper for G_ICMP
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/LowLevelTypeUtils.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cstdlib>
+
+#define DEBUG_TYPE "gi-combiner"
+
+using namespace llvm;
+
+bool CombinerHelper::constantFoldICmp(const GICmp &ICmp,
+                                      const GIConstant &LHSCst,
+                                      const GIConstant &RHSCst,
+                                      BuildFnTy &MatchInfo) {
+  if (LHSCst.getKind() != GIConstantKind::Scalar)
+    return false;
+
+  Register Dst = ICmp.getReg(0);
+  LLT DstTy = MRI.getType(Dst);
+
+  if (!isConstantLegalOrBeforeLegalizer(DstTy))
+    return false;
+
+  CmpInst::Predicate Pred = ICmp.getCond();
+  APInt LHS = LHSCst.getScalarValue();
+  APInt RHS = RHSCst.getScalarValue();
+
+  bool Result;
+
+  switch (Pred) {
+  case CmpInst::Predicate::ICMP_EQ:
+    Result = LHS.eq(RHS);
+    break;
+  case CmpInst::Predicate::ICMP_NE:
+    Result = LHS.ne(RHS);
+    break;
+  case CmpInst::Predicate::ICMP_UGT:
+    Result = LHS.ugt(RHS);
+    break;
+  case CmpInst::Predicate::ICMP_UGE:
+    Result = LHS.uge(RHS);
+    break;
+  case CmpInst::Predicate::ICMP_ULT:
+    Result = LHS.ult(RHS);
+    break;
+  case CmpInst::Predicate::ICMP_ULE:
+    Result = LHS.ule(RHS);
+    break;
+  case CmpInst::Predicate::ICMP_SGT:
+    Result = LHS.sgt(RHS);
+    break;
+  case CmpInst::Predicate::ICMP_SGE:
+    Result = LHS.sge(RHS);
+    break;
+  case CmpInst::Predicate::ICMP_SLT:
+    Result = LHS.slt(RHS);
+    break;
+  case CmpInst::Predicate::ICMP_SLE:
+    Result = LHS.sle(RHS);
+    break;
+  default:
+    llvm_unreachable("Unexpected predicate");
+  }
+
+  MatchInfo = [=](MachineIRBuilder &B) {
+    if (Result)
+      B.buildConstant(Dst, getICmpTrueVal(getTargetLowering(),
+                                          /*IsVector=*/DstTy.isVector(),
+                                          /*IsFP=*/false));
+    else
+      B.buildConstant(Dst, 0);
+  };
+
+  return true;
+}
+
+bool CombinerHelper::visitICmp(const MachineInstr &MI, BuildFnTy &MatchInfo) {
+  const GICmp *Cmp = cast<GICmp>(&MI);
+
+  Register Dst = Cmp->getReg(0);
+  LLT DstTy = MRI.getType(Dst);
+  Register LHS = Cmp->getLHSReg();
+  Register RHS = Cmp->getRHSReg();
+
+  CmpInst::Predicate Pred = Cmp->getCond();
+  assert(CmpInst::isIntPredicate(Pred) && "Not an integer compare!");
+  if (auto CLHS = GIConstant::getConstant(LHS, MRI)) {
+    if (auto CRHS = GIConstant::getConstant(RHS, MRI))
+      return constantFoldICmp(*Cmp, *CLHS, *CRHS, MatchInfo);
+
+    // If we have a constant, make sure it is on the RHS.
+    std::swap(LHS, RHS);
+    Pred = CmpInst::getSwappedPredicate(Pred);
+
+    MatchInfo = [=](MachineIRBuilder &B) { B.buildICmp(Pred, Dst, LHS, RHS); };
+    return true;
+  }
+
+  [[maybe_unused]] MachineInstr *MILHS = MRI.getVRegDef(LHS);
+  MachineInstr *MIRHS = MRI.getVRegDef(RHS);
+
+  // For EQ and NE, we can always pick a value for the undef to make the
+  // predicate pass or fail, so we can return undef.
+  // Matches behavior in llvm::ConstantFoldCompareInstruction.
+  if (isa<GImplicitDef>(MIRHS) && ICmpInst::isEquality(Pred) &&
+      isLegalOrBeforeLegalizer({TargetOpcode::G_IMPLICIT_DEF, {DstTy}})) {
+    MatchInfo = [=](MachineIRBuilder &B) { B.buildUndef(Dst); };
+    return true;
+  }
+
+  // icmp X, X -> true/false
+  // icmp X, undef -> true/false because undef could be X.
+  if ((LHS == RHS || isa<GImplicitDef>(MIRHS)) &&
+      isConstantLegalOrBeforeLegalizer(DstTy)) {
+    MatchInfo = [=](MachineIRBuilder &B) {
+      if (CmpInst::isTrueWhenEqual(Pred))
+        B.buildConstant(Dst, getICmpTrueVal(getTargetLowering(),
+                                            /*IsVector=*/DstTy.isVector(),
+                                            /*IsFP=*/false));
+      else
+        B.buildConstant(Dst, 0);
+    };
+    return true;
+  }
+
+  return false;
+}
+
+bool CombinerHelper::matchSextOfICmp(const MachineInstr &MI,
+                                     BuildFnTy &MatchInfo) {
+  const GICmp *Cmp = cast<GICmp>(&MI);
+
+  Register Dst = Cmp->getReg(0);
+  LLT DstTy = MRI.getType(Dst);
+  Register LHS = Cmp->getLHSReg();
+  Register RHS = Cmp->getRHSReg();
+  CmpInst::Predicate Pred = Cmp->getCond();
+
+  GSext *SL = cast<GSext>(MRI.getVRegDef(LHS));
+  GSext *SR = cast<GSext>(MRI.getVRegDef(RHS));
+
+  LLT SLTy = MRI.getType(SL->getSrcReg());
+  LLT SRTy = MRI.getType(SR->getSrcReg());
+
+  // Turn icmp (sext X), (sext Y) into a compare of X and Y if they have the
+  // same type.
+  if (SLTy != SRTy)
+    return false;
+
+  if (!isLegalOrBeforeLegalizer({TargetOpcode::G_ICMP, {DstTy, SLTy}}))
+    return false;
+
+  // Compare X and Y. Note that the predicate does not change.
+  MatchInfo = [=](MachineIRBuilder &B) {
+    B.buildICmp(Pred, Dst, SL->getSrcReg(), SR->getSrcReg());
+  };
+  return true;
+}
+
+bool CombinerHelper::matchZextOfICmp(const MachineInstr &MI,
+                                     BuildFnTy &MatchInfo) {
+  const GICmp *Cmp = cast<GICmp>(&MI);
+
+  Register Dst = Cmp->getReg(0);
+  LLT DstTy = MRI.getType(Dst);
+  Register LHS = Cmp->getLHSReg();
+  Register RHS = Cmp->getRHSReg();
+  CmpInst::Predicate Pred = Cmp->getCond();
+
+  /*
+    %x:_(p0) = COPY $x0
+    %y:_(p0) = COPY $x1
+    %zero:_(p0) = G_CONSTANT i64 0
+    %cmp1:_(s1) = G_ICMP intpred(eq), %x:_(p0), %zero:_
+   */
+
+  if (MRI.getType(LHS).isPointer() || MRI.getType(RHS).isPointer())
+    return false;
+
+  if (!MRI.getType(LHS).isScalar() || !MRI.getType(RHS).isScalar())
+    return false;
+
+  GZext *ZL = cast<GZext>(MRI.getVRegDef(LHS));
+  GZext *ZR = cast<GZext>(MRI.getVRegDef(RHS));
+
+  LLT ZLTy = MRI.getType(ZL->getSrcReg());
+  LLT ZRTy = MRI.getType(ZR->getSrcReg());
+
+  // Turn icmp (zext X), (zext Y) into a compare of X and Y if they have
+  // the same type.
+  if (ZLTy != ZRTy)
+    return false;
+
+  if (!isLegalOrBeforeLegalizer({TargetOpcode::G_ICMP, {DstTy, ZLTy}}))
+    return false;
+
+  // Compare X and Y. Note that signed predicates become unsigned.
+  MatchInfo = [=](MachineIRBuilder &B) {
+    B.buildICmp(ICmpInst::getUnsignedPredicate(Pred), Dst, ZL->getSrcReg(),
+                ZR->getSrcReg());
+  };
+  return true;
+}
+
+bool CombinerHelper::matchCmpOfZero(const MachineInstr &MI,
+                                    BuildFnTy &MatchInfo) {
+  const GICmp *Cmp = cast<GICmp>(&MI);
+
+  Register Dst = Cmp->getReg(0);
+  LLT DstTy = MRI.getType(Dst);
+  Register LHS = Cmp->getLHSReg();
+  CmpInst::Predicate Pred = Cmp->getCond();
+
+  if (!isConstantLegalOrBeforeLegalizer(DstTy))
+    return false;
+
+  std::optional<bool> Result;
+
+  switch (Pred) {
+  default:
+    llvm_unreachable("Unkonwn ICmp predicate!");
+  case ICmpInst::ICMP_ULT:
+    Result = false;
+    break;
+  case ICmpInst::ICMP_UGE:
+    Result = true;
+    break;
+  case ICmpInst::ICMP_EQ:
+  case ICmpInst::ICMP_ULE:
+    if (isKnownNonZero(LHS, MRI, KB))
+      Result = false;
+    break;
+  case ICmpInst::ICMP_NE:
+  case ICmpInst::ICMP_UGT:
+    if (isKnownNonZero(LHS, MRI, KB))
+      Result = true;
+    break;
+  case ICmpInst::ICMP_SLT: {
+    KnownBits LHSKnown = KB->getKnownBits(LHS);
+    if (LHSKnown.isNegative())
+      Result = true;
+    if (LHSKnown.isNonNegative())
+      Result = false;
+    break;
+  }
+  case ICmpInst::ICMP_SLE: {
+    KnownBits LHSKnown = KB->getKnownBits(LHS);
+    if (LHSKnown.isNegative())
+      Result = true;
+    if (LHSKnown.isNonNegative() && isKnownNonZero(LHS, MRI, KB))
+      Result = false;
+    break;
+  }
+  case ICmpInst::ICMP_SGE: {
+    KnownBits LHSKnown = KB->getKnownBits(LHS);
+    if (LHSKnown.isNegative())
+      Result = false;
+    if (LHSKnown.isNonNegative())
+      Result = true;
+    break;
+  }
+  case ICmpInst::ICMP_SGT: {
+    KnownBits LHSKnown = KB->getKnownBits(LHS);
+    if (LHSKnown.isNegative())
+      Result = false;
+    if (LHSKnown.isNonNegative() && isKnownNonZero(LHS, MRI, KB))
+      Result = true;
+    break;
+  }
+  }
+
+  if (!Result)
+    return false;
+
+  MatchInfo = [=](MachineIRBuilder &B) {
+    if (*Result)
+      B.buildConstant(Dst, getICmpTrueVal(getTargetLowering(),
+                                          /*IsVector=*/DstTy.isVector(),
+                                          /*IsFP=*/false));
+    else
+      B.buildConstant(Dst, 0);
+  };
+
+  return true;
+}
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index cfdd9905c16fa6..e8b9d995a22768 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -1984,3 +1984,326 @@ Type *llvm::getTypeForLLT(LLT Ty, LLVMContext &C) {
                            Ty.getElementCount());
   return IntegerType::get(C, Ty.getSizeInBits());
 }
+
+APInt llvm::GIConstant::getScalarValue() const {
+  assert(Kind == GIConstantKind::Scalar && "Expected scalar constant");
+
+  return Value;
+}
+
+std::optional<GIConstant>
+llvm::GIConstant::getConstant(Register Const, const MachineRegisterInfo &MRI) {
+  MachineInstr *Constant = getDefIgnoringCopies(Const, MRI);
+
+  if (GSplatVector *Splat = dyn_cast<GSplatVector>(Constant)) {
+    std::optional<ValueAndVReg> MayBeConstant =
+        getIConstantVRegValWithLookThrough(Splat->getValueReg(), MRI);
+    if (!MayBeConstant)
+      return std::nullopt;
+    return GIConstant(MayBeConstant->Value, GIConstantKind::ScalableVector);
+  }
+
+  if (GBuildVector *Build = dyn_cast<GBuildVector>(Constant)) {
+    SmallVector<APInt> Values;
+    unsigned NumSources = Build->getNumSources();
+    for (unsigned I = 0; I < NumSources; ++I) {
+      Register SrcReg = Build->getSourceReg(I);
+      std::optional<ValueAndVReg> MayBeConstant =
+          getIConstantVRegValWithLookThrough(SrcReg, MRI);
+      if (!MayBeConstant)
+        return std::nullopt;
+      Values.push_back(MayBeConstant->Value);
+    }
+    return GIConstant(Values);
+  }
+
+  std::optional<ValueAndVReg> MayBeConstant =
+      getIConstantVRegValWithLookThrough(Const, MRI);
+  if (!MayBeConstant)
+    return std::nullopt;
+
+  return GIConstant(MayBeConstant->Value, GIConstantKind::Scalar);
+}
+
+static bool isKnownNonZero(Register Reg, const MachineRegisterInfo &MRI,
+                           GISelKnownBits *KB, unsigned Depth);
+
+bool llvm::isKnownNonZero(Register Reg, const MachineRegisterInfo &MRI,
+                          GISelKnownBits *KB, unsigned Depth) {
+  if (!Reg.isVirtual())
+    return false;
+
+  LLT Ty = MRI.getType(Reg);
+  if (!Ty.isValid())
+    return false;
+
+  if (Ty.isPointer())
+    return false;
+
+  if (!Ty.isScalar())
+    errs() << "type: " << Ty << '\n';
+
+  assert(...
[truncated]

@@ -212,13 +212,10 @@ define float @sitofp_i128_to_f32(i128 %x) {
; GISEL-NEXT: v_or_b32_e32 v16, v10, v12
; GISEL-NEXT: v_lshrrev_b64 v[11:12], v14, -1
; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v5
; GISEL-NEXT: v_cndmask_b32_e32 v11, v11, v15, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v12, v12, v16, vcc
; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v5
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

canonicalize constant to the right + matchCmpOfZero

@nikic nikic removed their request for review August 26, 2024 07:44
Copy link
Contributor

@nikic nikic left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't have time to review GlobalISel changes, but I'll say this: Transforms from the middle-end optimizer should only be reimplemented in GlobalISel (or SDAG) if the patterns can plausibly be introduced as a result of legalization. The input to ISel is always assumed to be canonical IR, so anything taken care of by middle-end canonicalization does not need to be handled by GlobalISel (unless GlobalISel itself can introduce the pattern, e.g. via legalization).

@tschuett
Copy link
Author

It is open discussion whether the backend combiners are purely cleaning up legalization artefacts or do they go beyond:
#92309
For the DAG combiner, I would argue that it goes far beyond. In my first comment above and in the AMDGPU code, you can see that the combine hits.

The move constants to the right also hits.

@tschuett
Copy link
Author

AArch64 runs a combiner pre-legalization. How could it cleanup legalization artefacts?

@nikic
Copy link
Contributor

nikic commented Aug 26, 2024

Okay, maybe my comment didn't spell it out enough. I'm referring to combines that already exist as canonicalizations in the target-independent optimizer. Of course there are also combines during isel that are target-dependent, undoing middle-end canonicalizations, or similar. Those are not targeted at legalization artifacts and can be useful pre-legalization.

What I want to push back against here is blindly copying over optimizations from InstSimplify or InstCombine, without any justification for why these optimizations need to be replicated inside GlobalISel. I'm not necessarily saying that the combines proposed here are bad, but they need more justification than "inspired by simplifyICmpInst and simplifyICmpWithZero". You should understand, and be able to articulate to reviewers, why it is beneficial to have these combines replicated in GlobalISel.

@tschuett
Copy link
Author

See my first comment. They reduce code size.

@tschuett
Copy link
Author

Instruction *InstCombinerImpl::visitFreeze(FreezeInst &I) {

is ported almost exactly in both combiners.
https://github.com/llvm/llvm-project/blob/main/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
creates freeze instructions while legalizing illegal freeze instructions, but I could not find other places. Both combiners create freeze instruction while combining, e.g, selects. It is hard to argue that we ported visitFreeze to combine legalization artefacts.

@tschuett
Copy link
Author

I removed the ext of icmp combines. Please re-search for regressions.

Copy link
Contributor

@aemerson aemerson left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@nikic Thanks for pointing this out. This is something I've been asking for a while too but hadn't expressed right. What we need to see is in the PR description, a justification for why a combine is good or is necessary. If we don't see that, then unless it happens to improve some already existing tests, I have to take time out of my day to build the PR and manually benchmark it for changes. So if we can avoid using "do the same thing as X in InstCombine" as a justification then it's clearer why it's a good thing.

@tschuett
Copy link
Author

IR is in perfect canonical state. Codegen may do some target specific optimizations, but I would be surprised if they move constants to the left. The IRTranslator faithfully translates IR into gMIR. Thus porting middle-end optimizations to the backend is a waste of time because they cannot give any size improvements.

I mentioned visitFreeze above. It is not dead code. It actually gives code size improvements, which is at odds with my previous statement.

The same applies for canonicalization of constants to the left for icmps. You can see hits in the AMDGPU code. It also gives size improvements in AArch64 code. It is not dead code. The icmp x, 0 combine also hits and gives code size improvements, see itofp.i128.ll.

@tschuett
Copy link
Author

Regarding optimizations that exist in the middle-end, if that becomes a big issue, we could move it one floor up. And discuss on discourse the relationship between optimizations in the-middle-end and the two backends.

@tschuett
Copy link
Author

tschuett commented Sep 2, 2024

Another waste of energy: #77855

@aemerson
Copy link
Contributor

aemerson commented Sep 3, 2024

Some more comments. Frankly I'm having trouble following some of your comments like this:

IR is in perfect canonical state. Codegen may do some target specific optimizations, but I would be surprised if they move constants to the left. The IRTranslator faithfully translates IR into gMIR. Thus porting middle-end optimizations to the backend is a waste of time because they cannot give any size improvements.

Are you agreeing or disagreeing with your current patch? At this point I have no idea.

Comment on lines +598 to +621
/// An integer-like constant.
class GIConstant {
GIConstantKind Kind;
SmallVector<APInt> Values;
APInt Value;

public:
GIConstant(ArrayRef<APInt> Values)
: Kind(GIConstantKind::FixedVector), Values(Values) {};
GIConstant(const APInt &Value, GIConstantKind Kind)
: Kind(Kind), Value(Value) {};

GIConstantKind getKind() const { return Kind; }

APInt getScalarValue() const;

static std::optional<GIConstant> getConstant(Register Const,
const MachineRegisterInfo &MRI);
};

/// Return true if the given value is known to be non-zero when defined.
bool isKnownNonZero(Register Reg, const MachineRegisterInfo &MRI,
GISelKnownBits *KB, unsigned Depth = 0);

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We have similar interfaces like isConstantOrConstantVector and friends. Can we extend those or unify in some more consistent way rather than have 2 different ways of querying constant values?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note that isConstantOrConstantVectorstarts with is. It is a test. GIConstant::getConstant gives you a constant. It is a vector of APInt s. We can add arbitrary arithmetic. GIConstant + GIConstant.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes I'm aware, I said isConstantOrConstantVector and friends. One of those friends is this:

std::optional<APInt>
llvm::isConstantOrConstantSplatVector(

which returns an APInt. So if we add GIConstant now we have even more ways to do similar things. I'm not saying your approach is necessarily wrong here, but taking a step back maybe this change should be a separate effort to unify our utilities better.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We were limit to splat vectors before. GIConstant does not have this restriction.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am fine with a family of tests for various kinds of constnesses, but I would argue that GIConstant is powerful enough to become the constant. It is a class. We can add arbitrary member functions for tests and arithmetic. Constants can be scalar, fixed-, and scalable-vectors.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am fine with a family of tests for various kinds of constnesses, but I would argue that GIConstant is powerful enough to become the constant. It is a class. We can add arbitrary member functions for tests and arithmetic. Constants can be scalar, fixed-, and scalable-vectors.

Perhaps you're right, all the more reason to do it in a separate change where we can think about the design with more care.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The design is minimal, it has uses, and getting rid of the splat vector variants will take a lot of energy. I would prefer to continue with this PR.

In a separate change, it would be dead code.

@tschuett
Copy link
Author

tschuett commented Sep 3, 2024

Some more comments. Frankly I'm having trouble following some of your comments like this:

IR is in perfect canonical state. Codegen may do some target specific optimizations, but I would be surprised if they move constants to the left. The IRTranslator faithfully translates IR into gMIR. Thus porting middle-end optimizations to the backend is a waste of time because they cannot give any size improvements.

Are you agreeing or disagreeing with your current patch? At this point I have no idea.

I am supportive of this patch. I was only stating the common assumption. But also the cmp x, 0 is at odds with this statement.

@tschuett
Copy link
Author

tschuett commented Sep 3, 2024

The general assumption is that IR is in canonical form and middle-end optimizations should not be ported to the backends.

But reality is at odds with that statement. There are size improvements in this PR.

@tschuett
Copy link
Author

tschuett commented Sep 4, 2024

I added in this PR two combines. I have no interest to extend the first one. I would rather see more combines with disjoint and precise patterns, e.g., icmp_of_zero.

@tschuett
Copy link
Author

tschuett commented Sep 8, 2024

I strongly disagree with the change request. Compares and only compares deserve special treatment. Placing several optimizations into one function is beneficial. They share resources and order matters. Spreading them over several combines would be less efficient. In most cases, the pattern would be G_ICMP, which I am not interested in.

@tschuett
Copy link
Author

Could you point me to evidence of:
it adds lot of expensive combines based on things like KnownBits

Thanks.

@nikic
Copy link
Contributor

nikic commented Sep 10, 2024

I see at least another unrelated undef combine in here, but I was more talking about the entire infrastructure being added here. You may technically be adding one "combine", but the underlying analysis has lots of cases, most of which do not appear to be tested, and where I seriously doubt the need to replicate them in GlobalISel. This PR has very exotic stuff like "x | (x != 0) is non-zero" -- is this really important for the simplification of legalization artifacts?

I'm sure that there is some subset of the simplifications proposed here that is indeed useful, but I don't think that the PR as a whole, as you have presented is, is appropriate.

The general approach of "take some methods from InstSimplify and copy them to GlobalISel" is something I very much want to discourage. If you want to copy simplifications from somewhere, at least look at DAGCombine instead, where there is at least a chance that somebody has exercised some degree of due diligence when adding things there. But in an case, any handling you add needs to have test coverage, including any cases in underlying analyses.

@tschuett
Copy link
Author

tschuett commented Sep 10, 2024

That didn't answer my question!

The isKnownNonZero analysis is in Utils.cpp and test coverage is in combine-visit-icmp.mir and icmp2.ll.
Combine.td shows that precisely two combines were added and only of them relies on known bits.

@aemerson
Copy link
Contributor

That didn't answer my question!

The isKnownNonZero analysis is in Utils.cpp and test coverage is in combine-visit-icmp.mir and icmp2.ll. Combine.td shows that precisely two combines were added and only of them relies on known bits.

I hadn't even closely looked at that combine yet, but from what I can tell you're using KB a lot in that matchCmpOfZero combine. It's quite a lot of code don't you think?

@tschuett
Copy link
Author

CombinerHelper::matchCmpOfZero uses directly KB at most once per case in the switch statement of the icmp predicate, for reference:

static Value *simplifyICmpWithZero(CmpInst::Predicate Pred, Value *LHS,

A different question is the isKnownNonZero analysis, but it is depth limited:
bool isKnownNonZero(Register Reg, const MachineRegisterInfo &MRI, GISelKnownBits *KB, unsigned Depth = 0);

@tschuett
Copy link
Author

@nikic requested changes with the statement
"This PR is particularly problematic because it adds lot of expensive combines based on things like KnownBits -- these are very much not harmless folds you can add "just because"."

I am still looking for evidence of that claim.

@aemerson
Copy link
Contributor

CombinerHelper::matchCmpOfZero uses directly KB at most once per case in the switch statement of the icmp predicate, for reference:

static Value *simplifyICmpWithZero(CmpInst::Predicate Pred, Value *LHS,

A different question is the isKnownNonZero analysis, but it is depth limited: bool isKnownNonZero(Register Reg, const MachineRegisterInfo &MRI, GISelKnownBits *KB, unsigned Depth = 0);

MaxAnalysisRecursionDepth is defaulted to 6. We're going to potentially do a 6-deep KB analysis on every icmp. Maybe we don't need to start with that and only use it if the data supports it's worth it from code size or performance metrics? Have you run those benchmarks? Or do I need to do it myself again?

I'm going to level with you here: at this point in the conversation any time you bring up an IR combine for reference to use is not helping your case in the way you think. I really suggest you take a step back from this and consider why we have such resistance to this change.

You basically have a lot of code, and you're resisting splitting up the visitICmp() function on the basis of efficiency, but you're also ok with adding a KB dependent combine without also having data about it's cost/benefit. We need a fundamental rethink in how we add changes to GlobalISel. Individual patches like these are not important. They really aren't. Setting good examples and precedents, along with pragmatic choices backed up by data are what's important.

@tschuett
Copy link
Author

I said above visitICmp and matchCmpOfZero cooperate.

I said several times llvm/test/CodeGen/AMDGPU/itofp.i128.ll shows evidence of the effects of this PR.

Your resistance was based on naming and not on the combines or code.

@aemerson
Copy link
Contributor

I said above visitICmp and matchCmpOfZero cooperate.

I said several times llvm/test/CodeGen/AMDGPU/itofp.i128.ll shows evidence of the effects of this PR.

Your resistance was based on naming and not on the combines or code.

I don't feel you're making a good faith effort to listen to our concerns. I'm no longer going to participate in this PR review.

@aemerson aemerson dismissed their stale review September 10, 2024 17:41

Resigning

@aemerson aemerson requested review from aemerson and removed request for aemerson September 10, 2024 17:42
@tschuett
Copy link
Author

MaxAnalysisRecursionDepth comes from LLVM-IR. The constant is not invented by GlobalIsel. We already use it in production:

if (Depth >= MaxAnalysisRecursionDepth)

I never heard on a GlobalIsel Combiner review discussions of data driven decisions of, e.g., MaxAnalysisRecursionDepth. Is 4,5,6, or 7 optimal for GlobalIsel. My feeling so far has been it is sufficient for new combines:

If these rules have changed, we should move the discussion to #92309

@tschuett
Copy link
Author

I said above visitICmp and matchCmpOfZero cooperate.
I said several times llvm/test/CodeGen/AMDGPU/itofp.i128.ll shows evidence of the effects of this PR.
Your resistance was based on naming and not on the combines or code.

I don't feel you're making a good faith effort to listen to our concerns. I'm no longer going to participate in this PR review.

Sorry, for my bad communication.

@dc03-work
Copy link
Contributor

MaxAnalysisRecursionDepth comes from LLVM-IR. The constant is not invented by GlobalIsel. We already use it in production:

if (Depth >= MaxAnalysisRecursionDepth)

I never heard on a GlobalIsel Combiner review discussions of data driven decisions of, e.g., MaxAnalysisRecursionDepth. Is 4,5,6, or 7 optimal for GlobalIsel. My feeling so far has been it is sufficient for new combines:

If these rules have changed, we should move the discussion to #92309

The problem is neither the numeric value nor where it comes from. KnownBits analysis is expensive, period. I have profiled InstCombine in the past and KB analysis in functions like like isKnownNonZero and SimplifyDemandedBits take up a non-trivial amount of execution time, up to being multiple percent of the overall execution time of the pass. Unless the combine has a tangible improvement on the performance of benchmarks that offset the compile time cost, there is no good reason to add it.

Do also consider that GlobalISel is an instruction selector, not a middle-end pass like InstCombine. We don't need to do redo everything the middle-end does because the middle-end is supposed to do that for us :)

@tschuett
Copy link
Author

This combiner combines opcodes that are not coming as legalization artefacts from the legalizer. It is a generic optimizing combiner. We use the combiner to reduce code size and make the code more amenable to execute (G_MUL -> G_ADD). It is not the first time that we benefit from the expertise of the middle-end in analysis and optimizations. The isKnownNonZero analysis is established in the middle-end and the icmp x,0 combine shows code size improvements. For reasons completely unknown to me, this PR blows up in comments and a new review style.

I stated that if we change the order of the two undef optimizations the results will change. I am asked to hoist these optimizations into separate combines where I loose control of the order.

#90618 blindly copied code from the middle-end with almost unnoticeable code size improvement and without cost benefit analysis. It wasn't run on a set of benchmarks and there was no compile-time analysis.

I mentioned above v_cmp_eq_u32_e64 s[4:5], 0, v5. We need canonicalization of constants on icmps to the RHS and the icmp x, 0 combine to play to together. At the same time, I have requested changes:
"This needs to be split up into separate PRs, where each only adds a single fold, and each fold is well justified."

I am asked to separate canonicalization and the icmp x, 0 combine into two separate PRs. Neither of which is profitable in isolation. I am stuck again. I am asked to do tasks that I cannot do?!?

@tschuett
Copy link
Author

" at least look at DAGCombine instead, "
I do not see the DAGCombiner as a reference. It has an odd architecture and limitations that smell.

@tschuett
Copy link
Author

Alex Bradbury: IMHO use of MachineIR is more of a defining feature than “global” scope.

We test the correctness of combines in MIR. For the DAGCombiner noise in end-to-end tests seems to be sufficient for acceptance.

@tschuett
Copy link
Author

tschuett commented Sep 12, 2024

I tried a new version:

  • canonicalization of icmps
  • icmp x, 0

I removed the undef optimizations. There was too much discussion. Canonicalization is not an optimization. We do it blindly for as many opcodes as possible.

The isKnownNonZero analysis sits in Utils.cpp . It might be a bit code, but code size/complexity correlates with precision. I ported it from ValueTracking with adaptions to MIR. The version in ValueTracking is much larger/complex. You will still find the original comments. We need some precision for the icmp x,0 combine to have an effect on code size. I am interested in follow-on PRs to increase precision.

@tschuett tschuett requested a review from aemerson September 12, 2024 08:27
@aemerson
Copy link
Contributor

Please split this up into multiple patches as @nikic requested. Start with the canonicalization. Maybe just use the name canonicalize_icmp instead of prepare to be more specific. Then we merge the first part.

I'm not saying that the rest of the changes will be accepted after splitting, we still need to come to an agreement about the known bits issue.

It might be a bit code, but code size/complexity correlates with precision. I ported it from ValueTracking with adaptions to MIR. The version in ValueTracking is much larger/complex. You will still find the original comments. We need some precision for the icmp x,0 combine to have an effect on code size. I am interested in follow-on PRs to increase precision.

As a reminder: I don't care one iota about how precise the analysis is in the combiner if it doesn't pay for its compile time and complexity cost with real world improvements. Not synthetic MIR tests, real world improvements in code quality. This could be systemic improvements across the LLVM test suite at -Os/O3, or optimizing a key loop in a common benchmark. Nothing is landing into the tree without having test cases that exercise the code without synthetic input.

@tschuett
Copy link
Author

I still have an unanswered question.

@aemerson
Copy link
Contributor

I still have an unanswered question.

Which question?

@tschuett
Copy link
Author

I still have an unanswered question.

Which question?

@nikic requested changes with the statement
"This PR is particularly problematic because it adds lot of expensive combines based on things like KnownBits -- these are very much not harmless folds you can add "just because"."

I am still looking for evidence of that claim.

@nikic
Copy link
Contributor

nikic commented Sep 16, 2024

Sorry, my phrasing there was bad. It's not "a lot of expensive combines", it's one combine that is based on isKnownNonZero() analysis being introduced here, which in turn has a lot of recursive KnownBits calls.

FWIW from IR experience, we know that simplifyICmpInst and KnownBits-based icmp simplification take up a large fraction of InstCombine time for many programs, so the cost/benefit question here is not an idle concern.

@tschuett
Copy link
Author

That makes sense. Thanks.

@tschuett tschuett closed this Oct 2, 2024
Comment on lines +953 to +976
/// Represents a splat vector.
class GSplatVector : public GenericMachineInstr {
public:
Register getValueReg() const { return getOperand(1).getReg(); }

static bool classof(const MachineInstr *MI) {
return MI->getOpcode() == TargetOpcode::G_SPLAT_VECTOR;
};
};

/// Represents an integer-like extending operation.
class GZextOrSextOp : public GCastOp {
public:
static bool classof(const MachineInstr *MI) {
switch (MI->getOpcode()) {
case TargetOpcode::G_SEXT:
case TargetOpcode::G_ZEXT:
return true;
default:
return false;
}
};
};

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should extract the infrastructure bits out of this

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

GSplatVector is already upstream. GZextOrSextOp has probably a lower market share.

static bool isKnownNonZero(Register Reg, const MachineRegisterInfo &MRI,
GISelKnownBits *KB, unsigned Depth);

bool llvm::isKnownNonZero(Register Reg, const MachineRegisterInfo &MRI,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We do want such a helper, but doesn't mean we should aggressively use it

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was one of the most controversial question of this PR. I give up on this one.

Comment on lines +2017 to +2022
if (!Reg.isVirtual())
return false;

LLT Ty = MRI.getType(Reg);
if (!Ty.isValid())
return false;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These cases are forbidden and shouldn't require defending against

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

8 participants