-
Notifications
You must be signed in to change notification settings - Fork 13k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[GlobalIsel] Visit ICmp #105991
[GlobalIsel] Visit ICmp #105991
Conversation
@llvm/pr-subscribers-backend-amdgpu @llvm/pr-subscribers-llvm-globalisel Author: Thorsten Schütt (tschuett) Changesinspired by simplifyICmpInst and simplifyICmpWithZero Patch is 97.96 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/105991.diff 15 Files Affected:
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 9b62d6067be39c..da9c7fdbd2a093 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -20,6 +20,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/Register.h"
#include "llvm/CodeGenTypes/LowLevelType.h"
#include "llvm/IR/InstrTypes.h"
@@ -299,6 +300,12 @@ class CombinerHelper {
/// $whatever = COPY $addr
bool tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen = 0);
+ bool visitICmp(const MachineInstr &MI, BuildFnTy &MatchInfo);
+ bool matchSextOfICmp(const MachineInstr &MI, BuildFnTy &MatchInfo);
+ bool matchZextOfICmp(const MachineInstr &MI, BuildFnTy &MatchInfo);
+ /// Try hard to fold icmp with zero RHS because this is a common case.
+ bool matchCmpOfZero(const MachineInstr &MI, BuildFnTy &MatchInfo);
+
bool matchPtrAddImmedChain(MachineInstr &MI, PtrAddChain &MatchInfo);
void applyPtrAddImmedChain(MachineInstr &MI, PtrAddChain &MatchInfo);
@@ -1017,6 +1024,9 @@ class CombinerHelper {
bool tryFoldLogicOfFCmps(GLogicalBinOp *Logic, BuildFnTy &MatchInfo);
bool isCastFree(unsigned Opcode, LLT ToTy, LLT FromTy) const;
+
+ bool constantFoldICmp(const GICmp &ICmp, const GIConstant &LHS,
+ const GIConstant &RHS, BuildFnTy &MatchInfo);
};
} // namespace llvm
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
index ef1171d9f1f64d..427b5a86b6e0c4 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
@@ -950,6 +950,30 @@ class GExtOrTruncOp : public GCastOp {
};
};
+/// Represents a splat vector.
+class GSplatVector : public GenericMachineInstr {
+public:
+ Register getValueReg() const { return getOperand(1).getReg(); }
+
+ static bool classof(const MachineInstr *MI) {
+ return MI->getOpcode() == TargetOpcode::G_SPLAT_VECTOR;
+ };
+};
+
+/// Represents an integer-like extending operation.
+class GZextOrSextOp : public GCastOp {
+public:
+ static bool classof(const MachineInstr *MI) {
+ switch (MI->getOpcode()) {
+ case TargetOpcode::G_SEXT:
+ case TargetOpcode::G_ZEXT:
+ return true;
+ default:
+ return false;
+ }
+ };
+};
+
} // namespace llvm
#endif // LLVM_CODEGEN_GLOBALISEL_GENERICMACHINEINSTRS_H
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
index cf5fd6d6f288bd..a8bf2e722881ac 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
@@ -593,5 +593,31 @@ bool isGuaranteedNotToBeUndef(Register Reg, const MachineRegisterInfo &MRI,
/// estimate of the type.
Type *getTypeForLLT(LLT Ty, LLVMContext &C);
+enum class GIConstantKind { Scalar, FixedVector, ScalableVector };
+
+/// An integer-like constant.
+class GIConstant {
+ GIConstantKind Kind;
+ SmallVector<APInt> Values;
+ APInt Value;
+
+public:
+ GIConstant(ArrayRef<APInt> Values)
+ : Kind(GIConstantKind::FixedVector), Values(Values) {};
+ GIConstant(const APInt &Value, GIConstantKind Kind)
+ : Kind(Kind), Value(Value) {};
+
+ GIConstantKind getKind() const { return Kind; }
+
+ APInt getScalarValue() const;
+
+ static std::optional<GIConstant> getConstant(Register Const,
+ const MachineRegisterInfo &MRI);
+};
+
+/// Return true if the given value is known to be non-zero when defined.
+bool isKnownNonZero(Register Reg, const MachineRegisterInfo &MRI,
+ GISelKnownBits *KB, unsigned Depth = 0);
+
} // End namespace llvm.
#endif
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 525cc815e73cef..175a8ed57b2669 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1007,9 +1007,6 @@ def double_icmp_zero_or_combine: GICombineRule<
(G_ICMP $root, $p, $ordst, 0))
>;
-def double_icmp_zero_and_or_combine : GICombineGroup<[double_icmp_zero_and_combine,
- double_icmp_zero_or_combine]>;
-
def and_or_disjoint_mask : GICombineRule<
(defs root:$root, build_fn_matchinfo:$info),
(match (wip_match_opcode G_AND):$root,
@@ -1884,6 +1881,46 @@ def cast_combines: GICombineGroup<[
buildvector_of_truncate
]>;
+def visit_icmp : GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$matchinfo),
+ (match (G_ICMP $root, $pred, $lhs, $rhs):$cmp,
+ [{ return Helper.visitICmp(*${cmp}, ${matchinfo}); }]),
+ (apply [{ Helper.applyBuildFn(*${cmp}, ${matchinfo}); }])>;
+
+def sext_icmp : GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$matchinfo),
+ (match (G_SEXT $rhs, $inputR),
+ (G_SEXT $lhs, $inputL),
+ (G_ICMP $root, $pred, $lhs, $rhs):$cmp,
+ [{ return Helper.matchSextOfICmp(*${cmp}, ${matchinfo}); }]),
+ (apply [{ Helper.applyBuildFn(*${cmp}, ${matchinfo}); }])>;
+
+def zext_icmp : GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$matchinfo),
+ (match (G_ZEXT $rhs, $inputR),
+ (G_ZEXT $lhs, $inputL),
+ (G_ICMP $root, $pred, $lhs, $rhs):$cmp,
+ [{ return Helper.matchZextOfICmp(*${cmp}, ${matchinfo}); }]),
+ (apply [{ Helper.applyBuildFn(*${cmp}, ${matchinfo}); }])>;
+
+def icmp_of_zero : GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$matchinfo),
+ (match (G_CONSTANT $zero, 0),
+ (G_ICMP $root, $pred, $lhs, $zero):$cmp,
+ [{ return Helper.matchCmpOfZero(*${cmp}, ${matchinfo}); }]),
+ (apply [{ Helper.applyBuildFn(*${cmp}, ${matchinfo}); }])>;
+
+def icmp_combines: GICombineGroup<[
+ visit_icmp,
+ sext_icmp,
+ zext_icmp,
+ icmp_of_zero,
+ icmp_to_true_false_known_bits,
+ icmp_to_lhs_known_bits,
+ double_icmp_zero_and_combine,
+ double_icmp_zero_or_combine,
+ redundant_binop_in_equality
+]>;
// FIXME: These should use the custom predicate feature once it lands.
def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero,
@@ -1917,7 +1954,7 @@ def const_combines : GICombineGroup<[constant_fold_fp_ops, const_ptradd_to_i2p,
def known_bits_simplifications : GICombineGroup<[
redundant_and, redundant_sext_inreg, redundant_or, urem_pow2_to_mask,
- zext_trunc_fold, icmp_to_true_false_known_bits, icmp_to_lhs_known_bits,
+ zext_trunc_fold,
sext_inreg_to_zext_inreg]>;
def width_reduction_combines : GICombineGroup<[reduce_shl_of_extend,
@@ -1944,7 +1981,7 @@ def constant_fold_binops : GICombineGroup<[constant_fold_binop,
def prefer_sign_combines : GICombineGroup<[nneg_zext]>;
-def all_combines : GICombineGroup<[integer_reassoc_combines, trivial_combines,
+def all_combines : GICombineGroup<[icmp_combines, integer_reassoc_combines, trivial_combines,
vector_ops_combines, freeze_combines, cast_combines,
insert_vec_elt_combines, extract_vec_elt_combines, combines_for_extload,
combine_extracted_vector_load,
@@ -1964,9 +2001,9 @@ def all_combines : GICombineGroup<[integer_reassoc_combines, trivial_combines,
constant_fold_cast_op, fabs_fneg_fold,
intdiv_combines, mulh_combines, redundant_neg_operands,
and_or_disjoint_mask, fma_combines, fold_binop_into_select,
- sub_add_reg, select_to_minmax, redundant_binop_in_equality,
+ sub_add_reg, select_to_minmax,
fsub_to_fneg, commute_constant_to_rhs, match_ands, match_ors,
- combine_concat_vector, double_icmp_zero_and_or_combine, match_addos,
+ combine_concat_vector, match_addos,
sext_trunc, zext_trunc, prefer_sign_combines, combine_shuffle_concat]>;
// A combine group used to for prelegalizer combiners at -O0. The combines in
diff --git a/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt b/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt
index a15b76440364b1..af1717dbf76f39 100644
--- a/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt
+++ b/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt
@@ -7,6 +7,7 @@ add_llvm_component_library(LLVMGlobalISel
Combiner.cpp
CombinerHelper.cpp
CombinerHelperCasts.cpp
+ CombinerHelperCompares.cpp
CombinerHelperVectorOps.cpp
GIMatchTableExecutor.cpp
GISelChangeObserver.cpp
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp
new file mode 100644
index 00000000000000..415768fb07e59f
--- /dev/null
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp
@@ -0,0 +1,305 @@
+//===- CombinerHelperCompares.cpp------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements CombinerHelper for G_ICMP
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/LowLevelTypeUtils.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cstdlib>
+
+#define DEBUG_TYPE "gi-combiner"
+
+using namespace llvm;
+
+bool CombinerHelper::constantFoldICmp(const GICmp &ICmp,
+ const GIConstant &LHSCst,
+ const GIConstant &RHSCst,
+ BuildFnTy &MatchInfo) {
+ if (LHSCst.getKind() != GIConstantKind::Scalar)
+ return false;
+
+ Register Dst = ICmp.getReg(0);
+ LLT DstTy = MRI.getType(Dst);
+
+ if (!isConstantLegalOrBeforeLegalizer(DstTy))
+ return false;
+
+ CmpInst::Predicate Pred = ICmp.getCond();
+ APInt LHS = LHSCst.getScalarValue();
+ APInt RHS = RHSCst.getScalarValue();
+
+ bool Result;
+
+ switch (Pred) {
+ case CmpInst::Predicate::ICMP_EQ:
+ Result = LHS.eq(RHS);
+ break;
+ case CmpInst::Predicate::ICMP_NE:
+ Result = LHS.ne(RHS);
+ break;
+ case CmpInst::Predicate::ICMP_UGT:
+ Result = LHS.ugt(RHS);
+ break;
+ case CmpInst::Predicate::ICMP_UGE:
+ Result = LHS.uge(RHS);
+ break;
+ case CmpInst::Predicate::ICMP_ULT:
+ Result = LHS.ult(RHS);
+ break;
+ case CmpInst::Predicate::ICMP_ULE:
+ Result = LHS.ule(RHS);
+ break;
+ case CmpInst::Predicate::ICMP_SGT:
+ Result = LHS.sgt(RHS);
+ break;
+ case CmpInst::Predicate::ICMP_SGE:
+ Result = LHS.sge(RHS);
+ break;
+ case CmpInst::Predicate::ICMP_SLT:
+ Result = LHS.slt(RHS);
+ break;
+ case CmpInst::Predicate::ICMP_SLE:
+ Result = LHS.sle(RHS);
+ break;
+ default:
+ llvm_unreachable("Unexpected predicate");
+ }
+
+ MatchInfo = [=](MachineIRBuilder &B) {
+ if (Result)
+ B.buildConstant(Dst, getICmpTrueVal(getTargetLowering(),
+ /*IsVector=*/DstTy.isVector(),
+ /*IsFP=*/false));
+ else
+ B.buildConstant(Dst, 0);
+ };
+
+ return true;
+}
+
+bool CombinerHelper::visitICmp(const MachineInstr &MI, BuildFnTy &MatchInfo) {
+ const GICmp *Cmp = cast<GICmp>(&MI);
+
+ Register Dst = Cmp->getReg(0);
+ LLT DstTy = MRI.getType(Dst);
+ Register LHS = Cmp->getLHSReg();
+ Register RHS = Cmp->getRHSReg();
+
+ CmpInst::Predicate Pred = Cmp->getCond();
+ assert(CmpInst::isIntPredicate(Pred) && "Not an integer compare!");
+ if (auto CLHS = GIConstant::getConstant(LHS, MRI)) {
+ if (auto CRHS = GIConstant::getConstant(RHS, MRI))
+ return constantFoldICmp(*Cmp, *CLHS, *CRHS, MatchInfo);
+
+ // If we have a constant, make sure it is on the RHS.
+ std::swap(LHS, RHS);
+ Pred = CmpInst::getSwappedPredicate(Pred);
+
+ MatchInfo = [=](MachineIRBuilder &B) { B.buildICmp(Pred, Dst, LHS, RHS); };
+ return true;
+ }
+
+ [[maybe_unused]] MachineInstr *MILHS = MRI.getVRegDef(LHS);
+ MachineInstr *MIRHS = MRI.getVRegDef(RHS);
+
+ // For EQ and NE, we can always pick a value for the undef to make the
+ // predicate pass or fail, so we can return undef.
+ // Matches behavior in llvm::ConstantFoldCompareInstruction.
+ if (isa<GImplicitDef>(MIRHS) && ICmpInst::isEquality(Pred) &&
+ isLegalOrBeforeLegalizer({TargetOpcode::G_IMPLICIT_DEF, {DstTy}})) {
+ MatchInfo = [=](MachineIRBuilder &B) { B.buildUndef(Dst); };
+ return true;
+ }
+
+ // icmp X, X -> true/false
+ // icmp X, undef -> true/false because undef could be X.
+ if ((LHS == RHS || isa<GImplicitDef>(MIRHS)) &&
+ isConstantLegalOrBeforeLegalizer(DstTy)) {
+ MatchInfo = [=](MachineIRBuilder &B) {
+ if (CmpInst::isTrueWhenEqual(Pred))
+ B.buildConstant(Dst, getICmpTrueVal(getTargetLowering(),
+ /*IsVector=*/DstTy.isVector(),
+ /*IsFP=*/false));
+ else
+ B.buildConstant(Dst, 0);
+ };
+ return true;
+ }
+
+ return false;
+}
+
+bool CombinerHelper::matchSextOfICmp(const MachineInstr &MI,
+ BuildFnTy &MatchInfo) {
+ const GICmp *Cmp = cast<GICmp>(&MI);
+
+ Register Dst = Cmp->getReg(0);
+ LLT DstTy = MRI.getType(Dst);
+ Register LHS = Cmp->getLHSReg();
+ Register RHS = Cmp->getRHSReg();
+ CmpInst::Predicate Pred = Cmp->getCond();
+
+ GSext *SL = cast<GSext>(MRI.getVRegDef(LHS));
+ GSext *SR = cast<GSext>(MRI.getVRegDef(RHS));
+
+ LLT SLTy = MRI.getType(SL->getSrcReg());
+ LLT SRTy = MRI.getType(SR->getSrcReg());
+
+ // Turn icmp (sext X), (sext Y) into a compare of X and Y if they have the
+ // same type.
+ if (SLTy != SRTy)
+ return false;
+
+ if (!isLegalOrBeforeLegalizer({TargetOpcode::G_ICMP, {DstTy, SLTy}}))
+ return false;
+
+ // Compare X and Y. Note that the predicate does not change.
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.buildICmp(Pred, Dst, SL->getSrcReg(), SR->getSrcReg());
+ };
+ return true;
+}
+
+bool CombinerHelper::matchZextOfICmp(const MachineInstr &MI,
+ BuildFnTy &MatchInfo) {
+ const GICmp *Cmp = cast<GICmp>(&MI);
+
+ Register Dst = Cmp->getReg(0);
+ LLT DstTy = MRI.getType(Dst);
+ Register LHS = Cmp->getLHSReg();
+ Register RHS = Cmp->getRHSReg();
+ CmpInst::Predicate Pred = Cmp->getCond();
+
+ /*
+ %x:_(p0) = COPY $x0
+ %y:_(p0) = COPY $x1
+ %zero:_(p0) = G_CONSTANT i64 0
+ %cmp1:_(s1) = G_ICMP intpred(eq), %x:_(p0), %zero:_
+ */
+
+ if (MRI.getType(LHS).isPointer() || MRI.getType(RHS).isPointer())
+ return false;
+
+ if (!MRI.getType(LHS).isScalar() || !MRI.getType(RHS).isScalar())
+ return false;
+
+ GZext *ZL = cast<GZext>(MRI.getVRegDef(LHS));
+ GZext *ZR = cast<GZext>(MRI.getVRegDef(RHS));
+
+ LLT ZLTy = MRI.getType(ZL->getSrcReg());
+ LLT ZRTy = MRI.getType(ZR->getSrcReg());
+
+ // Turn icmp (zext X), (zext Y) into a compare of X and Y if they have
+ // the same type.
+ if (ZLTy != ZRTy)
+ return false;
+
+ if (!isLegalOrBeforeLegalizer({TargetOpcode::G_ICMP, {DstTy, ZLTy}}))
+ return false;
+
+ // Compare X and Y. Note that signed predicates become unsigned.
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.buildICmp(ICmpInst::getUnsignedPredicate(Pred), Dst, ZL->getSrcReg(),
+ ZR->getSrcReg());
+ };
+ return true;
+}
+
+bool CombinerHelper::matchCmpOfZero(const MachineInstr &MI,
+ BuildFnTy &MatchInfo) {
+ const GICmp *Cmp = cast<GICmp>(&MI);
+
+ Register Dst = Cmp->getReg(0);
+ LLT DstTy = MRI.getType(Dst);
+ Register LHS = Cmp->getLHSReg();
+ CmpInst::Predicate Pred = Cmp->getCond();
+
+ if (!isConstantLegalOrBeforeLegalizer(DstTy))
+ return false;
+
+ std::optional<bool> Result;
+
+ switch (Pred) {
+ default:
+ llvm_unreachable("Unkonwn ICmp predicate!");
+ case ICmpInst::ICMP_ULT:
+ Result = false;
+ break;
+ case ICmpInst::ICMP_UGE:
+ Result = true;
+ break;
+ case ICmpInst::ICMP_EQ:
+ case ICmpInst::ICMP_ULE:
+ if (isKnownNonZero(LHS, MRI, KB))
+ Result = false;
+ break;
+ case ICmpInst::ICMP_NE:
+ case ICmpInst::ICMP_UGT:
+ if (isKnownNonZero(LHS, MRI, KB))
+ Result = true;
+ break;
+ case ICmpInst::ICMP_SLT: {
+ KnownBits LHSKnown = KB->getKnownBits(LHS);
+ if (LHSKnown.isNegative())
+ Result = true;
+ if (LHSKnown.isNonNegative())
+ Result = false;
+ break;
+ }
+ case ICmpInst::ICMP_SLE: {
+ KnownBits LHSKnown = KB->getKnownBits(LHS);
+ if (LHSKnown.isNegative())
+ Result = true;
+ if (LHSKnown.isNonNegative() && isKnownNonZero(LHS, MRI, KB))
+ Result = false;
+ break;
+ }
+ case ICmpInst::ICMP_SGE: {
+ KnownBits LHSKnown = KB->getKnownBits(LHS);
+ if (LHSKnown.isNegative())
+ Result = false;
+ if (LHSKnown.isNonNegative())
+ Result = true;
+ break;
+ }
+ case ICmpInst::ICMP_SGT: {
+ KnownBits LHSKnown = KB->getKnownBits(LHS);
+ if (LHSKnown.isNegative())
+ Result = false;
+ if (LHSKnown.isNonNegative() && isKnownNonZero(LHS, MRI, KB))
+ Result = true;
+ break;
+ }
+ }
+
+ if (!Result)
+ return false;
+
+ MatchInfo = [=](MachineIRBuilder &B) {
+ if (*Result)
+ B.buildConstant(Dst, getICmpTrueVal(getTargetLowering(),
+ /*IsVector=*/DstTy.isVector(),
+ /*IsFP=*/false));
+ else
+ B.buildConstant(Dst, 0);
+ };
+
+ return true;
+}
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index cfdd9905c16fa6..e8b9d995a22768 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -1984,3 +1984,326 @@ Type *llvm::getTypeForLLT(LLT Ty, LLVMContext &C) {
Ty.getElementCount());
return IntegerType::get(C, Ty.getSizeInBits());
}
+
+APInt llvm::GIConstant::getScalarValue() const {
+ assert(Kind == GIConstantKind::Scalar && "Expected scalar constant");
+
+ return Value;
+}
+
+std::optional<GIConstant>
+llvm::GIConstant::getConstant(Register Const, const MachineRegisterInfo &MRI) {
+ MachineInstr *Constant = getDefIgnoringCopies(Const, MRI);
+
+ if (GSplatVector *Splat = dyn_cast<GSplatVector>(Constant)) {
+ std::optional<ValueAndVReg> MayBeConstant =
+ getIConstantVRegValWithLookThrough(Splat->getValueReg(), MRI);
+ if (!MayBeConstant)
+ return std::nullopt;
+ return GIConstant(MayBeConstant->Value, GIConstantKind::ScalableVector);
+ }
+
+ if (GBuildVector *Build = dyn_cast<GBuildVector>(Constant)) {
+ SmallVector<APInt> Values;
+ unsigned NumSources = Build->getNumSources();
+ for (unsigned I = 0; I < NumSources; ++I) {
+ Register SrcReg = Build->getSourceReg(I);
+ std::optional<ValueAndVReg> MayBeConstant =
+ getIConstantVRegValWithLookThrough(SrcReg, MRI);
+ if (!MayBeConstant)
+ return std::nullopt;
+ Values.push_back(MayBeConstant->Value);
+ }
+ return GIConstant(Values);
+ }
+
+ std::optional<ValueAndVReg> MayBeConstant =
+ getIConstantVRegValWithLookThrough(Const, MRI);
+ if (!MayBeConstant)
+ return std::nullopt;
+
+ return GIConstant(MayBeConstant->Value, GIConstantKind::Scalar);
+}
+
+static bool isKnownNonZero(Register Reg, const MachineRegisterInfo &MRI,
+ GISelKnownBits *KB, unsigned Depth);
+
+bool llvm::isKnownNonZero(Register Reg, const MachineRegisterInfo &MRI,
+ GISelKnownBits *KB, unsigned Depth) {
+ if (!Reg.isVirtual())
+ return false;
+
+ LLT Ty = MRI.getType(Reg);
+ if (!Ty.isValid())
+ return false;
+
+ if (Ty.isPointer())
+ return false;
+
+ if (!Ty.isScalar())
+ errs() << "type: " << Ty << '\n';
+
+ assert(...
[truncated]
|
@@ -212,13 +212,10 @@ define float @sitofp_i128_to_f32(i128 %x) { | |||
; GISEL-NEXT: v_or_b32_e32 v16, v10, v12 | |||
; GISEL-NEXT: v_lshrrev_b64 v[11:12], v14, -1 | |||
; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v5 | |||
; GISEL-NEXT: v_cndmask_b32_e32 v11, v11, v15, vcc | |||
; GISEL-NEXT: v_cndmask_b32_e32 v12, v12, v16, vcc | |||
; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v5 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
canonicalize constant to the right + matchCmpOfZero
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't have time to review GlobalISel changes, but I'll say this: Transforms from the middle-end optimizer should only be reimplemented in GlobalISel (or SDAG) if the patterns can plausibly be introduced as a result of legalization. The input to ISel is always assumed to be canonical IR, so anything taken care of by middle-end canonicalization does not need to be handled by GlobalISel (unless GlobalISel itself can introduce the pattern, e.g. via legalization).
It is open discussion whether the backend combiners are purely cleaning up legalization artefacts or do they go beyond: The move constants to the right also hits. |
AArch64 runs a combiner pre-legalization. How could it cleanup legalization artefacts? |
Okay, maybe my comment didn't spell it out enough. I'm referring to combines that already exist as canonicalizations in the target-independent optimizer. Of course there are also combines during isel that are target-dependent, undoing middle-end canonicalizations, or similar. Those are not targeted at legalization artifacts and can be useful pre-legalization. What I want to push back against here is blindly copying over optimizations from InstSimplify or InstCombine, without any justification for why these optimizations need to be replicated inside GlobalISel. I'm not necessarily saying that the combines proposed here are bad, but they need more justification than "inspired by simplifyICmpInst and simplifyICmpWithZero". You should understand, and be able to articulate to reviewers, why it is beneficial to have these combines replicated in GlobalISel. |
See my first comment. They reduce code size. |
is ported almost exactly in both combiners. https://github.com/llvm/llvm-project/blob/main/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp creates freeze instructions while legalizing illegal freeze instructions, but I could not find other places. Both combiners create freeze instruction while combining, e.g, selects. It is hard to argue that we ported visitFreeze to combine legalization artefacts. |
I removed the ext of icmp combines. Please re-search for regressions. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@nikic Thanks for pointing this out. This is something I've been asking for a while too but hadn't expressed right. What we need to see is in the PR description, a justification for why a combine is good or is necessary. If we don't see that, then unless it happens to improve some already existing tests, I have to take time out of my day to build the PR and manually benchmark it for changes. So if we can avoid using "do the same thing as X in InstCombine" as a justification then it's clearer why it's a good thing.
IR is in perfect canonical state. Codegen may do some target specific optimizations, but I would be surprised if they move constants to the left. The IRTranslator faithfully translates IR into gMIR. Thus porting middle-end optimizations to the backend is a waste of time because they cannot give any size improvements. I mentioned visitFreeze above. It is not dead code. It actually gives code size improvements, which is at odds with my previous statement. The same applies for canonicalization of constants to the left for icmps. You can see hits in the AMDGPU code. It also gives size improvements in AArch64 code. It is not dead code. The icmp x, 0 combine also hits and gives code size improvements, see itofp.i128.ll. |
Regarding optimizations that exist in the middle-end, if that becomes a big issue, we could move it one floor up. And discuss on discourse the relationship between optimizations in the-middle-end and the two backends. |
Another waste of energy: #77855 |
Some more comments. Frankly I'm having trouble following some of your comments like this:
Are you agreeing or disagreeing with your current patch? At this point I have no idea. |
/// An integer-like constant. | ||
class GIConstant { | ||
GIConstantKind Kind; | ||
SmallVector<APInt> Values; | ||
APInt Value; | ||
|
||
public: | ||
GIConstant(ArrayRef<APInt> Values) | ||
: Kind(GIConstantKind::FixedVector), Values(Values) {}; | ||
GIConstant(const APInt &Value, GIConstantKind Kind) | ||
: Kind(Kind), Value(Value) {}; | ||
|
||
GIConstantKind getKind() const { return Kind; } | ||
|
||
APInt getScalarValue() const; | ||
|
||
static std::optional<GIConstant> getConstant(Register Const, | ||
const MachineRegisterInfo &MRI); | ||
}; | ||
|
||
/// Return true if the given value is known to be non-zero when defined. | ||
bool isKnownNonZero(Register Reg, const MachineRegisterInfo &MRI, | ||
GISelKnownBits *KB, unsigned Depth = 0); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We have similar interfaces like isConstantOrConstantVector
and friends. Can we extend those or unify in some more consistent way rather than have 2 different ways of querying constant values?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Note that isConstantOrConstantVector
starts with is
. It is a test. GIConstant::getConstant
gives you a constant. It is a vector of APInt
s. We can add arbitrary arithmetic. GIConstant + GIConstant
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes I'm aware, I said isConstantOrConstantVector
and friends. One of those friends is this:
std::optional<APInt>
llvm::isConstantOrConstantSplatVector(
which returns an APInt. So if we add GIConstant now we have even more ways to do similar things. I'm not saying your approach is necessarily wrong here, but taking a step back maybe this change should be a separate effort to unify our utilities better.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We were limit to splat vectors before. GIConstant
does not have this restriction.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I am fine with a family of tests for various kinds of constnesses, but I would argue that GIConstant
is powerful enough to become the constant. It is a class. We can add arbitrary member functions for tests and arithmetic. Constants can be scalar, fixed-, and scalable-vectors.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I am fine with a family of tests for various kinds of constnesses, but I would argue that
GIConstant
is powerful enough to become the constant. It is a class. We can add arbitrary member functions for tests and arithmetic. Constants can be scalar, fixed-, and scalable-vectors.
Perhaps you're right, all the more reason to do it in a separate change where we can think about the design with more care.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The design is minimal, it has uses, and getting rid of the splat vector variants will take a lot of energy. I would prefer to continue with this PR.
In a separate change, it would be dead code.
I am supportive of this patch. I was only stating the common assumption. But also the cmp x, 0 is at odds with this statement. |
The general assumption is that IR is in canonical form and middle-end optimizations should not be ported to the backends. But reality is at odds with that statement. There are size improvements in this PR. |
I added in this PR two combines. I have no interest to extend the first one. I would rather see more combines with disjoint and precise patterns, e.g., icmp_of_zero. |
I strongly disagree with the change request. Compares and only compares deserve special treatment. Placing several optimizations into one function is beneficial. They share resources and order matters. Spreading them over several combines would be less efficient. In most cases, the pattern would be G_ICMP, which I am not interested in. |
Could you point me to evidence of: Thanks. |
I see at least another unrelated undef combine in here, but I was more talking about the entire infrastructure being added here. You may technically be adding one "combine", but the underlying analysis has lots of cases, most of which do not appear to be tested, and where I seriously doubt the need to replicate them in GlobalISel. This PR has very exotic stuff like " I'm sure that there is some subset of the simplifications proposed here that is indeed useful, but I don't think that the PR as a whole, as you have presented is, is appropriate. The general approach of "take some methods from InstSimplify and copy them to GlobalISel" is something I very much want to discourage. If you want to copy simplifications from somewhere, at least look at DAGCombine instead, where there is at least a chance that somebody has exercised some degree of due diligence when adding things there. But in an case, any handling you add needs to have test coverage, including any cases in underlying analyses. |
That didn't answer my question! The isKnownNonZero analysis is in Utils.cpp and test coverage is in combine-visit-icmp.mir and icmp2.ll. |
I hadn't even closely looked at that combine yet, but from what I can tell you're using KB a lot in that |
CombinerHelper::matchCmpOfZero uses directly KB at most once per case in the switch statement of the icmp predicate, for reference:
A different question is the isKnownNonZero analysis, but it is depth limited: |
@nikic requested changes with the statement I am still looking for evidence of that claim. |
MaxAnalysisRecursionDepth is defaulted to 6. We're going to potentially do a 6-deep KB analysis on every icmp. Maybe we don't need to start with that and only use it if the data supports it's worth it from code size or performance metrics? Have you run those benchmarks? Or do I need to do it myself again? I'm going to level with you here: at this point in the conversation any time you bring up an IR combine for reference to use is not helping your case in the way you think. I really suggest you take a step back from this and consider why we have such resistance to this change. You basically have a lot of code, and you're resisting splitting up the visitICmp() function on the basis of efficiency, but you're also ok with adding a KB dependent combine without also having data about it's cost/benefit. We need a fundamental rethink in how we add changes to GlobalISel. Individual patches like these are not important. They really aren't. Setting good examples and precedents, along with pragmatic choices backed up by data are what's important. |
I said above visitICmp and matchCmpOfZero cooperate. I said several times llvm/test/CodeGen/AMDGPU/itofp.i128.ll shows evidence of the effects of this PR. Your resistance was based on naming and not on the combines or code. |
I don't feel you're making a good faith effort to listen to our concerns. I'm no longer going to participate in this PR review. |
MaxAnalysisRecursionDepth comes from LLVM-IR. The constant is not invented by GlobalIsel. We already use it in production:
I never heard on a GlobalIsel Combiner review discussions of data driven decisions of, e.g., MaxAnalysisRecursionDepth. Is 4,5,6, or 7 optimal for GlobalIsel. My feeling so far has been it is sufficient for new combines:
If these rules have changed, we should move the discussion to #92309 |
Sorry, for my bad communication. |
The problem is neither the numeric value nor where it comes from. Do also consider that GlobalISel is an instruction selector, not a middle-end pass like InstCombine. We don't need to do redo everything the middle-end does because the middle-end is supposed to do that for us :) |
This combiner combines opcodes that are not coming as legalization artefacts from the legalizer. It is a generic optimizing combiner. We use the combiner to reduce code size and make the code more amenable to execute (G_MUL -> G_ADD). It is not the first time that we benefit from the expertise of the middle-end in analysis and optimizations. The I stated that if we change the order of the two undef optimizations the results will change. I am asked to hoist these optimizations into separate combines where I loose control of the order. #90618 blindly copied code from the middle-end with almost unnoticeable code size improvement and without cost benefit analysis. It wasn't run on a set of benchmarks and there was no compile-time analysis. I mentioned above I am asked to separate canonicalization and the icmp x, 0 combine into two separate PRs. Neither of which is profitable in isolation. I am stuck again. I am asked to do tasks that I cannot do?!? |
" at least look at DAGCombine instead, " |
Alex Bradbury: IMHO use of MachineIR is more of a defining feature than “global” scope. We test the correctness of combines in MIR. For the DAGCombiner noise in end-to-end tests seems to be sufficient for acceptance. |
I tried a new version:
I removed the undef optimizations. There was too much discussion. Canonicalization is not an optimization. We do it blindly for as many opcodes as possible. The |
Please split this up into multiple patches as @nikic requested. Start with the canonicalization. Maybe just use the name I'm not saying that the rest of the changes will be accepted after splitting, we still need to come to an agreement about the known bits issue.
As a reminder: I don't care one iota about how precise the analysis is in the combiner if it doesn't pay for its compile time and complexity cost with real world improvements. Not synthetic MIR tests, real world improvements in code quality. This could be systemic improvements across the LLVM test suite at -Os/O3, or optimizing a key loop in a common benchmark. Nothing is landing into the tree without having test cases that exercise the code without synthetic input. |
I still have an unanswered question. |
Which question? |
@nikic requested changes with the statement I am still looking for evidence of that claim. |
Sorry, my phrasing there was bad. It's not "a lot of expensive combines", it's one combine that is based on isKnownNonZero() analysis being introduced here, which in turn has a lot of recursive KnownBits calls. FWIW from IR experience, we know that simplifyICmpInst and KnownBits-based icmp simplification take up a large fraction of InstCombine time for many programs, so the cost/benefit question here is not an idle concern. |
That makes sense. Thanks. |
/// Represents a splat vector. | ||
class GSplatVector : public GenericMachineInstr { | ||
public: | ||
Register getValueReg() const { return getOperand(1).getReg(); } | ||
|
||
static bool classof(const MachineInstr *MI) { | ||
return MI->getOpcode() == TargetOpcode::G_SPLAT_VECTOR; | ||
}; | ||
}; | ||
|
||
/// Represents an integer-like extending operation. | ||
class GZextOrSextOp : public GCastOp { | ||
public: | ||
static bool classof(const MachineInstr *MI) { | ||
switch (MI->getOpcode()) { | ||
case TargetOpcode::G_SEXT: | ||
case TargetOpcode::G_ZEXT: | ||
return true; | ||
default: | ||
return false; | ||
} | ||
}; | ||
}; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should extract the infrastructure bits out of this
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
GSplatVector is already upstream. GZextOrSextOp
has probably a lower market share.
static bool isKnownNonZero(Register Reg, const MachineRegisterInfo &MRI, | ||
GISelKnownBits *KB, unsigned Depth); | ||
|
||
bool llvm::isKnownNonZero(Register Reg, const MachineRegisterInfo &MRI, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We do want such a helper, but doesn't mean we should aggressively use it
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This was one of the most controversial question of this PR. I give up on this one.
if (!Reg.isVirtual()) | ||
return false; | ||
|
||
LLT Ty = MRI.getType(Reg); | ||
if (!Ty.isValid()) | ||
return false; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
These cases are forbidden and shouldn't require defending against
inspired by simplifyICmpInst and simplifyICmpWithZero