From a6f927620c11769590dbb802f35c0f5f6ae38fdb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= Date: Sun, 11 Aug 2024 21:47:55 +0200 Subject: [PATCH 1/7] [GlobalIsel] Visit ICmp inspired by simplifyICmpInst and simplifyICmpWithZero --- .../llvm/CodeGen/GlobalISel/CombinerHelper.h | 10 + .../CodeGen/GlobalISel/GenericMachineInstrs.h | 24 ++ llvm/include/llvm/CodeGen/GlobalISel/Utils.h | 26 ++ .../include/llvm/Target/GlobalISel/Combine.td | 51 ++- llvm/lib/CodeGen/GlobalISel/CMakeLists.txt | 1 + .../GlobalISel/CombinerHelperCompares.cpp | 305 +++++++++++++++++ llvm/lib/CodeGen/GlobalISel/Utils.cpp | 323 ++++++++++++++++++ .../AArch64/GlobalISel/arm64-atomic.ll | 96 +++--- .../AArch64/GlobalISel/arm64-pcsections.ll | 56 +-- .../AArch64/GlobalISel/combine-visit-icmp.mir | 167 +++++++++ llvm/test/CodeGen/AArch64/arm64-ccmp.ll | 60 +--- llvm/test/CodeGen/AArch64/icmp2.ll | 295 ++++++++++++++++ .../CodeGen/AMDGPU/GlobalISel/fdiv.f64.ll | 18 +- llvm/test/CodeGen/AMDGPU/itofp.i128.ll | 154 ++++----- llvm/test/CodeGen/AMDGPU/rsq.f64.ll | 46 +-- 15 files changed, 1401 insertions(+), 231 deletions(-) create mode 100644 llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/combine-visit-icmp.mir create mode 100644 llvm/test/CodeGen/AArch64/icmp2.ll diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h index 9b62d6067be39..da9c7fdbd2a09 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -20,6 +20,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" +#include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/Register.h" #include "llvm/CodeGenTypes/LowLevelType.h" #include "llvm/IR/InstrTypes.h" @@ -299,6 +300,12 @@ class CombinerHelper { /// $whatever = COPY $addr bool tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen = 0); + bool visitICmp(const MachineInstr &MI, BuildFnTy &MatchInfo); + bool matchSextOfICmp(const MachineInstr &MI, BuildFnTy &MatchInfo); + bool matchZextOfICmp(const MachineInstr &MI, BuildFnTy &MatchInfo); + /// Try hard to fold icmp with zero RHS because this is a common case. + bool matchCmpOfZero(const MachineInstr &MI, BuildFnTy &MatchInfo); + bool matchPtrAddImmedChain(MachineInstr &MI, PtrAddChain &MatchInfo); void applyPtrAddImmedChain(MachineInstr &MI, PtrAddChain &MatchInfo); @@ -1017,6 +1024,9 @@ class CombinerHelper { bool tryFoldLogicOfFCmps(GLogicalBinOp *Logic, BuildFnTy &MatchInfo); bool isCastFree(unsigned Opcode, LLT ToTy, LLT FromTy) const; + + bool constantFoldICmp(const GICmp &ICmp, const GIConstant &LHS, + const GIConstant &RHS, BuildFnTy &MatchInfo); }; } // namespace llvm diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h index ef1171d9f1f64..427b5a86b6e0c 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h @@ -950,6 +950,30 @@ class GExtOrTruncOp : public GCastOp { }; }; +/// Represents a splat vector. +class GSplatVector : public GenericMachineInstr { +public: + Register getValueReg() const { return getOperand(1).getReg(); } + + static bool classof(const MachineInstr *MI) { + return MI->getOpcode() == TargetOpcode::G_SPLAT_VECTOR; + }; +}; + +/// Represents an integer-like extending operation. +class GZextOrSextOp : public GCastOp { +public: + static bool classof(const MachineInstr *MI) { + switch (MI->getOpcode()) { + case TargetOpcode::G_SEXT: + case TargetOpcode::G_ZEXT: + return true; + default: + return false; + } + }; +}; + } // namespace llvm #endif // LLVM_CODEGEN_GLOBALISEL_GENERICMACHINEINSTRS_H diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h index cf5fd6d6f288b..a8bf2e722881a 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h @@ -593,5 +593,31 @@ bool isGuaranteedNotToBeUndef(Register Reg, const MachineRegisterInfo &MRI, /// estimate of the type. Type *getTypeForLLT(LLT Ty, LLVMContext &C); +enum class GIConstantKind { Scalar, FixedVector, ScalableVector }; + +/// An integer-like constant. +class GIConstant { + GIConstantKind Kind; + SmallVector Values; + APInt Value; + +public: + GIConstant(ArrayRef Values) + : Kind(GIConstantKind::FixedVector), Values(Values) {}; + GIConstant(const APInt &Value, GIConstantKind Kind) + : Kind(Kind), Value(Value) {}; + + GIConstantKind getKind() const { return Kind; } + + APInt getScalarValue() const; + + static std::optional getConstant(Register Const, + const MachineRegisterInfo &MRI); +}; + +/// Return true if the given value is known to be non-zero when defined. +bool isKnownNonZero(Register Reg, const MachineRegisterInfo &MRI, + GISelKnownBits *KB, unsigned Depth = 0); + } // End namespace llvm. #endif diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td index 525cc815e73ce..175a8ed57b266 100644 --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -1007,9 +1007,6 @@ def double_icmp_zero_or_combine: GICombineRule< (G_ICMP $root, $p, $ordst, 0)) >; -def double_icmp_zero_and_or_combine : GICombineGroup<[double_icmp_zero_and_combine, - double_icmp_zero_or_combine]>; - def and_or_disjoint_mask : GICombineRule< (defs root:$root, build_fn_matchinfo:$info), (match (wip_match_opcode G_AND):$root, @@ -1884,6 +1881,46 @@ def cast_combines: GICombineGroup<[ buildvector_of_truncate ]>; +def visit_icmp : GICombineRule< + (defs root:$root, build_fn_matchinfo:$matchinfo), + (match (G_ICMP $root, $pred, $lhs, $rhs):$cmp, + [{ return Helper.visitICmp(*${cmp}, ${matchinfo}); }]), + (apply [{ Helper.applyBuildFn(*${cmp}, ${matchinfo}); }])>; + +def sext_icmp : GICombineRule< + (defs root:$root, build_fn_matchinfo:$matchinfo), + (match (G_SEXT $rhs, $inputR), + (G_SEXT $lhs, $inputL), + (G_ICMP $root, $pred, $lhs, $rhs):$cmp, + [{ return Helper.matchSextOfICmp(*${cmp}, ${matchinfo}); }]), + (apply [{ Helper.applyBuildFn(*${cmp}, ${matchinfo}); }])>; + +def zext_icmp : GICombineRule< + (defs root:$root, build_fn_matchinfo:$matchinfo), + (match (G_ZEXT $rhs, $inputR), + (G_ZEXT $lhs, $inputL), + (G_ICMP $root, $pred, $lhs, $rhs):$cmp, + [{ return Helper.matchZextOfICmp(*${cmp}, ${matchinfo}); }]), + (apply [{ Helper.applyBuildFn(*${cmp}, ${matchinfo}); }])>; + +def icmp_of_zero : GICombineRule< + (defs root:$root, build_fn_matchinfo:$matchinfo), + (match (G_CONSTANT $zero, 0), + (G_ICMP $root, $pred, $lhs, $zero):$cmp, + [{ return Helper.matchCmpOfZero(*${cmp}, ${matchinfo}); }]), + (apply [{ Helper.applyBuildFn(*${cmp}, ${matchinfo}); }])>; + +def icmp_combines: GICombineGroup<[ + visit_icmp, + sext_icmp, + zext_icmp, + icmp_of_zero, + icmp_to_true_false_known_bits, + icmp_to_lhs_known_bits, + double_icmp_zero_and_combine, + double_icmp_zero_or_combine, + redundant_binop_in_equality +]>; // FIXME: These should use the custom predicate feature once it lands. def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero, @@ -1917,7 +1954,7 @@ def const_combines : GICombineGroup<[constant_fold_fp_ops, const_ptradd_to_i2p, def known_bits_simplifications : GICombineGroup<[ redundant_and, redundant_sext_inreg, redundant_or, urem_pow2_to_mask, - zext_trunc_fold, icmp_to_true_false_known_bits, icmp_to_lhs_known_bits, + zext_trunc_fold, sext_inreg_to_zext_inreg]>; def width_reduction_combines : GICombineGroup<[reduce_shl_of_extend, @@ -1944,7 +1981,7 @@ def constant_fold_binops : GICombineGroup<[constant_fold_binop, def prefer_sign_combines : GICombineGroup<[nneg_zext]>; -def all_combines : GICombineGroup<[integer_reassoc_combines, trivial_combines, +def all_combines : GICombineGroup<[icmp_combines, integer_reassoc_combines, trivial_combines, vector_ops_combines, freeze_combines, cast_combines, insert_vec_elt_combines, extract_vec_elt_combines, combines_for_extload, combine_extracted_vector_load, @@ -1964,9 +2001,9 @@ def all_combines : GICombineGroup<[integer_reassoc_combines, trivial_combines, constant_fold_cast_op, fabs_fneg_fold, intdiv_combines, mulh_combines, redundant_neg_operands, and_or_disjoint_mask, fma_combines, fold_binop_into_select, - sub_add_reg, select_to_minmax, redundant_binop_in_equality, + sub_add_reg, select_to_minmax, fsub_to_fneg, commute_constant_to_rhs, match_ands, match_ors, - combine_concat_vector, double_icmp_zero_and_or_combine, match_addos, + combine_concat_vector, match_addos, sext_trunc, zext_trunc, prefer_sign_combines, combine_shuffle_concat]>; // A combine group used to for prelegalizer combiners at -O0. The combines in diff --git a/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt b/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt index a15b76440364b..af1717dbf76f3 100644 --- a/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt +++ b/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt @@ -7,6 +7,7 @@ add_llvm_component_library(LLVMGlobalISel Combiner.cpp CombinerHelper.cpp CombinerHelperCasts.cpp + CombinerHelperCompares.cpp CombinerHelperVectorOps.cpp GIMatchTableExecutor.cpp GISelChangeObserver.cpp diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp new file mode 100644 index 0000000000000..415768fb07e59 --- /dev/null +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp @@ -0,0 +1,305 @@ +//===- CombinerHelperCompares.cpp------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements CombinerHelper for G_ICMP +// +//===----------------------------------------------------------------------===// +#include "llvm/CodeGen/GlobalISel/CombinerHelper.h" +#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" +#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" +#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" +#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" +#include "llvm/CodeGen/GlobalISel/Utils.h" +#include "llvm/CodeGen/LowLevelTypeUtils.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/ErrorHandling.h" +#include + +#define DEBUG_TYPE "gi-combiner" + +using namespace llvm; + +bool CombinerHelper::constantFoldICmp(const GICmp &ICmp, + const GIConstant &LHSCst, + const GIConstant &RHSCst, + BuildFnTy &MatchInfo) { + if (LHSCst.getKind() != GIConstantKind::Scalar) + return false; + + Register Dst = ICmp.getReg(0); + LLT DstTy = MRI.getType(Dst); + + if (!isConstantLegalOrBeforeLegalizer(DstTy)) + return false; + + CmpInst::Predicate Pred = ICmp.getCond(); + APInt LHS = LHSCst.getScalarValue(); + APInt RHS = RHSCst.getScalarValue(); + + bool Result; + + switch (Pred) { + case CmpInst::Predicate::ICMP_EQ: + Result = LHS.eq(RHS); + break; + case CmpInst::Predicate::ICMP_NE: + Result = LHS.ne(RHS); + break; + case CmpInst::Predicate::ICMP_UGT: + Result = LHS.ugt(RHS); + break; + case CmpInst::Predicate::ICMP_UGE: + Result = LHS.uge(RHS); + break; + case CmpInst::Predicate::ICMP_ULT: + Result = LHS.ult(RHS); + break; + case CmpInst::Predicate::ICMP_ULE: + Result = LHS.ule(RHS); + break; + case CmpInst::Predicate::ICMP_SGT: + Result = LHS.sgt(RHS); + break; + case CmpInst::Predicate::ICMP_SGE: + Result = LHS.sge(RHS); + break; + case CmpInst::Predicate::ICMP_SLT: + Result = LHS.slt(RHS); + break; + case CmpInst::Predicate::ICMP_SLE: + Result = LHS.sle(RHS); + break; + default: + llvm_unreachable("Unexpected predicate"); + } + + MatchInfo = [=](MachineIRBuilder &B) { + if (Result) + B.buildConstant(Dst, getICmpTrueVal(getTargetLowering(), + /*IsVector=*/DstTy.isVector(), + /*IsFP=*/false)); + else + B.buildConstant(Dst, 0); + }; + + return true; +} + +bool CombinerHelper::visitICmp(const MachineInstr &MI, BuildFnTy &MatchInfo) { + const GICmp *Cmp = cast(&MI); + + Register Dst = Cmp->getReg(0); + LLT DstTy = MRI.getType(Dst); + Register LHS = Cmp->getLHSReg(); + Register RHS = Cmp->getRHSReg(); + + CmpInst::Predicate Pred = Cmp->getCond(); + assert(CmpInst::isIntPredicate(Pred) && "Not an integer compare!"); + if (auto CLHS = GIConstant::getConstant(LHS, MRI)) { + if (auto CRHS = GIConstant::getConstant(RHS, MRI)) + return constantFoldICmp(*Cmp, *CLHS, *CRHS, MatchInfo); + + // If we have a constant, make sure it is on the RHS. + std::swap(LHS, RHS); + Pred = CmpInst::getSwappedPredicate(Pred); + + MatchInfo = [=](MachineIRBuilder &B) { B.buildICmp(Pred, Dst, LHS, RHS); }; + return true; + } + + [[maybe_unused]] MachineInstr *MILHS = MRI.getVRegDef(LHS); + MachineInstr *MIRHS = MRI.getVRegDef(RHS); + + // For EQ and NE, we can always pick a value for the undef to make the + // predicate pass or fail, so we can return undef. + // Matches behavior in llvm::ConstantFoldCompareInstruction. + if (isa(MIRHS) && ICmpInst::isEquality(Pred) && + isLegalOrBeforeLegalizer({TargetOpcode::G_IMPLICIT_DEF, {DstTy}})) { + MatchInfo = [=](MachineIRBuilder &B) { B.buildUndef(Dst); }; + return true; + } + + // icmp X, X -> true/false + // icmp X, undef -> true/false because undef could be X. + if ((LHS == RHS || isa(MIRHS)) && + isConstantLegalOrBeforeLegalizer(DstTy)) { + MatchInfo = [=](MachineIRBuilder &B) { + if (CmpInst::isTrueWhenEqual(Pred)) + B.buildConstant(Dst, getICmpTrueVal(getTargetLowering(), + /*IsVector=*/DstTy.isVector(), + /*IsFP=*/false)); + else + B.buildConstant(Dst, 0); + }; + return true; + } + + return false; +} + +bool CombinerHelper::matchSextOfICmp(const MachineInstr &MI, + BuildFnTy &MatchInfo) { + const GICmp *Cmp = cast(&MI); + + Register Dst = Cmp->getReg(0); + LLT DstTy = MRI.getType(Dst); + Register LHS = Cmp->getLHSReg(); + Register RHS = Cmp->getRHSReg(); + CmpInst::Predicate Pred = Cmp->getCond(); + + GSext *SL = cast(MRI.getVRegDef(LHS)); + GSext *SR = cast(MRI.getVRegDef(RHS)); + + LLT SLTy = MRI.getType(SL->getSrcReg()); + LLT SRTy = MRI.getType(SR->getSrcReg()); + + // Turn icmp (sext X), (sext Y) into a compare of X and Y if they have the + // same type. + if (SLTy != SRTy) + return false; + + if (!isLegalOrBeforeLegalizer({TargetOpcode::G_ICMP, {DstTy, SLTy}})) + return false; + + // Compare X and Y. Note that the predicate does not change. + MatchInfo = [=](MachineIRBuilder &B) { + B.buildICmp(Pred, Dst, SL->getSrcReg(), SR->getSrcReg()); + }; + return true; +} + +bool CombinerHelper::matchZextOfICmp(const MachineInstr &MI, + BuildFnTy &MatchInfo) { + const GICmp *Cmp = cast(&MI); + + Register Dst = Cmp->getReg(0); + LLT DstTy = MRI.getType(Dst); + Register LHS = Cmp->getLHSReg(); + Register RHS = Cmp->getRHSReg(); + CmpInst::Predicate Pred = Cmp->getCond(); + + /* + %x:_(p0) = COPY $x0 + %y:_(p0) = COPY $x1 + %zero:_(p0) = G_CONSTANT i64 0 + %cmp1:_(s1) = G_ICMP intpred(eq), %x:_(p0), %zero:_ + */ + + if (MRI.getType(LHS).isPointer() || MRI.getType(RHS).isPointer()) + return false; + + if (!MRI.getType(LHS).isScalar() || !MRI.getType(RHS).isScalar()) + return false; + + GZext *ZL = cast(MRI.getVRegDef(LHS)); + GZext *ZR = cast(MRI.getVRegDef(RHS)); + + LLT ZLTy = MRI.getType(ZL->getSrcReg()); + LLT ZRTy = MRI.getType(ZR->getSrcReg()); + + // Turn icmp (zext X), (zext Y) into a compare of X and Y if they have + // the same type. + if (ZLTy != ZRTy) + return false; + + if (!isLegalOrBeforeLegalizer({TargetOpcode::G_ICMP, {DstTy, ZLTy}})) + return false; + + // Compare X and Y. Note that signed predicates become unsigned. + MatchInfo = [=](MachineIRBuilder &B) { + B.buildICmp(ICmpInst::getUnsignedPredicate(Pred), Dst, ZL->getSrcReg(), + ZR->getSrcReg()); + }; + return true; +} + +bool CombinerHelper::matchCmpOfZero(const MachineInstr &MI, + BuildFnTy &MatchInfo) { + const GICmp *Cmp = cast(&MI); + + Register Dst = Cmp->getReg(0); + LLT DstTy = MRI.getType(Dst); + Register LHS = Cmp->getLHSReg(); + CmpInst::Predicate Pred = Cmp->getCond(); + + if (!isConstantLegalOrBeforeLegalizer(DstTy)) + return false; + + std::optional Result; + + switch (Pred) { + default: + llvm_unreachable("Unkonwn ICmp predicate!"); + case ICmpInst::ICMP_ULT: + Result = false; + break; + case ICmpInst::ICMP_UGE: + Result = true; + break; + case ICmpInst::ICMP_EQ: + case ICmpInst::ICMP_ULE: + if (isKnownNonZero(LHS, MRI, KB)) + Result = false; + break; + case ICmpInst::ICMP_NE: + case ICmpInst::ICMP_UGT: + if (isKnownNonZero(LHS, MRI, KB)) + Result = true; + break; + case ICmpInst::ICMP_SLT: { + KnownBits LHSKnown = KB->getKnownBits(LHS); + if (LHSKnown.isNegative()) + Result = true; + if (LHSKnown.isNonNegative()) + Result = false; + break; + } + case ICmpInst::ICMP_SLE: { + KnownBits LHSKnown = KB->getKnownBits(LHS); + if (LHSKnown.isNegative()) + Result = true; + if (LHSKnown.isNonNegative() && isKnownNonZero(LHS, MRI, KB)) + Result = false; + break; + } + case ICmpInst::ICMP_SGE: { + KnownBits LHSKnown = KB->getKnownBits(LHS); + if (LHSKnown.isNegative()) + Result = false; + if (LHSKnown.isNonNegative()) + Result = true; + break; + } + case ICmpInst::ICMP_SGT: { + KnownBits LHSKnown = KB->getKnownBits(LHS); + if (LHSKnown.isNegative()) + Result = false; + if (LHSKnown.isNonNegative() && isKnownNonZero(LHS, MRI, KB)) + Result = true; + break; + } + } + + if (!Result) + return false; + + MatchInfo = [=](MachineIRBuilder &B) { + if (*Result) + B.buildConstant(Dst, getICmpTrueVal(getTargetLowering(), + /*IsVector=*/DstTy.isVector(), + /*IsFP=*/false)); + else + B.buildConstant(Dst, 0); + }; + + return true; +} diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp index 1713a582d5cfe..5e247c1210113 100644 --- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -1968,3 +1968,326 @@ Type *llvm::getTypeForLLT(LLT Ty, LLVMContext &C) { Ty.getElementCount()); return IntegerType::get(C, Ty.getSizeInBits()); } + +APInt llvm::GIConstant::getScalarValue() const { + assert(Kind == GIConstantKind::Scalar && "Expected scalar constant"); + + return Value; +} + +std::optional +llvm::GIConstant::getConstant(Register Const, const MachineRegisterInfo &MRI) { + MachineInstr *Constant = getDefIgnoringCopies(Const, MRI); + + if (GSplatVector *Splat = dyn_cast(Constant)) { + std::optional MayBeConstant = + getIConstantVRegValWithLookThrough(Splat->getValueReg(), MRI); + if (!MayBeConstant) + return std::nullopt; + return GIConstant(MayBeConstant->Value, GIConstantKind::ScalableVector); + } + + if (GBuildVector *Build = dyn_cast(Constant)) { + SmallVector Values; + unsigned NumSources = Build->getNumSources(); + for (unsigned I = 0; I < NumSources; ++I) { + Register SrcReg = Build->getSourceReg(I); + std::optional MayBeConstant = + getIConstantVRegValWithLookThrough(SrcReg, MRI); + if (!MayBeConstant) + return std::nullopt; + Values.push_back(MayBeConstant->Value); + } + return GIConstant(Values); + } + + std::optional MayBeConstant = + getIConstantVRegValWithLookThrough(Const, MRI); + if (!MayBeConstant) + return std::nullopt; + + return GIConstant(MayBeConstant->Value, GIConstantKind::Scalar); +} + +static bool isKnownNonZero(Register Reg, const MachineRegisterInfo &MRI, + GISelKnownBits *KB, unsigned Depth); + +bool llvm::isKnownNonZero(Register Reg, const MachineRegisterInfo &MRI, + GISelKnownBits *KB, unsigned Depth) { + if (!Reg.isVirtual()) + return false; + + LLT Ty = MRI.getType(Reg); + if (!Ty.isValid()) + return false; + + if (Ty.isPointer()) + return false; + + if (!Ty.isScalar()) + errs() << "type: " << Ty << '\n'; + + assert(Ty.isScalar() && "Expected a scalar value"); + return ::isKnownNonZero(Reg, MRI, KB, Depth); +} + +static bool matchOpWithOpEqZero(Register Op0, Register Op1, + const MachineRegisterInfo &MRI) { + MachineInstr *MI = MRI.getVRegDef(Op0); + + bool Result = false; + + if (GZextOrSextOp *ZS = dyn_cast(MI)) { + MachineInstr *SrcMI = MRI.getVRegDef(ZS->getSrcReg()); + if (GICmp *Cmp = dyn_cast(SrcMI)) { + std::optional MayBeConstant = + getIConstantVRegValWithLookThrough(Cmp->getRHSReg(), MRI); + if (MayBeConstant) + Result |= (MayBeConstant->Value == 0) && (Cmp->getLHSReg() == Op1) && + (Cmp->getCond() == ICmpInst::ICMP_EQ); + } + } + + MI = MRI.getVRegDef(Op1); + if (GZextOrSextOp *ZS = dyn_cast(MI)) { + MachineInstr *SrcMI = MRI.getVRegDef(ZS->getSrcReg()); + if (GICmp *Cmp = dyn_cast(SrcMI)) { + std::optional MayBeConstant = + getIConstantVRegValWithLookThrough(Cmp->getRHSReg(), MRI); + if (MayBeConstant) + Result |= (MayBeConstant->Value == 0) && (Cmp->getLHSReg() == Op0) && + (Cmp->getCond() == ICmpInst::ICMP_EQ); + } + } + + return Result; +} + +static bool isNonZeroAdd(const GBinOp &Add, const MachineRegisterInfo &MRI, + GISelKnownBits *KB, unsigned Depth, + unsigned BitWidth) { + bool NSW = Add.getFlag(MachineInstr::MIFlag::NoSWrap); + bool NUW = Add.getFlag(MachineInstr::MIFlag::NoUWrap); + Register LHS = Add.getLHSReg(); + Register RHS = Add.getRHSReg(); + + // (X + (X != 0)) is non zero + if (matchOpWithOpEqZero(LHS, RHS, MRI)) + return true; + + if (NUW) + return ::isKnownNonZero(RHS, MRI, KB, Depth) || + ::isKnownNonZero(LHS, MRI, KB, Depth); + + KnownBits LHSKnown = KB->getKnownBits(LHS); + KnownBits RHSKnown = KB->getKnownBits(RHS); + + // If LHS and RHS are both non-negative (as signed values) then their sum is + // not zero unless both LHS and RHS are zero. + if (LHSKnown.isNonNegative() && RHSKnown.isNonNegative()) + if (::isKnownNonZero(LHS, MRI, KB, Depth) || + ::isKnownNonZero(RHS, MRI, KB, Depth)) + return true; + + // If LHS and RHS are both negative (as signed values) then their sum is not + // zero unless both LHS and RHS equal INT_MIN. + if (LHSKnown.isNegative() && RHSKnown.isNegative()) { + APInt Mask = APInt::getSignedMaxValue(BitWidth); + // The sign bit of LHS is set. If some other bit is set then LHS is not + // equal to INT_MIN. + if (LHSKnown.One.intersects(Mask)) + return true; + // The sign bit of RHS is set. If some other bit is set then RHS is not + // equal to INT_MIN. + if (RHSKnown.One.intersects(Mask)) + return true; + } + + // The sum of a non-negative number and a power of two is not zero. + if (LHSKnown.isNonNegative() && ::isKnownToBeAPowerOfTwo(RHS, MRI, KB)) + return true; + if (RHSKnown.isNonNegative() && ::isKnownToBeAPowerOfTwo(LHS, MRI, KB)) + return true; + + return KnownBits::add(LHSKnown, RHSKnown, NSW, NUW).isNonZero(); +} + +static bool isKnownNonZeroBinOp(const GBinOp &BinOp, + const MachineRegisterInfo &MRI, + GISelKnownBits *KB, unsigned Depth) { + unsigned BitWidth = MRI.getType(BinOp.getReg(0)).getScalarSizeInBits(); + switch (BinOp.getOpcode()) { + case TargetOpcode::G_XOR: + // (X ^ (X != 0)) is non zero + if (matchOpWithOpEqZero(BinOp.getLHSReg(), BinOp.getRHSReg(), MRI)) + return true; + break; + case TargetOpcode::G_OR: { + // (X | (X != 0)) is non zero + if (matchOpWithOpEqZero(BinOp.getLHSReg(), BinOp.getRHSReg(), MRI)) + return true; + // X | Y != 0 if X != 0 or Y != 0. + return ::isKnownNonZero(BinOp.getRHSReg(), MRI, KB, Depth) || + ::isKnownNonZero(BinOp.getLHSReg(), MRI, KB, Depth); + } + case TargetOpcode::G_ADD: { + // X + Y. + + // If Add has nuw wrap flag, then if either X or Y is non-zero the result is + // non-zero. + return isNonZeroAdd(BinOp, MRI, KB, Depth, BitWidth); + } + default: + return false; + } + + return false; +} + +static bool isKnownNonZeroCastOp(const GCastOp &CastOp, + const MachineRegisterInfo &MRI, + GISelKnownBits *KB, unsigned Depth) { + switch (CastOp.getOpcode()) { + case TargetOpcode::G_SEXT: + case TargetOpcode::G_ZEXT: + // ext X != 0 if X != 0. + return isKnownNonZero(CastOp.getSrcReg(), MRI, KB); + case Instruction::Trunc: + // nuw/nsw trunc preserves zero/non-zero status of input. + if (CastOp.getFlag(MachineInstr::MIFlag::NoSWrap) || + CastOp.getFlag(MachineInstr::MIFlag::NoUWrap)) + return ::isKnownNonZero(CastOp.getSrcReg(), MRI, KB, Depth); + break; + default: + return false; + } + + return false; +} + +static bool isNonZeroShift(const MachineInstr *MI, + const MachineRegisterInfo &MRI, GISelKnownBits *KB, + unsigned Depth, const KnownBits &KnownVal) { + auto ShiftOp = [&](const APInt &Lhs, const APInt &Rhs) { + switch (MI->getOpcode()) { + case TargetOpcode::G_SHL: + return Lhs.shl(Rhs); + case TargetOpcode::G_LSHR: + return Lhs.lshr(Rhs); + case TargetOpcode::G_ASHR: + return Lhs.ashr(Rhs); + default: + llvm_unreachable("Unknown Shift Opcode"); + } + }; + + auto InvShiftOp = [&](const APInt &Lhs, const APInt &Rhs) { + switch (MI->getOpcode()) { + case TargetOpcode::G_SHL: + return Lhs.lshr(Rhs); + case TargetOpcode::G_LSHR: + case TargetOpcode::G_ASHR: + return Lhs.shl(Rhs); + default: + llvm_unreachable("Unknown Shift Opcode"); + } + }; + + if (KnownVal.isUnknown()) + return false; + + KnownBits KnownCnt = KB->getKnownBits(MI->getOperand(2).getReg()); + APInt MaxShift = KnownCnt.getMaxValue(); + unsigned NumBits = KnownVal.getBitWidth(); + if (MaxShift.uge(NumBits)) + return false; + + if (!ShiftOp(KnownVal.One, MaxShift).isZero()) + return true; + + // If all of the bits shifted out are known to be zero, and Val is known + // non-zero then at least one non-zero bit must remain. + if (InvShiftOp(KnownVal.Zero, NumBits - MaxShift) + .eq(InvShiftOp(APInt::getAllOnes(NumBits), NumBits - MaxShift)) && + ::isKnownNonZero(MI->getOperand(1).getReg(), MRI, KB, Depth)) + return true; + + return false; +} + +bool isKnownNonZero(Register Reg, const MachineRegisterInfo &MRI, + GISelKnownBits *KB, unsigned Depth) { + if (!Reg.isVirtual()) + return false; + + std::optional MayBeConstant = + getIConstantVRegValWithLookThrough(Reg, MRI); + + if (MayBeConstant) + return MayBeConstant->Value != 0; + + // Some of the tests below are recursive, so bail out if we hit the limit. + if (Depth++ >= MaxAnalysisRecursionDepth) + return false; + + MachineInstr *MI = getDefIgnoringCopies(Reg, MRI); + + if (GBinOp *BinOp = dyn_cast(MI)) + return isKnownNonZeroBinOp(*BinOp, MRI, KB, Depth); + + if (GCastOp *CastOp = dyn_cast(MI)) + return isKnownNonZeroCastOp(*CastOp, MRI, KB, Depth); + + switch (MI->getOpcode()) { + case TargetOpcode::G_SHL: { + // shl nsw/nuw can't remove any non-zero bits. + if (MI->getFlag(MachineInstr::MIFlag::NoUWrap) || + MI->getFlag(MachineInstr::MIFlag::NoSWrap)) + return ::isKnownNonZero(MI->getOperand(1).getReg(), MRI, KB, Depth); + + // shl X, Y != 0 if X is odd. Note that the value of the shift is undefined + // if the lowest bit is shifted off the end. + KnownBits Known = KB->getKnownBits(MI->getOperand(1).getReg()); + if (Known.One[0]) + return true; + + return isNonZeroShift(MI, MRI, KB, Depth, Known); + } + case TargetOpcode::G_LSHR: + case TargetOpcode::G_ASHR: { + // shr exact can only shift out zero bits. + if (MI->getFlag(MachineInstr::MIFlag::IsExact)) + return ::isKnownNonZero(MI->getOperand(1).getReg(), MRI, KB, Depth); + + // shr X, Y != 0 if X is negative. Note that the value of the shift is not + // defined if the sign bit is shifted off the end. + KnownBits Known = KB->getKnownBits(MI->getOperand(1).getReg()); + if (Known.isNegative()) + return true; + + return isNonZeroShift(MI, MRI, KB, Depth, Known); + } + case TargetOpcode::G_FREEZE: + return ::isKnownNonZero(MI->getOperand(1).getReg(), MRI, KB, Depth) && + ::isGuaranteedNotToBePoison(MI->getOperand(1).getReg(), MRI, Depth); + case TargetOpcode::G_SMIN: { + // If either arg is negative the result is non-zero. Otherwise + // the result is non-zero if both ops are non-zero. + KnownBits Op1Known = KB->getKnownBits(MI->getOperand(2).getReg()); + if (Op1Known.isNegative()) + return true; + KnownBits Op0Known = KB->getKnownBits(MI->getOperand(1).getReg()); + if (Op0Known.isNegative()) + return true; + + if (Op1Known.isNonZero() && Op0Known.isNonZero()) + return true; + } + [[fallthrough]]; + case TargetOpcode::G_UMIN: + return ::isKnownNonZero(MI->getOperand(1).getReg(), MRI, KB, Depth) && + ::isKnownNonZero(MI->getOperand(2).getReg(), MRI, KB, Depth); + default: + return false; + } +} diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll index de3f323891a36..816f7c3debcd3 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll @@ -2655,13 +2655,15 @@ define i8 @atomicrmw_max_i8(ptr %ptr, i8 %rhs) { define i8 @atomicrmw_umin_i8(ptr %ptr, i8 %rhs) { ; CHECK-NOLSE-O1-LABEL: atomicrmw_umin_i8: ; CHECK-NOLSE-O1: ; %bb.0: -; CHECK-NOLSE-O1-NEXT: and w9, w1, #0xff +; CHECK-NOLSE-O1-NEXT: ; kill: def $w1 killed $w1 def $x1 +; CHECK-NOLSE-O1-NEXT: and x9, x1, #0xff ; CHECK-NOLSE-O1-NEXT: LBB35_1: ; %atomicrmw.start ; CHECK-NOLSE-O1-NEXT: ; =>This Inner Loop Header: Depth=1 -; CHECK-NOLSE-O1-NEXT: ldaxrb w8, [x0] -; CHECK-NOLSE-O1-NEXT: and w8, w8, #0xff -; CHECK-NOLSE-O1-NEXT: cmp w8, w9 -; CHECK-NOLSE-O1-NEXT: csel w10, w8, w9, lo +; CHECK-NOLSE-O1-NEXT: ldaxrb w10, [x0] +; CHECK-NOLSE-O1-NEXT: and w8, w10, #0xff +; CHECK-NOLSE-O1-NEXT: and x10, x10, #0xff +; CHECK-NOLSE-O1-NEXT: cmp w8, w1, uxtb +; CHECK-NOLSE-O1-NEXT: csel x10, x10, x9, ls ; CHECK-NOLSE-O1-NEXT: stlxrb w11, w10, [x0] ; CHECK-NOLSE-O1-NEXT: cbnz w11, LBB35_1 ; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %atomicrmw.end @@ -2670,13 +2672,15 @@ define i8 @atomicrmw_umin_i8(ptr %ptr, i8 %rhs) { ; ; CHECK-OUTLINE-O1-LABEL: atomicrmw_umin_i8: ; CHECK-OUTLINE-O1: ; %bb.0: -; CHECK-OUTLINE-O1-NEXT: and w9, w1, #0xff +; CHECK-OUTLINE-O1-NEXT: ; kill: def $w1 killed $w1 def $x1 +; CHECK-OUTLINE-O1-NEXT: and x9, x1, #0xff ; CHECK-OUTLINE-O1-NEXT: LBB35_1: ; %atomicrmw.start ; CHECK-OUTLINE-O1-NEXT: ; =>This Inner Loop Header: Depth=1 -; CHECK-OUTLINE-O1-NEXT: ldaxrb w8, [x0] -; CHECK-OUTLINE-O1-NEXT: and w8, w8, #0xff -; CHECK-OUTLINE-O1-NEXT: cmp w8, w9 -; CHECK-OUTLINE-O1-NEXT: csel w10, w8, w9, lo +; CHECK-OUTLINE-O1-NEXT: ldaxrb w10, [x0] +; CHECK-OUTLINE-O1-NEXT: and w8, w10, #0xff +; CHECK-OUTLINE-O1-NEXT: and x10, x10, #0xff +; CHECK-OUTLINE-O1-NEXT: cmp w8, w1, uxtb +; CHECK-OUTLINE-O1-NEXT: csel x10, x10, x9, ls ; CHECK-OUTLINE-O1-NEXT: stlxrb w11, w10, [x0] ; CHECK-OUTLINE-O1-NEXT: cbnz w11, LBB35_1 ; CHECK-OUTLINE-O1-NEXT: ; %bb.2: ; %atomicrmw.end @@ -2777,13 +2781,15 @@ define i8 @atomicrmw_umin_i8(ptr %ptr, i8 %rhs) { define i8 @atomicrmw_umax_i8(ptr %ptr, i8 %rhs) { ; CHECK-NOLSE-O1-LABEL: atomicrmw_umax_i8: ; CHECK-NOLSE-O1: ; %bb.0: -; CHECK-NOLSE-O1-NEXT: and w9, w1, #0xff +; CHECK-NOLSE-O1-NEXT: ; kill: def $w1 killed $w1 def $x1 +; CHECK-NOLSE-O1-NEXT: and x9, x1, #0xff ; CHECK-NOLSE-O1-NEXT: LBB36_1: ; %atomicrmw.start ; CHECK-NOLSE-O1-NEXT: ; =>This Inner Loop Header: Depth=1 -; CHECK-NOLSE-O1-NEXT: ldxrb w8, [x0] -; CHECK-NOLSE-O1-NEXT: and w8, w8, #0xff -; CHECK-NOLSE-O1-NEXT: cmp w8, w9 -; CHECK-NOLSE-O1-NEXT: csel w10, w8, w9, hi +; CHECK-NOLSE-O1-NEXT: ldxrb w10, [x0] +; CHECK-NOLSE-O1-NEXT: and w8, w10, #0xff +; CHECK-NOLSE-O1-NEXT: and x10, x10, #0xff +; CHECK-NOLSE-O1-NEXT: cmp w8, w1, uxtb +; CHECK-NOLSE-O1-NEXT: csel x10, x10, x9, hi ; CHECK-NOLSE-O1-NEXT: stxrb w11, w10, [x0] ; CHECK-NOLSE-O1-NEXT: cbnz w11, LBB36_1 ; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %atomicrmw.end @@ -2792,13 +2798,15 @@ define i8 @atomicrmw_umax_i8(ptr %ptr, i8 %rhs) { ; ; CHECK-OUTLINE-O1-LABEL: atomicrmw_umax_i8: ; CHECK-OUTLINE-O1: ; %bb.0: -; CHECK-OUTLINE-O1-NEXT: and w9, w1, #0xff +; CHECK-OUTLINE-O1-NEXT: ; kill: def $w1 killed $w1 def $x1 +; CHECK-OUTLINE-O1-NEXT: and x9, x1, #0xff ; CHECK-OUTLINE-O1-NEXT: LBB36_1: ; %atomicrmw.start ; CHECK-OUTLINE-O1-NEXT: ; =>This Inner Loop Header: Depth=1 -; CHECK-OUTLINE-O1-NEXT: ldxrb w8, [x0] -; CHECK-OUTLINE-O1-NEXT: and w8, w8, #0xff -; CHECK-OUTLINE-O1-NEXT: cmp w8, w9 -; CHECK-OUTLINE-O1-NEXT: csel w10, w8, w9, hi +; CHECK-OUTLINE-O1-NEXT: ldxrb w10, [x0] +; CHECK-OUTLINE-O1-NEXT: and w8, w10, #0xff +; CHECK-OUTLINE-O1-NEXT: and x10, x10, #0xff +; CHECK-OUTLINE-O1-NEXT: cmp w8, w1, uxtb +; CHECK-OUTLINE-O1-NEXT: csel x10, x10, x9, hi ; CHECK-OUTLINE-O1-NEXT: stxrb w11, w10, [x0] ; CHECK-OUTLINE-O1-NEXT: cbnz w11, LBB36_1 ; CHECK-OUTLINE-O1-NEXT: ; %bb.2: ; %atomicrmw.end @@ -3710,13 +3718,15 @@ define i16 @atomicrmw_max_i16(ptr %ptr, i16 %rhs) { define i16 @atomicrmw_umin_i16(ptr %ptr, i16 %rhs) { ; CHECK-NOLSE-O1-LABEL: atomicrmw_umin_i16: ; CHECK-NOLSE-O1: ; %bb.0: -; CHECK-NOLSE-O1-NEXT: and w9, w1, #0xffff +; CHECK-NOLSE-O1-NEXT: ; kill: def $w1 killed $w1 def $x1 +; CHECK-NOLSE-O1-NEXT: and x9, x1, #0xffff ; CHECK-NOLSE-O1-NEXT: LBB45_1: ; %atomicrmw.start ; CHECK-NOLSE-O1-NEXT: ; =>This Inner Loop Header: Depth=1 -; CHECK-NOLSE-O1-NEXT: ldaxrh w8, [x0] -; CHECK-NOLSE-O1-NEXT: and w8, w8, #0xffff -; CHECK-NOLSE-O1-NEXT: cmp w8, w9 -; CHECK-NOLSE-O1-NEXT: csel w10, w8, w9, lo +; CHECK-NOLSE-O1-NEXT: ldaxrh w10, [x0] +; CHECK-NOLSE-O1-NEXT: and w8, w10, #0xffff +; CHECK-NOLSE-O1-NEXT: and x10, x10, #0xffff +; CHECK-NOLSE-O1-NEXT: cmp w8, w1, uxth +; CHECK-NOLSE-O1-NEXT: csel x10, x10, x9, ls ; CHECK-NOLSE-O1-NEXT: stlxrh w11, w10, [x0] ; CHECK-NOLSE-O1-NEXT: cbnz w11, LBB45_1 ; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %atomicrmw.end @@ -3725,13 +3735,15 @@ define i16 @atomicrmw_umin_i16(ptr %ptr, i16 %rhs) { ; ; CHECK-OUTLINE-O1-LABEL: atomicrmw_umin_i16: ; CHECK-OUTLINE-O1: ; %bb.0: -; CHECK-OUTLINE-O1-NEXT: and w9, w1, #0xffff +; CHECK-OUTLINE-O1-NEXT: ; kill: def $w1 killed $w1 def $x1 +; CHECK-OUTLINE-O1-NEXT: and x9, x1, #0xffff ; CHECK-OUTLINE-O1-NEXT: LBB45_1: ; %atomicrmw.start ; CHECK-OUTLINE-O1-NEXT: ; =>This Inner Loop Header: Depth=1 -; CHECK-OUTLINE-O1-NEXT: ldaxrh w8, [x0] -; CHECK-OUTLINE-O1-NEXT: and w8, w8, #0xffff -; CHECK-OUTLINE-O1-NEXT: cmp w8, w9 -; CHECK-OUTLINE-O1-NEXT: csel w10, w8, w9, lo +; CHECK-OUTLINE-O1-NEXT: ldaxrh w10, [x0] +; CHECK-OUTLINE-O1-NEXT: and w8, w10, #0xffff +; CHECK-OUTLINE-O1-NEXT: and x10, x10, #0xffff +; CHECK-OUTLINE-O1-NEXT: cmp w8, w1, uxth +; CHECK-OUTLINE-O1-NEXT: csel x10, x10, x9, ls ; CHECK-OUTLINE-O1-NEXT: stlxrh w11, w10, [x0] ; CHECK-OUTLINE-O1-NEXT: cbnz w11, LBB45_1 ; CHECK-OUTLINE-O1-NEXT: ; %bb.2: ; %atomicrmw.end @@ -3832,13 +3844,15 @@ define i16 @atomicrmw_umin_i16(ptr %ptr, i16 %rhs) { define i16 @atomicrmw_umax_i16(ptr %ptr, i16 %rhs) { ; CHECK-NOLSE-O1-LABEL: atomicrmw_umax_i16: ; CHECK-NOLSE-O1: ; %bb.0: -; CHECK-NOLSE-O1-NEXT: and w9, w1, #0xffff +; CHECK-NOLSE-O1-NEXT: ; kill: def $w1 killed $w1 def $x1 +; CHECK-NOLSE-O1-NEXT: and x9, x1, #0xffff ; CHECK-NOLSE-O1-NEXT: LBB46_1: ; %atomicrmw.start ; CHECK-NOLSE-O1-NEXT: ; =>This Inner Loop Header: Depth=1 -; CHECK-NOLSE-O1-NEXT: ldxrh w8, [x0] -; CHECK-NOLSE-O1-NEXT: and w8, w8, #0xffff -; CHECK-NOLSE-O1-NEXT: cmp w8, w9 -; CHECK-NOLSE-O1-NEXT: csel w10, w8, w9, hi +; CHECK-NOLSE-O1-NEXT: ldxrh w10, [x0] +; CHECK-NOLSE-O1-NEXT: and w8, w10, #0xffff +; CHECK-NOLSE-O1-NEXT: and x10, x10, #0xffff +; CHECK-NOLSE-O1-NEXT: cmp w8, w1, uxth +; CHECK-NOLSE-O1-NEXT: csel x10, x10, x9, hi ; CHECK-NOLSE-O1-NEXT: stxrh w11, w10, [x0] ; CHECK-NOLSE-O1-NEXT: cbnz w11, LBB46_1 ; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %atomicrmw.end @@ -3847,13 +3861,15 @@ define i16 @atomicrmw_umax_i16(ptr %ptr, i16 %rhs) { ; ; CHECK-OUTLINE-O1-LABEL: atomicrmw_umax_i16: ; CHECK-OUTLINE-O1: ; %bb.0: -; CHECK-OUTLINE-O1-NEXT: and w9, w1, #0xffff +; CHECK-OUTLINE-O1-NEXT: ; kill: def $w1 killed $w1 def $x1 +; CHECK-OUTLINE-O1-NEXT: and x9, x1, #0xffff ; CHECK-OUTLINE-O1-NEXT: LBB46_1: ; %atomicrmw.start ; CHECK-OUTLINE-O1-NEXT: ; =>This Inner Loop Header: Depth=1 -; CHECK-OUTLINE-O1-NEXT: ldxrh w8, [x0] -; CHECK-OUTLINE-O1-NEXT: and w8, w8, #0xffff -; CHECK-OUTLINE-O1-NEXT: cmp w8, w9 -; CHECK-OUTLINE-O1-NEXT: csel w10, w8, w9, hi +; CHECK-OUTLINE-O1-NEXT: ldxrh w10, [x0] +; CHECK-OUTLINE-O1-NEXT: and w8, w10, #0xffff +; CHECK-OUTLINE-O1-NEXT: and x10, x10, #0xffff +; CHECK-OUTLINE-O1-NEXT: cmp w8, w1, uxth +; CHECK-OUTLINE-O1-NEXT: csel x10, x10, x9, hi ; CHECK-OUTLINE-O1-NEXT: stxrh w11, w10, [x0] ; CHECK-OUTLINE-O1-NEXT: cbnz w11, LBB46_1 ; CHECK-OUTLINE-O1-NEXT: ; %bb.2: ; %atomicrmw.end diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll index c6819ff39ed33..0e4750d381592 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll @@ -919,16 +919,18 @@ define i8 @atomicrmw_umin_i8(ptr %ptr, i8 %rhs) { ; CHECK-NEXT: successors: %bb.1(0x80000000) ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w9 = ANDWri killed renamable $w1, 7 + ; CHECK-NEXT: renamable $w1 = KILL $w1, implicit-def $x1 + ; CHECK-NEXT: renamable $x9 = ANDXri renamable $x1, 4103, pcsections !0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1.atomicrmw.start: ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) - ; CHECK-NEXT: liveins: $w9, $x0 + ; CHECK-NEXT: liveins: $x0, $x1, $x9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr) - ; CHECK-NEXT: renamable $w8 = ANDWri renamable $w8, 7, implicit killed $x8 - ; CHECK-NEXT: $wzr = SUBSWrs renamable $w8, renamable $w9, 0, implicit-def $nzcv, pcsections !0 - ; CHECK-NEXT: renamable $w10 = CSELWr renamable $w8, renamable $w9, 3, implicit killed $nzcv, implicit-def $x10, pcsections !0 + ; CHECK-NEXT: renamable $w10 = LDAXRB renamable $x0, implicit-def $x10, pcsections !0 :: (volatile load (s8) from %ir.ptr) + ; CHECK-NEXT: renamable $w8 = ANDWri renamable $w10, 7 + ; CHECK-NEXT: renamable $x10 = ANDXri killed renamable $x10, 4103, pcsections !0 + ; CHECK-NEXT: dead $wzr = SUBSWrx renamable $w8, renamable $w1, 0, implicit-def $nzcv, pcsections !0 + ; CHECK-NEXT: renamable $x10 = CSELXr killed renamable $x10, renamable $x9, 9, implicit killed $nzcv, pcsections !0 ; CHECK-NEXT: early-clobber renamable $w11 = STLXRB renamable $w10, renamable $x0, implicit killed $x10, pcsections !0 :: (volatile store (s8) into %ir.ptr) ; CHECK-NEXT: CBNZW killed renamable $w11, %bb.1, pcsections !0 ; CHECK-NEXT: {{ $}} @@ -947,16 +949,18 @@ define i8 @atomicrmw_umax_i8(ptr %ptr, i8 %rhs) { ; CHECK-NEXT: successors: %bb.1(0x80000000) ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w9 = ANDWri killed renamable $w1, 7 + ; CHECK-NEXT: renamable $w1 = KILL $w1, implicit-def $x1 + ; CHECK-NEXT: renamable $x9 = ANDXri renamable $x1, 4103, pcsections !0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1.atomicrmw.start: ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) - ; CHECK-NEXT: liveins: $w9, $x0 + ; CHECK-NEXT: liveins: $x0, $x1, $x9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr) - ; CHECK-NEXT: renamable $w8 = ANDWri renamable $w8, 7, implicit killed $x8 - ; CHECK-NEXT: $wzr = SUBSWrs renamable $w8, renamable $w9, 0, implicit-def $nzcv, pcsections !0 - ; CHECK-NEXT: renamable $w10 = CSELWr renamable $w8, renamable $w9, 8, implicit killed $nzcv, implicit-def $x10, pcsections !0 + ; CHECK-NEXT: renamable $w10 = LDXRB renamable $x0, implicit-def $x10, pcsections !0 :: (volatile load (s8) from %ir.ptr) + ; CHECK-NEXT: renamable $w8 = ANDWri renamable $w10, 7 + ; CHECK-NEXT: renamable $x10 = ANDXri killed renamable $x10, 4103, pcsections !0 + ; CHECK-NEXT: dead $wzr = SUBSWrx renamable $w8, renamable $w1, 0, implicit-def $nzcv, pcsections !0 + ; CHECK-NEXT: renamable $x10 = CSELXr killed renamable $x10, renamable $x9, 8, implicit killed $nzcv, pcsections !0 ; CHECK-NEXT: early-clobber renamable $w11 = STXRB renamable $w10, renamable $x0, implicit killed $x10, pcsections !0 :: (volatile store (s8) into %ir.ptr) ; CHECK-NEXT: CBNZW killed renamable $w11, %bb.1, pcsections !0 ; CHECK-NEXT: {{ $}} @@ -1172,16 +1176,18 @@ define i16 @atomicrmw_umin_i16(ptr %ptr, i16 %rhs) { ; CHECK-NEXT: successors: %bb.1(0x80000000) ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w9 = ANDWri killed renamable $w1, 15 + ; CHECK-NEXT: renamable $w1 = KILL $w1, implicit-def $x1 + ; CHECK-NEXT: renamable $x9 = ANDXri renamable $x1, 4111, pcsections !0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1.atomicrmw.start: ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) - ; CHECK-NEXT: liveins: $w9, $x0 + ; CHECK-NEXT: liveins: $x0, $x1, $x9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w8 = LDAXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr) - ; CHECK-NEXT: renamable $w8 = ANDWri renamable $w8, 15, implicit killed $x8 - ; CHECK-NEXT: $wzr = SUBSWrs renamable $w8, renamable $w9, 0, implicit-def $nzcv, pcsections !0 - ; CHECK-NEXT: renamable $w10 = CSELWr renamable $w8, renamable $w9, 3, implicit killed $nzcv, implicit-def $x10, pcsections !0 + ; CHECK-NEXT: renamable $w10 = LDAXRH renamable $x0, implicit-def $x10, pcsections !0 :: (volatile load (s16) from %ir.ptr) + ; CHECK-NEXT: renamable $w8 = ANDWri renamable $w10, 15 + ; CHECK-NEXT: renamable $x10 = ANDXri killed renamable $x10, 4111, pcsections !0 + ; CHECK-NEXT: dead $wzr = SUBSWrx renamable $w8, renamable $w1, 8, implicit-def $nzcv, pcsections !0 + ; CHECK-NEXT: renamable $x10 = CSELXr killed renamable $x10, renamable $x9, 9, implicit killed $nzcv, pcsections !0 ; CHECK-NEXT: early-clobber renamable $w11 = STLXRH renamable $w10, renamable $x0, implicit killed $x10, pcsections !0 :: (volatile store (s16) into %ir.ptr) ; CHECK-NEXT: CBNZW killed renamable $w11, %bb.1, pcsections !0 ; CHECK-NEXT: {{ $}} @@ -1200,16 +1206,18 @@ define i16 @atomicrmw_umax_i16(ptr %ptr, i16 %rhs) { ; CHECK-NEXT: successors: %bb.1(0x80000000) ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w9 = ANDWri killed renamable $w1, 15 + ; CHECK-NEXT: renamable $w1 = KILL $w1, implicit-def $x1 + ; CHECK-NEXT: renamable $x9 = ANDXri renamable $x1, 4111, pcsections !0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1.atomicrmw.start: ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) - ; CHECK-NEXT: liveins: $w9, $x0 + ; CHECK-NEXT: liveins: $x0, $x1, $x9 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w8 = LDXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr) - ; CHECK-NEXT: renamable $w8 = ANDWri renamable $w8, 15, implicit killed $x8 - ; CHECK-NEXT: $wzr = SUBSWrs renamable $w8, renamable $w9, 0, implicit-def $nzcv, pcsections !0 - ; CHECK-NEXT: renamable $w10 = CSELWr renamable $w8, renamable $w9, 8, implicit killed $nzcv, implicit-def $x10, pcsections !0 + ; CHECK-NEXT: renamable $w10 = LDXRH renamable $x0, implicit-def $x10, pcsections !0 :: (volatile load (s16) from %ir.ptr) + ; CHECK-NEXT: renamable $w8 = ANDWri renamable $w10, 15 + ; CHECK-NEXT: renamable $x10 = ANDXri killed renamable $x10, 4111, pcsections !0 + ; CHECK-NEXT: dead $wzr = SUBSWrx renamable $w8, renamable $w1, 8, implicit-def $nzcv, pcsections !0 + ; CHECK-NEXT: renamable $x10 = CSELXr killed renamable $x10, renamable $x9, 8, implicit killed $nzcv, pcsections !0 ; CHECK-NEXT: early-clobber renamable $w11 = STXRH renamable $w10, renamable $x0, implicit killed $x10, pcsections !0 :: (volatile store (s16) into %ir.ptr) ; CHECK-NEXT: CBNZW killed renamable $w11, %bb.1, pcsections !0 ; CHECK-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-visit-icmp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-visit-icmp.mir new file mode 100644 index 0000000000000..e0eaa6d63b7fc --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-visit-icmp.mir @@ -0,0 +1,167 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -o - -mtriple=aarch64-unknown-unknown -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s | FileCheck %s --check-prefixes=CHECK + +--- +name: test_icmp_of_eq_and_right_undef +body: | + bb.1: + ; CHECK-LABEL: name: test_icmp_of_eq_and_right_undef + ; CHECK: %res:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: $w0 = COPY %res(s32) + %lhs:_(s64) = COPY $x0 + %rhs:_(s64) = G_IMPLICIT_DEF + %res:_(s32) = G_ICMP intpred(ne), %lhs(s64), %rhs + $w0 = COPY %res(s32) +... +--- +name: test_icmp_of_not_eq_and_right_undef +body: | + bb.1: + ; CHECK-LABEL: name: test_icmp_of_not_eq_and_right_undef + ; CHECK: %res:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: $w0 = COPY %res(s32) + %lhs:_(s64) = COPY $x0 + %rhs:_(s64) = G_IMPLICIT_DEF + %res:_(s32) = G_ICMP intpred(ugt), %lhs(s64), %rhs + $w0 = COPY %res(s32) +... +--- +name: test_icmp_of_is_eq_and_right_undef +body: | + bb.1: + ; CHECK-LABEL: name: test_icmp_of_is_eq_and_right_undef + ; CHECK: %res:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: $w0 = COPY %res(s32) + %lhs:_(s64) = COPY $x0 + %rhs:_(s64) = G_IMPLICIT_DEF + %res:_(s32) = G_ICMP intpred(ne), %lhs(s64), %rhs + $w0 = COPY %res(s32) +... +--- +name: test_icmp_of_eq_not_eq +body: | + bb.1: + ; CHECK-LABEL: name: test_icmp_of_eq_not_eq + ; CHECK: %res:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: $w0 = COPY %res(s32) + %lhs:_(s64) = COPY $x0 + %rhs:_(s64) = COPY $x0 + %res:_(s32) = G_ICMP intpred(ugt), %lhs(s64), %lhs + $w0 = COPY %res(s32) +... +--- +name: test_icmp_of_eq_is_eq +body: | + bb.1: + ; CHECK-LABEL: name: test_icmp_of_eq_is_eq + ; CHECK: %res:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: $w0 = COPY %res(s32) + %lhs:_(s64) = COPY $x0 + %rhs:_(s64) = COPY $x0 + %res:_(s32) = G_ICMP intpred(eq), %lhs(s64), %lhs + $w0 = COPY %res(s32) +... +--- +name: test_icmp_of_zext_and_zext +body: | + bb.1: + ; CHECK-LABEL: name: test_icmp_of_zext_and_zext + ; CHECK: %lhs1:_(s32) = COPY $w0 + ; CHECK-NEXT: %rhs1:_(s32) = COPY $w0 + ; CHECK-NEXT: %res:_(s32) = G_ICMP intpred(ugt), %lhs1(s32), %rhs1 + ; CHECK-NEXT: $w0 = COPY %res(s32) + %lhs1:_(s32) = COPY $w0 + %rhs1:_(s32) = COPY $w0 + %lhs:_(s64) = G_ZEXT %lhs1 + %rhs:_(s64) = G_ZEXT %rhs1 + %res:_(s32) = G_ICMP intpred(sgt), %lhs(s64), %rhs + $w0 = COPY %res(s32) +... +--- +name: test_icmp_of_sext_and_sext +body: | + bb.1: + ; CHECK-LABEL: name: test_icmp_of_sext_and_sext + ; CHECK: %lhs1:_(s32) = COPY $w0 + ; CHECK-NEXT: %rhs1:_(s32) = COPY $w0 + ; CHECK-NEXT: %res:_(s32) = G_ICMP intpred(sgt), %lhs1(s32), %rhs1 + ; CHECK-NEXT: $w0 = COPY %res(s32) + %lhs1:_(s32) = COPY $w0 + %rhs1:_(s32) = COPY $w0 + %lhs:_(s64) = G_SEXT %lhs1 + %rhs:_(s64) = G_SEXT %rhs1 + %res:_(s32) = G_ICMP intpred(sgt), %lhs(s64), %rhs + $w0 = COPY %res(s32) +... +--- +name: test_ugt_icmp_of_sext_and_sext +body: | + bb.1: + ; CHECK-LABEL: name: test_ugt_icmp_of_sext_and_sext + ; CHECK: %lhs1:_(s32) = COPY $w0 + ; CHECK-NEXT: %rhs1:_(s32) = COPY $w0 + ; CHECK-NEXT: %res:_(s32) = G_ICMP intpred(ugt), %lhs1(s32), %rhs1 + ; CHECK-NEXT: $w0 = COPY %res(s32) + %lhs1:_(s32) = COPY $w0 + %rhs1:_(s32) = COPY $w0 + %lhs:_(s64) = G_SEXT %lhs1 + %rhs:_(s64) = G_SEXT %rhs1 + %res:_(s32) = G_ICMP intpred(ugt), %lhs(s64), %rhs + $w0 = COPY %res(s32) +... +--- +name: test_uge_icmp_of_zero +body: | + bb.1: + ; CHECK-LABEL: name: test_uge_icmp_of_zero + ; CHECK: %res:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: $w0 = COPY %res(s32) + %lhs:_(s64) = COPY $x0 + %zero:_(s64) = G_CONSTANT i64 0 + %res:_(s32) = G_ICMP intpred(uge), %lhs(s64), %zero + $w0 = COPY %res(s32) +... +--- +name: test_slt_icmp_of_zero +body: | + bb.1: + ; CHECK-LABEL: name: test_slt_icmp_of_zero + ; CHECK: %lhs:_(s64) = COPY $x0 + ; CHECK-NEXT: %zero:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: %res:_(s32) = G_ICMP intpred(slt), %lhs(s64), %zero + ; CHECK-NEXT: $w0 = COPY %res(s32) + %lhs:_(s64) = COPY $x0 + %zero:_(s64) = G_CONSTANT i64 0 + %res:_(s32) = G_ICMP intpred(slt), %lhs(s64), %zero + $w0 = COPY %res(s32) +... +--- +name: test_ugt_icmp_of_zero_known_non_zero +body: | + bb.1: + ; CHECK-LABEL: name: test_ugt_icmp_of_zero_known_non_zero + ; CHECK: %res:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: $w0 = COPY %res(s32) + %amount:_(s64) = COPY $x0 + %zero:_(s64) = G_CONSTANT i64 0 + %lhs:_(s64) = G_SHL %zero, %amount + %res:_(s32) = G_ICMP intpred(ugt), %lhs(s64), %zero + $w0 = COPY %res(s32) +... +--- +name: test_ugt_icmp_of_zero_xor +body: | + bb.1: + ; CHECK-LABEL: name: test_ugt_icmp_of_zero_xor + ; CHECK: %x:_(s64) = COPY $x0 + ; CHECK-NEXT: %y:_(s64) = COPY $x0 + ; CHECK-NEXT: %zero:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: %lhs:_(s64) = G_XOR %x, %y + ; CHECK-NEXT: %res:_(s32) = G_ICMP intpred(ugt), %lhs(s64), %zero + ; CHECK-NEXT: $w0 = COPY %res(s32) + %x:_(s64) = COPY $x0 + %y:_(s64) = COPY $x0 + %zero:_(s64) = G_CONSTANT i64 0 + %lhs:_(s64) = G_XOR %x, %y + %res:_(s32) = G_ICMP intpred(ugt), %lhs(s64), %zero + $w0 = COPY %res(s32) diff --git a/llvm/test/CodeGen/AArch64/arm64-ccmp.ll b/llvm/test/CodeGen/AArch64/arm64-ccmp.ll index 50afc79a5a576..06e957fdcc6a2 100644 --- a/llvm/test/CodeGen/AArch64/arm64-ccmp.ll +++ b/llvm/test/CodeGen/AArch64/arm64-ccmp.ll @@ -457,20 +457,12 @@ sw.bb.i.i: } define i64 @select_and(i32 %w0, i32 %w1, i64 %x2, i64 %x3) { -; SDISEL-LABEL: select_and: -; SDISEL: ; %bb.0: -; SDISEL-NEXT: cmp w1, #5 -; SDISEL-NEXT: ccmp w0, w1, #0, ne -; SDISEL-NEXT: csel x0, x2, x3, lt -; SDISEL-NEXT: ret -; -; GISEL-LABEL: select_and: -; GISEL: ; %bb.0: -; GISEL-NEXT: mov w8, #5 ; =0x5 -; GISEL-NEXT: cmp w8, w1 -; GISEL-NEXT: ccmp w0, w1, #0, ne -; GISEL-NEXT: csel x0, x2, x3, lt -; GISEL-NEXT: ret +; CHECK-LABEL: select_and: +; CHECK: ; %bb.0: +; CHECK-NEXT: cmp w1, #5 +; CHECK-NEXT: ccmp w0, w1, #0, ne +; CHECK-NEXT: csel x0, x2, x3, lt +; CHECK-NEXT: ret %1 = icmp slt i32 %w0, %w1 %2 = icmp ne i32 5, %w1 %3 = and i1 %1, %2 @@ -479,20 +471,12 @@ define i64 @select_and(i32 %w0, i32 %w1, i64 %x2, i64 %x3) { } define i64 @select_or(i32 %w0, i32 %w1, i64 %x2, i64 %x3) { -; SDISEL-LABEL: select_or: -; SDISEL: ; %bb.0: -; SDISEL-NEXT: cmp w1, #5 -; SDISEL-NEXT: ccmp w0, w1, #8, eq -; SDISEL-NEXT: csel x0, x2, x3, lt -; SDISEL-NEXT: ret -; -; GISEL-LABEL: select_or: -; GISEL: ; %bb.0: -; GISEL-NEXT: mov w8, #5 ; =0x5 -; GISEL-NEXT: cmp w8, w1 -; GISEL-NEXT: ccmp w0, w1, #8, eq -; GISEL-NEXT: csel x0, x2, x3, lt -; GISEL-NEXT: ret +; CHECK-LABEL: select_or: +; CHECK: ; %bb.0: +; CHECK-NEXT: cmp w1, #5 +; CHECK-NEXT: ccmp w0, w1, #8, eq +; CHECK-NEXT: csel x0, x2, x3, lt +; CHECK-NEXT: ret %1 = icmp slt i32 %w0, %w1 %2 = icmp ne i32 5, %w1 %3 = or i1 %1, %2 @@ -501,20 +485,12 @@ define i64 @select_or(i32 %w0, i32 %w1, i64 %x2, i64 %x3) { } define float @select_or_float(i32 %w0, i32 %w1, float %x2, float %x3) { -; SDISEL-LABEL: select_or_float: -; SDISEL: ; %bb.0: -; SDISEL-NEXT: cmp w1, #5 -; SDISEL-NEXT: ccmp w0, w1, #8, eq -; SDISEL-NEXT: fcsel s0, s0, s1, lt -; SDISEL-NEXT: ret -; -; GISEL-LABEL: select_or_float: -; GISEL: ; %bb.0: -; GISEL-NEXT: mov w8, #5 ; =0x5 -; GISEL-NEXT: cmp w8, w1 -; GISEL-NEXT: ccmp w0, w1, #8, eq -; GISEL-NEXT: fcsel s0, s0, s1, lt -; GISEL-NEXT: ret +; CHECK-LABEL: select_or_float: +; CHECK: ; %bb.0: +; CHECK-NEXT: cmp w1, #5 +; CHECK-NEXT: ccmp w0, w1, #8, eq +; CHECK-NEXT: fcsel s0, s0, s1, lt +; CHECK-NEXT: ret %1 = icmp slt i32 %w0, %w1 %2 = icmp ne i32 5, %w1 %3 = or i1 %1, %2 diff --git a/llvm/test/CodeGen/AArch64/icmp2.ll b/llvm/test/CodeGen/AArch64/icmp2.ll new file mode 100644 index 0000000000000..1ab8d0e3331b3 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/icmp2.ll @@ -0,0 +1,295 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD +; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI + +define i1 @i64_i64_canon(i64 %a, i64 %b) { +; CHECK-SD-LABEL: i64_i64_canon: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmp x0, #0 +; CHECK-SD-NEXT: cset w0, eq +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: i64_i64_canon: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp x0, #0 +; CHECK-GI-NEXT: cset w0, ls +; CHECK-GI-NEXT: ret +entry: + %c = icmp uge i64 0, %a + ret i1 %c +} + +define <2 x i1> @i64_i64_canon_2x64(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: i64_i64_canon_2x64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: adrp x8, .LCPI1_0 +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI1_0] +; CHECK-NEXT: cmhs v0.2d, v1.2d, v0.2d +; CHECK-NEXT: xtn v0.2s, v0.2d +; CHECK-NEXT: ret +entry: + %c = icmp uge <2 x i64> , %a + ret <2 x i1> %c +} + +define i1 @i64_i64_undef_eq(i64 %a, i64 %b) { +; CHECK-LABEL: i64_i64_undef_eq: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +entry: + %c = icmp eq i64 %a, undef + ret i1 %c +} + +define i1 @i64_i64_slt_eq(i64 %a, i64 %b) { +; CHECK-LABEL: i64_i64_slt_eq: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +entry: + %c = icmp slt i64 %a, %a + ret i1 %c +} + +define i1 @i64_i64_not_eq_undef(i64 %a, i64 %b) { +; CHECK-LABEL: i64_i64_not_eq_undef: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +entry: + %c = icmp slt i64 %a, undef + ret i1 %c +} + +define i1 @i64_i64_sext(i32 %a, i32 %b) { +; CHECK-SD-LABEL: i64_i64_sext: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-SD-NEXT: sxtw x8, w0 +; CHECK-SD-NEXT: cmp x8, w1, sxtw +; CHECK-SD-NEXT: cset w0, lt +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: i64_i64_sext: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w0, lt +; CHECK-GI-NEXT: ret +entry: + %sextedlhs = sext i32 %a to i64 + %sextedrhs = sext i32 %b to i64 + %c = icmp slt i64 %sextedlhs, %sextedrhs + ret i1 %c +} + +define i1 @i64_i64_zext(i32 %a, i32 %b) { +; CHECK-SD-LABEL: i64_i64_zext: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: mov w8, w0 +; CHECK-SD-NEXT: cmp x8, w1, uxtw +; CHECK-SD-NEXT: cset w0, lt +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: i64_i64_zext: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w0, lo +; CHECK-GI-NEXT: ret +entry: + %zextedlhs = zext i32 %a to i64 + %zextedrhs = zext i32 %b to i64 + %c = icmp slt i64 %zextedlhs, %zextedrhs + ret i1 %c +} + +define i1 @i64_i64_ule_or(i64 %a, i64 %b, i64 %c) { +; CHECK-SD-LABEL: i64_i64_ule_or: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: orr x8, x2, #0x1 +; CHECK-SD-NEXT: cmp x8, #0 +; CHECK-SD-NEXT: cset w0, eq +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: i64_i64_ule_or: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov w0, wzr +; CHECK-GI-NEXT: ret +entry: + %or = or i64 1, %c + %cmp = icmp ule i64 %or, 0 + ret i1 %cmp +} + +define i1 @i64_i64_ugt_or(i64 %a, i64 %b, i64 %c) { +; CHECK-SD-LABEL: i64_i64_ugt_or: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: orr x8, x2, #0x1 +; CHECK-SD-NEXT: cmp x8, #0 +; CHECK-SD-NEXT: cset w0, ne +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: i64_i64_ugt_or: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov w0, #1 // =0x1 +; CHECK-GI-NEXT: ret +entry: + %or = or i64 1, %c + %cmp = icmp ugt i64 %or, 0 + ret i1 %cmp +} + +define i1 @i64_i64_eq_or(i64 %a, i64 %b, i64 %c) { +; CHECK-SD-LABEL: i64_i64_eq_or: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: orr x8, x2, #0x1 +; CHECK-SD-NEXT: cmp x8, #0 +; CHECK-SD-NEXT: cset w0, eq +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: i64_i64_eq_or: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov w0, wzr +; CHECK-GI-NEXT: ret +entry: + %or = or i64 1, %c + %cmp = icmp eq i64 %or, 0 + ret i1 %cmp +} + +define i1 @i64_i64_eq_freeze_or(i64 %a, i64 %b, i64 %c) { +; CHECK-SD-LABEL: i64_i64_eq_freeze_or: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: orr x8, x2, #0x1 +; CHECK-SD-NEXT: cmp x8, #0 +; CHECK-SD-NEXT: cset w0, eq +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: i64_i64_eq_freeze_or: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov w0, wzr +; CHECK-GI-NEXT: ret +entry: + %or = or i64 1, %c + %free = freeze i64 %or + %cmp = icmp eq i64 %free, 0 + ret i1 %cmp +} + +define i1 @i64_i64_eq_freeze_add(i64 %a, i64 %b, i64 %c) { +; CHECK-LABEL: i64_i64_eq_freeze_add: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cmn x2, #1 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret +entry: + %add = add nuw i64 1, %c + %free = freeze i64 %add + %cmp = icmp eq i64 %free, 0 + ret i1 %cmp +} + +define i1 @i64_i64_eq_lshr(i64 %a, i64 %b, i64 %c) { +; CHECK-LABEL: i64_i64_eq_lshr: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +entry: + %lshr = lshr exact i64 1, %c + %cmp = icmp eq i64 %lshr, 0 + ret i1 %cmp +} + +define i1 @i64_i64_eq_zext(i64 %a, i64 %b, i32 %c) { +; CHECK-SD-LABEL: i64_i64_eq_zext: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: orr w8, w2, #0x1 +; CHECK-SD-NEXT: cmp w8, #0 +; CHECK-SD-NEXT: cset w0, eq +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: i64_i64_eq_zext: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov w0, wzr +; CHECK-GI-NEXT: ret +entry: + %or = or i32 1, %c + %ze = zext i32 %or to i64 + %cmp = icmp eq i64 %ze, 0 + ret i1 %cmp +} + +define i1 @i64_i64_canon_ule(i64 %a, i64 %b, i64 %c) { +; CHECK-LABEL: i64_i64_canon_ule: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w0, #1 // =0x1 +; CHECK-NEXT: ret +entry: + %cmp = icmp ule i64 0, %a + ret i1 %cmp +} + +define i1 @i64_i64_canon_ugt(i64 %a, i64 %b, i64 %c) { +; CHECK-LABEL: i64_i64_canon_ugt: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +entry: + %cmp = icmp ugt i64 0, %a + ret i1 %cmp +} + +define i1 @i64_i64_trunc_eq(i64 %a, i64 %b, i64 %c) { +; CHECK-SD-LABEL: i64_i64_trunc_eq: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: orr w8, w2, #0x1 +; CHECK-SD-NEXT: cmp w8, #0 +; CHECK-SD-NEXT: cset w0, eq +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: i64_i64_trunc_eq: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov w0, wzr +; CHECK-GI-NEXT: ret +entry: + %or = or i64 1, %c + %tr = trunc nsw i64 %or to i32 + %cmp = icmp eq i32 %tr, 0 + ret i1 %cmp +} + +define i1 @i64_i64_umin_eq(i64 %a, i64 %b, i64 %c) { +; CHECK-LABEL: i64_i64_umin_eq: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: orr x8, x2, #0x1 +; CHECK-NEXT: orr x9, x2, #0x2 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: csel x8, x8, x9, lo +; CHECK-NEXT: cmp x8, #0 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret +entry: + %or1 = or i64 1, %c + %or2 = or i64 2, %c + %umin = call i64 @llvm.umin.i64(i64 %or1, i64 %or2) + %cmp = icmp eq i64 %umin, 0 + ret i1 %cmp +} + +define i1 @i64_i64_smin_eq(i64 %a, i64 %b, i64 %c) { +; CHECK-LABEL: i64_i64_smin_eq: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: orr x8, x2, #0x1 +; CHECK-NEXT: orr x9, x2, #0x2 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: csel x8, x8, x9, lt +; CHECK-NEXT: cmp x8, #0 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret +entry: + %or1 = or i64 1, %c + %or2 = or i64 2, %c + %smin = call i64 @llvm.smin.i64(i64 %or1, i64 %or2) + %cmp = icmp eq i64 %smin, 0 + ret i1 %cmp +} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f64.ll index b1cdf553b7242..0b66185d25f3e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f64.ll @@ -253,7 +253,7 @@ define double @v_rcp_f64(double %x) { ; GFX6-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0 ; GFX6-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] ; GFX6-NEXT: v_mov_b32_e32 v10, 0x3ff00000 -; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9 +; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10 ; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3 ; GFX6-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] @@ -346,7 +346,7 @@ define double @v_rcp_f64_arcp(double %x) { ; GFX6-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0 ; GFX6-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] ; GFX6-NEXT: v_mov_b32_e32 v10, 0x3ff00000 -; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9 +; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10 ; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3 ; GFX6-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] @@ -483,7 +483,7 @@ define double @v_rcp_f64_ulp25(double %x) { ; GFX6-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0 ; GFX6-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] ; GFX6-NEXT: v_mov_b32_e32 v10, 0x3ff00000 -; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9 +; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10 ; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3 ; GFX6-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] @@ -1115,7 +1115,7 @@ define <2 x double> @v_rcp_v2f64(<2 x double> %x) { ; GFX6-NEXT: v_fma_f64 v[12:13], -v[4:5], v[6:7], 1.0 ; GFX6-NEXT: v_fma_f64 v[14:15], v[14:15], v[16:17], v[14:15] ; GFX6-NEXT: v_fma_f64 v[6:7], v[6:7], v[12:13], v[6:7] -; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v20, v9 +; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v9, v20 ; GFX6-NEXT: v_mul_f64 v[12:13], v[8:9], v[6:7] ; GFX6-NEXT: v_div_scale_f64 v[16:17], s[4:5], 1.0, v[2:3], 1.0 ; GFX6-NEXT: v_fma_f64 v[18:19], -v[4:5], v[12:13], v[8:9] @@ -1126,7 +1126,7 @@ define <2 x double> @v_rcp_v2f64(<2 x double> %x) { ; GFX6-NEXT: v_mul_f64 v[8:9], v[16:17], v[4:5] ; GFX6-NEXT: v_div_fmas_f64 v[6:7], v[18:19], v[6:7], v[12:13] ; GFX6-NEXT: v_fma_f64 v[12:13], -v[10:11], v[8:9], v[16:17] -; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v20, v17 +; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v17, v20 ; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v11 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] ; GFX6-NEXT: v_div_fixup_f64 v[0:1], v[6:7], v[0:1], 1.0 @@ -1275,7 +1275,7 @@ define <2 x double> @v_rcp_v2f64_arcp(<2 x double> %x) { ; GFX6-NEXT: v_fma_f64 v[12:13], -v[4:5], v[6:7], 1.0 ; GFX6-NEXT: v_fma_f64 v[14:15], v[14:15], v[16:17], v[14:15] ; GFX6-NEXT: v_fma_f64 v[6:7], v[6:7], v[12:13], v[6:7] -; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v20, v9 +; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v9, v20 ; GFX6-NEXT: v_mul_f64 v[12:13], v[8:9], v[6:7] ; GFX6-NEXT: v_div_scale_f64 v[16:17], s[4:5], 1.0, v[2:3], 1.0 ; GFX6-NEXT: v_fma_f64 v[18:19], -v[4:5], v[12:13], v[8:9] @@ -1286,7 +1286,7 @@ define <2 x double> @v_rcp_v2f64_arcp(<2 x double> %x) { ; GFX6-NEXT: v_mul_f64 v[8:9], v[16:17], v[4:5] ; GFX6-NEXT: v_div_fmas_f64 v[6:7], v[18:19], v[6:7], v[12:13] ; GFX6-NEXT: v_fma_f64 v[12:13], -v[10:11], v[8:9], v[16:17] -; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v20, v17 +; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v17, v20 ; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v11 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] ; GFX6-NEXT: v_div_fixup_f64 v[0:1], v[6:7], v[0:1], 1.0 @@ -1502,7 +1502,7 @@ define <2 x double> @v_rcp_v2f64_ulp25(<2 x double> %x) { ; GFX6-NEXT: v_fma_f64 v[12:13], -v[4:5], v[6:7], 1.0 ; GFX6-NEXT: v_fma_f64 v[14:15], v[14:15], v[16:17], v[14:15] ; GFX6-NEXT: v_fma_f64 v[6:7], v[6:7], v[12:13], v[6:7] -; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v20, v9 +; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v9, v20 ; GFX6-NEXT: v_mul_f64 v[12:13], v[8:9], v[6:7] ; GFX6-NEXT: v_div_scale_f64 v[16:17], s[4:5], 1.0, v[2:3], 1.0 ; GFX6-NEXT: v_fma_f64 v[18:19], -v[4:5], v[12:13], v[8:9] @@ -1513,7 +1513,7 @@ define <2 x double> @v_rcp_v2f64_ulp25(<2 x double> %x) { ; GFX6-NEXT: v_mul_f64 v[8:9], v[16:17], v[4:5] ; GFX6-NEXT: v_div_fmas_f64 v[6:7], v[18:19], v[6:7], v[12:13] ; GFX6-NEXT: v_fma_f64 v[12:13], -v[10:11], v[8:9], v[16:17] -; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v20, v17 +; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v17, v20 ; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v11 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] ; GFX6-NEXT: v_div_fixup_f64 v[0:1], v[6:7], v[0:1], 1.0 diff --git a/llvm/test/CodeGen/AMDGPU/itofp.i128.ll b/llvm/test/CodeGen/AMDGPU/itofp.i128.ll index c5198cdb421a5..98f09db4925ec 100644 --- a/llvm/test/CodeGen/AMDGPU/itofp.i128.ll +++ b/llvm/test/CodeGen/AMDGPU/itofp.i128.ll @@ -138,7 +138,7 @@ define float @sitofp_i128_to_f32(i128 %x) { ; GISEL-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] ; GISEL-NEXT: s_mov_b32 s4, 0 ; GISEL-NEXT: v_mov_b32_e32 v4, s4 -; GISEL-NEXT: s_and_saveexec_b64 s[6:7], vcc +; GISEL-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GISEL-NEXT: s_cbranch_execz .LBB0_14 ; GISEL-NEXT: ; %bb.1: ; %itofp-if-end ; GISEL-NEXT: v_ashrrev_i32_e32 v6, 31, v3 @@ -165,8 +165,8 @@ define float @sitofp_i128_to_f32(i128 %x) { ; GISEL-NEXT: v_sub_u32_e32 v7, 0x7f, v5 ; GISEL-NEXT: v_cmp_ge_i32_e32 vcc, 24, v8 ; GISEL-NEXT: ; implicit-def: $vgpr4 -; GISEL-NEXT: s_and_saveexec_b64 s[4:5], vcc -; GISEL-NEXT: s_xor_b64 s[4:5], exec, s[4:5] +; GISEL-NEXT: s_and_saveexec_b64 s[6:7], vcc +; GISEL-NEXT: s_xor_b64 s[6:7], exec, s[6:7] ; GISEL-NEXT: ; %bb.2: ; %itofp-if-else ; GISEL-NEXT: v_add_u32_e32 v2, 0xffffff98, v5 ; GISEL-NEXT: v_lshlrev_b64 v[0:1], v2, v[0:1] @@ -177,16 +177,16 @@ define float @sitofp_i128_to_f32(i128 %x) { ; GISEL-NEXT: ; implicit-def: $vgpr5 ; GISEL-NEXT: ; implicit-def: $vgpr2 ; GISEL-NEXT: ; %bb.3: ; %Flow3 -; GISEL-NEXT: s_andn2_saveexec_b64 s[8:9], s[4:5] +; GISEL-NEXT: s_andn2_saveexec_b64 s[6:7], s[6:7] ; GISEL-NEXT: s_cbranch_execz .LBB0_13 ; GISEL-NEXT: ; %bb.4: ; %NodeBlock ; GISEL-NEXT: v_cmp_le_i32_e32 vcc, 26, v8 -; GISEL-NEXT: s_and_saveexec_b64 s[4:5], vcc -; GISEL-NEXT: s_xor_b64 s[10:11], exec, s[4:5] +; GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc +; GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] ; GISEL-NEXT: s_cbranch_execz .LBB0_8 ; GISEL-NEXT: ; %bb.5: ; %LeafBlock ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 26, v8 -; GISEL-NEXT: s_and_saveexec_b64 s[12:13], vcc +; GISEL-NEXT: s_and_saveexec_b64 s[10:11], vcc ; GISEL-NEXT: s_cbranch_execz .LBB0_7 ; GISEL-NEXT: ; %bb.6: ; %itofp-sw-default ; GISEL-NEXT: v_sub_u32_e32 v4, 0x66, v5 @@ -212,13 +212,10 @@ define float @sitofp_i128_to_f32(i128 %x) { ; GISEL-NEXT: v_or_b32_e32 v16, v10, v12 ; GISEL-NEXT: v_lshrrev_b64 v[11:12], v14, -1 ; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v5 -; GISEL-NEXT: v_cndmask_b32_e32 v11, v11, v15, vcc -; GISEL-NEXT: v_cndmask_b32_e32 v12, v12, v16, vcc -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v5 ; GISEL-NEXT: v_cndmask_b32_e32 v9, 0, v9, vcc ; GISEL-NEXT: v_cndmask_b32_e32 v10, 0, v10, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v5, v11, -1, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e64 v11, v12, -1, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e32 v5, v11, v15, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v11, v12, v16, vcc ; GISEL-NEXT: v_and_b32_e32 v2, v9, v2 ; GISEL-NEXT: v_and_b32_e32 v3, v10, v3 ; GISEL-NEXT: v_and_or_b32 v0, v5, v0, v2 @@ -231,13 +228,13 @@ define float @sitofp_i128_to_f32(i128 %x) { ; GISEL-NEXT: v_mov_b32_e32 v2, v5 ; GISEL-NEXT: v_mov_b32_e32 v3, v6 ; GISEL-NEXT: .LBB0_7: ; %Flow1 -; GISEL-NEXT: s_or_b64 exec, exec, s[12:13] +; GISEL-NEXT: s_or_b64 exec, exec, s[10:11] ; GISEL-NEXT: .LBB0_8: ; %Flow2 -; GISEL-NEXT: s_andn2_saveexec_b64 s[4:5], s[10:11] +; GISEL-NEXT: s_andn2_saveexec_b64 s[8:9], s[8:9] ; GISEL-NEXT: ; %bb.9: ; %itofp-sw-bb ; GISEL-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] ; GISEL-NEXT: ; %bb.10: ; %itofp-sw-epilog -; GISEL-NEXT: s_or_b64 exec, exec, s[4:5] +; GISEL-NEXT: s_or_b64 exec, exec, s[8:9] ; GISEL-NEXT: v_bfe_u32 v2, v0, 2, 1 ; GISEL-NEXT: v_or_b32_e32 v0, v0, v2 ; GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 1, v0 @@ -246,20 +243,20 @@ define float @sitofp_i128_to_f32(i128 %x) { ; GISEL-NEXT: v_mov_b32_e32 v3, 0 ; GISEL-NEXT: v_lshrrev_b64 v[4:5], 2, v[0:1] ; GISEL-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3] -; GISEL-NEXT: s_and_saveexec_b64 s[4:5], vcc +; GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GISEL-NEXT: ; %bb.11: ; %itofp-if-then20 ; GISEL-NEXT: v_lshrrev_b64 v[4:5], 3, v[0:1] ; GISEL-NEXT: v_mov_b32_e32 v7, v8 ; GISEL-NEXT: ; %bb.12: ; %Flow -; GISEL-NEXT: s_or_b64 exec, exec, s[4:5] -; GISEL-NEXT: .LBB0_13: ; %Flow4 ; GISEL-NEXT: s_or_b64 exec, exec, s[8:9] +; GISEL-NEXT: .LBB0_13: ; %Flow4 +; GISEL-NEXT: s_or_b64 exec, exec, s[6:7] ; GISEL-NEXT: v_and_b32_e32 v0, 0x80000000, v6 ; GISEL-NEXT: v_lshl_add_u32 v1, v7, 23, 1.0 ; GISEL-NEXT: v_and_b32_e32 v2, 0x7fffff, v4 ; GISEL-NEXT: v_or3_b32 v4, v2, v0, v1 ; GISEL-NEXT: .LBB0_14: ; %Flow5 -; GISEL-NEXT: s_or_b64 exec, exec, s[6:7] +; GISEL-NEXT: s_or_b64 exec, exec, s[4:5] ; GISEL-NEXT: v_mov_b32_e32 v0, v4 ; GISEL-NEXT: s_setpc_b64 s[30:31] %cvt = sitofp i128 %x to float @@ -392,7 +389,7 @@ define float @uitofp_i128_to_f32(i128 %x) { ; GISEL-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] ; GISEL-NEXT: s_mov_b32 s4, 0 ; GISEL-NEXT: v_mov_b32_e32 v4, s4 -; GISEL-NEXT: s_and_saveexec_b64 s[6:7], vcc +; GISEL-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GISEL-NEXT: s_cbranch_execz .LBB1_14 ; GISEL-NEXT: ; %bb.1: ; %itofp-if-end ; GISEL-NEXT: v_ffbh_u32_e32 v5, v0 @@ -410,8 +407,8 @@ define float @uitofp_i128_to_f32(i128 %x) { ; GISEL-NEXT: v_sub_u32_e32 v6, 0x7f, v5 ; GISEL-NEXT: v_cmp_ge_i32_e32 vcc, 24, v7 ; GISEL-NEXT: ; implicit-def: $vgpr4 -; GISEL-NEXT: s_and_saveexec_b64 s[4:5], vcc -; GISEL-NEXT: s_xor_b64 s[4:5], exec, s[4:5] +; GISEL-NEXT: s_and_saveexec_b64 s[6:7], vcc +; GISEL-NEXT: s_xor_b64 s[6:7], exec, s[6:7] ; GISEL-NEXT: ; %bb.2: ; %itofp-if-else ; GISEL-NEXT: v_add_u32_e32 v2, 0xffffff98, v5 ; GISEL-NEXT: v_lshlrev_b64 v[0:1], v2, v[0:1] @@ -422,16 +419,16 @@ define float @uitofp_i128_to_f32(i128 %x) { ; GISEL-NEXT: ; implicit-def: $vgpr5 ; GISEL-NEXT: ; implicit-def: $vgpr2 ; GISEL-NEXT: ; %bb.3: ; %Flow3 -; GISEL-NEXT: s_andn2_saveexec_b64 s[8:9], s[4:5] +; GISEL-NEXT: s_andn2_saveexec_b64 s[6:7], s[6:7] ; GISEL-NEXT: s_cbranch_execz .LBB1_13 ; GISEL-NEXT: ; %bb.4: ; %NodeBlock ; GISEL-NEXT: v_cmp_le_i32_e32 vcc, 26, v7 -; GISEL-NEXT: s_and_saveexec_b64 s[4:5], vcc -; GISEL-NEXT: s_xor_b64 s[10:11], exec, s[4:5] +; GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc +; GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] ; GISEL-NEXT: s_cbranch_execz .LBB1_8 ; GISEL-NEXT: ; %bb.5: ; %LeafBlock ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 26, v7 -; GISEL-NEXT: s_and_saveexec_b64 s[12:13], vcc +; GISEL-NEXT: s_and_saveexec_b64 s[10:11], vcc ; GISEL-NEXT: s_cbranch_execz .LBB1_7 ; GISEL-NEXT: ; %bb.6: ; %itofp-sw-default ; GISEL-NEXT: v_sub_u32_e32 v4, 0x66, v5 @@ -457,13 +454,10 @@ define float @uitofp_i128_to_f32(i128 %x) { ; GISEL-NEXT: v_or_b32_e32 v15, v9, v11 ; GISEL-NEXT: v_lshrrev_b64 v[10:11], v13, -1 ; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v5 -; GISEL-NEXT: v_cndmask_b32_e32 v10, v10, v14, vcc -; GISEL-NEXT: v_cndmask_b32_e32 v11, v11, v15, vcc -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v5 ; GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v8, vcc ; GISEL-NEXT: v_cndmask_b32_e32 v9, 0, v9, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v5, v10, -1, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e64 v10, v11, -1, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e32 v5, v10, v14, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v10, v11, v15, vcc ; GISEL-NEXT: v_and_b32_e32 v2, v8, v2 ; GISEL-NEXT: v_and_b32_e32 v3, v9, v3 ; GISEL-NEXT: v_and_or_b32 v0, v5, v0, v2 @@ -476,13 +470,13 @@ define float @uitofp_i128_to_f32(i128 %x) { ; GISEL-NEXT: v_mov_b32_e32 v2, v5 ; GISEL-NEXT: v_mov_b32_e32 v3, v6 ; GISEL-NEXT: .LBB1_7: ; %Flow1 -; GISEL-NEXT: s_or_b64 exec, exec, s[12:13] +; GISEL-NEXT: s_or_b64 exec, exec, s[10:11] ; GISEL-NEXT: .LBB1_8: ; %Flow2 -; GISEL-NEXT: s_andn2_saveexec_b64 s[4:5], s[10:11] +; GISEL-NEXT: s_andn2_saveexec_b64 s[8:9], s[8:9] ; GISEL-NEXT: ; %bb.9: ; %itofp-sw-bb ; GISEL-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] ; GISEL-NEXT: ; %bb.10: ; %itofp-sw-epilog -; GISEL-NEXT: s_or_b64 exec, exec, s[4:5] +; GISEL-NEXT: s_or_b64 exec, exec, s[8:9] ; GISEL-NEXT: v_bfe_u32 v2, v0, 2, 1 ; GISEL-NEXT: v_or_b32_e32 v0, v0, v2 ; GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 1, v0 @@ -491,19 +485,19 @@ define float @uitofp_i128_to_f32(i128 %x) { ; GISEL-NEXT: v_mov_b32_e32 v3, 0 ; GISEL-NEXT: v_lshrrev_b64 v[4:5], 2, v[0:1] ; GISEL-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3] -; GISEL-NEXT: s_and_saveexec_b64 s[4:5], vcc +; GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GISEL-NEXT: ; %bb.11: ; %itofp-if-then20 ; GISEL-NEXT: v_lshrrev_b64 v[4:5], 3, v[0:1] ; GISEL-NEXT: v_mov_b32_e32 v6, v7 ; GISEL-NEXT: ; %bb.12: ; %Flow -; GISEL-NEXT: s_or_b64 exec, exec, s[4:5] -; GISEL-NEXT: .LBB1_13: ; %Flow4 ; GISEL-NEXT: s_or_b64 exec, exec, s[8:9] +; GISEL-NEXT: .LBB1_13: ; %Flow4 +; GISEL-NEXT: s_or_b64 exec, exec, s[6:7] ; GISEL-NEXT: v_lshl_add_u32 v0, v6, 23, 1.0 ; GISEL-NEXT: v_mov_b32_e32 v1, 0x7fffff ; GISEL-NEXT: v_and_or_b32 v4, v4, v1, v0 ; GISEL-NEXT: .LBB1_14: ; %Flow5 -; GISEL-NEXT: s_or_b64 exec, exec, s[6:7] +; GISEL-NEXT: s_or_b64 exec, exec, s[4:5] ; GISEL-NEXT: v_mov_b32_e32 v0, v4 ; GISEL-NEXT: s_setpc_b64 s[30:31] %cvt = uitofp i128 %x to float @@ -744,13 +738,10 @@ define double @sitofp_i128_to_f64(i128 %x) { ; GISEL-NEXT: v_or_b32_e32 v17, v10, v12 ; GISEL-NEXT: v_lshrrev_b64 v[11:12], v15, -1 ; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v14 -; GISEL-NEXT: v_cndmask_b32_e32 v11, v11, v16, vcc -; GISEL-NEXT: v_cndmask_b32_e32 v12, v12, v17, vcc -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v14 ; GISEL-NEXT: v_cndmask_b32_e32 v9, 0, v9, vcc ; GISEL-NEXT: v_cndmask_b32_e32 v10, 0, v10, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v11, v11, -1, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e64 v12, v12, -1, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e32 v11, v11, v16, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v12, v12, v17, vcc ; GISEL-NEXT: v_and_b32_e32 v2, v9, v2 ; GISEL-NEXT: v_and_b32_e32 v3, v10, v3 ; GISEL-NEXT: v_and_or_b32 v0, v11, v0, v2 @@ -1021,13 +1012,10 @@ define double @uitofp_i128_to_f64(i128 %x) { ; GISEL-NEXT: v_or_b32_e32 v17, v5, v13 ; GISEL-NEXT: v_lshrrev_b64 v[12:13], v15, -1 ; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v8 -; GISEL-NEXT: v_cndmask_b32_e32 v12, v12, v16, vcc -; GISEL-NEXT: v_cndmask_b32_e32 v13, v13, v17, vcc -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v8 ; GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc ; GISEL-NEXT: v_cndmask_b32_e32 v5, 0, v5, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v8, v12, -1, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e64 v12, v13, -1, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e32 v8, v12, v16, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v12, v13, v17, vcc ; GISEL-NEXT: v_and_b32_e32 v2, v4, v2 ; GISEL-NEXT: v_and_b32_e32 v3, v5, v3 ; GISEL-NEXT: v_and_or_b32 v0, v8, v0, v2 @@ -1229,7 +1217,7 @@ define half @sitofp_i128_to_f16(i128 %x) { ; GISEL-NEXT: s_mov_b32 s4, 0 ; GISEL-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] ; GISEL-NEXT: v_mov_b32_e32 v4, s4 -; GISEL-NEXT: s_and_saveexec_b64 s[6:7], vcc +; GISEL-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GISEL-NEXT: s_cbranch_execz .LBB4_14 ; GISEL-NEXT: ; %bb.1: ; %itofp-if-end ; GISEL-NEXT: v_ashrrev_i32_e32 v6, 31, v3 @@ -1256,8 +1244,8 @@ define half @sitofp_i128_to_f16(i128 %x) { ; GISEL-NEXT: v_sub_u32_e32 v7, 0x7f, v5 ; GISEL-NEXT: v_cmp_ge_i32_e32 vcc, 24, v8 ; GISEL-NEXT: ; implicit-def: $vgpr4 -; GISEL-NEXT: s_and_saveexec_b64 s[4:5], vcc -; GISEL-NEXT: s_xor_b64 s[4:5], exec, s[4:5] +; GISEL-NEXT: s_and_saveexec_b64 s[6:7], vcc +; GISEL-NEXT: s_xor_b64 s[6:7], exec, s[6:7] ; GISEL-NEXT: ; %bb.2: ; %itofp-if-else ; GISEL-NEXT: v_add_u32_e32 v2, 0xffffff98, v5 ; GISEL-NEXT: v_lshlrev_b64 v[0:1], v2, v[0:1] @@ -1268,16 +1256,16 @@ define half @sitofp_i128_to_f16(i128 %x) { ; GISEL-NEXT: ; implicit-def: $vgpr5 ; GISEL-NEXT: ; implicit-def: $vgpr2 ; GISEL-NEXT: ; %bb.3: ; %Flow3 -; GISEL-NEXT: s_andn2_saveexec_b64 s[8:9], s[4:5] +; GISEL-NEXT: s_andn2_saveexec_b64 s[6:7], s[6:7] ; GISEL-NEXT: s_cbranch_execz .LBB4_13 ; GISEL-NEXT: ; %bb.4: ; %NodeBlock ; GISEL-NEXT: v_cmp_le_i32_e32 vcc, 26, v8 -; GISEL-NEXT: s_and_saveexec_b64 s[4:5], vcc -; GISEL-NEXT: s_xor_b64 s[10:11], exec, s[4:5] +; GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc +; GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] ; GISEL-NEXT: s_cbranch_execz .LBB4_8 ; GISEL-NEXT: ; %bb.5: ; %LeafBlock ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 26, v8 -; GISEL-NEXT: s_and_saveexec_b64 s[12:13], vcc +; GISEL-NEXT: s_and_saveexec_b64 s[10:11], vcc ; GISEL-NEXT: s_cbranch_execz .LBB4_7 ; GISEL-NEXT: ; %bb.6: ; %itofp-sw-default ; GISEL-NEXT: v_sub_u32_e32 v4, 0x66, v5 @@ -1303,13 +1291,10 @@ define half @sitofp_i128_to_f16(i128 %x) { ; GISEL-NEXT: v_or_b32_e32 v16, v10, v12 ; GISEL-NEXT: v_lshrrev_b64 v[11:12], v14, -1 ; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v5 -; GISEL-NEXT: v_cndmask_b32_e32 v11, v11, v15, vcc -; GISEL-NEXT: v_cndmask_b32_e32 v12, v12, v16, vcc -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v5 ; GISEL-NEXT: v_cndmask_b32_e32 v9, 0, v9, vcc ; GISEL-NEXT: v_cndmask_b32_e32 v10, 0, v10, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v5, v11, -1, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e64 v11, v12, -1, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e32 v5, v11, v15, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v11, v12, v16, vcc ; GISEL-NEXT: v_and_b32_e32 v2, v9, v2 ; GISEL-NEXT: v_and_b32_e32 v3, v10, v3 ; GISEL-NEXT: v_and_or_b32 v0, v5, v0, v2 @@ -1322,13 +1307,13 @@ define half @sitofp_i128_to_f16(i128 %x) { ; GISEL-NEXT: v_mov_b32_e32 v2, v5 ; GISEL-NEXT: v_mov_b32_e32 v3, v6 ; GISEL-NEXT: .LBB4_7: ; %Flow1 -; GISEL-NEXT: s_or_b64 exec, exec, s[12:13] +; GISEL-NEXT: s_or_b64 exec, exec, s[10:11] ; GISEL-NEXT: .LBB4_8: ; %Flow2 -; GISEL-NEXT: s_andn2_saveexec_b64 s[4:5], s[10:11] +; GISEL-NEXT: s_andn2_saveexec_b64 s[8:9], s[8:9] ; GISEL-NEXT: ; %bb.9: ; %itofp-sw-bb ; GISEL-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] ; GISEL-NEXT: ; %bb.10: ; %itofp-sw-epilog -; GISEL-NEXT: s_or_b64 exec, exec, s[4:5] +; GISEL-NEXT: s_or_b64 exec, exec, s[8:9] ; GISEL-NEXT: v_bfe_u32 v2, v0, 2, 1 ; GISEL-NEXT: v_or_b32_e32 v0, v0, v2 ; GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 1, v0 @@ -1337,21 +1322,21 @@ define half @sitofp_i128_to_f16(i128 %x) { ; GISEL-NEXT: v_mov_b32_e32 v3, 0 ; GISEL-NEXT: v_lshrrev_b64 v[4:5], 2, v[0:1] ; GISEL-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3] -; GISEL-NEXT: s_and_saveexec_b64 s[4:5], vcc +; GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GISEL-NEXT: ; %bb.11: ; %itofp-if-then20 ; GISEL-NEXT: v_lshrrev_b64 v[4:5], 3, v[0:1] ; GISEL-NEXT: v_mov_b32_e32 v7, v8 ; GISEL-NEXT: ; %bb.12: ; %Flow -; GISEL-NEXT: s_or_b64 exec, exec, s[4:5] -; GISEL-NEXT: .LBB4_13: ; %Flow4 ; GISEL-NEXT: s_or_b64 exec, exec, s[8:9] +; GISEL-NEXT: .LBB4_13: ; %Flow4 +; GISEL-NEXT: s_or_b64 exec, exec, s[6:7] ; GISEL-NEXT: v_and_b32_e32 v0, 0x80000000, v6 ; GISEL-NEXT: v_lshl_add_u32 v1, v7, 23, 1.0 ; GISEL-NEXT: v_and_b32_e32 v2, 0x7fffff, v4 ; GISEL-NEXT: v_or3_b32 v0, v2, v0, v1 ; GISEL-NEXT: v_cvt_f16_f32_e32 v4, v0 ; GISEL-NEXT: .LBB4_14: ; %Flow5 -; GISEL-NEXT: s_or_b64 exec, exec, s[6:7] +; GISEL-NEXT: s_or_b64 exec, exec, s[4:5] ; GISEL-NEXT: v_mov_b32_e32 v0, v4 ; GISEL-NEXT: s_setpc_b64 s[30:31] %cvt = sitofp i128 %x to half @@ -1485,7 +1470,7 @@ define half @uitofp_i128_to_f16(i128 %x) { ; GISEL-NEXT: s_mov_b32 s4, 0 ; GISEL-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] ; GISEL-NEXT: v_mov_b32_e32 v4, s4 -; GISEL-NEXT: s_and_saveexec_b64 s[6:7], vcc +; GISEL-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GISEL-NEXT: s_cbranch_execz .LBB5_14 ; GISEL-NEXT: ; %bb.1: ; %itofp-if-end ; GISEL-NEXT: v_ffbh_u32_e32 v5, v0 @@ -1503,8 +1488,8 @@ define half @uitofp_i128_to_f16(i128 %x) { ; GISEL-NEXT: v_sub_u32_e32 v6, 0x7f, v5 ; GISEL-NEXT: v_cmp_ge_i32_e32 vcc, 24, v7 ; GISEL-NEXT: ; implicit-def: $vgpr4 -; GISEL-NEXT: s_and_saveexec_b64 s[4:5], vcc -; GISEL-NEXT: s_xor_b64 s[4:5], exec, s[4:5] +; GISEL-NEXT: s_and_saveexec_b64 s[6:7], vcc +; GISEL-NEXT: s_xor_b64 s[6:7], exec, s[6:7] ; GISEL-NEXT: ; %bb.2: ; %itofp-if-else ; GISEL-NEXT: v_add_u32_e32 v2, 0xffffff98, v5 ; GISEL-NEXT: v_lshlrev_b64 v[0:1], v2, v[0:1] @@ -1515,16 +1500,16 @@ define half @uitofp_i128_to_f16(i128 %x) { ; GISEL-NEXT: ; implicit-def: $vgpr5 ; GISEL-NEXT: ; implicit-def: $vgpr2 ; GISEL-NEXT: ; %bb.3: ; %Flow3 -; GISEL-NEXT: s_andn2_saveexec_b64 s[8:9], s[4:5] +; GISEL-NEXT: s_andn2_saveexec_b64 s[6:7], s[6:7] ; GISEL-NEXT: s_cbranch_execz .LBB5_13 ; GISEL-NEXT: ; %bb.4: ; %NodeBlock ; GISEL-NEXT: v_cmp_le_i32_e32 vcc, 26, v7 -; GISEL-NEXT: s_and_saveexec_b64 s[4:5], vcc -; GISEL-NEXT: s_xor_b64 s[10:11], exec, s[4:5] +; GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc +; GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] ; GISEL-NEXT: s_cbranch_execz .LBB5_8 ; GISEL-NEXT: ; %bb.5: ; %LeafBlock ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 26, v7 -; GISEL-NEXT: s_and_saveexec_b64 s[12:13], vcc +; GISEL-NEXT: s_and_saveexec_b64 s[10:11], vcc ; GISEL-NEXT: s_cbranch_execz .LBB5_7 ; GISEL-NEXT: ; %bb.6: ; %itofp-sw-default ; GISEL-NEXT: v_sub_u32_e32 v4, 0x66, v5 @@ -1550,13 +1535,10 @@ define half @uitofp_i128_to_f16(i128 %x) { ; GISEL-NEXT: v_or_b32_e32 v15, v9, v11 ; GISEL-NEXT: v_lshrrev_b64 v[10:11], v13, -1 ; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v5 -; GISEL-NEXT: v_cndmask_b32_e32 v10, v10, v14, vcc -; GISEL-NEXT: v_cndmask_b32_e32 v11, v11, v15, vcc -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v5 ; GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v8, vcc ; GISEL-NEXT: v_cndmask_b32_e32 v9, 0, v9, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v5, v10, -1, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e64 v10, v11, -1, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e32 v5, v10, v14, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v10, v11, v15, vcc ; GISEL-NEXT: v_and_b32_e32 v2, v8, v2 ; GISEL-NEXT: v_and_b32_e32 v3, v9, v3 ; GISEL-NEXT: v_and_or_b32 v0, v5, v0, v2 @@ -1569,13 +1551,13 @@ define half @uitofp_i128_to_f16(i128 %x) { ; GISEL-NEXT: v_mov_b32_e32 v2, v5 ; GISEL-NEXT: v_mov_b32_e32 v3, v6 ; GISEL-NEXT: .LBB5_7: ; %Flow1 -; GISEL-NEXT: s_or_b64 exec, exec, s[12:13] +; GISEL-NEXT: s_or_b64 exec, exec, s[10:11] ; GISEL-NEXT: .LBB5_8: ; %Flow2 -; GISEL-NEXT: s_andn2_saveexec_b64 s[4:5], s[10:11] +; GISEL-NEXT: s_andn2_saveexec_b64 s[8:9], s[8:9] ; GISEL-NEXT: ; %bb.9: ; %itofp-sw-bb ; GISEL-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] ; GISEL-NEXT: ; %bb.10: ; %itofp-sw-epilog -; GISEL-NEXT: s_or_b64 exec, exec, s[4:5] +; GISEL-NEXT: s_or_b64 exec, exec, s[8:9] ; GISEL-NEXT: v_bfe_u32 v2, v0, 2, 1 ; GISEL-NEXT: v_or_b32_e32 v0, v0, v2 ; GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 1, v0 @@ -1584,20 +1566,20 @@ define half @uitofp_i128_to_f16(i128 %x) { ; GISEL-NEXT: v_mov_b32_e32 v3, 0 ; GISEL-NEXT: v_lshrrev_b64 v[4:5], 2, v[0:1] ; GISEL-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3] -; GISEL-NEXT: s_and_saveexec_b64 s[4:5], vcc +; GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GISEL-NEXT: ; %bb.11: ; %itofp-if-then20 ; GISEL-NEXT: v_lshrrev_b64 v[4:5], 3, v[0:1] ; GISEL-NEXT: v_mov_b32_e32 v6, v7 ; GISEL-NEXT: ; %bb.12: ; %Flow -; GISEL-NEXT: s_or_b64 exec, exec, s[4:5] -; GISEL-NEXT: .LBB5_13: ; %Flow4 ; GISEL-NEXT: s_or_b64 exec, exec, s[8:9] +; GISEL-NEXT: .LBB5_13: ; %Flow4 +; GISEL-NEXT: s_or_b64 exec, exec, s[6:7] ; GISEL-NEXT: v_lshl_add_u32 v0, v6, 23, 1.0 ; GISEL-NEXT: v_mov_b32_e32 v1, 0x7fffff ; GISEL-NEXT: v_and_or_b32 v0, v4, v1, v0 ; GISEL-NEXT: v_cvt_f16_f32_e32 v4, v0 ; GISEL-NEXT: .LBB5_14: ; %Flow5 -; GISEL-NEXT: s_or_b64 exec, exec, s[6:7] +; GISEL-NEXT: s_or_b64 exec, exec, s[4:5] ; GISEL-NEXT: v_mov_b32_e32 v0, v4 ; GISEL-NEXT: s_setpc_b64 s[30:31] %cvt = uitofp i128 %x to half diff --git a/llvm/test/CodeGen/AMDGPU/rsq.f64.ll b/llvm/test/CodeGen/AMDGPU/rsq.f64.ll index bd6e1f54e636d..8f4a4b5afcdc1 100644 --- a/llvm/test/CodeGen/AMDGPU/rsq.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/rsq.f64.ll @@ -87,7 +87,7 @@ define amdgpu_ps <2 x i32> @s_rsq_f64(double inreg %x) { ; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[0:1], 1.0, v[0:1], 1.0 ; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] ; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[0:1], v1, v3 -; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9 +; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10 ; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[0:1] ; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5] @@ -267,7 +267,7 @@ define amdgpu_ps <2 x i32> @s_rsq_f64_fabs(double inreg %x) { ; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[0:1], 1.0, v[0:1], 1.0 ; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] ; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[0:1], v1, v3 -; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9 +; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10 ; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[0:1] ; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5] @@ -448,7 +448,7 @@ define amdgpu_ps <2 x i32> @s_neg_rsq_f64(double inreg %x) { ; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[0:1], -1.0, v[0:1], -1.0 ; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] ; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[0:1], v1, v3 -; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9 +; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10 ; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[0:1] ; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5] @@ -628,7 +628,7 @@ define amdgpu_ps <2 x i32> @s_neg_rsq_neg_f64(double inreg %x) { ; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[0:1], -1.0, v[0:1], -1.0 ; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] ; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[0:1], v1, v3 -; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9 +; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10 ; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[0:1] ; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5] @@ -809,7 +809,7 @@ define double @v_rsq_f64(double %x) { ; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0 ; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] ; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3 -; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9 +; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10 ; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5] ; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5] @@ -978,7 +978,7 @@ define double @v_rsq_f64_fabs(double %x) { ; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0 ; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] ; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3 -; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9 +; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10 ; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5] ; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5] @@ -1148,7 +1148,7 @@ define double @v_rsq_f64_missing_contract0(double %x) { ; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0 ; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] ; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3 -; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9 +; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10 ; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5] ; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5] @@ -1317,7 +1317,7 @@ define double @v_rsq_f64_missing_contract1(double %x) { ; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0 ; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] ; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3 -; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9 +; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10 ; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5] ; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5] @@ -1486,7 +1486,7 @@ define double @v_neg_rsq_f64(double %x) { ; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[4:5], -1.0, v[0:1], -1.0 ; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] ; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3 -; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9 +; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10 ; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5] ; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5] @@ -1716,7 +1716,7 @@ define <2 x double> @v_rsq_v2f64(<2 x double> %x) { ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[6:7], v[4:5], v[6:7] ; SI-GISEL-NEXT: v_rcp_f64_e32 v[6:7], v[8:9] ; SI-GISEL-NEXT: v_mul_f64 v[14:15], v[12:13], v[4:5] -; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v20, v13 +; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v13, v20 ; SI-GISEL-NEXT: v_fma_f64 v[16:17], -v[10:11], v[14:15], v[12:13] ; SI-GISEL-NEXT: v_fma_f64 v[18:19], -v[8:9], v[6:7], 1.0 ; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[18:19], v[6:7] @@ -1728,7 +1728,7 @@ define <2 x double> @v_rsq_v2f64(<2 x double> %x) { ; SI-GISEL-NEXT: v_mul_f64 v[10:11], v[18:19], v[6:7] ; SI-GISEL-NEXT: v_div_fmas_f64 v[4:5], v[16:17], v[4:5], v[14:15] ; SI-GISEL-NEXT: v_fma_f64 v[12:13], -v[8:9], v[10:11], v[18:19] -; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v20, v19 +; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v19, v20 ; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v9 ; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5] ; SI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[4:5], v[0:1], 1.0 @@ -2019,7 +2019,7 @@ define <2 x double> @v_neg_rsq_v2f64(<2 x double> %x) { ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[6:7], v[4:5], v[6:7] ; SI-GISEL-NEXT: v_rcp_f64_e32 v[6:7], v[8:9] ; SI-GISEL-NEXT: v_mul_f64 v[14:15], v[12:13], v[4:5] -; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v20, v13 +; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v13, v20 ; SI-GISEL-NEXT: v_fma_f64 v[16:17], -v[10:11], v[14:15], v[12:13] ; SI-GISEL-NEXT: v_fma_f64 v[18:19], -v[8:9], v[6:7], 1.0 ; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[18:19], v[6:7] @@ -2031,7 +2031,7 @@ define <2 x double> @v_neg_rsq_v2f64(<2 x double> %x) { ; SI-GISEL-NEXT: v_mul_f64 v[10:11], v[18:19], v[6:7] ; SI-GISEL-NEXT: v_div_fmas_f64 v[4:5], v[16:17], v[4:5], v[14:15] ; SI-GISEL-NEXT: v_fma_f64 v[12:13], -v[8:9], v[10:11], v[18:19] -; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v20, v19 +; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v19, v20 ; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v9 ; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5] ; SI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[4:5], v[0:1], -1.0 @@ -2293,7 +2293,7 @@ define <2 x double> @v_neg_rsq_v2f64_poisonelt(<2 x double> %x) { ; SI-GISEL-NEXT: v_fma_f64 v[18:19], -v[8:9], v[6:7], 1.0 ; SI-GISEL-NEXT: v_mov_b32_e32 v10, 0xbff00000 ; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[18:19], v[6:7] -; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v13 +; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v13, v10 ; SI-GISEL-NEXT: v_fma_f64 v[12:13], -v[8:9], v[6:7], 1.0 ; SI-GISEL-NEXT: v_div_scale_f64 v[18:19], s[4:5], s[4:5], v[2:3], s[4:5] ; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[12:13], v[6:7] @@ -2568,7 +2568,7 @@ define <2 x double> @v_neg_pos_rsq_v2f64(<2 x double> %x) { ; SI-GISEL-NEXT: v_fma_f64 v[18:19], -v[8:9], v[6:7], 1.0 ; SI-GISEL-NEXT: v_mov_b32_e32 v10, 0xbff00000 ; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[18:19], v[6:7] -; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v13 +; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v13, v10 ; SI-GISEL-NEXT: v_fma_f64 v[12:13], -v[8:9], v[6:7], 1.0 ; SI-GISEL-NEXT: v_div_scale_f64 v[18:19], s[4:5], 1.0, v[2:3], 1.0 ; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[12:13], v[6:7] @@ -2578,7 +2578,7 @@ define <2 x double> @v_neg_pos_rsq_v2f64(<2 x double> %x) { ; SI-GISEL-NEXT: v_fma_f64 v[12:13], -v[8:9], v[10:11], v[18:19] ; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0x3ff00000 ; SI-GISEL-NEXT: v_div_fmas_f64 v[4:5], v[16:17], v[4:5], v[14:15] -; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v8, v19 +; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v19, v8 ; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v9 ; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5] ; SI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[4:5], v[0:1], -1.0 @@ -2808,7 +2808,7 @@ define double @v_rsq_f64_fneg_fabs(double %x) { ; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0 ; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] ; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3 -; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9 +; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10 ; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5] ; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5] @@ -2979,7 +2979,7 @@ define double @v_rsq_f64__afn_sqrt(double %x) { ; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0 ; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] ; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3 -; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9 +; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10 ; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5] ; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5] @@ -4167,7 +4167,7 @@ define double @v_rsq_f64__nnan_ninf(double %x) { ; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0 ; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] ; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3 -; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9 +; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10 ; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5] ; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5] @@ -4846,7 +4846,7 @@ define double @v_rsq_amdgcn_sqrt_f64(double %x) { ; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0 ; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] ; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3 -; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9 +; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10 ; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5] ; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5] @@ -4927,7 +4927,7 @@ define double @v_neg_rsq_amdgcn_sqrt_f64(double %x) { ; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[4:5], -1.0, v[0:1], -1.0 ; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] ; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3 -; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9 +; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10 ; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5] ; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5] @@ -5008,7 +5008,7 @@ define amdgpu_ps <2 x i32> @s_rsq_amdgcn_sqrt_f64(double inreg %x) { ; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[0:1], 1.0, v[0:1], 1.0 ; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] ; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[0:1], v1, v3 -; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9 +; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10 ; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[0:1] ; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5] @@ -5649,7 +5649,7 @@ define double @v_div_const_contract_sqrt_f64(double %x) { ; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[4:5], s[6:7], v[0:1], s[6:7] ; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] ; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3 -; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9 +; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10 ; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5] ; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5] From eb751b6f7d9af5514e35613b55525dbac00fdd70 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= Date: Sun, 25 Aug 2024 19:46:44 +0200 Subject: [PATCH 2/7] remove debug leftover --- .../CodeGen/GlobalISel/CombinerHelperCompares.cpp | 13 ------------- llvm/lib/CodeGen/GlobalISel/Utils.cpp | 3 --- 2 files changed, 16 deletions(-) diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp index 415768fb07e59..9fa7c347d917e 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp @@ -187,19 +187,6 @@ bool CombinerHelper::matchZextOfICmp(const MachineInstr &MI, Register RHS = Cmp->getRHSReg(); CmpInst::Predicate Pred = Cmp->getCond(); - /* - %x:_(p0) = COPY $x0 - %y:_(p0) = COPY $x1 - %zero:_(p0) = G_CONSTANT i64 0 - %cmp1:_(s1) = G_ICMP intpred(eq), %x:_(p0), %zero:_ - */ - - if (MRI.getType(LHS).isPointer() || MRI.getType(RHS).isPointer()) - return false; - - if (!MRI.getType(LHS).isScalar() || !MRI.getType(RHS).isScalar()) - return false; - GZext *ZL = cast(MRI.getVRegDef(LHS)); GZext *ZR = cast(MRI.getVRegDef(RHS)); diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp index 5e247c1210113..3a94f4adb9e4e 100644 --- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -2024,9 +2024,6 @@ bool llvm::isKnownNonZero(Register Reg, const MachineRegisterInfo &MRI, if (Ty.isPointer()) return false; - if (!Ty.isScalar()) - errs() << "type: " << Ty << '\n'; - assert(Ty.isScalar() && "Expected a scalar value"); return ::isKnownNonZero(Reg, MRI, KB, Depth); } From 39e49c7138c4d6aa6cb018695ff4598ef0eb0451 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= Date: Sun, 25 Aug 2024 22:29:45 +0200 Subject: [PATCH 3/7] style fix --- llvm/lib/CodeGen/GlobalISel/Utils.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp index 3a94f4adb9e4e..6b67f6567525c 100644 --- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -2148,7 +2148,7 @@ static bool isKnownNonZeroCastOp(const GCastOp &CastOp, case TargetOpcode::G_SEXT: case TargetOpcode::G_ZEXT: // ext X != 0 if X != 0. - return isKnownNonZero(CastOp.getSrcReg(), MRI, KB); + return ::isKnownNonZero(CastOp.getSrcReg(), MRI, KB, Depth); case Instruction::Trunc: // nuw/nsw trunc preserves zero/non-zero status of input. if (CastOp.getFlag(MachineInstr::MIFlag::NoSWrap) || From ef71a605a7eeca57a5a2609e5669fbc283db52aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= Date: Mon, 26 Aug 2024 07:05:36 +0200 Subject: [PATCH 4/7] address review comments --- .../include/llvm/Target/GlobalISel/Combine.td | 4 +- .../GlobalISel/CombinerHelperCompares.cpp | 3 +- llvm/lib/CodeGen/GlobalISel/Utils.cpp | 46 +++++++------------ 3 files changed, 20 insertions(+), 33 deletions(-) diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td index 175a8ed57b266..ef4ca8a2552f4 100644 --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -1981,8 +1981,8 @@ def constant_fold_binops : GICombineGroup<[constant_fold_binop, def prefer_sign_combines : GICombineGroup<[nneg_zext]>; -def all_combines : GICombineGroup<[icmp_combines, integer_reassoc_combines, trivial_combines, - vector_ops_combines, freeze_combines, cast_combines, +def all_combines : GICombineGroup<[icmp_combines, integer_reassoc_combines, + trivial_combines, vector_ops_combines, freeze_combines, cast_combines, insert_vec_elt_combines, extract_vec_elt_combines, combines_for_extload, combine_extracted_vector_load, undef_combines, identity_combines, phi_combines, diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp index 9fa7c347d917e..57f4804fa10f6 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// This file implements CombinerHelper for G_ICMP +// This file implements CombinerHelper for G_ICMP. // //===----------------------------------------------------------------------===// #include "llvm/CodeGen/GlobalISel/CombinerHelper.h" @@ -116,7 +116,6 @@ bool CombinerHelper::visitICmp(const MachineInstr &MI, BuildFnTy &MatchInfo) { return true; } - [[maybe_unused]] MachineInstr *MILHS = MRI.getVRegDef(LHS); MachineInstr *MIRHS = MRI.getVRegDef(RHS); // For EQ and NE, we can always pick a value for the undef to make the diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp index 6b67f6567525c..6f7bc97b55dc8 100644 --- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -2030,34 +2030,22 @@ bool llvm::isKnownNonZero(Register Reg, const MachineRegisterInfo &MRI, static bool matchOpWithOpEqZero(Register Op0, Register Op1, const MachineRegisterInfo &MRI) { - MachineInstr *MI = MRI.getVRegDef(Op0); - - bool Result = false; - - if (GZextOrSextOp *ZS = dyn_cast(MI)) { - MachineInstr *SrcMI = MRI.getVRegDef(ZS->getSrcReg()); - if (GICmp *Cmp = dyn_cast(SrcMI)) { - std::optional MayBeConstant = - getIConstantVRegValWithLookThrough(Cmp->getRHSReg(), MRI); - if (MayBeConstant) - Result |= (MayBeConstant->Value == 0) && (Cmp->getLHSReg() == Op1) && - (Cmp->getCond() == ICmpInst::ICMP_EQ); - } - } - - MI = MRI.getVRegDef(Op1); - if (GZextOrSextOp *ZS = dyn_cast(MI)) { - MachineInstr *SrcMI = MRI.getVRegDef(ZS->getSrcReg()); - if (GICmp *Cmp = dyn_cast(SrcMI)) { - std::optional MayBeConstant = - getIConstantVRegValWithLookThrough(Cmp->getRHSReg(), MRI); - if (MayBeConstant) - Result |= (MayBeConstant->Value == 0) && (Cmp->getLHSReg() == Op0) && - (Cmp->getCond() == ICmpInst::ICMP_EQ); + auto MatchIt = [&MRI](const Register Reg0, const Register Reg1) { + MachineInstr *MI = MRI.getVRegDef(Reg0); + if (GZextOrSextOp *ZS = dyn_cast(MI)) { + MachineInstr *SrcMI = MRI.getVRegDef(ZS->getSrcReg()); + if (GICmp *Cmp = dyn_cast(SrcMI)) { + std::optional MayBeConstant = + getIConstantVRegValWithLookThrough(Cmp->getRHSReg(), MRI); + return MayBeConstant && (MayBeConstant->Value == 0) && + (Cmp->getLHSReg() == Reg1) && + (Cmp->getCond() == ICmpInst::ICMP_EQ); + } } - } + return false; + }; - return Result; + return MatchIt(Op0, Op1) || MatchIt(Op1, Op0); } static bool isNonZeroAdd(const GBinOp &Add, const MachineRegisterInfo &MRI, @@ -2068,7 +2056,7 @@ static bool isNonZeroAdd(const GBinOp &Add, const MachineRegisterInfo &MRI, Register LHS = Add.getLHSReg(); Register RHS = Add.getRHSReg(); - // (X + (X != 0)) is non zero + // (X + (X != 0)) is non zero. if (matchOpWithOpEqZero(LHS, RHS, MRI)) return true; @@ -2115,12 +2103,12 @@ static bool isKnownNonZeroBinOp(const GBinOp &BinOp, unsigned BitWidth = MRI.getType(BinOp.getReg(0)).getScalarSizeInBits(); switch (BinOp.getOpcode()) { case TargetOpcode::G_XOR: - // (X ^ (X != 0)) is non zero + // (X ^ (X != 0)) is non zero. if (matchOpWithOpEqZero(BinOp.getLHSReg(), BinOp.getRHSReg(), MRI)) return true; break; case TargetOpcode::G_OR: { - // (X | (X != 0)) is non zero + // (X | (X != 0)) is non zero. if (matchOpWithOpEqZero(BinOp.getLHSReg(), BinOp.getRHSReg(), MRI)) return true; // X | Y != 0 if X != 0 or Y != 0. From 077a9db460a64a9e140d7219a876980d702570f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= Date: Mon, 26 Aug 2024 08:02:34 +0200 Subject: [PATCH 5/7] remove switch --- .../GlobalISel/CombinerHelperCompares.cpp | 38 +------------------ 1 file changed, 2 insertions(+), 36 deletions(-) diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp index 57f4804fa10f6..88e1eb0304335 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp @@ -20,6 +20,7 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/IR/Instructions.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" #include @@ -45,42 +46,7 @@ bool CombinerHelper::constantFoldICmp(const GICmp &ICmp, APInt LHS = LHSCst.getScalarValue(); APInt RHS = RHSCst.getScalarValue(); - bool Result; - - switch (Pred) { - case CmpInst::Predicate::ICMP_EQ: - Result = LHS.eq(RHS); - break; - case CmpInst::Predicate::ICMP_NE: - Result = LHS.ne(RHS); - break; - case CmpInst::Predicate::ICMP_UGT: - Result = LHS.ugt(RHS); - break; - case CmpInst::Predicate::ICMP_UGE: - Result = LHS.uge(RHS); - break; - case CmpInst::Predicate::ICMP_ULT: - Result = LHS.ult(RHS); - break; - case CmpInst::Predicate::ICMP_ULE: - Result = LHS.ule(RHS); - break; - case CmpInst::Predicate::ICMP_SGT: - Result = LHS.sgt(RHS); - break; - case CmpInst::Predicate::ICMP_SGE: - Result = LHS.sge(RHS); - break; - case CmpInst::Predicate::ICMP_SLT: - Result = LHS.slt(RHS); - break; - case CmpInst::Predicate::ICMP_SLE: - Result = LHS.sle(RHS); - break; - default: - llvm_unreachable("Unexpected predicate"); - } + bool Result = ICmpInst::compare(LHS, RHS, Pred); MatchInfo = [=](MachineIRBuilder &B) { if (Result) From 328c62bc0ca983ede222946e15953955aaba06c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= Date: Mon, 26 Aug 2024 14:07:58 +0200 Subject: [PATCH 6/7] remove ext of icmp combines --- .../llvm/CodeGen/GlobalISel/CombinerHelper.h | 2 - .../include/llvm/Target/GlobalISel/Combine.td | 18 ---- .../GlobalISel/CombinerHelperCompares.cpp | 63 ------------ .../AArch64/GlobalISel/arm64-atomic.ll | 96 ++++++++----------- .../AArch64/GlobalISel/arm64-pcsections.ll | 56 +++++------ .../AArch64/GlobalISel/combine-visit-icmp.mir | 12 ++- llvm/test/CodeGen/AArch64/icmp2.ll | 38 +++----- 7 files changed, 86 insertions(+), 199 deletions(-) diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h index da9c7fdbd2a09..a8026a659b16a 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -301,8 +301,6 @@ class CombinerHelper { bool tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen = 0); bool visitICmp(const MachineInstr &MI, BuildFnTy &MatchInfo); - bool matchSextOfICmp(const MachineInstr &MI, BuildFnTy &MatchInfo); - bool matchZextOfICmp(const MachineInstr &MI, BuildFnTy &MatchInfo); /// Try hard to fold icmp with zero RHS because this is a common case. bool matchCmpOfZero(const MachineInstr &MI, BuildFnTy &MatchInfo); diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td index ef4ca8a2552f4..0d9bafc26d0c6 100644 --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -1887,22 +1887,6 @@ def visit_icmp : GICombineRule< [{ return Helper.visitICmp(*${cmp}, ${matchinfo}); }]), (apply [{ Helper.applyBuildFn(*${cmp}, ${matchinfo}); }])>; -def sext_icmp : GICombineRule< - (defs root:$root, build_fn_matchinfo:$matchinfo), - (match (G_SEXT $rhs, $inputR), - (G_SEXT $lhs, $inputL), - (G_ICMP $root, $pred, $lhs, $rhs):$cmp, - [{ return Helper.matchSextOfICmp(*${cmp}, ${matchinfo}); }]), - (apply [{ Helper.applyBuildFn(*${cmp}, ${matchinfo}); }])>; - -def zext_icmp : GICombineRule< - (defs root:$root, build_fn_matchinfo:$matchinfo), - (match (G_ZEXT $rhs, $inputR), - (G_ZEXT $lhs, $inputL), - (G_ICMP $root, $pred, $lhs, $rhs):$cmp, - [{ return Helper.matchZextOfICmp(*${cmp}, ${matchinfo}); }]), - (apply [{ Helper.applyBuildFn(*${cmp}, ${matchinfo}); }])>; - def icmp_of_zero : GICombineRule< (defs root:$root, build_fn_matchinfo:$matchinfo), (match (G_CONSTANT $zero, 0), @@ -1912,8 +1896,6 @@ def icmp_of_zero : GICombineRule< def icmp_combines: GICombineGroup<[ visit_icmp, - sext_icmp, - zext_icmp, icmp_of_zero, icmp_to_true_false_known_bits, icmp_to_lhs_known_bits, diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp index 88e1eb0304335..67dd0c4332093 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp @@ -111,69 +111,6 @@ bool CombinerHelper::visitICmp(const MachineInstr &MI, BuildFnTy &MatchInfo) { return false; } -bool CombinerHelper::matchSextOfICmp(const MachineInstr &MI, - BuildFnTy &MatchInfo) { - const GICmp *Cmp = cast(&MI); - - Register Dst = Cmp->getReg(0); - LLT DstTy = MRI.getType(Dst); - Register LHS = Cmp->getLHSReg(); - Register RHS = Cmp->getRHSReg(); - CmpInst::Predicate Pred = Cmp->getCond(); - - GSext *SL = cast(MRI.getVRegDef(LHS)); - GSext *SR = cast(MRI.getVRegDef(RHS)); - - LLT SLTy = MRI.getType(SL->getSrcReg()); - LLT SRTy = MRI.getType(SR->getSrcReg()); - - // Turn icmp (sext X), (sext Y) into a compare of X and Y if they have the - // same type. - if (SLTy != SRTy) - return false; - - if (!isLegalOrBeforeLegalizer({TargetOpcode::G_ICMP, {DstTy, SLTy}})) - return false; - - // Compare X and Y. Note that the predicate does not change. - MatchInfo = [=](MachineIRBuilder &B) { - B.buildICmp(Pred, Dst, SL->getSrcReg(), SR->getSrcReg()); - }; - return true; -} - -bool CombinerHelper::matchZextOfICmp(const MachineInstr &MI, - BuildFnTy &MatchInfo) { - const GICmp *Cmp = cast(&MI); - - Register Dst = Cmp->getReg(0); - LLT DstTy = MRI.getType(Dst); - Register LHS = Cmp->getLHSReg(); - Register RHS = Cmp->getRHSReg(); - CmpInst::Predicate Pred = Cmp->getCond(); - - GZext *ZL = cast(MRI.getVRegDef(LHS)); - GZext *ZR = cast(MRI.getVRegDef(RHS)); - - LLT ZLTy = MRI.getType(ZL->getSrcReg()); - LLT ZRTy = MRI.getType(ZR->getSrcReg()); - - // Turn icmp (zext X), (zext Y) into a compare of X and Y if they have - // the same type. - if (ZLTy != ZRTy) - return false; - - if (!isLegalOrBeforeLegalizer({TargetOpcode::G_ICMP, {DstTy, ZLTy}})) - return false; - - // Compare X and Y. Note that signed predicates become unsigned. - MatchInfo = [=](MachineIRBuilder &B) { - B.buildICmp(ICmpInst::getUnsignedPredicate(Pred), Dst, ZL->getSrcReg(), - ZR->getSrcReg()); - }; - return true; -} - bool CombinerHelper::matchCmpOfZero(const MachineInstr &MI, BuildFnTy &MatchInfo) { const GICmp *Cmp = cast(&MI); diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll index 816f7c3debcd3..de3f323891a36 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll @@ -2655,15 +2655,13 @@ define i8 @atomicrmw_max_i8(ptr %ptr, i8 %rhs) { define i8 @atomicrmw_umin_i8(ptr %ptr, i8 %rhs) { ; CHECK-NOLSE-O1-LABEL: atomicrmw_umin_i8: ; CHECK-NOLSE-O1: ; %bb.0: -; CHECK-NOLSE-O1-NEXT: ; kill: def $w1 killed $w1 def $x1 -; CHECK-NOLSE-O1-NEXT: and x9, x1, #0xff +; CHECK-NOLSE-O1-NEXT: and w9, w1, #0xff ; CHECK-NOLSE-O1-NEXT: LBB35_1: ; %atomicrmw.start ; CHECK-NOLSE-O1-NEXT: ; =>This Inner Loop Header: Depth=1 -; CHECK-NOLSE-O1-NEXT: ldaxrb w10, [x0] -; CHECK-NOLSE-O1-NEXT: and w8, w10, #0xff -; CHECK-NOLSE-O1-NEXT: and x10, x10, #0xff -; CHECK-NOLSE-O1-NEXT: cmp w8, w1, uxtb -; CHECK-NOLSE-O1-NEXT: csel x10, x10, x9, ls +; CHECK-NOLSE-O1-NEXT: ldaxrb w8, [x0] +; CHECK-NOLSE-O1-NEXT: and w8, w8, #0xff +; CHECK-NOLSE-O1-NEXT: cmp w8, w9 +; CHECK-NOLSE-O1-NEXT: csel w10, w8, w9, lo ; CHECK-NOLSE-O1-NEXT: stlxrb w11, w10, [x0] ; CHECK-NOLSE-O1-NEXT: cbnz w11, LBB35_1 ; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %atomicrmw.end @@ -2672,15 +2670,13 @@ define i8 @atomicrmw_umin_i8(ptr %ptr, i8 %rhs) { ; ; CHECK-OUTLINE-O1-LABEL: atomicrmw_umin_i8: ; CHECK-OUTLINE-O1: ; %bb.0: -; CHECK-OUTLINE-O1-NEXT: ; kill: def $w1 killed $w1 def $x1 -; CHECK-OUTLINE-O1-NEXT: and x9, x1, #0xff +; CHECK-OUTLINE-O1-NEXT: and w9, w1, #0xff ; CHECK-OUTLINE-O1-NEXT: LBB35_1: ; %atomicrmw.start ; CHECK-OUTLINE-O1-NEXT: ; =>This Inner Loop Header: Depth=1 -; CHECK-OUTLINE-O1-NEXT: ldaxrb w10, [x0] -; CHECK-OUTLINE-O1-NEXT: and w8, w10, #0xff -; CHECK-OUTLINE-O1-NEXT: and x10, x10, #0xff -; CHECK-OUTLINE-O1-NEXT: cmp w8, w1, uxtb -; CHECK-OUTLINE-O1-NEXT: csel x10, x10, x9, ls +; CHECK-OUTLINE-O1-NEXT: ldaxrb w8, [x0] +; CHECK-OUTLINE-O1-NEXT: and w8, w8, #0xff +; CHECK-OUTLINE-O1-NEXT: cmp w8, w9 +; CHECK-OUTLINE-O1-NEXT: csel w10, w8, w9, lo ; CHECK-OUTLINE-O1-NEXT: stlxrb w11, w10, [x0] ; CHECK-OUTLINE-O1-NEXT: cbnz w11, LBB35_1 ; CHECK-OUTLINE-O1-NEXT: ; %bb.2: ; %atomicrmw.end @@ -2781,15 +2777,13 @@ define i8 @atomicrmw_umin_i8(ptr %ptr, i8 %rhs) { define i8 @atomicrmw_umax_i8(ptr %ptr, i8 %rhs) { ; CHECK-NOLSE-O1-LABEL: atomicrmw_umax_i8: ; CHECK-NOLSE-O1: ; %bb.0: -; CHECK-NOLSE-O1-NEXT: ; kill: def $w1 killed $w1 def $x1 -; CHECK-NOLSE-O1-NEXT: and x9, x1, #0xff +; CHECK-NOLSE-O1-NEXT: and w9, w1, #0xff ; CHECK-NOLSE-O1-NEXT: LBB36_1: ; %atomicrmw.start ; CHECK-NOLSE-O1-NEXT: ; =>This Inner Loop Header: Depth=1 -; CHECK-NOLSE-O1-NEXT: ldxrb w10, [x0] -; CHECK-NOLSE-O1-NEXT: and w8, w10, #0xff -; CHECK-NOLSE-O1-NEXT: and x10, x10, #0xff -; CHECK-NOLSE-O1-NEXT: cmp w8, w1, uxtb -; CHECK-NOLSE-O1-NEXT: csel x10, x10, x9, hi +; CHECK-NOLSE-O1-NEXT: ldxrb w8, [x0] +; CHECK-NOLSE-O1-NEXT: and w8, w8, #0xff +; CHECK-NOLSE-O1-NEXT: cmp w8, w9 +; CHECK-NOLSE-O1-NEXT: csel w10, w8, w9, hi ; CHECK-NOLSE-O1-NEXT: stxrb w11, w10, [x0] ; CHECK-NOLSE-O1-NEXT: cbnz w11, LBB36_1 ; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %atomicrmw.end @@ -2798,15 +2792,13 @@ define i8 @atomicrmw_umax_i8(ptr %ptr, i8 %rhs) { ; ; CHECK-OUTLINE-O1-LABEL: atomicrmw_umax_i8: ; CHECK-OUTLINE-O1: ; %bb.0: -; CHECK-OUTLINE-O1-NEXT: ; kill: def $w1 killed $w1 def $x1 -; CHECK-OUTLINE-O1-NEXT: and x9, x1, #0xff +; CHECK-OUTLINE-O1-NEXT: and w9, w1, #0xff ; CHECK-OUTLINE-O1-NEXT: LBB36_1: ; %atomicrmw.start ; CHECK-OUTLINE-O1-NEXT: ; =>This Inner Loop Header: Depth=1 -; CHECK-OUTLINE-O1-NEXT: ldxrb w10, [x0] -; CHECK-OUTLINE-O1-NEXT: and w8, w10, #0xff -; CHECK-OUTLINE-O1-NEXT: and x10, x10, #0xff -; CHECK-OUTLINE-O1-NEXT: cmp w8, w1, uxtb -; CHECK-OUTLINE-O1-NEXT: csel x10, x10, x9, hi +; CHECK-OUTLINE-O1-NEXT: ldxrb w8, [x0] +; CHECK-OUTLINE-O1-NEXT: and w8, w8, #0xff +; CHECK-OUTLINE-O1-NEXT: cmp w8, w9 +; CHECK-OUTLINE-O1-NEXT: csel w10, w8, w9, hi ; CHECK-OUTLINE-O1-NEXT: stxrb w11, w10, [x0] ; CHECK-OUTLINE-O1-NEXT: cbnz w11, LBB36_1 ; CHECK-OUTLINE-O1-NEXT: ; %bb.2: ; %atomicrmw.end @@ -3718,15 +3710,13 @@ define i16 @atomicrmw_max_i16(ptr %ptr, i16 %rhs) { define i16 @atomicrmw_umin_i16(ptr %ptr, i16 %rhs) { ; CHECK-NOLSE-O1-LABEL: atomicrmw_umin_i16: ; CHECK-NOLSE-O1: ; %bb.0: -; CHECK-NOLSE-O1-NEXT: ; kill: def $w1 killed $w1 def $x1 -; CHECK-NOLSE-O1-NEXT: and x9, x1, #0xffff +; CHECK-NOLSE-O1-NEXT: and w9, w1, #0xffff ; CHECK-NOLSE-O1-NEXT: LBB45_1: ; %atomicrmw.start ; CHECK-NOLSE-O1-NEXT: ; =>This Inner Loop Header: Depth=1 -; CHECK-NOLSE-O1-NEXT: ldaxrh w10, [x0] -; CHECK-NOLSE-O1-NEXT: and w8, w10, #0xffff -; CHECK-NOLSE-O1-NEXT: and x10, x10, #0xffff -; CHECK-NOLSE-O1-NEXT: cmp w8, w1, uxth -; CHECK-NOLSE-O1-NEXT: csel x10, x10, x9, ls +; CHECK-NOLSE-O1-NEXT: ldaxrh w8, [x0] +; CHECK-NOLSE-O1-NEXT: and w8, w8, #0xffff +; CHECK-NOLSE-O1-NEXT: cmp w8, w9 +; CHECK-NOLSE-O1-NEXT: csel w10, w8, w9, lo ; CHECK-NOLSE-O1-NEXT: stlxrh w11, w10, [x0] ; CHECK-NOLSE-O1-NEXT: cbnz w11, LBB45_1 ; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %atomicrmw.end @@ -3735,15 +3725,13 @@ define i16 @atomicrmw_umin_i16(ptr %ptr, i16 %rhs) { ; ; CHECK-OUTLINE-O1-LABEL: atomicrmw_umin_i16: ; CHECK-OUTLINE-O1: ; %bb.0: -; CHECK-OUTLINE-O1-NEXT: ; kill: def $w1 killed $w1 def $x1 -; CHECK-OUTLINE-O1-NEXT: and x9, x1, #0xffff +; CHECK-OUTLINE-O1-NEXT: and w9, w1, #0xffff ; CHECK-OUTLINE-O1-NEXT: LBB45_1: ; %atomicrmw.start ; CHECK-OUTLINE-O1-NEXT: ; =>This Inner Loop Header: Depth=1 -; CHECK-OUTLINE-O1-NEXT: ldaxrh w10, [x0] -; CHECK-OUTLINE-O1-NEXT: and w8, w10, #0xffff -; CHECK-OUTLINE-O1-NEXT: and x10, x10, #0xffff -; CHECK-OUTLINE-O1-NEXT: cmp w8, w1, uxth -; CHECK-OUTLINE-O1-NEXT: csel x10, x10, x9, ls +; CHECK-OUTLINE-O1-NEXT: ldaxrh w8, [x0] +; CHECK-OUTLINE-O1-NEXT: and w8, w8, #0xffff +; CHECK-OUTLINE-O1-NEXT: cmp w8, w9 +; CHECK-OUTLINE-O1-NEXT: csel w10, w8, w9, lo ; CHECK-OUTLINE-O1-NEXT: stlxrh w11, w10, [x0] ; CHECK-OUTLINE-O1-NEXT: cbnz w11, LBB45_1 ; CHECK-OUTLINE-O1-NEXT: ; %bb.2: ; %atomicrmw.end @@ -3844,15 +3832,13 @@ define i16 @atomicrmw_umin_i16(ptr %ptr, i16 %rhs) { define i16 @atomicrmw_umax_i16(ptr %ptr, i16 %rhs) { ; CHECK-NOLSE-O1-LABEL: atomicrmw_umax_i16: ; CHECK-NOLSE-O1: ; %bb.0: -; CHECK-NOLSE-O1-NEXT: ; kill: def $w1 killed $w1 def $x1 -; CHECK-NOLSE-O1-NEXT: and x9, x1, #0xffff +; CHECK-NOLSE-O1-NEXT: and w9, w1, #0xffff ; CHECK-NOLSE-O1-NEXT: LBB46_1: ; %atomicrmw.start ; CHECK-NOLSE-O1-NEXT: ; =>This Inner Loop Header: Depth=1 -; CHECK-NOLSE-O1-NEXT: ldxrh w10, [x0] -; CHECK-NOLSE-O1-NEXT: and w8, w10, #0xffff -; CHECK-NOLSE-O1-NEXT: and x10, x10, #0xffff -; CHECK-NOLSE-O1-NEXT: cmp w8, w1, uxth -; CHECK-NOLSE-O1-NEXT: csel x10, x10, x9, hi +; CHECK-NOLSE-O1-NEXT: ldxrh w8, [x0] +; CHECK-NOLSE-O1-NEXT: and w8, w8, #0xffff +; CHECK-NOLSE-O1-NEXT: cmp w8, w9 +; CHECK-NOLSE-O1-NEXT: csel w10, w8, w9, hi ; CHECK-NOLSE-O1-NEXT: stxrh w11, w10, [x0] ; CHECK-NOLSE-O1-NEXT: cbnz w11, LBB46_1 ; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %atomicrmw.end @@ -3861,15 +3847,13 @@ define i16 @atomicrmw_umax_i16(ptr %ptr, i16 %rhs) { ; ; CHECK-OUTLINE-O1-LABEL: atomicrmw_umax_i16: ; CHECK-OUTLINE-O1: ; %bb.0: -; CHECK-OUTLINE-O1-NEXT: ; kill: def $w1 killed $w1 def $x1 -; CHECK-OUTLINE-O1-NEXT: and x9, x1, #0xffff +; CHECK-OUTLINE-O1-NEXT: and w9, w1, #0xffff ; CHECK-OUTLINE-O1-NEXT: LBB46_1: ; %atomicrmw.start ; CHECK-OUTLINE-O1-NEXT: ; =>This Inner Loop Header: Depth=1 -; CHECK-OUTLINE-O1-NEXT: ldxrh w10, [x0] -; CHECK-OUTLINE-O1-NEXT: and w8, w10, #0xffff -; CHECK-OUTLINE-O1-NEXT: and x10, x10, #0xffff -; CHECK-OUTLINE-O1-NEXT: cmp w8, w1, uxth -; CHECK-OUTLINE-O1-NEXT: csel x10, x10, x9, hi +; CHECK-OUTLINE-O1-NEXT: ldxrh w8, [x0] +; CHECK-OUTLINE-O1-NEXT: and w8, w8, #0xffff +; CHECK-OUTLINE-O1-NEXT: cmp w8, w9 +; CHECK-OUTLINE-O1-NEXT: csel w10, w8, w9, hi ; CHECK-OUTLINE-O1-NEXT: stxrh w11, w10, [x0] ; CHECK-OUTLINE-O1-NEXT: cbnz w11, LBB46_1 ; CHECK-OUTLINE-O1-NEXT: ; %bb.2: ; %atomicrmw.end diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll index 0e4750d381592..c6819ff39ed33 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll @@ -919,18 +919,16 @@ define i8 @atomicrmw_umin_i8(ptr %ptr, i8 %rhs) { ; CHECK-NEXT: successors: %bb.1(0x80000000) ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w1 = KILL $w1, implicit-def $x1 - ; CHECK-NEXT: renamable $x9 = ANDXri renamable $x1, 4103, pcsections !0 + ; CHECK-NEXT: renamable $w9 = ANDWri killed renamable $w1, 7 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1.atomicrmw.start: ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) - ; CHECK-NEXT: liveins: $x0, $x1, $x9 + ; CHECK-NEXT: liveins: $w9, $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w10 = LDAXRB renamable $x0, implicit-def $x10, pcsections !0 :: (volatile load (s8) from %ir.ptr) - ; CHECK-NEXT: renamable $w8 = ANDWri renamable $w10, 7 - ; CHECK-NEXT: renamable $x10 = ANDXri killed renamable $x10, 4103, pcsections !0 - ; CHECK-NEXT: dead $wzr = SUBSWrx renamable $w8, renamable $w1, 0, implicit-def $nzcv, pcsections !0 - ; CHECK-NEXT: renamable $x10 = CSELXr killed renamable $x10, renamable $x9, 9, implicit killed $nzcv, pcsections !0 + ; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr) + ; CHECK-NEXT: renamable $w8 = ANDWri renamable $w8, 7, implicit killed $x8 + ; CHECK-NEXT: $wzr = SUBSWrs renamable $w8, renamable $w9, 0, implicit-def $nzcv, pcsections !0 + ; CHECK-NEXT: renamable $w10 = CSELWr renamable $w8, renamable $w9, 3, implicit killed $nzcv, implicit-def $x10, pcsections !0 ; CHECK-NEXT: early-clobber renamable $w11 = STLXRB renamable $w10, renamable $x0, implicit killed $x10, pcsections !0 :: (volatile store (s8) into %ir.ptr) ; CHECK-NEXT: CBNZW killed renamable $w11, %bb.1, pcsections !0 ; CHECK-NEXT: {{ $}} @@ -949,18 +947,16 @@ define i8 @atomicrmw_umax_i8(ptr %ptr, i8 %rhs) { ; CHECK-NEXT: successors: %bb.1(0x80000000) ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w1 = KILL $w1, implicit-def $x1 - ; CHECK-NEXT: renamable $x9 = ANDXri renamable $x1, 4103, pcsections !0 + ; CHECK-NEXT: renamable $w9 = ANDWri killed renamable $w1, 7 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1.atomicrmw.start: ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) - ; CHECK-NEXT: liveins: $x0, $x1, $x9 + ; CHECK-NEXT: liveins: $w9, $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w10 = LDXRB renamable $x0, implicit-def $x10, pcsections !0 :: (volatile load (s8) from %ir.ptr) - ; CHECK-NEXT: renamable $w8 = ANDWri renamable $w10, 7 - ; CHECK-NEXT: renamable $x10 = ANDXri killed renamable $x10, 4103, pcsections !0 - ; CHECK-NEXT: dead $wzr = SUBSWrx renamable $w8, renamable $w1, 0, implicit-def $nzcv, pcsections !0 - ; CHECK-NEXT: renamable $x10 = CSELXr killed renamable $x10, renamable $x9, 8, implicit killed $nzcv, pcsections !0 + ; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr) + ; CHECK-NEXT: renamable $w8 = ANDWri renamable $w8, 7, implicit killed $x8 + ; CHECK-NEXT: $wzr = SUBSWrs renamable $w8, renamable $w9, 0, implicit-def $nzcv, pcsections !0 + ; CHECK-NEXT: renamable $w10 = CSELWr renamable $w8, renamable $w9, 8, implicit killed $nzcv, implicit-def $x10, pcsections !0 ; CHECK-NEXT: early-clobber renamable $w11 = STXRB renamable $w10, renamable $x0, implicit killed $x10, pcsections !0 :: (volatile store (s8) into %ir.ptr) ; CHECK-NEXT: CBNZW killed renamable $w11, %bb.1, pcsections !0 ; CHECK-NEXT: {{ $}} @@ -1176,18 +1172,16 @@ define i16 @atomicrmw_umin_i16(ptr %ptr, i16 %rhs) { ; CHECK-NEXT: successors: %bb.1(0x80000000) ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w1 = KILL $w1, implicit-def $x1 - ; CHECK-NEXT: renamable $x9 = ANDXri renamable $x1, 4111, pcsections !0 + ; CHECK-NEXT: renamable $w9 = ANDWri killed renamable $w1, 15 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1.atomicrmw.start: ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) - ; CHECK-NEXT: liveins: $x0, $x1, $x9 + ; CHECK-NEXT: liveins: $w9, $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w10 = LDAXRH renamable $x0, implicit-def $x10, pcsections !0 :: (volatile load (s16) from %ir.ptr) - ; CHECK-NEXT: renamable $w8 = ANDWri renamable $w10, 15 - ; CHECK-NEXT: renamable $x10 = ANDXri killed renamable $x10, 4111, pcsections !0 - ; CHECK-NEXT: dead $wzr = SUBSWrx renamable $w8, renamable $w1, 8, implicit-def $nzcv, pcsections !0 - ; CHECK-NEXT: renamable $x10 = CSELXr killed renamable $x10, renamable $x9, 9, implicit killed $nzcv, pcsections !0 + ; CHECK-NEXT: renamable $w8 = LDAXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr) + ; CHECK-NEXT: renamable $w8 = ANDWri renamable $w8, 15, implicit killed $x8 + ; CHECK-NEXT: $wzr = SUBSWrs renamable $w8, renamable $w9, 0, implicit-def $nzcv, pcsections !0 + ; CHECK-NEXT: renamable $w10 = CSELWr renamable $w8, renamable $w9, 3, implicit killed $nzcv, implicit-def $x10, pcsections !0 ; CHECK-NEXT: early-clobber renamable $w11 = STLXRH renamable $w10, renamable $x0, implicit killed $x10, pcsections !0 :: (volatile store (s16) into %ir.ptr) ; CHECK-NEXT: CBNZW killed renamable $w11, %bb.1, pcsections !0 ; CHECK-NEXT: {{ $}} @@ -1206,18 +1200,16 @@ define i16 @atomicrmw_umax_i16(ptr %ptr, i16 %rhs) { ; CHECK-NEXT: successors: %bb.1(0x80000000) ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w1 = KILL $w1, implicit-def $x1 - ; CHECK-NEXT: renamable $x9 = ANDXri renamable $x1, 4111, pcsections !0 + ; CHECK-NEXT: renamable $w9 = ANDWri killed renamable $w1, 15 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1.atomicrmw.start: ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) - ; CHECK-NEXT: liveins: $x0, $x1, $x9 + ; CHECK-NEXT: liveins: $w9, $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w10 = LDXRH renamable $x0, implicit-def $x10, pcsections !0 :: (volatile load (s16) from %ir.ptr) - ; CHECK-NEXT: renamable $w8 = ANDWri renamable $w10, 15 - ; CHECK-NEXT: renamable $x10 = ANDXri killed renamable $x10, 4111, pcsections !0 - ; CHECK-NEXT: dead $wzr = SUBSWrx renamable $w8, renamable $w1, 8, implicit-def $nzcv, pcsections !0 - ; CHECK-NEXT: renamable $x10 = CSELXr killed renamable $x10, renamable $x9, 8, implicit killed $nzcv, pcsections !0 + ; CHECK-NEXT: renamable $w8 = LDXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr) + ; CHECK-NEXT: renamable $w8 = ANDWri renamable $w8, 15, implicit killed $x8 + ; CHECK-NEXT: $wzr = SUBSWrs renamable $w8, renamable $w9, 0, implicit-def $nzcv, pcsections !0 + ; CHECK-NEXT: renamable $w10 = CSELWr renamable $w8, renamable $w9, 8, implicit killed $nzcv, implicit-def $x10, pcsections !0 ; CHECK-NEXT: early-clobber renamable $w11 = STXRH renamable $w10, renamable $x0, implicit killed $x10, pcsections !0 :: (volatile store (s16) into %ir.ptr) ; CHECK-NEXT: CBNZW killed renamable $w11, %bb.1, pcsections !0 ; CHECK-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-visit-icmp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-visit-icmp.mir index e0eaa6d63b7fc..605fb06a57235 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-visit-icmp.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-visit-icmp.mir @@ -68,7 +68,9 @@ body: | ; CHECK-LABEL: name: test_icmp_of_zext_and_zext ; CHECK: %lhs1:_(s32) = COPY $w0 ; CHECK-NEXT: %rhs1:_(s32) = COPY $w0 - ; CHECK-NEXT: %res:_(s32) = G_ICMP intpred(ugt), %lhs1(s32), %rhs1 + ; CHECK-NEXT: %lhs:_(s64) = G_ZEXT %lhs1(s32) + ; CHECK-NEXT: %rhs:_(s64) = G_ZEXT %rhs1(s32) + ; CHECK-NEXT: %res:_(s32) = G_ICMP intpred(sgt), %lhs(s64), %rhs ; CHECK-NEXT: $w0 = COPY %res(s32) %lhs1:_(s32) = COPY $w0 %rhs1:_(s32) = COPY $w0 @@ -84,7 +86,9 @@ body: | ; CHECK-LABEL: name: test_icmp_of_sext_and_sext ; CHECK: %lhs1:_(s32) = COPY $w0 ; CHECK-NEXT: %rhs1:_(s32) = COPY $w0 - ; CHECK-NEXT: %res:_(s32) = G_ICMP intpred(sgt), %lhs1(s32), %rhs1 + ; CHECK-NEXT: %lhs:_(s64) = G_SEXT %lhs1(s32) + ; CHECK-NEXT: %rhs:_(s64) = G_SEXT %rhs1(s32) + ; CHECK-NEXT: %res:_(s32) = G_ICMP intpred(sgt), %lhs(s64), %rhs ; CHECK-NEXT: $w0 = COPY %res(s32) %lhs1:_(s32) = COPY $w0 %rhs1:_(s32) = COPY $w0 @@ -100,7 +104,9 @@ body: | ; CHECK-LABEL: name: test_ugt_icmp_of_sext_and_sext ; CHECK: %lhs1:_(s32) = COPY $w0 ; CHECK-NEXT: %rhs1:_(s32) = COPY $w0 - ; CHECK-NEXT: %res:_(s32) = G_ICMP intpred(ugt), %lhs1(s32), %rhs1 + ; CHECK-NEXT: %lhs:_(s64) = G_SEXT %lhs1(s32) + ; CHECK-NEXT: %rhs:_(s64) = G_SEXT %rhs1(s32) + ; CHECK-NEXT: %res:_(s32) = G_ICMP intpred(ugt), %lhs(s64), %rhs ; CHECK-NEXT: $w0 = COPY %res(s32) %lhs1:_(s32) = COPY $w0 %rhs1:_(s32) = COPY $w0 diff --git a/llvm/test/CodeGen/AArch64/icmp2.ll b/llvm/test/CodeGen/AArch64/icmp2.ll index 1ab8d0e3331b3..431c2b1b3540f 100644 --- a/llvm/test/CodeGen/AArch64/icmp2.ll +++ b/llvm/test/CodeGen/AArch64/icmp2.ll @@ -63,19 +63,13 @@ entry: } define i1 @i64_i64_sext(i32 %a, i32 %b) { -; CHECK-SD-LABEL: i64_i64_sext: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-SD-NEXT: sxtw x8, w0 -; CHECK-SD-NEXT: cmp x8, w1, sxtw -; CHECK-SD-NEXT: cset w0, lt -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: i64_i64_sext: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: cmp w0, w1 -; CHECK-GI-NEXT: cset w0, lt -; CHECK-GI-NEXT: ret +; CHECK-LABEL: i64_i64_sext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: sxtw x8, w0 +; CHECK-NEXT: cmp x8, w1, sxtw +; CHECK-NEXT: cset w0, lt +; CHECK-NEXT: ret entry: %sextedlhs = sext i32 %a to i64 %sextedrhs = sext i32 %b to i64 @@ -84,18 +78,12 @@ entry: } define i1 @i64_i64_zext(i32 %a, i32 %b) { -; CHECK-SD-LABEL: i64_i64_zext: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: mov w8, w0 -; CHECK-SD-NEXT: cmp x8, w1, uxtw -; CHECK-SD-NEXT: cset w0, lt -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: i64_i64_zext: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: cmp w0, w1 -; CHECK-GI-NEXT: cset w0, lo -; CHECK-GI-NEXT: ret +; CHECK-LABEL: i64_i64_zext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: cmp x8, w1, uxtw +; CHECK-NEXT: cset w0, lt +; CHECK-NEXT: ret entry: %zextedlhs = zext i32 %a to i64 %zextedrhs = zext i32 %b to i64 From 286061c4ca5dc9483440b3032e842ebf655e5428 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= Date: Thu, 12 Sep 2024 10:18:34 +0200 Subject: [PATCH 7/7] remove undef combine --- .../llvm/CodeGen/GlobalISel/CombinerHelper.h | 2 +- .../include/llvm/Target/GlobalISel/Combine.td | 6 +-- .../GlobalISel/CombinerHelperCompares.cpp | 29 +------------ .../AArch64/GlobalISel/combine-visit-icmp.mir | 18 +++++--- llvm/test/CodeGen/AArch64/icmp2.ll | 42 +++++++++++++------ 5 files changed, 48 insertions(+), 49 deletions(-) diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h index a8026a659b16a..3a30d18ec5b32 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -300,7 +300,7 @@ class CombinerHelper { /// $whatever = COPY $addr bool tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen = 0); - bool visitICmp(const MachineInstr &MI, BuildFnTy &MatchInfo); + bool matchICmp(const MachineInstr &MI, BuildFnTy &MatchInfo); /// Try hard to fold icmp with zero RHS because this is a common case. bool matchCmpOfZero(const MachineInstr &MI, BuildFnTy &MatchInfo); diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td index 0d9bafc26d0c6..8c0585e4f70ad 100644 --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -1881,10 +1881,10 @@ def cast_combines: GICombineGroup<[ buildvector_of_truncate ]>; -def visit_icmp : GICombineRule< +def prepare_icmp : GICombineRule< (defs root:$root, build_fn_matchinfo:$matchinfo), (match (G_ICMP $root, $pred, $lhs, $rhs):$cmp, - [{ return Helper.visitICmp(*${cmp}, ${matchinfo}); }]), + [{ return Helper.matchICmp(*${cmp}, ${matchinfo}); }]), (apply [{ Helper.applyBuildFn(*${cmp}, ${matchinfo}); }])>; def icmp_of_zero : GICombineRule< @@ -1895,7 +1895,7 @@ def icmp_of_zero : GICombineRule< (apply [{ Helper.applyBuildFn(*${cmp}, ${matchinfo}); }])>; def icmp_combines: GICombineGroup<[ - visit_icmp, + prepare_icmp, icmp_of_zero, icmp_to_true_false_known_bits, icmp_to_lhs_known_bits, diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp index 67dd0c4332093..0e88525c47171 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp @@ -60,11 +60,10 @@ bool CombinerHelper::constantFoldICmp(const GICmp &ICmp, return true; } -bool CombinerHelper::visitICmp(const MachineInstr &MI, BuildFnTy &MatchInfo) { +bool CombinerHelper::matchICmp(const MachineInstr &MI, BuildFnTy &MatchInfo) { const GICmp *Cmp = cast(&MI); Register Dst = Cmp->getReg(0); - LLT DstTy = MRI.getType(Dst); Register LHS = Cmp->getLHSReg(); Register RHS = Cmp->getRHSReg(); @@ -82,32 +81,6 @@ bool CombinerHelper::visitICmp(const MachineInstr &MI, BuildFnTy &MatchInfo) { return true; } - MachineInstr *MIRHS = MRI.getVRegDef(RHS); - - // For EQ and NE, we can always pick a value for the undef to make the - // predicate pass or fail, so we can return undef. - // Matches behavior in llvm::ConstantFoldCompareInstruction. - if (isa(MIRHS) && ICmpInst::isEquality(Pred) && - isLegalOrBeforeLegalizer({TargetOpcode::G_IMPLICIT_DEF, {DstTy}})) { - MatchInfo = [=](MachineIRBuilder &B) { B.buildUndef(Dst); }; - return true; - } - - // icmp X, X -> true/false - // icmp X, undef -> true/false because undef could be X. - if ((LHS == RHS || isa(MIRHS)) && - isConstantLegalOrBeforeLegalizer(DstTy)) { - MatchInfo = [=](MachineIRBuilder &B) { - if (CmpInst::isTrueWhenEqual(Pred)) - B.buildConstant(Dst, getICmpTrueVal(getTargetLowering(), - /*IsVector=*/DstTy.isVector(), - /*IsFP=*/false)); - else - B.buildConstant(Dst, 0); - }; - return true; - } - return false; } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-visit-icmp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-visit-icmp.mir index 605fb06a57235..d454e60b034ea 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-visit-icmp.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-visit-icmp.mir @@ -6,7 +6,9 @@ name: test_icmp_of_eq_and_right_undef body: | bb.1: ; CHECK-LABEL: name: test_icmp_of_eq_and_right_undef - ; CHECK: %res:_(s32) = G_IMPLICIT_DEF + ; CHECK: %lhs:_(s64) = COPY $x0 + ; CHECK-NEXT: %rhs:_(s64) = G_IMPLICIT_DEF + ; CHECK-NEXT: %res:_(s32) = G_ICMP intpred(ne), %lhs(s64), %rhs ; CHECK-NEXT: $w0 = COPY %res(s32) %lhs:_(s64) = COPY $x0 %rhs:_(s64) = G_IMPLICIT_DEF @@ -18,7 +20,9 @@ name: test_icmp_of_not_eq_and_right_undef body: | bb.1: ; CHECK-LABEL: name: test_icmp_of_not_eq_and_right_undef - ; CHECK: %res:_(s32) = G_CONSTANT i32 0 + ; CHECK: %lhs:_(s64) = COPY $x0 + ; CHECK-NEXT: %rhs:_(s64) = G_IMPLICIT_DEF + ; CHECK-NEXT: %res:_(s32) = G_ICMP intpred(ugt), %lhs(s64), %rhs ; CHECK-NEXT: $w0 = COPY %res(s32) %lhs:_(s64) = COPY $x0 %rhs:_(s64) = G_IMPLICIT_DEF @@ -30,7 +34,9 @@ name: test_icmp_of_is_eq_and_right_undef body: | bb.1: ; CHECK-LABEL: name: test_icmp_of_is_eq_and_right_undef - ; CHECK: %res:_(s32) = G_IMPLICIT_DEF + ; CHECK: %lhs:_(s64) = COPY $x0 + ; CHECK-NEXT: %rhs:_(s64) = G_IMPLICIT_DEF + ; CHECK-NEXT: %res:_(s32) = G_ICMP intpred(ne), %lhs(s64), %rhs ; CHECK-NEXT: $w0 = COPY %res(s32) %lhs:_(s64) = COPY $x0 %rhs:_(s64) = G_IMPLICIT_DEF @@ -42,7 +48,8 @@ name: test_icmp_of_eq_not_eq body: | bb.1: ; CHECK-LABEL: name: test_icmp_of_eq_not_eq - ; CHECK: %res:_(s32) = G_CONSTANT i32 0 + ; CHECK: %lhs:_(s64) = COPY $x0 + ; CHECK-NEXT: %res:_(s32) = G_ICMP intpred(ugt), %lhs(s64), %lhs ; CHECK-NEXT: $w0 = COPY %res(s32) %lhs:_(s64) = COPY $x0 %rhs:_(s64) = COPY $x0 @@ -54,7 +61,8 @@ name: test_icmp_of_eq_is_eq body: | bb.1: ; CHECK-LABEL: name: test_icmp_of_eq_is_eq - ; CHECK: %res:_(s32) = G_CONSTANT i32 1 + ; CHECK: %lhs:_(s64) = COPY $x0 + ; CHECK-NEXT: %res:_(s32) = G_ICMP intpred(eq), %lhs(s64), %lhs ; CHECK-NEXT: $w0 = COPY %res(s32) %lhs:_(s64) = COPY $x0 %rhs:_(s64) = COPY $x0 diff --git a/llvm/test/CodeGen/AArch64/icmp2.ll b/llvm/test/CodeGen/AArch64/icmp2.ll index 431c2b1b3540f..963501da78475 100644 --- a/llvm/test/CodeGen/AArch64/icmp2.ll +++ b/llvm/test/CodeGen/AArch64/icmp2.ll @@ -33,30 +33,48 @@ entry: } define i1 @i64_i64_undef_eq(i64 %a, i64 %b) { -; CHECK-LABEL: i64_i64_undef_eq: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w0, wzr -; CHECK-NEXT: ret +; CHECK-SD-LABEL: i64_i64_undef_eq: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: mov w0, wzr +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: i64_i64_undef_eq: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp x0, x8 +; CHECK-GI-NEXT: cset w0, eq +; CHECK-GI-NEXT: ret entry: %c = icmp eq i64 %a, undef ret i1 %c } define i1 @i64_i64_slt_eq(i64 %a, i64 %b) { -; CHECK-LABEL: i64_i64_slt_eq: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w0, wzr -; CHECK-NEXT: ret +; CHECK-SD-LABEL: i64_i64_slt_eq: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: mov w0, wzr +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: i64_i64_slt_eq: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp x0, x0 +; CHECK-GI-NEXT: cset w0, lt +; CHECK-GI-NEXT: ret entry: %c = icmp slt i64 %a, %a ret i1 %c } define i1 @i64_i64_not_eq_undef(i64 %a, i64 %b) { -; CHECK-LABEL: i64_i64_not_eq_undef: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w0, wzr -; CHECK-NEXT: ret +; CHECK-SD-LABEL: i64_i64_not_eq_undef: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: mov w0, wzr +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: i64_i64_not_eq_undef: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp x0, x8 +; CHECK-GI-NEXT: cset w0, lt +; CHECK-GI-NEXT: ret entry: %c = icmp slt i64 %a, undef ret i1 %c