diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index 50dc7d5c54c54..caa3a57ebabc2 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -2451,7 +2451,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { CmpIntrinsic::getLTPredicate(IID), CostKind); - if (TLI->shouldExpandCmpUsingSelects()) { + EVT VT = TLI->getValueType(DL, CmpTy, true); + if (TLI->shouldExpandCmpUsingSelects(VT)) { // x < y ? -1 : (x > y ? 1 : 0) Cost += 2 * thisT()->getCmpSelInstrCost( BinaryOperator::Select, RetTy, CondTy, diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index e17d68d2690c8..802510dd0e4fa 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -3409,7 +3409,7 @@ class TargetLoweringBase { /// Should we expand [US]CMP nodes using two selects and two compares, or by /// doing arithmetic on boolean types - virtual bool shouldExpandCmpUsingSelects() const { return false; } + virtual bool shouldExpandCmpUsingSelects(EVT VT) const { return false; } /// Does this target support complex deinterleaving virtual bool isComplexDeinterleavingSupported() const { return false; } diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index ca379a691da91..9593788628068 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -10681,7 +10681,7 @@ SDValue TargetLowering::expandCMP(SDNode *Node, SelectionDAG &DAG) const { // because one of the conditions can be merged with one of the selects. // And finally, if we don't know the contents of high bits of a boolean value // we can't perform any arithmetic either. - if (shouldExpandCmpUsingSelects() || BoolVT.getScalarSizeInBits() == 1 || + if (shouldExpandCmpUsingSelects(VT) || BoolVT.getScalarSizeInBits() == 1 || getBooleanContents(BoolVT) == UndefinedBooleanContent) { SDValue SelectZeroOrOne = DAG.getSelect(dl, ResVT, IsGT, DAG.getConstant(1, dl, ResVT), diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 47da9d577cd82..d41f45ac0ce82 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -27781,6 +27781,12 @@ bool AArch64TargetLowering::shouldConvertFpToSat(unsigned Op, EVT FPVT, return TargetLowering::shouldConvertFpToSat(Op, FPVT, VT); } +bool AArch64TargetLowering::shouldExpandCmpUsingSelects(EVT VT) const { + // Expand scalar and SVE operations using selects. Neon vectors prefer sub to + // avoid vselect becoming bsl / unrolling. + return !VT.isFixedLengthVector(); +} + MachineInstr * AArch64TargetLowering::EmitKCFICheck(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator &MBBI, diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index f9d45b02d30e3..06b918f9ccaa2 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -914,7 +914,7 @@ class AArch64TargetLowering : public TargetLowering { bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override; - bool shouldExpandCmpUsingSelects() const override { return true; } + bool shouldExpandCmpUsingSelects(EVT VT) const override; bool isComplexDeinterleavingSupported() const override; bool isComplexDeinterleavingOperationSupported( diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h index 1e7285e3e0fc5..4a18bde00a0b9 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h @@ -507,7 +507,7 @@ class SystemZTargetLowering : public TargetLowering { bool shouldConsiderGEPOffsetSplit() const override { return true; } - bool shouldExpandCmpUsingSelects() const override { return true; } + bool shouldExpandCmpUsingSelects(EVT VT) const override { return true; } const char *getTargetNodeName(unsigned Opcode) const override; std::pair diff --git a/llvm/test/Analysis/CostModel/AArch64/cmp.ll b/llvm/test/Analysis/CostModel/AArch64/cmp.ll index 1b4b5eb616b5a..a56ca8890e307 100644 --- a/llvm/test/Analysis/CostModel/AArch64/cmp.ll +++ b/llvm/test/Analysis/CostModel/AArch64/cmp.ll @@ -128,16 +128,16 @@ define void @uscmp() { ; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %u16 = call i16 @llvm.ucmp.i16.i16(i16 undef, i16 undef) ; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %u32 = call i32 @llvm.ucmp.i32.i32(i32 undef, i32 undef) ; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %u64 = call i64 @llvm.ucmp.i64.i64(i64 undef, i64 undef) -; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %uv16i8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> undef, <16 x i8> undef) -; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %uv8i16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> undef, <8 x i16> undef) -; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %uv4i32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> undef, <4 x i32> undef) +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %uv16i8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> undef, <16 x i8> undef) +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %uv8i16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> undef, <8 x i16> undef) +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %uv4i32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> undef, <4 x i32> undef) ; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %s8 = call i8 @llvm.scmp.i8.i8(i8 undef, i8 undef) ; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %s16 = call i16 @llvm.scmp.i16.i16(i16 undef, i16 undef) ; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %s32 = call i32 @llvm.scmp.i32.i32(i32 undef, i32 undef) ; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %s64 = call i64 @llvm.scmp.i64.i64(i64 undef, i64 undef) -; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %sv16i8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> undef, <16 x i8> undef) -; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %sv8i16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> undef, <8 x i16> undef) -; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %sv4i32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> undef, <4 x i32> undef) +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %sv16i8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> undef, <16 x i8> undef) +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %sv8i16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> undef, <8 x i16> undef) +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %sv4i32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> undef, <4 x i32> undef) ; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-SIZE-LABEL: 'uscmp' @@ -145,16 +145,16 @@ define void @uscmp() { ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %u16 = call i16 @llvm.ucmp.i16.i16(i16 undef, i16 undef) ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %u32 = call i32 @llvm.ucmp.i32.i32(i32 undef, i32 undef) ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %u64 = call i64 @llvm.ucmp.i64.i64(i64 undef, i64 undef) -; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %uv16i8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> undef, <16 x i8> undef) -; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %uv8i16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> undef, <8 x i16> undef) -; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %uv4i32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> undef, <4 x i32> undef) +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %uv16i8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> undef, <16 x i8> undef) +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %uv8i16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> undef, <8 x i16> undef) +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %uv4i32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> undef, <4 x i32> undef) ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %s8 = call i8 @llvm.scmp.i8.i8(i8 undef, i8 undef) ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %s16 = call i16 @llvm.scmp.i16.i16(i16 undef, i16 undef) ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %s32 = call i32 @llvm.scmp.i32.i32(i32 undef, i32 undef) ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %s64 = call i64 @llvm.scmp.i64.i64(i64 undef, i64 undef) -; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sv16i8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> undef, <16 x i8> undef) -; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sv8i16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> undef, <8 x i16> undef) -; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sv4i32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> undef, <4 x i32> undef) +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %sv16i8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> undef, <16 x i8> undef) +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %sv8i16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> undef, <8 x i16> undef) +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %sv4i32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> undef, <4 x i32> undef) ; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %u8 = call i8 @llvm.ucmp(i8 undef, i8 undef) diff --git a/llvm/test/CodeGen/AArch64/scmp.ll b/llvm/test/CodeGen/AArch64/scmp.ll index 3d18a904ed2d3..4aff5a836e1a1 100644 --- a/llvm/test/CodeGen/AArch64/scmp.ll +++ b/llvm/test/CodeGen/AArch64/scmp.ll @@ -136,11 +136,9 @@ define i64 @scmp.64.64(i64 %x, i64 %y) nounwind { define <8 x i8> @s_v8i8(<8 x i8> %a, <8 x i8> %b) { ; CHECK-SD-LABEL: s_v8i8: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: movi v2.8b, #1 -; CHECK-SD-NEXT: cmgt v3.8b, v0.8b, v1.8b +; CHECK-SD-NEXT: cmgt v2.8b, v0.8b, v1.8b ; CHECK-SD-NEXT: cmgt v0.8b, v1.8b, v0.8b -; CHECK-SD-NEXT: and v1.8b, v3.8b, v2.8b -; CHECK-SD-NEXT: orr v0.8b, v1.8b, v0.8b +; CHECK-SD-NEXT: sub v0.8b, v0.8b, v2.8b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: s_v8i8: @@ -160,11 +158,9 @@ entry: define <16 x i8> @s_v16i8(<16 x i8> %a, <16 x i8> %b) { ; CHECK-SD-LABEL: s_v16i8: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: movi v2.16b, #1 -; CHECK-SD-NEXT: cmgt v3.16b, v0.16b, v1.16b +; CHECK-SD-NEXT: cmgt v2.16b, v0.16b, v1.16b ; CHECK-SD-NEXT: cmgt v0.16b, v1.16b, v0.16b -; CHECK-SD-NEXT: and v1.16b, v3.16b, v2.16b -; CHECK-SD-NEXT: orr v0.16b, v1.16b, v0.16b +; CHECK-SD-NEXT: sub v0.16b, v0.16b, v2.16b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: s_v16i8: @@ -184,11 +180,9 @@ entry: define <4 x i16> @s_v4i16(<4 x i16> %a, <4 x i16> %b) { ; CHECK-SD-LABEL: s_v4i16: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: movi v2.4h, #1 -; CHECK-SD-NEXT: cmgt v3.4h, v0.4h, v1.4h +; CHECK-SD-NEXT: cmgt v2.4h, v0.4h, v1.4h ; CHECK-SD-NEXT: cmgt v0.4h, v1.4h, v0.4h -; CHECK-SD-NEXT: and v1.8b, v3.8b, v2.8b -; CHECK-SD-NEXT: orr v0.8b, v1.8b, v0.8b +; CHECK-SD-NEXT: sub v0.4h, v0.4h, v2.4h ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: s_v4i16: @@ -208,11 +202,9 @@ entry: define <8 x i16> @s_v8i16(<8 x i16> %a, <8 x i16> %b) { ; CHECK-SD-LABEL: s_v8i16: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: movi v2.8h, #1 -; CHECK-SD-NEXT: cmgt v3.8h, v0.8h, v1.8h +; CHECK-SD-NEXT: cmgt v2.8h, v0.8h, v1.8h ; CHECK-SD-NEXT: cmgt v0.8h, v1.8h, v0.8h -; CHECK-SD-NEXT: and v1.16b, v3.16b, v2.16b -; CHECK-SD-NEXT: orr v0.16b, v1.16b, v0.16b +; CHECK-SD-NEXT: sub v0.8h, v0.8h, v2.8h ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: s_v8i16: @@ -232,15 +224,12 @@ entry: define <16 x i16> @s_v16i16(<16 x i16> %a, <16 x i16> %b) { ; CHECK-SD-LABEL: s_v16i16: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: movi v4.8h, #1 +; CHECK-SD-NEXT: cmgt v4.8h, v1.8h, v3.8h ; CHECK-SD-NEXT: cmgt v5.8h, v0.8h, v2.8h -; CHECK-SD-NEXT: cmgt v6.8h, v1.8h, v3.8h ; CHECK-SD-NEXT: cmgt v0.8h, v2.8h, v0.8h ; CHECK-SD-NEXT: cmgt v1.8h, v3.8h, v1.8h -; CHECK-SD-NEXT: and v2.16b, v5.16b, v4.16b -; CHECK-SD-NEXT: and v3.16b, v6.16b, v4.16b -; CHECK-SD-NEXT: orr v0.16b, v2.16b, v0.16b -; CHECK-SD-NEXT: orr v1.16b, v3.16b, v1.16b +; CHECK-SD-NEXT: sub v0.8h, v0.8h, v5.8h +; CHECK-SD-NEXT: sub v1.8h, v1.8h, v4.8h ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: s_v16i16: @@ -264,11 +253,9 @@ entry: define <2 x i32> @s_v2i32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-SD-LABEL: s_v2i32: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: movi v2.2s, #1 -; CHECK-SD-NEXT: cmgt v3.2s, v0.2s, v1.2s +; CHECK-SD-NEXT: cmgt v2.2s, v0.2s, v1.2s ; CHECK-SD-NEXT: cmgt v0.2s, v1.2s, v0.2s -; CHECK-SD-NEXT: and v1.8b, v3.8b, v2.8b -; CHECK-SD-NEXT: orr v0.8b, v1.8b, v0.8b +; CHECK-SD-NEXT: sub v0.2s, v0.2s, v2.2s ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: s_v2i32: @@ -288,11 +275,9 @@ entry: define <4 x i32> @s_v4i32(<4 x i32> %a, <4 x i32> %b) { ; CHECK-SD-LABEL: s_v4i32: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: movi v2.4s, #1 -; CHECK-SD-NEXT: cmgt v3.4s, v0.4s, v1.4s +; CHECK-SD-NEXT: cmgt v2.4s, v0.4s, v1.4s ; CHECK-SD-NEXT: cmgt v0.4s, v1.4s, v0.4s -; CHECK-SD-NEXT: and v1.16b, v3.16b, v2.16b -; CHECK-SD-NEXT: orr v0.16b, v1.16b, v0.16b +; CHECK-SD-NEXT: sub v0.4s, v0.4s, v2.4s ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: s_v4i32: @@ -312,15 +297,12 @@ entry: define <8 x i32> @s_v8i32(<8 x i32> %a, <8 x i32> %b) { ; CHECK-SD-LABEL: s_v8i32: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: movi v4.4s, #1 +; CHECK-SD-NEXT: cmgt v4.4s, v1.4s, v3.4s ; CHECK-SD-NEXT: cmgt v5.4s, v0.4s, v2.4s -; CHECK-SD-NEXT: cmgt v6.4s, v1.4s, v3.4s ; CHECK-SD-NEXT: cmgt v0.4s, v2.4s, v0.4s ; CHECK-SD-NEXT: cmgt v1.4s, v3.4s, v1.4s -; CHECK-SD-NEXT: and v2.16b, v5.16b, v4.16b -; CHECK-SD-NEXT: and v3.16b, v6.16b, v4.16b -; CHECK-SD-NEXT: orr v0.16b, v2.16b, v0.16b -; CHECK-SD-NEXT: orr v1.16b, v3.16b, v1.16b +; CHECK-SD-NEXT: sub v0.4s, v0.4s, v5.4s +; CHECK-SD-NEXT: sub v1.4s, v1.4s, v4.4s ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: s_v8i32: @@ -344,12 +326,9 @@ entry: define <2 x i64> @s_v2i64(<2 x i64> %a, <2 x i64> %b) { ; CHECK-SD-LABEL: s_v2i64: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: mov w8, #1 // =0x1 ; CHECK-SD-NEXT: cmgt v2.2d, v0.2d, v1.2d ; CHECK-SD-NEXT: cmgt v0.2d, v1.2d, v0.2d -; CHECK-SD-NEXT: dup v3.2d, x8 -; CHECK-SD-NEXT: and v1.16b, v2.16b, v3.16b -; CHECK-SD-NEXT: orr v0.16b, v1.16b, v0.16b +; CHECK-SD-NEXT: sub v0.2d, v0.2d, v2.2d ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: s_v2i64: @@ -370,16 +349,12 @@ entry: define <4 x i64> @s_v4i64(<4 x i64> %a, <4 x i64> %b) { ; CHECK-SD-LABEL: s_v4i64: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: mov w8, #1 // =0x1 -; CHECK-SD-NEXT: cmgt v4.2d, v0.2d, v2.2d -; CHECK-SD-NEXT: cmgt v6.2d, v1.2d, v3.2d -; CHECK-SD-NEXT: dup v5.2d, x8 +; CHECK-SD-NEXT: cmgt v4.2d, v1.2d, v3.2d +; CHECK-SD-NEXT: cmgt v5.2d, v0.2d, v2.2d ; CHECK-SD-NEXT: cmgt v0.2d, v2.2d, v0.2d ; CHECK-SD-NEXT: cmgt v1.2d, v3.2d, v1.2d -; CHECK-SD-NEXT: and v2.16b, v4.16b, v5.16b -; CHECK-SD-NEXT: and v3.16b, v6.16b, v5.16b -; CHECK-SD-NEXT: orr v0.16b, v2.16b, v0.16b -; CHECK-SD-NEXT: orr v1.16b, v3.16b, v1.16b +; CHECK-SD-NEXT: sub v0.2d, v0.2d, v5.2d +; CHECK-SD-NEXT: sub v1.2d, v1.2d, v4.2d ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: s_v4i64: @@ -404,122 +379,13 @@ entry: define <16 x i8> @signOf_neon_scmp(<8 x i16> %s0_lo, <8 x i16> %s0_hi, <8 x i16> %s1_lo, <8 x i16> %s1_hi) { ; CHECK-SD-LABEL: signOf_neon_scmp: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: cmgt v5.8h, v0.8h, v2.8h -; CHECK-SD-NEXT: cmgt v2.8h, v2.8h, v0.8h ; CHECK-SD-NEXT: cmgt v4.8h, v1.8h, v3.8h ; CHECK-SD-NEXT: cmgt v1.8h, v3.8h, v1.8h -; CHECK-SD-NEXT: umov w8, v5.h[1] -; CHECK-SD-NEXT: umov w9, v2.h[1] -; CHECK-SD-NEXT: umov w10, v5.h[0] -; CHECK-SD-NEXT: umov w11, v2.h[0] -; CHECK-SD-NEXT: tst w8, #0xffff -; CHECK-SD-NEXT: cset w8, ne -; CHECK-SD-NEXT: tst w9, #0xffff -; CHECK-SD-NEXT: csinv w8, w8, wzr, eq -; CHECK-SD-NEXT: tst w10, #0xffff -; CHECK-SD-NEXT: umov w10, v5.h[2] -; CHECK-SD-NEXT: cset w9, ne -; CHECK-SD-NEXT: tst w11, #0xffff -; CHECK-SD-NEXT: umov w11, v2.h[2] -; CHECK-SD-NEXT: csinv w9, w9, wzr, eq -; CHECK-SD-NEXT: fmov s0, w9 -; CHECK-SD-NEXT: tst w10, #0xffff -; CHECK-SD-NEXT: umov w10, v2.h[3] -; CHECK-SD-NEXT: cset w9, ne -; CHECK-SD-NEXT: tst w11, #0xffff -; CHECK-SD-NEXT: mov v0.b[1], w8 -; CHECK-SD-NEXT: umov w8, v5.h[3] -; CHECK-SD-NEXT: csinv w9, w9, wzr, eq -; CHECK-SD-NEXT: mov v0.b[2], w9 -; CHECK-SD-NEXT: tst w8, #0xffff -; CHECK-SD-NEXT: umov w8, v5.h[4] -; CHECK-SD-NEXT: cset w9, ne -; CHECK-SD-NEXT: tst w10, #0xffff -; CHECK-SD-NEXT: umov w10, v2.h[4] -; CHECK-SD-NEXT: csinv w9, w9, wzr, eq -; CHECK-SD-NEXT: mov v0.b[3], w9 -; CHECK-SD-NEXT: tst w8, #0xffff -; CHECK-SD-NEXT: umov w8, v5.h[5] -; CHECK-SD-NEXT: cset w9, ne -; CHECK-SD-NEXT: tst w10, #0xffff -; CHECK-SD-NEXT: umov w10, v2.h[5] -; CHECK-SD-NEXT: csinv w9, w9, wzr, eq -; CHECK-SD-NEXT: mov v0.b[4], w9 -; CHECK-SD-NEXT: tst w8, #0xffff -; CHECK-SD-NEXT: umov w8, v5.h[6] -; CHECK-SD-NEXT: cset w9, ne -; CHECK-SD-NEXT: tst w10, #0xffff -; CHECK-SD-NEXT: umov w10, v2.h[6] -; CHECK-SD-NEXT: csinv w9, w9, wzr, eq -; CHECK-SD-NEXT: mov v0.b[5], w9 -; CHECK-SD-NEXT: umov w9, v5.h[7] -; CHECK-SD-NEXT: tst w8, #0xffff -; CHECK-SD-NEXT: cset w8, ne -; CHECK-SD-NEXT: tst w10, #0xffff -; CHECK-SD-NEXT: umov w10, v2.h[7] -; CHECK-SD-NEXT: csinv w8, w8, wzr, eq -; CHECK-SD-NEXT: mov v0.b[6], w8 -; CHECK-SD-NEXT: tst w9, #0xffff -; CHECK-SD-NEXT: umov w8, v4.h[0] -; CHECK-SD-NEXT: cset w9, ne -; CHECK-SD-NEXT: tst w10, #0xffff -; CHECK-SD-NEXT: umov w10, v1.h[0] -; CHECK-SD-NEXT: csinv w9, w9, wzr, eq -; CHECK-SD-NEXT: mov v0.b[7], w9 -; CHECK-SD-NEXT: tst w8, #0xffff -; CHECK-SD-NEXT: umov w8, v4.h[1] -; CHECK-SD-NEXT: cset w9, ne -; CHECK-SD-NEXT: tst w10, #0xffff -; CHECK-SD-NEXT: umov w10, v1.h[1] -; CHECK-SD-NEXT: csinv w9, w9, wzr, eq -; CHECK-SD-NEXT: mov v0.b[8], w9 -; CHECK-SD-NEXT: tst w8, #0xffff -; CHECK-SD-NEXT: umov w8, v4.h[2] -; CHECK-SD-NEXT: cset w9, ne -; CHECK-SD-NEXT: tst w10, #0xffff -; CHECK-SD-NEXT: umov w10, v1.h[2] -; CHECK-SD-NEXT: csinv w9, w9, wzr, eq -; CHECK-SD-NEXT: mov v0.b[9], w9 -; CHECK-SD-NEXT: tst w8, #0xffff -; CHECK-SD-NEXT: umov w8, v4.h[3] -; CHECK-SD-NEXT: cset w9, ne -; CHECK-SD-NEXT: tst w10, #0xffff -; CHECK-SD-NEXT: umov w10, v1.h[3] -; CHECK-SD-NEXT: csinv w9, w9, wzr, eq -; CHECK-SD-NEXT: mov v0.b[10], w9 -; CHECK-SD-NEXT: tst w8, #0xffff -; CHECK-SD-NEXT: umov w8, v4.h[4] -; CHECK-SD-NEXT: cset w9, ne -; CHECK-SD-NEXT: tst w10, #0xffff -; CHECK-SD-NEXT: umov w10, v1.h[4] -; CHECK-SD-NEXT: csinv w9, w9, wzr, eq -; CHECK-SD-NEXT: mov v0.b[11], w9 -; CHECK-SD-NEXT: tst w8, #0xffff -; CHECK-SD-NEXT: umov w8, v4.h[5] -; CHECK-SD-NEXT: cset w9, ne -; CHECK-SD-NEXT: tst w10, #0xffff -; CHECK-SD-NEXT: umov w10, v1.h[5] -; CHECK-SD-NEXT: csinv w9, w9, wzr, eq -; CHECK-SD-NEXT: mov v0.b[12], w9 -; CHECK-SD-NEXT: tst w8, #0xffff -; CHECK-SD-NEXT: umov w8, v4.h[6] -; CHECK-SD-NEXT: cset w9, ne -; CHECK-SD-NEXT: tst w10, #0xffff -; CHECK-SD-NEXT: umov w10, v1.h[6] -; CHECK-SD-NEXT: csinv w9, w9, wzr, eq -; CHECK-SD-NEXT: mov v0.b[13], w9 -; CHECK-SD-NEXT: tst w8, #0xffff -; CHECK-SD-NEXT: umov w8, v4.h[7] -; CHECK-SD-NEXT: cset w9, ne -; CHECK-SD-NEXT: tst w10, #0xffff -; CHECK-SD-NEXT: umov w10, v1.h[7] -; CHECK-SD-NEXT: csinv w9, w9, wzr, eq -; CHECK-SD-NEXT: mov v0.b[14], w9 -; CHECK-SD-NEXT: tst w8, #0xffff -; CHECK-SD-NEXT: cset w8, ne -; CHECK-SD-NEXT: tst w10, #0xffff -; CHECK-SD-NEXT: csinv w8, w8, wzr, eq -; CHECK-SD-NEXT: mov v0.b[15], w8 +; CHECK-SD-NEXT: cmgt v3.8h, v0.8h, v2.8h +; CHECK-SD-NEXT: cmgt v0.8h, v2.8h, v0.8h +; CHECK-SD-NEXT: sub v1.8h, v1.8h, v4.8h +; CHECK-SD-NEXT: sub v0.8h, v0.8h, v3.8h +; CHECK-SD-NEXT: uzp1 v0.16b, v0.16b, v1.16b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: signOf_neon_scmp: diff --git a/llvm/test/CodeGen/AArch64/ucmp.ll b/llvm/test/CodeGen/AArch64/ucmp.ll index 7e94cb6c103b5..125ac7f61a41e 100644 --- a/llvm/test/CodeGen/AArch64/ucmp.ll +++ b/llvm/test/CodeGen/AArch64/ucmp.ll @@ -176,11 +176,9 @@ define <1 x i64> @ucmp.1.64.65(<1 x i65> %x, <1 x i65> %y) { define <8 x i8> @u_v8i8(<8 x i8> %a, <8 x i8> %b) { ; CHECK-SD-LABEL: u_v8i8: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: movi v2.8b, #1 -; CHECK-SD-NEXT: cmhi v3.8b, v0.8b, v1.8b +; CHECK-SD-NEXT: cmhi v2.8b, v0.8b, v1.8b ; CHECK-SD-NEXT: cmhi v0.8b, v1.8b, v0.8b -; CHECK-SD-NEXT: and v1.8b, v3.8b, v2.8b -; CHECK-SD-NEXT: orr v0.8b, v1.8b, v0.8b +; CHECK-SD-NEXT: sub v0.8b, v0.8b, v2.8b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: u_v8i8: @@ -200,11 +198,9 @@ entry: define <16 x i8> @u_v16i8(<16 x i8> %a, <16 x i8> %b) { ; CHECK-SD-LABEL: u_v16i8: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: movi v2.16b, #1 -; CHECK-SD-NEXT: cmhi v3.16b, v0.16b, v1.16b +; CHECK-SD-NEXT: cmhi v2.16b, v0.16b, v1.16b ; CHECK-SD-NEXT: cmhi v0.16b, v1.16b, v0.16b -; CHECK-SD-NEXT: and v1.16b, v3.16b, v2.16b -; CHECK-SD-NEXT: orr v0.16b, v1.16b, v0.16b +; CHECK-SD-NEXT: sub v0.16b, v0.16b, v2.16b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: u_v16i8: @@ -224,11 +220,9 @@ entry: define <4 x i16> @u_v4i16(<4 x i16> %a, <4 x i16> %b) { ; CHECK-SD-LABEL: u_v4i16: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: movi v2.4h, #1 -; CHECK-SD-NEXT: cmhi v3.4h, v0.4h, v1.4h +; CHECK-SD-NEXT: cmhi v2.4h, v0.4h, v1.4h ; CHECK-SD-NEXT: cmhi v0.4h, v1.4h, v0.4h -; CHECK-SD-NEXT: and v1.8b, v3.8b, v2.8b -; CHECK-SD-NEXT: orr v0.8b, v1.8b, v0.8b +; CHECK-SD-NEXT: sub v0.4h, v0.4h, v2.4h ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: u_v4i16: @@ -248,11 +242,9 @@ entry: define <8 x i16> @u_v8i16(<8 x i16> %a, <8 x i16> %b) { ; CHECK-SD-LABEL: u_v8i16: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: movi v2.8h, #1 -; CHECK-SD-NEXT: cmhi v3.8h, v0.8h, v1.8h +; CHECK-SD-NEXT: cmhi v2.8h, v0.8h, v1.8h ; CHECK-SD-NEXT: cmhi v0.8h, v1.8h, v0.8h -; CHECK-SD-NEXT: and v1.16b, v3.16b, v2.16b -; CHECK-SD-NEXT: orr v0.16b, v1.16b, v0.16b +; CHECK-SD-NEXT: sub v0.8h, v0.8h, v2.8h ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: u_v8i16: @@ -272,15 +264,12 @@ entry: define <16 x i16> @u_v16i16(<16 x i16> %a, <16 x i16> %b) { ; CHECK-SD-LABEL: u_v16i16: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: movi v4.8h, #1 +; CHECK-SD-NEXT: cmhi v4.8h, v1.8h, v3.8h ; CHECK-SD-NEXT: cmhi v5.8h, v0.8h, v2.8h -; CHECK-SD-NEXT: cmhi v6.8h, v1.8h, v3.8h ; CHECK-SD-NEXT: cmhi v0.8h, v2.8h, v0.8h ; CHECK-SD-NEXT: cmhi v1.8h, v3.8h, v1.8h -; CHECK-SD-NEXT: and v2.16b, v5.16b, v4.16b -; CHECK-SD-NEXT: and v3.16b, v6.16b, v4.16b -; CHECK-SD-NEXT: orr v0.16b, v2.16b, v0.16b -; CHECK-SD-NEXT: orr v1.16b, v3.16b, v1.16b +; CHECK-SD-NEXT: sub v0.8h, v0.8h, v5.8h +; CHECK-SD-NEXT: sub v1.8h, v1.8h, v4.8h ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: u_v16i16: @@ -304,11 +293,9 @@ entry: define <2 x i32> @u_v2i32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-SD-LABEL: u_v2i32: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: movi v2.2s, #1 -; CHECK-SD-NEXT: cmhi v3.2s, v0.2s, v1.2s +; CHECK-SD-NEXT: cmhi v2.2s, v0.2s, v1.2s ; CHECK-SD-NEXT: cmhi v0.2s, v1.2s, v0.2s -; CHECK-SD-NEXT: and v1.8b, v3.8b, v2.8b -; CHECK-SD-NEXT: orr v0.8b, v1.8b, v0.8b +; CHECK-SD-NEXT: sub v0.2s, v0.2s, v2.2s ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: u_v2i32: @@ -328,11 +315,9 @@ entry: define <4 x i32> @u_v4i32(<4 x i32> %a, <4 x i32> %b) { ; CHECK-SD-LABEL: u_v4i32: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: movi v2.4s, #1 -; CHECK-SD-NEXT: cmhi v3.4s, v0.4s, v1.4s +; CHECK-SD-NEXT: cmhi v2.4s, v0.4s, v1.4s ; CHECK-SD-NEXT: cmhi v0.4s, v1.4s, v0.4s -; CHECK-SD-NEXT: and v1.16b, v3.16b, v2.16b -; CHECK-SD-NEXT: orr v0.16b, v1.16b, v0.16b +; CHECK-SD-NEXT: sub v0.4s, v0.4s, v2.4s ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: u_v4i32: @@ -352,15 +337,12 @@ entry: define <8 x i32> @u_v8i32(<8 x i32> %a, <8 x i32> %b) { ; CHECK-SD-LABEL: u_v8i32: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: movi v4.4s, #1 +; CHECK-SD-NEXT: cmhi v4.4s, v1.4s, v3.4s ; CHECK-SD-NEXT: cmhi v5.4s, v0.4s, v2.4s -; CHECK-SD-NEXT: cmhi v6.4s, v1.4s, v3.4s ; CHECK-SD-NEXT: cmhi v0.4s, v2.4s, v0.4s ; CHECK-SD-NEXT: cmhi v1.4s, v3.4s, v1.4s -; CHECK-SD-NEXT: and v2.16b, v5.16b, v4.16b -; CHECK-SD-NEXT: and v3.16b, v6.16b, v4.16b -; CHECK-SD-NEXT: orr v0.16b, v2.16b, v0.16b -; CHECK-SD-NEXT: orr v1.16b, v3.16b, v1.16b +; CHECK-SD-NEXT: sub v0.4s, v0.4s, v5.4s +; CHECK-SD-NEXT: sub v1.4s, v1.4s, v4.4s ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: u_v8i32: @@ -384,12 +366,9 @@ entry: define <2 x i64> @u_v2i64(<2 x i64> %a, <2 x i64> %b) { ; CHECK-SD-LABEL: u_v2i64: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: mov w8, #1 // =0x1 ; CHECK-SD-NEXT: cmhi v2.2d, v0.2d, v1.2d ; CHECK-SD-NEXT: cmhi v0.2d, v1.2d, v0.2d -; CHECK-SD-NEXT: dup v3.2d, x8 -; CHECK-SD-NEXT: and v1.16b, v2.16b, v3.16b -; CHECK-SD-NEXT: orr v0.16b, v1.16b, v0.16b +; CHECK-SD-NEXT: sub v0.2d, v0.2d, v2.2d ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: u_v2i64: @@ -410,16 +389,12 @@ entry: define <4 x i64> @u_v4i64(<4 x i64> %a, <4 x i64> %b) { ; CHECK-SD-LABEL: u_v4i64: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: mov w8, #1 // =0x1 -; CHECK-SD-NEXT: cmhi v4.2d, v0.2d, v2.2d -; CHECK-SD-NEXT: cmhi v6.2d, v1.2d, v3.2d -; CHECK-SD-NEXT: dup v5.2d, x8 +; CHECK-SD-NEXT: cmhi v4.2d, v1.2d, v3.2d +; CHECK-SD-NEXT: cmhi v5.2d, v0.2d, v2.2d ; CHECK-SD-NEXT: cmhi v0.2d, v2.2d, v0.2d ; CHECK-SD-NEXT: cmhi v1.2d, v3.2d, v1.2d -; CHECK-SD-NEXT: and v2.16b, v4.16b, v5.16b -; CHECK-SD-NEXT: and v3.16b, v6.16b, v5.16b -; CHECK-SD-NEXT: orr v0.16b, v2.16b, v0.16b -; CHECK-SD-NEXT: orr v1.16b, v3.16b, v1.16b +; CHECK-SD-NEXT: sub v0.2d, v0.2d, v5.2d +; CHECK-SD-NEXT: sub v1.2d, v1.2d, v4.2d ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: u_v4i64: @@ -444,122 +419,13 @@ entry: define <16 x i8> @signOf_neon(<8 x i16> %s0_lo, <8 x i16> %s0_hi, <8 x i16> %s1_lo, <8 x i16> %s1_hi) { ; CHECK-SD-LABEL: signOf_neon: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: cmhi v5.8h, v0.8h, v2.8h -; CHECK-SD-NEXT: cmhi v2.8h, v2.8h, v0.8h ; CHECK-SD-NEXT: cmhi v4.8h, v1.8h, v3.8h ; CHECK-SD-NEXT: cmhi v1.8h, v3.8h, v1.8h -; CHECK-SD-NEXT: umov w8, v5.h[1] -; CHECK-SD-NEXT: umov w9, v2.h[1] -; CHECK-SD-NEXT: umov w10, v5.h[0] -; CHECK-SD-NEXT: umov w11, v2.h[0] -; CHECK-SD-NEXT: tst w8, #0xffff -; CHECK-SD-NEXT: cset w8, ne -; CHECK-SD-NEXT: tst w9, #0xffff -; CHECK-SD-NEXT: csinv w8, w8, wzr, eq -; CHECK-SD-NEXT: tst w10, #0xffff -; CHECK-SD-NEXT: umov w10, v5.h[2] -; CHECK-SD-NEXT: cset w9, ne -; CHECK-SD-NEXT: tst w11, #0xffff -; CHECK-SD-NEXT: umov w11, v2.h[2] -; CHECK-SD-NEXT: csinv w9, w9, wzr, eq -; CHECK-SD-NEXT: fmov s0, w9 -; CHECK-SD-NEXT: tst w10, #0xffff -; CHECK-SD-NEXT: umov w10, v2.h[3] -; CHECK-SD-NEXT: cset w9, ne -; CHECK-SD-NEXT: tst w11, #0xffff -; CHECK-SD-NEXT: mov v0.b[1], w8 -; CHECK-SD-NEXT: umov w8, v5.h[3] -; CHECK-SD-NEXT: csinv w9, w9, wzr, eq -; CHECK-SD-NEXT: mov v0.b[2], w9 -; CHECK-SD-NEXT: tst w8, #0xffff -; CHECK-SD-NEXT: umov w8, v5.h[4] -; CHECK-SD-NEXT: cset w9, ne -; CHECK-SD-NEXT: tst w10, #0xffff -; CHECK-SD-NEXT: umov w10, v2.h[4] -; CHECK-SD-NEXT: csinv w9, w9, wzr, eq -; CHECK-SD-NEXT: mov v0.b[3], w9 -; CHECK-SD-NEXT: tst w8, #0xffff -; CHECK-SD-NEXT: umov w8, v5.h[5] -; CHECK-SD-NEXT: cset w9, ne -; CHECK-SD-NEXT: tst w10, #0xffff -; CHECK-SD-NEXT: umov w10, v2.h[5] -; CHECK-SD-NEXT: csinv w9, w9, wzr, eq -; CHECK-SD-NEXT: mov v0.b[4], w9 -; CHECK-SD-NEXT: tst w8, #0xffff -; CHECK-SD-NEXT: umov w8, v5.h[6] -; CHECK-SD-NEXT: cset w9, ne -; CHECK-SD-NEXT: tst w10, #0xffff -; CHECK-SD-NEXT: umov w10, v2.h[6] -; CHECK-SD-NEXT: csinv w9, w9, wzr, eq -; CHECK-SD-NEXT: mov v0.b[5], w9 -; CHECK-SD-NEXT: umov w9, v5.h[7] -; CHECK-SD-NEXT: tst w8, #0xffff -; CHECK-SD-NEXT: cset w8, ne -; CHECK-SD-NEXT: tst w10, #0xffff -; CHECK-SD-NEXT: umov w10, v2.h[7] -; CHECK-SD-NEXT: csinv w8, w8, wzr, eq -; CHECK-SD-NEXT: mov v0.b[6], w8 -; CHECK-SD-NEXT: tst w9, #0xffff -; CHECK-SD-NEXT: umov w8, v4.h[0] -; CHECK-SD-NEXT: cset w9, ne -; CHECK-SD-NEXT: tst w10, #0xffff -; CHECK-SD-NEXT: umov w10, v1.h[0] -; CHECK-SD-NEXT: csinv w9, w9, wzr, eq -; CHECK-SD-NEXT: mov v0.b[7], w9 -; CHECK-SD-NEXT: tst w8, #0xffff -; CHECK-SD-NEXT: umov w8, v4.h[1] -; CHECK-SD-NEXT: cset w9, ne -; CHECK-SD-NEXT: tst w10, #0xffff -; CHECK-SD-NEXT: umov w10, v1.h[1] -; CHECK-SD-NEXT: csinv w9, w9, wzr, eq -; CHECK-SD-NEXT: mov v0.b[8], w9 -; CHECK-SD-NEXT: tst w8, #0xffff -; CHECK-SD-NEXT: umov w8, v4.h[2] -; CHECK-SD-NEXT: cset w9, ne -; CHECK-SD-NEXT: tst w10, #0xffff -; CHECK-SD-NEXT: umov w10, v1.h[2] -; CHECK-SD-NEXT: csinv w9, w9, wzr, eq -; CHECK-SD-NEXT: mov v0.b[9], w9 -; CHECK-SD-NEXT: tst w8, #0xffff -; CHECK-SD-NEXT: umov w8, v4.h[3] -; CHECK-SD-NEXT: cset w9, ne -; CHECK-SD-NEXT: tst w10, #0xffff -; CHECK-SD-NEXT: umov w10, v1.h[3] -; CHECK-SD-NEXT: csinv w9, w9, wzr, eq -; CHECK-SD-NEXT: mov v0.b[10], w9 -; CHECK-SD-NEXT: tst w8, #0xffff -; CHECK-SD-NEXT: umov w8, v4.h[4] -; CHECK-SD-NEXT: cset w9, ne -; CHECK-SD-NEXT: tst w10, #0xffff -; CHECK-SD-NEXT: umov w10, v1.h[4] -; CHECK-SD-NEXT: csinv w9, w9, wzr, eq -; CHECK-SD-NEXT: mov v0.b[11], w9 -; CHECK-SD-NEXT: tst w8, #0xffff -; CHECK-SD-NEXT: umov w8, v4.h[5] -; CHECK-SD-NEXT: cset w9, ne -; CHECK-SD-NEXT: tst w10, #0xffff -; CHECK-SD-NEXT: umov w10, v1.h[5] -; CHECK-SD-NEXT: csinv w9, w9, wzr, eq -; CHECK-SD-NEXT: mov v0.b[12], w9 -; CHECK-SD-NEXT: tst w8, #0xffff -; CHECK-SD-NEXT: umov w8, v4.h[6] -; CHECK-SD-NEXT: cset w9, ne -; CHECK-SD-NEXT: tst w10, #0xffff -; CHECK-SD-NEXT: umov w10, v1.h[6] -; CHECK-SD-NEXT: csinv w9, w9, wzr, eq -; CHECK-SD-NEXT: mov v0.b[13], w9 -; CHECK-SD-NEXT: tst w8, #0xffff -; CHECK-SD-NEXT: umov w8, v4.h[7] -; CHECK-SD-NEXT: cset w9, ne -; CHECK-SD-NEXT: tst w10, #0xffff -; CHECK-SD-NEXT: umov w10, v1.h[7] -; CHECK-SD-NEXT: csinv w9, w9, wzr, eq -; CHECK-SD-NEXT: mov v0.b[14], w9 -; CHECK-SD-NEXT: tst w8, #0xffff -; CHECK-SD-NEXT: cset w8, ne -; CHECK-SD-NEXT: tst w10, #0xffff -; CHECK-SD-NEXT: csinv w8, w8, wzr, eq -; CHECK-SD-NEXT: mov v0.b[15], w8 +; CHECK-SD-NEXT: cmhi v3.8h, v0.8h, v2.8h +; CHECK-SD-NEXT: cmhi v0.8h, v2.8h, v0.8h +; CHECK-SD-NEXT: sub v1.8h, v1.8h, v4.8h +; CHECK-SD-NEXT: sub v0.8h, v0.8h, v3.8h +; CHECK-SD-NEXT: uzp1 v0.16b, v0.16b, v1.16b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: signOf_neon: