Skip to content

Commit 77fccb3

Browse files
authored
[AArch64] Replace AND with LSL#2 for LDR target (llvm#34101) (llvm#89531)
Currently, process of replacing bitwise operations consisting of `LSR`/`LSL` with `And` is performed by `DAGCombiner`. However, in certain cases, the `AND` generated by this process can be removed. Consider following case: ``` lsr x8, x8, rust-lang#56 and x8, x8, #0xfc ldr w0, [x2, x8] ret ``` In this case, we can remove the `AND` by changing the target of `LDR` to `[X2, X8, LSL rust-lang#2]` and right-shifting amount change to 56 to 58. after changed: ``` lsr x8, x8, rust-lang#58 ldr w0, [x2, x8, lsl rust-lang#2] ret ``` This patch checks to see if the `SHIFTING` + `AND` operation on load target can be optimized and optimizes it if it can.
1 parent 43b8885 commit 77fccb3

File tree

2 files changed

+155
-0
lines changed

2 files changed

+155
-0
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

+17
Original file line numberDiff line numberDiff line change
@@ -18023,6 +18023,23 @@ bool AArch64TargetLowering::shouldFoldConstantShiftPairToMask(
1802318023
return (!C1 || !C2 || C1->getZExtValue() >= C2->getZExtValue());
1802418024
}
1802518025

18026+
// We do not need to fold when this shifting used in specific load case:
18027+
// (ldr x, (add x, (shl (srl x, c1) 2)))
18028+
if (N->getOpcode() == ISD::SHL && N->hasOneUse()) {
18029+
if (auto C2 = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
18030+
unsigned ShlAmt = C2->getZExtValue();
18031+
if (auto ShouldADD = *N->use_begin();
18032+
ShouldADD->getOpcode() == ISD::ADD && ShouldADD->hasOneUse()) {
18033+
if (auto ShouldLOAD = dyn_cast<LoadSDNode>(*ShouldADD->use_begin())) {
18034+
unsigned ByteVT = ShouldLOAD->getMemoryVT().getSizeInBits() / 8;
18035+
if ((1ULL << ShlAmt) == ByteVT &&
18036+
isIndexedLoadLegal(ISD::PRE_INC, ShouldLOAD->getMemoryVT()))
18037+
return false;
18038+
}
18039+
}
18040+
}
18041+
}
18042+
1802618043
return true;
1802718044
}
1802818045

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2+
; RUN: llc < %s -mtriple=aarch64 | FileCheck %s
3+
;
4+
5+
define i16 @load16_shr63(i64 %a, i64 %b, ptr %table) {
6+
; CHECK-LABEL: load16_shr63:
7+
; CHECK: // %bb.0: // %entry
8+
; CHECK-NEXT: mul x8, x1, x0
9+
; CHECK-NEXT: lsr x8, x8, #63
10+
; CHECK-NEXT: ldrh w0, [x2, x8, lsl #1]
11+
; CHECK-NEXT: ret
12+
entry:
13+
%mul = mul i64 %b, %a
14+
%shr = lshr i64 %mul, 63
15+
%arrayidx = getelementptr inbounds i16, ptr %table, i64 %shr
16+
%0 = load i16, ptr %arrayidx, align 2
17+
ret i16 %0
18+
}
19+
20+
define i16 @load16_shr2(i64 %a, i64 %b, ptr %table) {
21+
; CHECK-LABEL: load16_shr2:
22+
; CHECK: // %bb.0: // %entry
23+
; CHECK-NEXT: mul x8, x1, x0
24+
; CHECK-NEXT: lsr x8, x8, #2
25+
; CHECK-NEXT: ldrh w0, [x2, x8, lsl #1]
26+
; CHECK-NEXT: ret
27+
entry:
28+
%mul = mul i64 %b, %a
29+
%shr = lshr i64 %mul, 2
30+
%arrayidx = getelementptr inbounds i16, ptr %table, i64 %shr
31+
%0 = load i16, ptr %arrayidx, align 2
32+
ret i16 %0
33+
}
34+
35+
define i16 @load16_shr1(i64 %a, i64 %b, ptr %table) {
36+
; CHECK-LABEL: load16_shr1:
37+
; CHECK: // %bb.0: // %entry
38+
; CHECK-NEXT: mul x8, x1, x0
39+
; CHECK-NEXT: lsr x8, x8, #1
40+
; CHECK-NEXT: ldrh w0, [x2, x8, lsl #1]
41+
; CHECK-NEXT: ret
42+
entry:
43+
%mul = mul i64 %b, %a
44+
%shr = lshr i64 %mul, 1
45+
%arrayidx = getelementptr inbounds i16, ptr %table, i64 %shr
46+
%0 = load i16, ptr %arrayidx, align 2
47+
ret i16 %0
48+
}
49+
50+
define i32 @load32_shr63(i64 %a, i64 %b, ptr %table) {
51+
; CHECK-LABEL: load32_shr63:
52+
; CHECK: // %bb.0: // %entry
53+
; CHECK-NEXT: mul x8, x1, x0
54+
; CHECK-NEXT: lsr x8, x8, #63
55+
; CHECK-NEXT: ldr w0, [x2, x8, lsl #2]
56+
; CHECK-NEXT: ret
57+
entry:
58+
%mul = mul i64 %b, %a
59+
%shr = lshr i64 %mul, 63
60+
%arrayidx = getelementptr inbounds i32, ptr %table, i64 %shr
61+
%0 = load i32, ptr %arrayidx, align 4
62+
ret i32 %0
63+
}
64+
65+
define i32 @load32_shr2(i64 %a, i64 %b, ptr %table) {
66+
; CHECK-LABEL: load32_shr2:
67+
; CHECK: // %bb.0: // %entry
68+
; CHECK-NEXT: mul x8, x1, x0
69+
; CHECK-NEXT: lsr x8, x8, #2
70+
; CHECK-NEXT: ldr w0, [x2, x8, lsl #2]
71+
; CHECK-NEXT: ret
72+
entry:
73+
%mul = mul i64 %b, %a
74+
%shr = lshr i64 %mul, 2
75+
%arrayidx = getelementptr inbounds i32, ptr %table, i64 %shr
76+
%0 = load i32, ptr %arrayidx, align 4
77+
ret i32 %0
78+
}
79+
80+
define i32 @load32_shr1(i64 %a, i64 %b, ptr %table) {
81+
; CHECK-LABEL: load32_shr1:
82+
; CHECK: // %bb.0: // %entry
83+
; CHECK-NEXT: mul x8, x1, x0
84+
; CHECK-NEXT: lsr x8, x8, #1
85+
; CHECK-NEXT: ldr w0, [x2, x8, lsl #2]
86+
; CHECK-NEXT: ret
87+
entry:
88+
%mul = mul i64 %b, %a
89+
%shr = lshr i64 %mul, 1
90+
%arrayidx = getelementptr inbounds i32, ptr %table, i64 %shr
91+
%0 = load i32, ptr %arrayidx, align 4
92+
ret i32 %0
93+
}
94+
95+
define i64 @load64_shr63(i64 %a, i64 %b, ptr %table) {
96+
; CHECK-LABEL: load64_shr63:
97+
; CHECK: // %bb.0: // %entry
98+
; CHECK-NEXT: mul x8, x1, x0
99+
; CHECK-NEXT: lsr x8, x8, #63
100+
; CHECK-NEXT: ldr x0, [x2, x8, lsl #3]
101+
; CHECK-NEXT: ret
102+
entry:
103+
%mul = mul i64 %b, %a
104+
%shr = lshr i64 %mul, 63
105+
%arrayidx = getelementptr inbounds i64, ptr %table, i64 %shr
106+
%0 = load i64, ptr %arrayidx, align 8
107+
ret i64 %0
108+
}
109+
110+
define i64 @load64_shr2(i64 %a, i64 %b, ptr %table) {
111+
; CHECK-LABEL: load64_shr2:
112+
; CHECK: // %bb.0: // %entry
113+
; CHECK-NEXT: mul x8, x1, x0
114+
; CHECK-NEXT: lsr x8, x8, #2
115+
; CHECK-NEXT: ldr x0, [x2, x8, lsl #3]
116+
; CHECK-NEXT: ret
117+
entry:
118+
%mul = mul i64 %b, %a
119+
%shr = lshr i64 %mul, 2
120+
%arrayidx = getelementptr inbounds i64, ptr %table, i64 %shr
121+
%0 = load i64, ptr %arrayidx, align 8
122+
ret i64 %0
123+
}
124+
125+
define i64 @load64_shr1(i64 %a, i64 %b, ptr %table) {
126+
; CHECK-LABEL: load64_shr1:
127+
; CHECK: // %bb.0: // %entry
128+
; CHECK-NEXT: mul x8, x1, x0
129+
; CHECK-NEXT: lsr x8, x8, #1
130+
; CHECK-NEXT: ldr x0, [x2, x8, lsl #3]
131+
; CHECK-NEXT: ret
132+
entry:
133+
%mul = mul i64 %b, %a
134+
%shr = lshr i64 %mul, 1
135+
%arrayidx = getelementptr inbounds i64, ptr %table, i64 %shr
136+
%0 = load i64, ptr %arrayidx, align 8
137+
ret i64 %0
138+
}

0 commit comments

Comments
 (0)