Skip to content

Commit a607f64

Browse files
committed
Revert "[LV] Print remark when loop cannot be vectorized due to invalid costs."
This reverts commit efaf309. This reverts commit dc7bdc1. Reverting patches due to buildbot failures.
1 parent 05eb59e commit a607f64

File tree

2 files changed

+11
-138
lines changed

2 files changed

+11
-138
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

+4-69
Original file line numberDiff line numberDiff line change
@@ -1676,13 +1676,8 @@ class LoopVectorizationCostModel {
16761676
/// Returns the expected execution cost. The unit of the cost does
16771677
/// not matter because we use the 'cost' units to compare different
16781678
/// vector widths. The cost that is returned is *not* normalized by
1679-
/// the factor width. If \p Invalid is not nullptr, this function
1680-
/// will add a pair(Instruction*, ElementCount) to \p Invalid for
1681-
/// each instruction that has an Invalid cost for the given VF.
1682-
using InstructionVFPair = std::pair<Instruction *, ElementCount>;
1683-
VectorizationCostTy
1684-
expectedCost(ElementCount VF,
1685-
SmallVectorImpl<InstructionVFPair> *Invalid = nullptr);
1679+
/// the factor width.
1680+
VectorizationCostTy expectedCost(ElementCount VF);
16861681

16871682
/// Returns the execution time cost of an instruction for a given vector
16881683
/// width. Vector width of one means scalar.
@@ -6080,13 +6075,12 @@ VectorizationFactor LoopVectorizationCostModel::selectVectorizationFactor(
60806075
ChosenFactor.Cost = InstructionCost::getMax();
60816076
}
60826077

6083-
SmallVector<InstructionVFPair> InvalidCosts;
60846078
for (const auto &i : VFCandidates) {
60856079
// The cost for scalar VF=1 is already calculated, so ignore it.
60866080
if (i.isScalar())
60876081
continue;
60886082

6089-
VectorizationCostTy C = expectedCost(i, &InvalidCosts);
6083+
VectorizationCostTy C = expectedCost(i);
60906084
VectorizationFactor Candidate(i, C.first);
60916085
LLVM_DEBUG(
60926086
dbgs() << "LV: Vector loop of width " << i << " costs: "
@@ -6109,60 +6103,6 @@ VectorizationFactor LoopVectorizationCostModel::selectVectorizationFactor(
61096103
ChosenFactor = Candidate;
61106104
}
61116105

6112-
// Emit a report of VFs with invalid costs in the loop.
6113-
if (!InvalidCosts.empty()) {
6114-
// Sort/group per instruction (lexicographically within basic blocks).
6115-
llvm::sort(InvalidCosts, [](InstructionVFPair &A, InstructionVFPair &B) {
6116-
const Instruction *AI = A.first, *BI = B.first;
6117-
if (AI->getParent() != BI->getParent())
6118-
return AI->getParent() < BI->getParent();
6119-
ElementCountComparator ECC;
6120-
if (AI != BI)
6121-
return AI->comesBefore(BI);
6122-
return ECC(A.second, B.second);
6123-
});
6124-
6125-
// For a list of ordered instruction-vf pairs:
6126-
// [(load, vf1), (load, vf2), (store, vf1)]
6127-
// Group the instructions together to emit separate remarks for:
6128-
// load (vf1, vf2)
6129-
// store (vf1)
6130-
auto Tail = ArrayRef<InstructionVFPair>(InvalidCosts);
6131-
auto Subset = ArrayRef<InstructionVFPair>();
6132-
do {
6133-
if (Subset.empty())
6134-
Subset = Tail.take_front(1);
6135-
6136-
Instruction *I = Subset.front().first;
6137-
6138-
// If the next instruction is different, or if there are no other pairs,
6139-
// emit a remark for the collated subset. e.g.
6140-
// [(load, vf1), (load, vf2))]
6141-
// to emit:
6142-
// remark: invalid costs for 'load' at VF=(vf, vf2)
6143-
if (Subset == Tail || Tail[Subset.size()].first != I) {
6144-
std::string OutString;
6145-
raw_string_ostream OS(OutString);
6146-
assert(!Subset.empty() && "Unexpected empty range");
6147-
OS << "Instruction with invalid costs prevented vectorization at VF=(";
6148-
for (auto &Pair : Subset)
6149-
OS << (Pair.second == Subset.front().second ? "" : ", ")
6150-
<< Pair.second;
6151-
OS << "):";
6152-
if (auto *CI = dyn_cast<CallInst>(I))
6153-
OS << " call to " << CI->getCalledFunction()->getName();
6154-
else
6155-
OS << " " << I->getOpcodeName();
6156-
OS.flush();
6157-
reportVectorizationInfo(OutString, "InvalidCost", ORE, TheLoop, I);
6158-
Tail = Tail.drop_front(Subset.size());
6159-
Subset = {};
6160-
} else
6161-
// Grow the subset by one element
6162-
Subset = Tail.take_front(Subset.size() + 1);
6163-
} while (!Tail.empty());
6164-
}
6165-
61666106
if (!EnableCondStoresVectorization && NumPredStores) {
61676107
reportVectorizationFailure("There are conditional stores.",
61686108
"store that is conditionally executed prevents vectorization",
@@ -6944,8 +6884,7 @@ int LoopVectorizationCostModel::computePredInstDiscount(
69446884
}
69456885

69466886
LoopVectorizationCostModel::VectorizationCostTy
6947-
LoopVectorizationCostModel::expectedCost(
6948-
ElementCount VF, SmallVectorImpl<InstructionVFPair> *Invalid) {
6887+
LoopVectorizationCostModel::expectedCost(ElementCount VF) {
69496888
VectorizationCostTy Cost;
69506889

69516890
// For each block.
@@ -6965,10 +6904,6 @@ LoopVectorizationCostModel::expectedCost(
69656904
if (ForceTargetInstructionCost.getNumOccurrences() > 0)
69666905
C.first = InstructionCost(ForceTargetInstructionCost);
69676906

6968-
// Keep a list of instructions with invalid costs.
6969-
if (Invalid && !C.first.isValid())
6970-
Invalid->emplace_back(&I, VF);
6971-
69726907
BlockCost.first += C.first;
69736908
BlockCost.second |= C.second;
69746909
LLVM_DEBUG(dbgs() << "LV: Found an estimated cost of " << C.first

llvm/test/Transforms/LoopVectorize/AArch64/scalable-call.ll

+7-69
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,4 @@
1-
; RUN: opt -S -loop-vectorize -force-vector-interleave=1 -instcombine -mattr=+sve -mtriple aarch64-unknown-linux-gnu -scalable-vectorization=on \
2-
; RUN: -pass-remarks-missed=loop-vectorize < %s 2>%t | FileCheck %s
3-
; RUN: cat %t | FileCheck %s --check-prefix=CHECK-REMARKS
1+
; RUN: opt -S -loop-vectorize -force-vector-interleave=1 -instcombine -mattr=+sve -mtriple aarch64-unknown-linux-gnu -scalable-vectorization=on < %s | FileCheck %s
42

53
define void @vec_load(i64 %N, double* nocapture %a, double* nocapture readonly %b) {
64
; CHECK-LABEL: @vec_load
@@ -97,10 +95,6 @@ for.end:
9795
ret void
9896
}
9997

100-
; CHECK-REMARKS: UserVF ignored because of invalid costs.
101-
; CHECK-REMARKS-NEXT: t.c:3:10: Instruction with invalid costs prevented vectorization at VF=(vscale x 1): load
102-
; CHECK-REMARKS-NEXT: t.c:3:20: Instruction with invalid costs prevented vectorization at VF=(vscale x 1, vscale x 2): call to llvm.sin.f32
103-
; CHECK-REMARKS-NEXT: t.c:3:30: Instruction with invalid costs prevented vectorization at VF=(vscale x 1): store
10498
define void @vec_sin_no_mapping(float* noalias nocapture %dst, float* noalias nocapture readonly %src, i64 %n) {
10599
; CHECK: @vec_sin_no_mapping
106100
; CHECK: call fast <2 x float> @llvm.sin.v2f32
@@ -111,46 +105,10 @@ entry:
111105
for.body: ; preds = %entry, %for.body
112106
%i.07 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
113107
%arrayidx = getelementptr inbounds float, float* %src, i64 %i.07
114-
%0 = load float, float* %arrayidx, align 4, !dbg !11
115-
%1 = tail call fast float @llvm.sin.f32(float %0), !dbg !12
116-
%arrayidx1 = getelementptr inbounds float, float* %dst, i64 %i.07
117-
store float %1, float* %arrayidx1, align 4, !dbg !13
118-
%inc = add nuw nsw i64 %i.07, 1
119-
%exitcond.not = icmp eq i64 %inc, %n
120-
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !1
121-
122-
for.cond.cleanup: ; preds = %for.body
123-
ret void
124-
}
125-
126-
; CHECK-REMARKS: UserVF ignored because of invalid costs.
127-
; CHECK-REMARKS-NEXT: t.c:3:10: Instruction with invalid costs prevented vectorization at VF=(vscale x 1): load
128-
; CHECK-REMARKS-NEXT: t.c:3:40: Instruction with invalid costs prevented vectorization at VF=(vscale x 1): store
129-
; CHECK-REMARKS-NEXT: t.c:3:20: Instruction with invalid costs prevented vectorization at VF=(vscale x 1, vscale x 2): call to llvm.sin.f32
130-
; CHECK-REMARKS-NEXT: t.c:3:30: Instruction with invalid costs prevented vectorization at VF=(vscale x 1, vscale x 2): call to llvm.sin.f32
131-
define void @vec_sin_no_mapping_ite(float* noalias nocapture %dst, float* noalias nocapture readonly %src, i64 %n) {
132-
; CHECK: @vec_sin_no_mapping_ite
133-
; CHECK-NOT: <vscale x
134-
; CHECK: ret
135-
entry:
136-
br label %for.body
137-
138-
for.body: ; preds = %entry, %if.end
139-
%i.07 = phi i64 [ %inc, %if.end ], [ 0, %entry ]
140-
%arrayidx = getelementptr inbounds float, float* %src, i64 %i.07
141-
%0 = load float, float* %arrayidx, align 4, !dbg !11
142-
%cmp = fcmp ugt float %0, 0.0000
143-
br i1 %cmp, label %if.then, label %if.else
144-
if.then:
145-
%1 = tail call fast float @llvm.sin.f32(float %0), !dbg !12
146-
br label %if.end
147-
if.else:
148-
%2 = tail call fast float @llvm.sin.f32(float 0.0), !dbg !13
149-
br label %if.end
150-
if.end:
151-
%3 = phi float [%1, %if.then], [%2, %if.else]
108+
%0 = load float, float* %arrayidx, align 4
109+
%1 = tail call fast float @llvm.sin.f32(float %0)
152110
%arrayidx1 = getelementptr inbounds float, float* %dst, i64 %i.07
153-
store float %3, float* %arrayidx1, align 4, !dbg !14
111+
store float %1, float* %arrayidx1, align 4
154112
%inc = add nuw nsw i64 %i.07, 1
155113
%exitcond.not = icmp eq i64 %inc, %n
156114
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !1
@@ -159,10 +117,6 @@ for.cond.cleanup: ; preds = %for.body
159117
ret void
160118
}
161119

162-
; CHECK-REMARKS: UserVF ignored because of invalid costs.
163-
; CHECK-REMARKS-NEXT: t.c:3:10: Instruction with invalid costs prevented vectorization at VF=(vscale x 1): load
164-
; CHECK-REMARKS-NEXT: t.c:3:20: Instruction with invalid costs prevented vectorization at VF=(vscale x 1, vscale x 2): call to llvm.sin.f32
165-
; CHECK-REMARKS-NEXT: t.c:3:30: Instruction with invalid costs prevented vectorization at VF=(vscale x 1): store
166120
define void @vec_sin_fixed_mapping(float* noalias nocapture %dst, float* noalias nocapture readonly %src, i64 %n) {
167121
; CHECK: @vec_sin_fixed_mapping
168122
; CHECK: call fast <2 x float> @llvm.sin.v2f32
@@ -173,10 +127,10 @@ entry:
173127
for.body: ; preds = %entry, %for.body
174128
%i.07 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
175129
%arrayidx = getelementptr inbounds float, float* %src, i64 %i.07
176-
%0 = load float, float* %arrayidx, align 4, !dbg !11
177-
%1 = tail call fast float @llvm.sin.f32(float %0) #3, !dbg !12
130+
%0 = load float, float* %arrayidx, align 4
131+
%1 = tail call fast float @llvm.sin.f32(float %0) #3
178132
%arrayidx1 = getelementptr inbounds float, float* %dst, i64 %i.07
179-
store float %1, float* %arrayidx1, align 4, !dbg !13
133+
store float %1, float* %arrayidx1, align 4
180134
%inc = add nuw nsw i64 %i.07, 1
181135
%exitcond.not = icmp eq i64 %inc, %n
182136
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !1
@@ -229,19 +183,3 @@ attributes #3 = { "vector-function-abi-variant"="_ZGV_LLVM_N2v_llvm.sin.f64(sin_
229183
!1 = distinct !{!1, !2, !3}
230184
!2 = !{!"llvm.loop.vectorize.width", i32 2}
231185
!3 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}
232-
233-
!llvm.dbg.cu = !{!4}
234-
!llvm.module.flags = !{!7}
235-
!llvm.ident = !{!8}
236-
237-
!4 = distinct !DICompileUnit(language: DW_LANG_C99, file: !5, producer: "clang", isOptimized: true, runtimeVersion: 0, emissionKind: NoDebug, enums: !6, splitDebugInlining: false, nameTableKind: None)
238-
!5 = !DIFile(filename: "t.c", directory: "somedir")
239-
!6 = !{}
240-
!7 = !{i32 2, !"Debug Info Version", i32 3}
241-
!8 = !{!"clang"}
242-
!9 = distinct !DISubprogram(name: "foo", scope: !5, file: !5, line: 2, type: !10, scopeLine: 2, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !6)
243-
!10 = !DISubroutineType(types: !6)
244-
!11 = !DILocation(line: 3, column: 10, scope: !9)
245-
!12 = !DILocation(line: 3, column: 20, scope: !9)
246-
!13 = !DILocation(line: 3, column: 30, scope: !9)
247-
!14 = !DILocation(line: 3, column: 40, scope: !9)

0 commit comments

Comments
 (0)