Skip to content

Commit ffe86e3

Browse files
committed
[AMDGPU] Update SIInsertHardClauses for GFX11
Changes for GFX11: - Clauses may not mix instructions of different types, and there are more types. For example image instructions with and without a sampler are now different types. - The max size of a clause is explicitly documented as 63 instructions. Previously it was implicitly assumed to be 64. This is such a tiny difference that it does not seem worth making it conditional on the subtarget. - It can be beneficial to clause stores as well as loads. Differential Revision: https://reviews.llvm.org/D127391
1 parent 87b4677 commit ffe86e3

File tree

2 files changed

+299
-17
lines changed

2 files changed

+299
-17
lines changed

llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp

+64-12
Original file line numberDiff line numberDiff line change
@@ -43,11 +43,39 @@ using namespace llvm;
4343

4444
namespace {
4545

46+
// A clause length of 64 instructions could be encoded in the s_clause
47+
// instruction, but the hardware documentation (at least for GFX11) says that
48+
// 63 is the maximum allowed.
49+
constexpr unsigned MaxInstructionsInClause = 63;
50+
4651
enum HardClauseType {
52+
// For GFX10:
53+
4754
// Texture, buffer, global or scratch memory instructions.
4855
HARDCLAUSE_VMEM,
4956
// Flat (not global or scratch) memory instructions.
5057
HARDCLAUSE_FLAT,
58+
59+
// For GFX11:
60+
61+
// Texture memory instructions.
62+
HARDCLAUSE_MIMG_LOAD,
63+
HARDCLAUSE_MIMG_STORE,
64+
HARDCLAUSE_MIMG_ATOMIC,
65+
HARDCLAUSE_MIMG_SAMPLE,
66+
// Buffer, global or scratch memory instructions.
67+
HARDCLAUSE_VMEM_LOAD,
68+
HARDCLAUSE_VMEM_STORE,
69+
HARDCLAUSE_VMEM_ATOMIC,
70+
// Flat (not global or scratch) memory instructions.
71+
HARDCLAUSE_FLAT_LOAD,
72+
HARDCLAUSE_FLAT_STORE,
73+
HARDCLAUSE_FLAT_ATOMIC,
74+
// BVH instructions.
75+
HARDCLAUSE_BVH,
76+
77+
// Common:
78+
5179
// Instructions that access LDS.
5280
HARDCLAUSE_LDS,
5381
// Scalar memory instructions.
@@ -79,19 +107,43 @@ class SIInsertHardClauses : public MachineFunctionPass {
79107
}
80108

81109
HardClauseType getHardClauseType(const MachineInstr &MI) {
82-
83-
// On current architectures we only get a benefit from clausing loads.
84-
if (MI.mayLoad()) {
85-
if (SIInstrInfo::isVMEM(MI) || SIInstrInfo::isSegmentSpecificFLAT(MI)) {
86-
if (ST->hasNSAClauseBug()) {
110+
if (MI.mayLoad() || (MI.mayStore() && ST->shouldClusterStores())) {
111+
if (ST->getGeneration() == AMDGPUSubtarget::GFX10) {
112+
if (SIInstrInfo::isVMEM(MI) || SIInstrInfo::isSegmentSpecificFLAT(MI)) {
113+
if (ST->hasNSAClauseBug()) {
114+
const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
115+
if (Info && Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA)
116+
return HARDCLAUSE_ILLEGAL;
117+
}
118+
return HARDCLAUSE_VMEM;
119+
}
120+
if (SIInstrInfo::isFLAT(MI))
121+
return HARDCLAUSE_FLAT;
122+
} else {
123+
assert(ST->getGeneration() >= AMDGPUSubtarget::GFX11);
124+
if (SIInstrInfo::isMIMG(MI)) {
87125
const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
88-
if (Info && Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA)
89-
return HARDCLAUSE_ILLEGAL;
126+
const AMDGPU::MIMGBaseOpcodeInfo *BaseInfo =
127+
AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
128+
if (BaseInfo->BVH)
129+
return HARDCLAUSE_BVH;
130+
if (BaseInfo->Sampler)
131+
return HARDCLAUSE_MIMG_SAMPLE;
132+
return MI.mayLoad() ? MI.mayStore() ? HARDCLAUSE_MIMG_ATOMIC
133+
: HARDCLAUSE_MIMG_LOAD
134+
: HARDCLAUSE_MIMG_STORE;
135+
}
136+
if (SIInstrInfo::isVMEM(MI) || SIInstrInfo::isSegmentSpecificFLAT(MI)) {
137+
return MI.mayLoad() ? MI.mayStore() ? HARDCLAUSE_VMEM_ATOMIC
138+
: HARDCLAUSE_VMEM_LOAD
139+
: HARDCLAUSE_VMEM_STORE;
140+
}
141+
if (SIInstrInfo::isFLAT(MI)) {
142+
return MI.mayLoad() ? MI.mayStore() ? HARDCLAUSE_FLAT_ATOMIC
143+
: HARDCLAUSE_FLAT_LOAD
144+
: HARDCLAUSE_FLAT_STORE;
90145
}
91-
return HARDCLAUSE_VMEM;
92146
}
93-
if (SIInstrInfo::isFLAT(MI))
94-
return HARDCLAUSE_FLAT;
95147
// TODO: LDS
96148
if (SIInstrInfo::isSMRD(MI))
97149
return HARDCLAUSE_SMEM;
@@ -130,7 +182,7 @@ class SIInsertHardClauses : public MachineFunctionPass {
130182
bool emitClause(const ClauseInfo &CI, const SIInstrInfo *SII) {
131183
if (CI.First == CI.Last)
132184
return false;
133-
assert(CI.Length <= 64 && "Hard clause is too long!");
185+
assert(CI.Length <= MaxInstructionsInClause && "Hard clause is too long!");
134186

135187
auto &MBB = *CI.First->getParent();
136188
auto ClauseMI =
@@ -171,7 +223,7 @@ class SIInsertHardClauses : public MachineFunctionPass {
171223
}
172224
}
173225

174-
if (CI.Length == 64 ||
226+
if (CI.Length == MaxInstructionsInClause ||
175227
(CI.Length && Type != HARDCLAUSE_INTERNAL &&
176228
Type != HARDCLAUSE_IGNORE &&
177229
(Type != CI.Type ||

0 commit comments

Comments
 (0)