@@ -43,11 +43,39 @@ using namespace llvm;
43
43
44
44
namespace {
45
45
46
+ // A clause length of 64 instructions could be encoded in the s_clause
47
+ // instruction, but the hardware documentation (at least for GFX11) says that
48
+ // 63 is the maximum allowed.
49
+ constexpr unsigned MaxInstructionsInClause = 63 ;
50
+
46
51
enum HardClauseType {
52
+ // For GFX10:
53
+
47
54
// Texture, buffer, global or scratch memory instructions.
48
55
HARDCLAUSE_VMEM,
49
56
// Flat (not global or scratch) memory instructions.
50
57
HARDCLAUSE_FLAT,
58
+
59
+ // For GFX11:
60
+
61
+ // Texture memory instructions.
62
+ HARDCLAUSE_MIMG_LOAD,
63
+ HARDCLAUSE_MIMG_STORE,
64
+ HARDCLAUSE_MIMG_ATOMIC,
65
+ HARDCLAUSE_MIMG_SAMPLE,
66
+ // Buffer, global or scratch memory instructions.
67
+ HARDCLAUSE_VMEM_LOAD,
68
+ HARDCLAUSE_VMEM_STORE,
69
+ HARDCLAUSE_VMEM_ATOMIC,
70
+ // Flat (not global or scratch) memory instructions.
71
+ HARDCLAUSE_FLAT_LOAD,
72
+ HARDCLAUSE_FLAT_STORE,
73
+ HARDCLAUSE_FLAT_ATOMIC,
74
+ // BVH instructions.
75
+ HARDCLAUSE_BVH,
76
+
77
+ // Common:
78
+
51
79
// Instructions that access LDS.
52
80
HARDCLAUSE_LDS,
53
81
// Scalar memory instructions.
@@ -79,19 +107,43 @@ class SIInsertHardClauses : public MachineFunctionPass {
79
107
}
80
108
81
109
HardClauseType getHardClauseType (const MachineInstr &MI) {
82
-
83
- // On current architectures we only get a benefit from clausing loads.
84
- if (MI.mayLoad ()) {
85
- if (SIInstrInfo::isVMEM (MI) || SIInstrInfo::isSegmentSpecificFLAT (MI)) {
86
- if (ST->hasNSAClauseBug ()) {
110
+ if (MI.mayLoad () || (MI.mayStore () && ST->shouldClusterStores ())) {
111
+ if (ST->getGeneration () == AMDGPUSubtarget::GFX10) {
112
+ if (SIInstrInfo::isVMEM (MI) || SIInstrInfo::isSegmentSpecificFLAT (MI)) {
113
+ if (ST->hasNSAClauseBug ()) {
114
+ const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo (MI.getOpcode ());
115
+ if (Info && Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA)
116
+ return HARDCLAUSE_ILLEGAL;
117
+ }
118
+ return HARDCLAUSE_VMEM;
119
+ }
120
+ if (SIInstrInfo::isFLAT (MI))
121
+ return HARDCLAUSE_FLAT;
122
+ } else {
123
+ assert (ST->getGeneration () >= AMDGPUSubtarget::GFX11);
124
+ if (SIInstrInfo::isMIMG (MI)) {
87
125
const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo (MI.getOpcode ());
88
- if (Info && Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA)
89
- return HARDCLAUSE_ILLEGAL;
126
+ const AMDGPU::MIMGBaseOpcodeInfo *BaseInfo =
127
+ AMDGPU::getMIMGBaseOpcodeInfo (Info->BaseOpcode );
128
+ if (BaseInfo->BVH )
129
+ return HARDCLAUSE_BVH;
130
+ if (BaseInfo->Sampler )
131
+ return HARDCLAUSE_MIMG_SAMPLE;
132
+ return MI.mayLoad () ? MI.mayStore () ? HARDCLAUSE_MIMG_ATOMIC
133
+ : HARDCLAUSE_MIMG_LOAD
134
+ : HARDCLAUSE_MIMG_STORE;
135
+ }
136
+ if (SIInstrInfo::isVMEM (MI) || SIInstrInfo::isSegmentSpecificFLAT (MI)) {
137
+ return MI.mayLoad () ? MI.mayStore () ? HARDCLAUSE_VMEM_ATOMIC
138
+ : HARDCLAUSE_VMEM_LOAD
139
+ : HARDCLAUSE_VMEM_STORE;
140
+ }
141
+ if (SIInstrInfo::isFLAT (MI)) {
142
+ return MI.mayLoad () ? MI.mayStore () ? HARDCLAUSE_FLAT_ATOMIC
143
+ : HARDCLAUSE_FLAT_LOAD
144
+ : HARDCLAUSE_FLAT_STORE;
90
145
}
91
- return HARDCLAUSE_VMEM;
92
146
}
93
- if (SIInstrInfo::isFLAT (MI))
94
- return HARDCLAUSE_FLAT;
95
147
// TODO: LDS
96
148
if (SIInstrInfo::isSMRD (MI))
97
149
return HARDCLAUSE_SMEM;
@@ -130,7 +182,7 @@ class SIInsertHardClauses : public MachineFunctionPass {
130
182
bool emitClause (const ClauseInfo &CI, const SIInstrInfo *SII) {
131
183
if (CI.First == CI.Last )
132
184
return false ;
133
- assert (CI.Length <= 64 && " Hard clause is too long!" );
185
+ assert (CI.Length <= MaxInstructionsInClause && " Hard clause is too long!" );
134
186
135
187
auto &MBB = *CI.First ->getParent ();
136
188
auto ClauseMI =
@@ -171,7 +223,7 @@ class SIInsertHardClauses : public MachineFunctionPass {
171
223
}
172
224
}
173
225
174
- if (CI.Length == 64 ||
226
+ if (CI.Length == MaxInstructionsInClause ||
175
227
(CI.Length && Type != HARDCLAUSE_INTERNAL &&
176
228
Type != HARDCLAUSE_IGNORE &&
177
229
(Type != CI.Type ||
0 commit comments