|
6 | 6 |
|
7 | 7 | #include "compiler/plugins/target/ROCM/ROCMTargetUtils.h"
|
8 | 8 |
|
| 9 | +#include "compiler/plugins/target/ROCM/builtins/ukernel/iree_uk_amdgpu_gfx1030.h" |
| 10 | +#include "compiler/plugins/target/ROCM/builtins/ukernel/iree_uk_amdgpu_gfx1100.h" |
| 11 | +#include "compiler/plugins/target/ROCM/builtins/ukernel/iree_uk_amdgpu_gfx90a.h" |
| 12 | +#include "compiler/plugins/target/ROCM/builtins/ukernel/iree_uk_amdgpu_gfx942.h" |
9 | 13 | #include "iree/compiler/Codegen/Utils/GPUUtils.h"
|
10 | 14 | #include "iree/compiler/Dialect/HAL/Utils/LLVMLinkerUtils.h"
|
11 | 15 | #include "iree/compiler/Utils/ToolUtils.h"
|
| 16 | +#include "llvm/ADT/StringSwitch.h" |
12 | 17 | #include "llvm/IR/Constants.h"
|
13 | 18 | #include "llvm/IR/Module.h"
|
14 | 19 | #include "llvm/IRReader/IRReader.h"
|
@@ -79,76 +84,28 @@ static LogicalResult linkWithBitcodeFiles(Location loc, llvm::Module *module,
|
79 | 84 | }
|
80 | 85 |
|
81 | 86 | static LogicalResult linkBitcodeFile(Location loc, llvm::Linker &linker,
|
82 |
| - unsigned linkerFlags, StringRef path, |
| 87 | + unsigned linkerFlags, StringRef filename, |
| 88 | + StringRef contents, |
83 | 89 | llvm::TargetMachine &targetMachine,
|
84 | 90 | llvm::LLVMContext &context) {
|
85 |
| - auto bitcodeBufferRef = llvm::MemoryBuffer::getFile(path); |
86 |
| - if (auto ec = bitcodeBufferRef.getError()) { |
87 |
| - return mlir::emitError(loc) << "failed reading user bitcode file `" << path |
88 |
| - << "`: " << ec.message(); |
89 |
| - } |
| 91 | + llvm::MemoryBufferRef bitcodeBufferRef(contents, filename); |
90 | 92 | auto setAlwaysInline = [&](llvm::Module &module) {
|
91 |
| - if (targetMachine.getTargetCPU().contains("gfx10") || |
92 |
| - targetMachine.getTargetCPU().contains("gfx11")) { |
93 |
| - // Some ROCM/HIP functions for gfx10 or gfx11 has accuracy issue if |
94 |
| - // inlined. |
95 |
| - return; |
96 |
| - } |
97 | 93 | for (auto &func : module.getFunctionList()) {
|
98 |
| - // Some ROCM/HIP builtin functions have Optnone and NoInline for default. |
99 |
| - if (targetMachine.getTargetTriple().isAMDGCN()) { |
100 |
| - if (func.hasFnAttribute(llvm::Attribute::OptimizeNone)) { |
101 |
| - func.removeFnAttr(llvm::Attribute::OptimizeNone); |
102 |
| - } |
103 |
| - if (targetMachine.getTargetTriple().isAMDGCN() && |
104 |
| - func.hasFnAttribute(llvm::Attribute::NoInline)) { |
105 |
| - func.removeFnAttr(llvm::Attribute::NoInline); |
106 |
| - } |
107 |
| - } |
108 | 94 | func.addFnAttr(llvm::Attribute::AlwaysInline);
|
109 | 95 | }
|
110 | 96 | };
|
111 |
| - if (failed(linkBitcodeModule( |
112 |
| - loc, linker, linkerFlags, targetMachine, path, |
113 |
| - llvm::parseBitcodeFile(*bitcodeBufferRef->get(), context), |
114 |
| - setAlwaysInline))) { |
| 97 | + if (failed( |
| 98 | + linkBitcodeModule(loc, linker, linkerFlags, targetMachine, filename, |
| 99 | + llvm::parseBitcodeFile(bitcodeBufferRef, context), |
| 100 | + setAlwaysInline))) { |
115 | 101 | return mlir::emitError(loc) << "failed linking in user bitcode file `"
|
116 |
| - << path << "` for target triple '" |
| 102 | + << filename << "` for target triple '" |
117 | 103 | << targetMachine.getTargetTriple().str() << "'";
|
118 | 104 | }
|
119 | 105 |
|
120 | 106 | return success();
|
121 | 107 | }
|
122 | 108 |
|
123 |
| -static std::vector<std::string> getUkernelPaths(StringRef enabledUkernelsStr, |
124 |
| - StringRef targetChip, |
125 |
| - StringRef bitcodePath) { |
126 |
| - std::vector<std::string> selectedUkernelNames; |
127 |
| - if (enabledUkernelsStr == "all") { |
128 |
| - const char *allUkernelNames[] = {"argmax"}; |
129 |
| - size_t numUkernels = sizeof(allUkernelNames) / sizeof(allUkernelNames[0]); |
130 |
| - for (int i = 0; i < numUkernels; i++) { |
131 |
| - selectedUkernelNames.push_back(allUkernelNames[i]); |
132 |
| - } |
133 |
| - } else { |
134 |
| - while (!enabledUkernelsStr.empty()) { |
135 |
| - auto split = enabledUkernelsStr.split(','); |
136 |
| - selectedUkernelNames.push_back(split.first.str()); |
137 |
| - enabledUkernelsStr = split.second; |
138 |
| - } |
139 |
| - } |
140 |
| - |
141 |
| - // Construct full path to ROCDL bitcode libraries. |
142 |
| - std::vector<std::string> result; |
143 |
| - std::string app = "/"; |
144 |
| - for (auto &kernelName : selectedUkernelNames) { |
145 |
| - std::string filename = |
146 |
| - "rocm_" + kernelName + "_ukernel_" + targetChip.str(); |
147 |
| - result.push_back(bitcodePath.str() + app + filename + ".bc"); |
148 |
| - } |
149 |
| - return result; |
150 |
| -} |
151 |
| - |
152 | 109 | static void overridePlatformGlobal(llvm::Module *module, StringRef globalName,
|
153 | 110 | uint32_t newValue, llvm::Type *globalTy) {
|
154 | 111 | // NOTE: the global will not be defined if it is not used in the module.
|
@@ -228,24 +185,36 @@ LogicalResult linkHIPBitcodeIfNeeded(Location loc, llvm::Module *module,
|
228 | 185 | return linkWithBitcodeFiles(loc, module, bitcodePaths);
|
229 | 186 | }
|
230 | 187 |
|
| 188 | +static std::tuple<const iree_file_toc_t *, int> |
| 189 | +getUkernelBitcodeTOC(StringRef gpuArch) { |
| 190 | + return llvm::StringSwitch<std::tuple<const iree_file_toc_t *, int>>(gpuArch) |
| 191 | + .Case("gfx90a", |
| 192 | + {iree_uk_amdgpu_gfx90a_create(), iree_uk_amdgpu_gfx90a_size()}) |
| 193 | + .Case("gfx942", |
| 194 | + {iree_uk_amdgpu_gfx942_create(), iree_uk_amdgpu_gfx942_size()}) |
| 195 | + .Case("gfx1030", |
| 196 | + {iree_uk_amdgpu_gfx1030_create(), iree_uk_amdgpu_gfx1030_size()}) |
| 197 | + .Case("gfx1100", |
| 198 | + {iree_uk_amdgpu_gfx1100_create(), iree_uk_amdgpu_gfx1100_size()}) |
| 199 | + .Default({nullptr, 0}); |
| 200 | +} |
| 201 | + |
231 | 202 | // Links optimized Ukernel bitcode into the given module if the module needs it.
|
232 | 203 | LogicalResult linkUkernelBitcodeFiles(Location loc, llvm::Module *module,
|
233 | 204 | StringRef enabledUkernelsStr,
|
234 | 205 | StringRef targetChip,
|
235 | 206 | StringRef bitcodePath,
|
236 | 207 | unsigned linkerFlags,
|
237 | 208 | llvm::TargetMachine &targetMachine) {
|
238 |
| - // Early exit if Ukernel not supported on target chip. |
239 |
| - if (!iree_compiler::hasUkernelSupportedRocmArch(targetChip)) { |
240 |
| - return mlir::emitError(loc) |
241 |
| - << "ukernel '" << enabledUkernelsStr |
242 |
| - << "' not supported on target chip: " << targetChip; |
| 209 | + auto [toc, toc_size] = getUkernelBitcodeTOC(targetChip); |
| 210 | + if (!toc) { |
| 211 | + return failure(); |
243 | 212 | }
|
244 |
| - std::vector<std::string> ukernelPaths = |
245 |
| - getUkernelPaths(enabledUkernelsStr, targetChip, bitcodePath); |
| 213 | + |
246 | 214 | llvm::Linker linker(*module);
|
247 |
| - for (auto &path : ukernelPaths) { |
248 |
| - if (failed(linkBitcodeFile(loc, linker, linkerFlags, StringRef(path), |
| 215 | + for (int i = 0; i < toc_size; ++i) { |
| 216 | + if (failed(linkBitcodeFile(loc, linker, linkerFlags, toc[i].name, |
| 217 | + llvm::StringRef(toc[i].data, toc[i].size), |
249 | 218 | targetMachine, module->getContext())))
|
250 | 219 | return failure();
|
251 | 220 | }
|
|
0 commit comments