diff --git a/clang/include/clang/Basic/Cuda.h b/clang/include/clang/Basic/Cuda.h index 01cfe286c491b..83699f8897f66 100644 --- a/clang/include/clang/Basic/Cuda.h +++ b/clang/include/clang/Basic/Cuda.h @@ -52,7 +52,7 @@ const char *CudaVersionToString(CudaVersion V); // Input is "Major.Minor" CudaVersion CudaStringToVersion(const llvm::Twine &S); -enum class CudaArch { +enum class OffloadArch { UNUSED, UNKNOWN, // TODO: Deprecate and remove GPU architectures older than sm_52. @@ -133,8 +133,8 @@ enum class CudaArch { // public one. LAST, - CudaDefault = CudaArch::SM_52, - HIPDefault = CudaArch::GFX906, + CudaDefault = OffloadArch::SM_52, + HIPDefault = OffloadArch::GFX906, }; enum class CUDAFunctionTarget { @@ -145,26 +145,26 @@ enum class CUDAFunctionTarget { InvalidTarget }; -static inline bool IsNVIDIAGpuArch(CudaArch A) { - return A >= CudaArch::SM_20 && A < CudaArch::GFX600; +static inline bool IsNVIDIAOffloadArch(OffloadArch A) { + return A >= OffloadArch::SM_20 && A < OffloadArch::GFX600; } -static inline bool IsAMDGpuArch(CudaArch A) { +static inline bool IsAMDOffloadArch(OffloadArch A) { // Generic processor model is for testing only. - return A >= CudaArch::GFX600 && A < CudaArch::Generic; + return A >= OffloadArch::GFX600 && A < OffloadArch::Generic; } -const char *CudaArchToString(CudaArch A); -const char *CudaArchToVirtualArchString(CudaArch A); +const char *OffloadArchToString(OffloadArch A); +const char *OffloadArchToVirtualArchString(OffloadArch A); // The input should have the form "sm_20". -CudaArch StringToCudaArch(llvm::StringRef S); +OffloadArch StringToOffloadArch(llvm::StringRef S); -/// Get the earliest CudaVersion that supports the given CudaArch. -CudaVersion MinVersionForCudaArch(CudaArch A); +/// Get the earliest CudaVersion that supports the given OffloadArch. +CudaVersion MinVersionForOffloadArch(OffloadArch A); -/// Get the latest CudaVersion that supports the given CudaArch. -CudaVersion MaxVersionForCudaArch(CudaArch A); +/// Get the latest CudaVersion that supports the given OffloadArch. +CudaVersion MaxVersionForOffloadArch(OffloadArch A); // Various SDK-dependent features that affect CUDA compilation enum class CudaFeature { diff --git a/clang/lib/Basic/Cuda.cpp b/clang/lib/Basic/Cuda.cpp index af99c4d61021e..faf3878f064d2 100644 --- a/clang/lib/Basic/Cuda.cpp +++ b/clang/lib/Basic/Cuda.cpp @@ -72,23 +72,21 @@ CudaVersion ToCudaVersion(llvm::VersionTuple Version) { } namespace { -struct CudaArchToStringMap { - CudaArch arch; +struct OffloadArchToStringMap { + OffloadArch arch; const char *arch_name; const char *virtual_arch_name; }; } // namespace -#define SM2(sm, ca) \ - { CudaArch::SM_##sm, "sm_" #sm, ca } +#define SM2(sm, ca) {OffloadArch::SM_##sm, "sm_" #sm, ca} #define SM(sm) SM2(sm, "compute_" #sm) -#define GFX(gpu) \ - { CudaArch::GFX##gpu, "gfx" #gpu, "compute_amdgcn" } -static const CudaArchToStringMap arch_names[] = { +#define GFX(gpu) {OffloadArch::GFX##gpu, "gfx" #gpu, "compute_amdgcn"} +static const OffloadArchToStringMap arch_names[] = { // clang-format off - {CudaArch::UNUSED, "", ""}, + {OffloadArch::UNUSED, "", ""}, SM2(20, "compute_20"), SM2(21, "compute_20"), // Fermi - SM(30), {CudaArch::SM_32_, "sm_32", "compute_32"}, SM(35), SM(37), // Kepler + SM(30), {OffloadArch::SM_32_, "sm_32", "compute_32"}, SM(35), SM(37), // Kepler SM(50), SM(52), SM(53), // Maxwell SM(60), SM(61), SM(62), // Pascal SM(70), SM(72), // Volta @@ -112,7 +110,7 @@ static const CudaArchToStringMap arch_names[] = { GFX(803), // gfx803 GFX(805), // gfx805 GFX(810), // gfx810 - {CudaArch::GFX9_GENERIC, "gfx9-generic", "compute_amdgcn"}, + {OffloadArch::GFX9_GENERIC, "gfx9-generic", "compute_amdgcn"}, GFX(900), // gfx900 GFX(902), // gfx902 GFX(904), // gfx903 @@ -124,12 +122,12 @@ static const CudaArchToStringMap arch_names[] = { GFX(940), // gfx940 GFX(941), // gfx941 GFX(942), // gfx942 - {CudaArch::GFX10_1_GENERIC, "gfx10-1-generic", "compute_amdgcn"}, + {OffloadArch::GFX10_1_GENERIC, "gfx10-1-generic", "compute_amdgcn"}, GFX(1010), // gfx1010 GFX(1011), // gfx1011 GFX(1012), // gfx1012 GFX(1013), // gfx1013 - {CudaArch::GFX10_3_GENERIC, "gfx10-3-generic", "compute_amdgcn"}, + {OffloadArch::GFX10_3_GENERIC, "gfx10-3-generic", "compute_amdgcn"}, GFX(1030), // gfx1030 GFX(1031), // gfx1031 GFX(1032), // gfx1032 @@ -137,7 +135,7 @@ static const CudaArchToStringMap arch_names[] = { GFX(1034), // gfx1034 GFX(1035), // gfx1035 GFX(1036), // gfx1036 - {CudaArch::GFX11_GENERIC, "gfx11-generic", "compute_amdgcn"}, + {OffloadArch::GFX11_GENERIC, "gfx11-generic", "compute_amdgcn"}, GFX(1100), // gfx1100 GFX(1101), // gfx1101 GFX(1102), // gfx1102 @@ -145,105 +143,105 @@ static const CudaArchToStringMap arch_names[] = { GFX(1150), // gfx1150 GFX(1151), // gfx1151 GFX(1152), // gfx1152 - {CudaArch::GFX12_GENERIC, "gfx12-generic", "compute_amdgcn"}, + {OffloadArch::GFX12_GENERIC, "gfx12-generic", "compute_amdgcn"}, GFX(1200), // gfx1200 GFX(1201), // gfx1201 - {CudaArch::AMDGCNSPIRV, "amdgcnspirv", "compute_amdgcn"}, - {CudaArch::Generic, "generic", ""}, + {OffloadArch::AMDGCNSPIRV, "amdgcnspirv", "compute_amdgcn"}, + {OffloadArch::Generic, "generic", ""}, // clang-format on }; #undef SM #undef SM2 #undef GFX -const char *CudaArchToString(CudaArch A) { +const char *OffloadArchToString(OffloadArch A) { auto result = std::find_if( std::begin(arch_names), std::end(arch_names), - [A](const CudaArchToStringMap &map) { return A == map.arch; }); + [A](const OffloadArchToStringMap &map) { return A == map.arch; }); if (result == std::end(arch_names)) return "unknown"; return result->arch_name; } -const char *CudaArchToVirtualArchString(CudaArch A) { +const char *OffloadArchToVirtualArchString(OffloadArch A) { auto result = std::find_if( std::begin(arch_names), std::end(arch_names), - [A](const CudaArchToStringMap &map) { return A == map.arch; }); + [A](const OffloadArchToStringMap &map) { return A == map.arch; }); if (result == std::end(arch_names)) return "unknown"; return result->virtual_arch_name; } -CudaArch StringToCudaArch(llvm::StringRef S) { +OffloadArch StringToOffloadArch(llvm::StringRef S) { auto result = std::find_if( std::begin(arch_names), std::end(arch_names), - [S](const CudaArchToStringMap &map) { return S == map.arch_name; }); + [S](const OffloadArchToStringMap &map) { return S == map.arch_name; }); if (result == std::end(arch_names)) - return CudaArch::UNKNOWN; + return OffloadArch::UNKNOWN; return result->arch; } -CudaVersion MinVersionForCudaArch(CudaArch A) { - if (A == CudaArch::UNKNOWN) +CudaVersion MinVersionForOffloadArch(OffloadArch A) { + if (A == OffloadArch::UNKNOWN) return CudaVersion::UNKNOWN; // AMD GPUs do not depend on CUDA versions. - if (IsAMDGpuArch(A)) + if (IsAMDOffloadArch(A)) return CudaVersion::CUDA_70; switch (A) { - case CudaArch::SM_20: - case CudaArch::SM_21: - case CudaArch::SM_30: - case CudaArch::SM_32_: - case CudaArch::SM_35: - case CudaArch::SM_37: - case CudaArch::SM_50: - case CudaArch::SM_52: - case CudaArch::SM_53: + case OffloadArch::SM_20: + case OffloadArch::SM_21: + case OffloadArch::SM_30: + case OffloadArch::SM_32_: + case OffloadArch::SM_35: + case OffloadArch::SM_37: + case OffloadArch::SM_50: + case OffloadArch::SM_52: + case OffloadArch::SM_53: return CudaVersion::CUDA_70; - case CudaArch::SM_60: - case CudaArch::SM_61: - case CudaArch::SM_62: + case OffloadArch::SM_60: + case OffloadArch::SM_61: + case OffloadArch::SM_62: return CudaVersion::CUDA_80; - case CudaArch::SM_70: + case OffloadArch::SM_70: return CudaVersion::CUDA_90; - case CudaArch::SM_72: + case OffloadArch::SM_72: return CudaVersion::CUDA_91; - case CudaArch::SM_75: + case OffloadArch::SM_75: return CudaVersion::CUDA_100; - case CudaArch::SM_80: + case OffloadArch::SM_80: return CudaVersion::CUDA_110; - case CudaArch::SM_86: + case OffloadArch::SM_86: return CudaVersion::CUDA_111; - case CudaArch::SM_87: + case OffloadArch::SM_87: return CudaVersion::CUDA_114; - case CudaArch::SM_89: - case CudaArch::SM_90: + case OffloadArch::SM_89: + case OffloadArch::SM_90: return CudaVersion::CUDA_118; - case CudaArch::SM_90a: + case OffloadArch::SM_90a: return CudaVersion::CUDA_120; default: llvm_unreachable("invalid enum"); } } -CudaVersion MaxVersionForCudaArch(CudaArch A) { +CudaVersion MaxVersionForOffloadArch(OffloadArch A) { // AMD GPUs do not depend on CUDA versions. - if (IsAMDGpuArch(A)) + if (IsAMDOffloadArch(A)) return CudaVersion::NEW; switch (A) { - case CudaArch::UNKNOWN: + case OffloadArch::UNKNOWN: return CudaVersion::UNKNOWN; - case CudaArch::SM_20: - case CudaArch::SM_21: + case OffloadArch::SM_20: + case OffloadArch::SM_21: return CudaVersion::CUDA_80; - case CudaArch::SM_30: - case CudaArch::SM_32_: + case OffloadArch::SM_30: + case OffloadArch::SM_32_: return CudaVersion::CUDA_102; - case CudaArch::SM_35: - case CudaArch::SM_37: + case OffloadArch::SM_35: + case OffloadArch::SM_37: return CudaVersion::CUDA_118; default: return CudaVersion::NEW; diff --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp index 8e9006853db65..43b653dc52ce0 100644 --- a/clang/lib/Basic/Targets/NVPTX.cpp +++ b/clang/lib/Basic/Targets/NVPTX.cpp @@ -59,7 +59,7 @@ NVPTXTargetInfo::NVPTXTargetInfo(const llvm::Triple &Triple, // Define available target features // These must be defined in sorted order! NoAsmVariants = true; - GPU = CudaArch::UNUSED; + GPU = OffloadArch::UNUSED; // PTX supports f16 as a fundamental type. HasLegalHalfType = true; @@ -175,117 +175,117 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts, Builder.defineMacro("__NVPTX__"); // Skip setting architecture dependent macros if undefined. - if (GPU == CudaArch::UNUSED && !HostTarget) + if (GPU == OffloadArch::UNUSED && !HostTarget) return; if (Opts.CUDAIsDevice || Opts.OpenMPIsTargetDevice || !HostTarget) { // Set __CUDA_ARCH__ for the GPU specified. std::string CUDAArchCode = [this] { switch (GPU) { - case CudaArch::GFX600: - case CudaArch::GFX601: - case CudaArch::GFX602: - case CudaArch::GFX700: - case CudaArch::GFX701: - case CudaArch::GFX702: - case CudaArch::GFX703: - case CudaArch::GFX704: - case CudaArch::GFX705: - case CudaArch::GFX801: - case CudaArch::GFX802: - case CudaArch::GFX803: - case CudaArch::GFX805: - case CudaArch::GFX810: - case CudaArch::GFX9_GENERIC: - case CudaArch::GFX900: - case CudaArch::GFX902: - case CudaArch::GFX904: - case CudaArch::GFX906: - case CudaArch::GFX908: - case CudaArch::GFX909: - case CudaArch::GFX90a: - case CudaArch::GFX90c: - case CudaArch::GFX940: - case CudaArch::GFX941: - case CudaArch::GFX942: - case CudaArch::GFX10_1_GENERIC: - case CudaArch::GFX1010: - case CudaArch::GFX1011: - case CudaArch::GFX1012: - case CudaArch::GFX1013: - case CudaArch::GFX10_3_GENERIC: - case CudaArch::GFX1030: - case CudaArch::GFX1031: - case CudaArch::GFX1032: - case CudaArch::GFX1033: - case CudaArch::GFX1034: - case CudaArch::GFX1035: - case CudaArch::GFX1036: - case CudaArch::GFX11_GENERIC: - case CudaArch::GFX1100: - case CudaArch::GFX1101: - case CudaArch::GFX1102: - case CudaArch::GFX1103: - case CudaArch::GFX1150: - case CudaArch::GFX1151: - case CudaArch::GFX1152: - case CudaArch::GFX12_GENERIC: - case CudaArch::GFX1200: - case CudaArch::GFX1201: - case CudaArch::AMDGCNSPIRV: - case CudaArch::Generic: - case CudaArch::LAST: + case OffloadArch::GFX600: + case OffloadArch::GFX601: + case OffloadArch::GFX602: + case OffloadArch::GFX700: + case OffloadArch::GFX701: + case OffloadArch::GFX702: + case OffloadArch::GFX703: + case OffloadArch::GFX704: + case OffloadArch::GFX705: + case OffloadArch::GFX801: + case OffloadArch::GFX802: + case OffloadArch::GFX803: + case OffloadArch::GFX805: + case OffloadArch::GFX810: + case OffloadArch::GFX9_GENERIC: + case OffloadArch::GFX900: + case OffloadArch::GFX902: + case OffloadArch::GFX904: + case OffloadArch::GFX906: + case OffloadArch::GFX908: + case OffloadArch::GFX909: + case OffloadArch::GFX90a: + case OffloadArch::GFX90c: + case OffloadArch::GFX940: + case OffloadArch::GFX941: + case OffloadArch::GFX942: + case OffloadArch::GFX10_1_GENERIC: + case OffloadArch::GFX1010: + case OffloadArch::GFX1011: + case OffloadArch::GFX1012: + case OffloadArch::GFX1013: + case OffloadArch::GFX10_3_GENERIC: + case OffloadArch::GFX1030: + case OffloadArch::GFX1031: + case OffloadArch::GFX1032: + case OffloadArch::GFX1033: + case OffloadArch::GFX1034: + case OffloadArch::GFX1035: + case OffloadArch::GFX1036: + case OffloadArch::GFX11_GENERIC: + case OffloadArch::GFX1100: + case OffloadArch::GFX1101: + case OffloadArch::GFX1102: + case OffloadArch::GFX1103: + case OffloadArch::GFX1150: + case OffloadArch::GFX1151: + case OffloadArch::GFX1152: + case OffloadArch::GFX12_GENERIC: + case OffloadArch::GFX1200: + case OffloadArch::GFX1201: + case OffloadArch::AMDGCNSPIRV: + case OffloadArch::Generic: + case OffloadArch::LAST: break; - case CudaArch::UNKNOWN: + case OffloadArch::UNKNOWN: assert(false && "No GPU arch when compiling CUDA device code."); return ""; - case CudaArch::UNUSED: - case CudaArch::SM_20: + case OffloadArch::UNUSED: + case OffloadArch::SM_20: return "200"; - case CudaArch::SM_21: + case OffloadArch::SM_21: return "210"; - case CudaArch::SM_30: + case OffloadArch::SM_30: return "300"; - case CudaArch::SM_32_: + case OffloadArch::SM_32_: return "320"; - case CudaArch::SM_35: + case OffloadArch::SM_35: return "350"; - case CudaArch::SM_37: + case OffloadArch::SM_37: return "370"; - case CudaArch::SM_50: + case OffloadArch::SM_50: return "500"; - case CudaArch::SM_52: + case OffloadArch::SM_52: return "520"; - case CudaArch::SM_53: + case OffloadArch::SM_53: return "530"; - case CudaArch::SM_60: + case OffloadArch::SM_60: return "600"; - case CudaArch::SM_61: + case OffloadArch::SM_61: return "610"; - case CudaArch::SM_62: + case OffloadArch::SM_62: return "620"; - case CudaArch::SM_70: + case OffloadArch::SM_70: return "700"; - case CudaArch::SM_72: + case OffloadArch::SM_72: return "720"; - case CudaArch::SM_75: + case OffloadArch::SM_75: return "750"; - case CudaArch::SM_80: + case OffloadArch::SM_80: return "800"; - case CudaArch::SM_86: + case OffloadArch::SM_86: return "860"; - case CudaArch::SM_87: + case OffloadArch::SM_87: return "870"; - case CudaArch::SM_89: + case OffloadArch::SM_89: return "890"; - case CudaArch::SM_90: - case CudaArch::SM_90a: + case OffloadArch::SM_90: + case OffloadArch::SM_90a: return "900"; } - llvm_unreachable("unhandled CudaArch"); + llvm_unreachable("unhandled OffloadArch"); }(); Builder.defineMacro("__CUDA_ARCH__", CUDAArchCode); - if (GPU == CudaArch::SM_90a) + if (GPU == OffloadArch::SM_90a) Builder.defineMacro("__CUDA_ARCH_FEAT_SM90_ALL", "1"); } } diff --git a/clang/lib/Basic/Targets/NVPTX.h b/clang/lib/Basic/Targets/NVPTX.h index f476d49047c01..a5daf36cfac72 100644 --- a/clang/lib/Basic/Targets/NVPTX.h +++ b/clang/lib/Basic/Targets/NVPTX.h @@ -62,7 +62,7 @@ static const int NVPTXDWARFAddrSpaceMap[] = { class LLVM_LIBRARY_VISIBILITY NVPTXTargetInfo : public TargetInfo { static const char *const GCCRegNames[]; - CudaArch GPU; + OffloadArch GPU; uint32_t PTXVersion; std::unique_ptr HostTarget; @@ -79,8 +79,8 @@ class LLVM_LIBRARY_VISIBILITY NVPTXTargetInfo : public TargetInfo { initFeatureMap(llvm::StringMap &Features, DiagnosticsEngine &Diags, StringRef CPU, const std::vector &FeaturesVec) const override { - if (GPU != CudaArch::UNUSED) - Features[CudaArchToString(GPU)] = true; + if (GPU != OffloadArch::UNUSED) + Features[OffloadArchToString(GPU)] = true; Features["ptx" + std::to_string(PTXVersion)] = true; return TargetInfo::initFeatureMap(Features, Diags, CPU, FeaturesVec); } @@ -121,18 +121,18 @@ class LLVM_LIBRARY_VISIBILITY NVPTXTargetInfo : public TargetInfo { } bool isValidCPUName(StringRef Name) const override { - return StringToCudaArch(Name) != CudaArch::UNKNOWN; + return StringToOffloadArch(Name) != OffloadArch::UNKNOWN; } void fillValidCPUList(SmallVectorImpl &Values) const override { - for (int i = static_cast(CudaArch::SM_20); - i < static_cast(CudaArch::Generic); ++i) - Values.emplace_back(CudaArchToString(static_cast(i))); + for (int i = static_cast(OffloadArch::SM_20); + i < static_cast(OffloadArch::Generic); ++i) + Values.emplace_back(OffloadArchToString(static_cast(i))); } bool setCPU(const std::string &Name) override { - GPU = StringToCudaArch(Name); - return GPU != CudaArch::UNKNOWN; + GPU = StringToOffloadArch(Name); + return GPU != OffloadArch::UNKNOWN; } void setSupportedOpenCLOpts() override { @@ -183,7 +183,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXTargetInfo : public TargetInfo { bool hasBitIntType() const override { return true; } bool hasBFloat16Type() const override { return true; } - CudaArch getGPU() const { return GPU; } + OffloadArch getGPU() const { return GPU; } }; } // namespace targets } // namespace clang diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp index f4eba14da51a5..f5bd4a141cc2d 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -2227,113 +2227,112 @@ bool CGOpenMPRuntimeGPU::hasAllocateAttributeForGlobalVar(const VarDecl *VD, return false; } -// Get current CudaArch and ignore any unknown values -static CudaArch getCudaArch(CodeGenModule &CGM) { +// Get current OffloadArch and ignore any unknown values +static OffloadArch getOffloadArch(CodeGenModule &CGM) { if (!CGM.getTarget().hasFeature("ptx")) - return CudaArch::UNKNOWN; + return OffloadArch::UNKNOWN; for (const auto &Feature : CGM.getTarget().getTargetOpts().FeatureMap) { if (Feature.getValue()) { - CudaArch Arch = StringToCudaArch(Feature.getKey()); - if (Arch != CudaArch::UNKNOWN) + OffloadArch Arch = StringToOffloadArch(Feature.getKey()); + if (Arch != OffloadArch::UNKNOWN) return Arch; } } - return CudaArch::UNKNOWN; + return OffloadArch::UNKNOWN; } /// Check to see if target architecture supports unified addressing which is /// a restriction for OpenMP requires clause "unified_shared_memory". -void CGOpenMPRuntimeGPU::processRequiresDirective( - const OMPRequiresDecl *D) { +void CGOpenMPRuntimeGPU::processRequiresDirective(const OMPRequiresDecl *D) { for (const OMPClause *Clause : D->clauselists()) { if (Clause->getClauseKind() == OMPC_unified_shared_memory) { - CudaArch Arch = getCudaArch(CGM); + OffloadArch Arch = getOffloadArch(CGM); switch (Arch) { - case CudaArch::SM_20: - case CudaArch::SM_21: - case CudaArch::SM_30: - case CudaArch::SM_32_: - case CudaArch::SM_35: - case CudaArch::SM_37: - case CudaArch::SM_50: - case CudaArch::SM_52: - case CudaArch::SM_53: { + case OffloadArch::SM_20: + case OffloadArch::SM_21: + case OffloadArch::SM_30: + case OffloadArch::SM_32_: + case OffloadArch::SM_35: + case OffloadArch::SM_37: + case OffloadArch::SM_50: + case OffloadArch::SM_52: + case OffloadArch::SM_53: { SmallString<256> Buffer; llvm::raw_svector_ostream Out(Buffer); - Out << "Target architecture " << CudaArchToString(Arch) + Out << "Target architecture " << OffloadArchToString(Arch) << " does not support unified addressing"; CGM.Error(Clause->getBeginLoc(), Out.str()); return; } - case CudaArch::SM_60: - case CudaArch::SM_61: - case CudaArch::SM_62: - case CudaArch::SM_70: - case CudaArch::SM_72: - case CudaArch::SM_75: - case CudaArch::SM_80: - case CudaArch::SM_86: - case CudaArch::SM_87: - case CudaArch::SM_89: - case CudaArch::SM_90: - case CudaArch::SM_90a: - case CudaArch::GFX600: - case CudaArch::GFX601: - case CudaArch::GFX602: - case CudaArch::GFX700: - case CudaArch::GFX701: - case CudaArch::GFX702: - case CudaArch::GFX703: - case CudaArch::GFX704: - case CudaArch::GFX705: - case CudaArch::GFX801: - case CudaArch::GFX802: - case CudaArch::GFX803: - case CudaArch::GFX805: - case CudaArch::GFX810: - case CudaArch::GFX9_GENERIC: - case CudaArch::GFX900: - case CudaArch::GFX902: - case CudaArch::GFX904: - case CudaArch::GFX906: - case CudaArch::GFX908: - case CudaArch::GFX909: - case CudaArch::GFX90a: - case CudaArch::GFX90c: - case CudaArch::GFX940: - case CudaArch::GFX941: - case CudaArch::GFX942: - case CudaArch::GFX10_1_GENERIC: - case CudaArch::GFX1010: - case CudaArch::GFX1011: - case CudaArch::GFX1012: - case CudaArch::GFX1013: - case CudaArch::GFX10_3_GENERIC: - case CudaArch::GFX1030: - case CudaArch::GFX1031: - case CudaArch::GFX1032: - case CudaArch::GFX1033: - case CudaArch::GFX1034: - case CudaArch::GFX1035: - case CudaArch::GFX1036: - case CudaArch::GFX11_GENERIC: - case CudaArch::GFX1100: - case CudaArch::GFX1101: - case CudaArch::GFX1102: - case CudaArch::GFX1103: - case CudaArch::GFX1150: - case CudaArch::GFX1151: - case CudaArch::GFX1152: - case CudaArch::GFX12_GENERIC: - case CudaArch::GFX1200: - case CudaArch::GFX1201: - case CudaArch::AMDGCNSPIRV: - case CudaArch::Generic: - case CudaArch::UNUSED: - case CudaArch::UNKNOWN: + case OffloadArch::SM_60: + case OffloadArch::SM_61: + case OffloadArch::SM_62: + case OffloadArch::SM_70: + case OffloadArch::SM_72: + case OffloadArch::SM_75: + case OffloadArch::SM_80: + case OffloadArch::SM_86: + case OffloadArch::SM_87: + case OffloadArch::SM_89: + case OffloadArch::SM_90: + case OffloadArch::SM_90a: + case OffloadArch::GFX600: + case OffloadArch::GFX601: + case OffloadArch::GFX602: + case OffloadArch::GFX700: + case OffloadArch::GFX701: + case OffloadArch::GFX702: + case OffloadArch::GFX703: + case OffloadArch::GFX704: + case OffloadArch::GFX705: + case OffloadArch::GFX801: + case OffloadArch::GFX802: + case OffloadArch::GFX803: + case OffloadArch::GFX805: + case OffloadArch::GFX810: + case OffloadArch::GFX9_GENERIC: + case OffloadArch::GFX900: + case OffloadArch::GFX902: + case OffloadArch::GFX904: + case OffloadArch::GFX906: + case OffloadArch::GFX908: + case OffloadArch::GFX909: + case OffloadArch::GFX90a: + case OffloadArch::GFX90c: + case OffloadArch::GFX940: + case OffloadArch::GFX941: + case OffloadArch::GFX942: + case OffloadArch::GFX10_1_GENERIC: + case OffloadArch::GFX1010: + case OffloadArch::GFX1011: + case OffloadArch::GFX1012: + case OffloadArch::GFX1013: + case OffloadArch::GFX10_3_GENERIC: + case OffloadArch::GFX1030: + case OffloadArch::GFX1031: + case OffloadArch::GFX1032: + case OffloadArch::GFX1033: + case OffloadArch::GFX1034: + case OffloadArch::GFX1035: + case OffloadArch::GFX1036: + case OffloadArch::GFX11_GENERIC: + case OffloadArch::GFX1100: + case OffloadArch::GFX1101: + case OffloadArch::GFX1102: + case OffloadArch::GFX1103: + case OffloadArch::GFX1150: + case OffloadArch::GFX1151: + case OffloadArch::GFX1152: + case OffloadArch::GFX12_GENERIC: + case OffloadArch::GFX1200: + case OffloadArch::GFX1201: + case OffloadArch::AMDGCNSPIRV: + case OffloadArch::Generic: + case OffloadArch::UNUSED: + case OffloadArch::UNKNOWN: break; - case CudaArch::LAST: - llvm_unreachable("Unexpected Cuda arch."); + case OffloadArch::LAST: + llvm_unreachable("Unexpected GPU arch."); } } } diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 6314bc5d61071..221e222bdd47d 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -899,11 +899,11 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C, } for (StringRef Arch : Archs) { - if (NVPTXTriple && IsNVIDIAGpuArch(StringToCudaArch( + if (NVPTXTriple && IsNVIDIAOffloadArch(StringToOffloadArch( getProcessorFromTargetID(*NVPTXTriple, Arch)))) { DerivedArchs[NVPTXTriple->getTriple()].insert(Arch); } else if (AMDTriple && - IsAMDGpuArch(StringToCudaArch( + IsAMDOffloadArch(StringToOffloadArch( getProcessorFromTargetID(*AMDTriple, Arch)))) { DerivedArchs[AMDTriple->getTriple()].insert(Arch); } else { @@ -2948,7 +2948,7 @@ class OffloadingActionBuilder final { struct TargetID { /// Target ID string which is persistent throughout the compilation. const char *ID; - TargetID(CudaArch Arch) { ID = CudaArchToString(Arch); } + TargetID(OffloadArch Arch) { ID = OffloadArchToString(Arch); } TargetID(const char *ID) : ID(ID) {} operator const char *() { return ID; } operator StringRef() { return StringRef(ID); } @@ -2969,7 +2969,7 @@ class OffloadingActionBuilder final { bool Relocatable = false; /// Default GPU architecture if there's no one specified. - CudaArch DefaultCudaArch = CudaArch::UNKNOWN; + OffloadArch DefaultOffloadArch = OffloadArch::UNKNOWN; /// Method to generate compilation unit ID specified by option /// '-fuse-cuid='. @@ -3098,7 +3098,7 @@ class OffloadingActionBuilder final { // If we have a fat binary, add it to the list. if (CudaFatBinary) { - AddTopLevel(CudaFatBinary, CudaArch::UNUSED); + AddTopLevel(CudaFatBinary, OffloadArch::UNUSED); CudaDeviceActions.clear(); CudaFatBinary = nullptr; return; @@ -3243,11 +3243,11 @@ class OffloadingActionBuilder final { if (GpuArchList.empty()) { if (ToolChains.front()->getTriple().isSPIRV()) { if (ToolChains.front()->getTriple().getVendor() == llvm::Triple::AMD) - GpuArchList.push_back(CudaArch::AMDGCNSPIRV); + GpuArchList.push_back(OffloadArch::AMDGCNSPIRV); else - GpuArchList.push_back(CudaArch::Generic); + GpuArchList.push_back(OffloadArch::Generic); } else { - GpuArchList.push_back(DefaultCudaArch); + GpuArchList.push_back(DefaultOffloadArch); } } @@ -3262,16 +3262,16 @@ class OffloadingActionBuilder final { CudaActionBuilder(Compilation &C, DerivedArgList &Args, const Driver::InputList &Inputs) : CudaActionBuilderBase(C, Args, Inputs, Action::OFK_Cuda) { - DefaultCudaArch = CudaArch::CudaDefault; + DefaultOffloadArch = OffloadArch::CudaDefault; } StringRef getCanonicalOffloadArch(StringRef ArchStr) override { - CudaArch Arch = StringToCudaArch(ArchStr); - if (Arch == CudaArch::UNKNOWN || !IsNVIDIAGpuArch(Arch)) { + OffloadArch Arch = StringToOffloadArch(ArchStr); + if (Arch == OffloadArch::UNKNOWN || !IsNVIDIAOffloadArch(Arch)) { C.getDriver().Diag(clang::diag::err_drv_cuda_bad_gpu_arch) << ArchStr; return StringRef(); } - return CudaArchToString(Arch); + return OffloadArchToString(Arch); } std::optional> @@ -3401,7 +3401,7 @@ class OffloadingActionBuilder final { const Driver::InputList &Inputs) : CudaActionBuilderBase(C, Args, Inputs, Action::OFK_HIP) { - DefaultCudaArch = CudaArch::HIPDefault; + DefaultOffloadArch = OffloadArch::HIPDefault; if (Args.hasArg(options::OPT_fhip_emit_relocatable, options::OPT_fno_hip_emit_relocatable)) { @@ -4408,23 +4408,24 @@ static StringRef getCanonicalArchString(Compilation &C, bool SuppressError = false) { // Lookup the CUDA / HIP architecture string. Only report an error if we were // expecting the triple to be only NVPTX / AMDGPU. - CudaArch Arch = StringToCudaArch(getProcessorFromTargetID(Triple, ArchStr)); + OffloadArch Arch = + StringToOffloadArch(getProcessorFromTargetID(Triple, ArchStr)); if (!SuppressError && Triple.isNVPTX() && - (Arch == CudaArch::UNKNOWN || !IsNVIDIAGpuArch(Arch))) { + (Arch == OffloadArch::UNKNOWN || !IsNVIDIAOffloadArch(Arch))) { C.getDriver().Diag(clang::diag::err_drv_offload_bad_gpu_arch) << "CUDA" << ArchStr; return StringRef(); } else if (!SuppressError && Triple.isAMDGPU() && - (Arch == CudaArch::UNKNOWN || !IsAMDGpuArch(Arch))) { + (Arch == OffloadArch::UNKNOWN || !IsAMDOffloadArch(Arch))) { C.getDriver().Diag(clang::diag::err_drv_offload_bad_gpu_arch) << "HIP" << ArchStr; return StringRef(); } - if (IsNVIDIAGpuArch(Arch)) - return Args.MakeArgStringRef(CudaArchToString(Arch)); + if (IsNVIDIAOffloadArch(Arch)) + return Args.MakeArgStringRef(OffloadArchToString(Arch)); - if (IsAMDGpuArch(Arch)) { + if (IsAMDOffloadArch(Arch)) { llvm::StringMap Features; auto HIPTriple = getHIPOffloadTargetTriple(C.getDriver(), C.getInputArgs()); if (!HIPTriple) @@ -4545,9 +4546,9 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args, if (Archs.empty()) { if (Kind == Action::OFK_Cuda) - Archs.insert(CudaArchToString(CudaArch::CudaDefault)); + Archs.insert(OffloadArchToString(OffloadArch::CudaDefault)); else if (Kind == Action::OFK_HIP) - Archs.insert(CudaArchToString(CudaArch::HIPDefault)); + Archs.insert(OffloadArchToString(OffloadArch::HIPDefault)); else if (Kind == Action::OFK_OpenMP) Archs.insert(StringRef()); } else { diff --git a/clang/lib/Driver/OffloadBundler.cpp b/clang/lib/Driver/OffloadBundler.cpp index 191d108e9b739..a4ab846ed2c57 100644 --- a/clang/lib/Driver/OffloadBundler.cpp +++ b/clang/lib/Driver/OffloadBundler.cpp @@ -79,7 +79,8 @@ OffloadTargetInfo::OffloadTargetInfo(const StringRef Target, auto TargetFeatures = Target.split(':'); auto TripleOrGPU = TargetFeatures.first.rsplit('-'); - if (clang::StringToCudaArch(TripleOrGPU.second) != clang::CudaArch::UNKNOWN) { + if (clang::StringToOffloadArch(TripleOrGPU.second) != + clang::OffloadArch::UNKNOWN) { auto KindTriple = TripleOrGPU.first.split('-'); this->OffloadKind = KindTriple.first; diff --git a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp index d17ecb15c8208..1c0fb4babe3a5 100644 --- a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp @@ -87,7 +87,7 @@ llvm::opt::DerivedArgList *AMDGPUOpenMPToolChain::TranslateArgs( llvm::formatv("{0}", llvm::fmt_consume(ArchsOrErr.takeError())); getDriver().Diag(diag::err_drv_undetermined_gpu_arch) << llvm::Triple::getArchTypeName(getArch()) << ErrMsg << "-march"; - Arch = CudaArchToString(CudaArch::HIPDefault); + Arch = OffloadArchToString(OffloadArch::HIPDefault); } else { Arch = Args.MakeArgString(ArchsOrErr->front()); } diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp index 2dfc7457b0ac7..08a4633902654 100644 --- a/clang/lib/Driver/ToolChains/Cuda.cpp +++ b/clang/lib/Driver/ToolChains/Cuda.cpp @@ -223,13 +223,13 @@ CudaInstallationDetector::CudaInstallationDetector( // CUDA-9+ uses single libdevice file for all GPU variants. std::string FilePath = LibDevicePath + "/libdevice.10.bc"; if (FS.exists(FilePath)) { - for (int Arch = (int)CudaArch::SM_30, E = (int)CudaArch::LAST; Arch < E; - ++Arch) { - CudaArch GpuArch = static_cast(Arch); - if (!IsNVIDIAGpuArch(GpuArch)) + for (int Arch = (int)OffloadArch::SM_30, E = (int)OffloadArch::LAST; + Arch < E; ++Arch) { + OffloadArch OA = static_cast(Arch); + if (!IsNVIDIAOffloadArch(OA)) continue; - std::string GpuArchName(CudaArchToString(GpuArch)); - LibDeviceMap[GpuArchName] = FilePath; + std::string OffloadArchName(OffloadArchToString(OA)); + LibDeviceMap[OffloadArchName] = FilePath; } } } else { @@ -312,17 +312,17 @@ void CudaInstallationDetector::AddCudaIncludeArgs( } void CudaInstallationDetector::CheckCudaVersionSupportsArch( - CudaArch Arch) const { - if (Arch == CudaArch::UNKNOWN || Version == CudaVersion::UNKNOWN || + OffloadArch Arch) const { + if (Arch == OffloadArch::UNKNOWN || Version == CudaVersion::UNKNOWN || ArchsWithBadVersion[(int)Arch]) return; - auto MinVersion = MinVersionForCudaArch(Arch); - auto MaxVersion = MaxVersionForCudaArch(Arch); + auto MinVersion = MinVersionForOffloadArch(Arch); + auto MaxVersion = MaxVersionForOffloadArch(Arch); if (Version < MinVersion || Version > MaxVersion) { ArchsWithBadVersion[(int)Arch] = true; D.Diag(diag::err_drv_cuda_version_unsupported) - << CudaArchToString(Arch) << CudaVersionToString(MinVersion) + << OffloadArchToString(Arch) << CudaVersionToString(MinVersion) << CudaVersionToString(MaxVersion) << InstallPath << CudaVersionToString(Version); } @@ -401,8 +401,8 @@ void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA, } // Obtain architecture from the action. - CudaArch gpu_arch = StringToCudaArch(GPUArchName); - assert(gpu_arch != CudaArch::UNKNOWN && + OffloadArch gpu_arch = StringToOffloadArch(GPUArchName); + assert(gpu_arch != OffloadArch::UNKNOWN && "Device action expected to have an architecture."); // Check that our installation's ptxas supports gpu_arch. @@ -457,7 +457,7 @@ void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-v"); CmdArgs.push_back("--gpu-name"); - CmdArgs.push_back(Args.MakeArgString(CudaArchToString(gpu_arch))); + CmdArgs.push_back(Args.MakeArgString(OffloadArchToString(gpu_arch))); CmdArgs.push_back("--output-file"); std::string OutputFileName = TC.getInputFilename(Output); @@ -553,7 +553,7 @@ void NVPTX::FatBinary::ConstructJob(Compilation &C, const JobAction &JA, const char *gpu_arch_str = A->getOffloadingArch(); assert(gpu_arch_str && "Device action expected to have associated a GPU architecture!"); - CudaArch gpu_arch = StringToCudaArch(gpu_arch_str); + OffloadArch gpu_arch = StringToOffloadArch(gpu_arch_str); if (II.getType() == types::TY_PP_Asm && !shouldIncludePTX(Args, gpu_arch_str)) @@ -561,7 +561,7 @@ void NVPTX::FatBinary::ConstructJob(Compilation &C, const JobAction &JA, // We need to pass an Arch of the form "sm_XX" for cubin files and // "compute_XX" for ptx. const char *Arch = (II.getType() == types::TY_PP_Asm) - ? CudaArchToVirtualArchString(gpu_arch) + ? OffloadArchToVirtualArchString(gpu_arch) : gpu_arch_str; CmdArgs.push_back( Args.MakeArgString(llvm::Twine("--image=profile=") + Arch + @@ -758,7 +758,7 @@ NVPTXToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args, if (!DAL->hasArg(options::OPT_march_EQ) && OffloadKind != Action::OFK_None) { DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ), - CudaArchToString(CudaArch::CudaDefault)); + OffloadArchToString(OffloadArch::CudaDefault)); } else if (DAL->getLastArgValue(options::OPT_march_EQ) == "generic" && OffloadKind == Action::OFK_None) { DAL->eraseArg(options::OPT_march_EQ); @@ -938,7 +938,7 @@ void CudaToolChain::AddCudaIncludeArgs(const ArgList &DriverArgs, !DriverArgs.hasArg(options::OPT_no_cuda_version_check)) { StringRef Arch = DriverArgs.getLastArgValue(options::OPT_march_EQ); assert(!Arch.empty() && "Must have an explicit GPU arch."); - CudaInstallation.CheckCudaVersionSupportsArch(StringToCudaArch(Arch)); + CudaInstallation.CheckCudaVersionSupportsArch(StringToOffloadArch(Arch)); } CudaInstallation.AddCudaIncludeArgs(DriverArgs, CC1Args); } @@ -984,7 +984,7 @@ CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args, llvm::formatv("{0}", llvm::fmt_consume(ArchsOrErr.takeError())); getDriver().Diag(diag::err_drv_undetermined_gpu_arch) << llvm::Triple::getArchTypeName(getArch()) << ErrMsg << "-march"; - Arch = CudaArchToString(CudaArch::CudaDefault); + Arch = OffloadArchToString(OffloadArch::CudaDefault); } else { Arch = Args.MakeArgString(ArchsOrErr->front()); } diff --git a/clang/lib/Driver/ToolChains/Cuda.h b/clang/lib/Driver/ToolChains/Cuda.h index 43c17ba7c0ba0..7464d88cb350b 100644 --- a/clang/lib/Driver/ToolChains/Cuda.h +++ b/clang/lib/Driver/ToolChains/Cuda.h @@ -37,7 +37,7 @@ class CudaInstallationDetector { // CUDA architectures for which we have raised an error in // CheckCudaVersionSupportsArch. - mutable std::bitset<(int)CudaArch::LAST> ArchsWithBadVersion; + mutable std::bitset<(int)OffloadArch::LAST> ArchsWithBadVersion; public: CudaInstallationDetector(const Driver &D, const llvm::Triple &HostTriple, @@ -50,7 +50,7 @@ class CudaInstallationDetector { /// /// If either Version or Arch is unknown, does not emit an error. Emits at /// most one error per Arch. - void CheckCudaVersionSupportsArch(CudaArch Arch) const; + void CheckCudaVersionSupportsArch(OffloadArch Arch) const; /// Check whether we detected a valid Cuda install. bool isValid() const { return IsValid; } diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index 7e6c7d776588a..41489789919d0 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -5116,12 +5116,12 @@ bool Sema::CheckRegparmAttr(const ParsedAttr &AL, unsigned &numParams) { return false; } -// Helper to get CudaArch. -static CudaArch getCudaArch(const TargetInfo &TI) { +// Helper to get OffloadArch. +static OffloadArch getOffloadArch(const TargetInfo &TI) { if (!TI.getTriple().isNVPTX()) - llvm_unreachable("getCudaArch is only valid for NVPTX triple"); + llvm_unreachable("getOffloadArch is only valid for NVPTX triple"); auto &TO = TI.getTargetOpts(); - return StringToCudaArch(TO.CPU); + return StringToOffloadArch(TO.CPU); } // Checks whether an argument of launch_bounds attribute is @@ -5181,10 +5181,10 @@ Sema::CreateLaunchBoundsAttr(const AttributeCommonInfo &CI, Expr *MaxThreads, if (MaxBlocks) { // '.maxclusterrank' ptx directive requires .target sm_90 or higher. - auto SM = getCudaArch(Context.getTargetInfo()); - if (SM == CudaArch::UNKNOWN || SM < CudaArch::SM_90) { + auto SM = getOffloadArch(Context.getTargetInfo()); + if (SM == OffloadArch::UNKNOWN || SM < OffloadArch::SM_90) { Diag(MaxBlocks->getBeginLoc(), diag::warn_cuda_maxclusterrank_sm_90) - << CudaArchToString(SM) << CI << MaxBlocks->getSourceRange(); + << OffloadArchToString(SM) << CI << MaxBlocks->getSourceRange(); // Ignore it by setting MaxBlocks to null; MaxBlocks = nullptr; } else {