@@ -3961,7 +3961,7 @@ class OffloadingActionBuilder final {
3961
3961
ActionList FPGAArchiveInputs;
3962
3962
3963
3963
// / List of CUDA architectures to use in this compilation with NVPTX targets.
3964
- SmallVector<CudaArch , 8 > GpuArchList;
3964
+ SmallVector<std::pair<llvm::Triple, std::string> , 8 > GpuArchList;
3965
3965
3966
3966
// / Build the last steps for CUDA after all BC files have been linked.
3967
3967
JobAction *finalizeNVPTXDependences (Action *Input, const llvm::Triple &TT) {
@@ -3998,13 +3998,17 @@ class OffloadingActionBuilder final {
3998
3998
const Driver::InputList &Inputs)
3999
3999
: DeviceActionBuilder(C, Args, Inputs, Action::OFK_SYCL) {}
4000
4000
4001
- void withBoundArchForToolChain (const ToolChain* TC,
4001
+ void withBoundArchForToolChain (const ToolChain * TC,
4002
4002
llvm::function_ref<void (const char *)> Op) {
4003
- if (TC->getTriple ().isNVPTX ())
4004
- for (CudaArch A : GpuArchList)
4005
- Op (CudaArchToString (A));
4006
- else
4007
- Op (nullptr );
4003
+ for (auto &A : GpuArchList) {
4004
+ if (TC->getTriple () == A.first ) {
4005
+ Op (Args.MakeArgString (A.second .c_str ()));
4006
+ return ;
4007
+ }
4008
+ }
4009
+
4010
+ // no bound arch for this toolchain
4011
+ Op (nullptr );
4008
4012
}
4009
4013
4010
4014
ActionBuilderReturnCode
@@ -4058,8 +4062,8 @@ class OffloadingActionBuilder final {
4058
4062
}
4059
4063
const auto *TC = ToolChains.front ();
4060
4064
const char *BoundArch = nullptr ;
4061
- if (TC->getTriple ().isNVPTX ())
4062
- BoundArch = CudaArchToString ( GpuArchList.front ());
4065
+ if (TC->getTriple ().isNVPTX () || TC-> getTriple (). isAMDGCN () )
4066
+ BoundArch = GpuArchList.front (). second . c_str ( );
4063
4067
DA.add (*DeviceCompilerInput, *TC, BoundArch, Action::OFK_SYCL);
4064
4068
// Clear the input file, it is already a dependence to a host
4065
4069
// action.
@@ -4642,39 +4646,94 @@ class OffloadingActionBuilder final {
4642
4646
}
4643
4647
}
4644
4648
4645
- // / Initialize the GPU architecture list from arguments - this populates `GpuArchList` from
4646
- // / `--cuda-gpu-arch` flags. Only relevant if compiling to CUDA. Return true if any
4647
- // / initialization errors are found.
4649
+ // / Initialize the GPU architecture list from arguments - this populates
4650
+ // / `GpuArchList` from `--offload-arch` flags. Only relevant if compiling to
4651
+ // / CUDA or AMDGCN. Return true if any initialization errors are found.
4652
+ // / FIXME: SPIR-V AOT targets should also use `offload-arch` to better fit
4653
+ // / in the standard model.
4648
4654
bool initializeGpuArchMap () {
4649
4655
const OptTable &Opts = C.getDriver ().getOpts ();
4650
4656
for (auto *A : Args) {
4651
4657
unsigned Index;
4658
+ llvm::Triple *TargetBE = nullptr ;
4652
4659
4653
- if (A->getOption ().matches (options::OPT_Xsycl_backend_EQ))
4660
+ auto GetTripleIt = [&, this ](llvm::StringRef Triple) {
4661
+ llvm::Triple TargetTriple{Triple};
4662
+ auto TripleIt = llvm::find_if (SYCLTripleList, [&](auto &SYCLTriple) {
4663
+ return SYCLTriple == TargetTriple;
4664
+ });
4665
+ return TripleIt != SYCLTripleList.end () ? &*TripleIt : nullptr ;
4666
+ };
4667
+
4668
+ if (A->getOption ().matches (options::OPT_Xsycl_backend_EQ)) {
4669
+ TargetBE = GetTripleIt (A->getValue (0 ));
4654
4670
// Passing device args: -Xsycl-target-backend=<triple> -opt=val.
4655
- if (llvm::Triple (A-> getValue ( 0 )). isNVPTX () )
4671
+ if (TargetBE )
4656
4672
Index = Args.getBaseArgs ().MakeIndex (A->getValue (1 ));
4657
4673
else
4658
4674
continue ;
4659
- else if (A->getOption ().matches (options::OPT_Xsycl_backend))
4675
+ } else if (A->getOption ().matches (options::OPT_Xsycl_backend)) {
4676
+ if (SYCLTripleList.size () > 1 ) {
4677
+ C.getDriver ().Diag (diag::err_drv_Xsycl_target_missing_triple)
4678
+ << A->getSpelling ();
4679
+ continue ;
4680
+ }
4660
4681
// Passing device args: -Xsycl-target-backend -opt=val.
4682
+ TargetBE = &SYCLTripleList.front ();
4661
4683
Index = Args.getBaseArgs ().MakeIndex (A->getValue (0 ));
4662
- else
4684
+ } else
4663
4685
continue ;
4664
4686
4665
4687
A->claim ();
4666
4688
auto ParsedArg = Opts.ParseOneArg (Args, Index);
4689
+
4667
4690
// TODO: Support --no-cuda-gpu-arch, --{,no-}cuda-gpu-arch=all.
4668
4691
if (ParsedArg &&
4669
4692
ParsedArg->getOption ().matches (options::OPT_offload_arch_EQ)) {
4693
+ llvm::StringRef ArchStr = ParsedArg->getValue (0 );
4694
+ if (TargetBE->isNVPTX ()) {
4695
+ // CUDA arch also applies to AMDGCN ...
4696
+ CudaArch Arch = StringToCudaArch (ArchStr);
4697
+ if (Arch == CudaArch::UNKNOWN || !IsNVIDIAGpuArch (Arch)) {
4698
+ C.getDriver ().Diag (clang::diag::err_drv_cuda_bad_gpu_arch)
4699
+ << ArchStr;
4700
+ continue ;
4701
+ }
4702
+ ArchStr = CudaArchToString (Arch);
4703
+ } else if (TargetBE->isAMDGCN ()) {
4704
+ llvm::StringMap<bool > Features;
4705
+ auto Arch =
4706
+ parseTargetID (getHIPOffloadTargetTriple (), ArchStr, &Features);
4707
+ if (!Arch) {
4708
+ C.getDriver ().Diag (clang::diag::err_drv_bad_target_id) << ArchStr;
4709
+ continue ;
4710
+ }
4711
+ auto CanId = getCanonicalTargetID (Arch.getValue (), Features);
4712
+ ArchStr = Args.MakeArgStringRef (CanId);
4713
+ }
4670
4714
ParsedArg->claim ();
4671
- GpuArchList.push_back ( StringToCudaArch (ParsedArg-> getValue ( 0 )) );
4715
+ GpuArchList.emplace_back (*TargetBE, ArchStr );
4672
4716
}
4673
4717
}
4674
4718
4675
- // If there are no CUDA architectures provided then default to SM_50.
4676
- if (GpuArchList.empty ()) {
4677
- GpuArchList.push_back (CudaArch::SM_50);
4719
+ // Handle defaults architectures
4720
+ for (auto &Triple : SYCLTripleList) {
4721
+ // For NVIDIA use SM_50 as a default
4722
+ if (Triple.isNVPTX () && llvm::none_of (GpuArchList, [&](auto &P) {
4723
+ return P.first .isNVPTX ();
4724
+ })) {
4725
+ llvm::StringRef DefaultArch = CudaArchToString (CudaArch::SM_50);
4726
+ GpuArchList.emplace_back (Triple, DefaultArch);
4727
+ }
4728
+
4729
+ // For AMD require the architecture to be set by the user
4730
+ if (Triple.isAMDGCN () && llvm::none_of (GpuArchList, [&](auto &P) {
4731
+ return P.first .isAMDGCN ();
4732
+ })) {
4733
+ C.getDriver ().Diag (
4734
+ clang::diag::err_drv_sycl_missing_amdgpu_arch);
4735
+ continue ;
4736
+ }
4678
4737
}
4679
4738
4680
4739
return false ;
0 commit comments