Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: [SYCL] Implement SYCL address space attributes handling #968

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion clang/include/clang/AST/Type.h
Original file line number Diff line number Diff line change
Expand Up @@ -486,7 +486,13 @@ class Qualifiers {
/// Returns true if the address space in these qualifiers is equal to or
/// a superset of the address space in the argument qualifiers.
bool isAddressSpaceSupersetOf(Qualifiers other) const {
return isAddressSpaceSupersetOf(getAddressSpace(), other.getAddressSpace());
return isAddressSpaceSupersetOf(getAddressSpace(),
other.getAddressSpace()) ||
(!hasAddressSpace() &&
(other.getAddressSpace() == LangAS::sycl_private ||
other.getAddressSpace() == LangAS::sycl_local ||
other.getAddressSpace() == LangAS::sycl_global ||
other.getAddressSpace() == LangAS::sycl_constant));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, maybe it would be good to add a comment explaining why we allowed casting between "empty" address space and others. At least for casts to sycl_constant address spaces.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@asavonic, can you help with that?

casts to sycl_constant address spaces

I don't think this casts should be allowed as they are not legal in OpenCL env.

Another option would be skipping sycl_constant in this patch.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think this casts should be allowed as they are not legal in OpenCL env.

Agree.

}

/// Determines if these qualifiers compatibly include another set.
Expand Down
6 changes: 6 additions & 0 deletions clang/include/clang/Basic/AddressSpaces.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,12 @@ enum class LangAS : unsigned {
cuda_constant,
cuda_shared,

// SYCL specific address spaces.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please remind me why we define these SYCL address spaces, and not use the OpenCL ones?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I experimented in this direction and existing rules for OpenCL address spaces prohibit casts between qualified and unqualified pointers. I can share my patch if you are interested.
If I understand the logic you implemented is semantically different from the OpenCL semantics, but as long as we produce valid SPIR-V module it should be fine.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FYI: current status of experiment with using OpenCL address spaces can be found in my private fork.

sycl_global,
sycl_local,
sycl_constant,
sycl_private,

// Pointer size and extension address spaces.
ptr32_sptr,
ptr32_uptr,
Expand Down
22 changes: 20 additions & 2 deletions clang/include/clang/Sema/ParsedAttr.h
Original file line number Diff line number Diff line change
Expand Up @@ -515,8 +515,8 @@ class ParsedAttr final
/// a Spelling enumeration, the value UINT_MAX is returned.
unsigned getSemanticSpelling() const;

/// If this is an OpenCL addr space attribute returns its representation
/// in LangAS, otherwise returns default addr space.
/// If this is an OpenCL address space attribute returns its representation
/// in LangAS, otherwise returns default address space.
LangAS asOpenCLLangAS() const {
switch (getParsedKind()) {
case ParsedAttr::AT_OpenCLConstantAddressSpace:
Expand All @@ -534,6 +534,24 @@ class ParsedAttr final
}
}

/// If this is an OpenCL address space attribute returns its SYCL
/// representation in LangAS, otherwise returns default address space.
LangAS asSYCLLangAS() const {
switch (getKind()) {
case ParsedAttr::AT_OpenCLConstantAddressSpace:
return LangAS::sycl_constant;
case ParsedAttr::AT_OpenCLGlobalAddressSpace:
return LangAS::sycl_global;
case ParsedAttr::AT_OpenCLLocalAddressSpace:
return LangAS::sycl_local;
case ParsedAttr::AT_OpenCLPrivateAddressSpace:
return LangAS::sycl_private;
case ParsedAttr::AT_OpenCLGenericAddressSpace:
default:
return LangAS::Default;
}
}

AttributeCommonInfo::Kind getKind() const { return getParsedKind(); }
};

Expand Down
4 changes: 4 additions & 0 deletions clang/lib/AST/ASTContext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -836,6 +836,10 @@ static const LangASMap *getAddressSpaceMap(const TargetInfo &T,
5, // cuda_device
6, // cuda_constant
7, // cuda_shared
1, // sycl_global
3, // sycl_local
2, // sycl_constant
0, // sycl_private
8, // ptr32_sptr
9, // ptr32_uptr
10 // ptr64
Expand Down
2 changes: 1 addition & 1 deletion clang/lib/AST/ItaniumMangle.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2285,7 +2285,7 @@ void CXXNameMangler::mangleQualifiers(Qualifiers Quals, const DependentAddressSp
if (Context.getASTContext().addressSpaceMapManglingFor(AS)) {
// <target-addrspace> ::= "AS" <address-space-number>
unsigned TargetAS = Context.getASTContext().getTargetAddressSpace(AS);
if (TargetAS != 0)
if (TargetAS != 0 || (Context.getASTContext().getLangOpts().SYCLIsDevice))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't you know why we need this change?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sycl_private address is mapped to 0 for SPIR as well as default address space, so in order to "specialize" templates with the sycl_private qualified types, we have to mangle them differently from the unqualified types.
Test case is here: https://github.com/intel/llvm/pull/968/files#diff-4e74dfb66665848ae647af85422c78d8R41

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sycl_private address is mapped to 0 for SPIR as well as default address space

Default address space is supposed to be akin to generic, i.e. it should be mapped to 4.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, my previous reply is not quite accurate.
This change is needed because pointers qualified with address space mapped to 0 LLVM AS are mangled the same way as unquliafied pointers.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, that makes sense.
However, I'm surprised that we don't have a similar code for OpenCL here. opencl_private is also mapped to 0, but it has a distinct mangling:

__attribute__((overloadable))
void foo(__private int *ip) {
  *ip = 0;
}
// magled as: _Z3fooPU9CLprivatei

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We have similar problem for OpenCL targeting SPIR. You seemed to use some other target your example. X86?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, the problem I'm talking about is mangling, not the mangled names conflict.
Mangled names conflict is not a problem in OpenCL as OpenCL compiler qualifies all the pointers.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We have similar problem for OpenCL targeting SPIR. You seemed to use some other target your example. X86?

Right, sorry for the confusion. With -triple spir the function is mangled as _Z3fooPi.

Sorry, the problem I'm talking about is mangling, not the mangled names conflict.
Mangled names conflict is not a problem in OpenCL as OpenCL compiler qualifies all the pointers.

I don't quite understand what the problem is. Can provide a test case?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The test case is added to this PR already: https://github.com/intel/llvm/pull/968/files#diff-9cbf615457fd73dff7e9840ed855fa9a and problem is covered in this thread. I just clarified why it affects only SYCL compiler, but not OpenCL.
Smaller reproducer:

template<typename T>
void tmpl(T t){}

__attribute__((opencl_private)) int *PRIV;
tmpl(PRIV);

int *NoAS;
tmpl(NoAS);

SYCL compiler produces the same mangled name for two tmpl instances, OpenCL compiler adds generic qualifier for NoAS declaration and produces different names.
Let me know if it's still unclear.

I think original patch from @erichkeane mapped sycl_private address space to AS 5. This might be an alternative option, but we need LLVM-SPIRV translator to support alternative mappings as well.

ASString = "AS" + llvm::utostr(TargetAS);
} else {
switch (AS) {
Expand Down
4 changes: 4 additions & 0 deletions clang/lib/AST/TypePrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1777,12 +1777,16 @@ std::string Qualifiers::getAddrSpaceAsString(LangAS AS) {
case LangAS::Default:
return "";
case LangAS::opencl_global:
case LangAS::sycl_global:
return "__global";
case LangAS::opencl_local:
case LangAS::sycl_local:
return "__local";
case LangAS::opencl_private:
case LangAS::sycl_private:
return "";
case LangAS::opencl_constant:
case LangAS::sycl_constant:
return "__constant";
case LangAS::opencl_generic:
return "__generic";
Expand Down
12 changes: 2 additions & 10 deletions clang/lib/Basic/Targets.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -561,18 +561,10 @@ TargetInfo *AllocateTarget(const llvm::Triple &Triple,
return new X86_64TargetInfo(Triple, Opts);
}

case llvm::Triple::spir: {
if (Triple.getOS() != llvm::Triple::UnknownOS ||
Triple.getEnvironment() != llvm::Triple::UnknownEnvironment)
return nullptr;
case llvm::Triple::spir:
return new SPIR32TargetInfo(Triple, Opts);
}
case llvm::Triple::spir64: {
if (Triple.getOS() != llvm::Triple::UnknownOS ||
Triple.getEnvironment() != llvm::Triple::UnknownEnvironment)
return nullptr;
case llvm::Triple::spir64:
return new SPIR64TargetInfo(Triple, Opts);
}
case llvm::Triple::wasm32:
if (Triple.getSubArch() != llvm::Triple::NoSubArch ||
Triple.getVendor() != llvm::Triple::UnknownVendor ||
Expand Down
8 changes: 8 additions & 0 deletions clang/lib/Basic/Targets/AMDGPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,10 @@ const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
Global, // cuda_device
Constant, // cuda_constant
Local, // cuda_shared
Global, // sycl_global
Local, // sycl_local
Constant, // sycl_constant
Private, // sycl_private
Generic, // ptr32_sptr
Generic, // ptr32_uptr
Generic // ptr64
Expand All @@ -63,6 +67,10 @@ const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
Global, // cuda_device
Constant, // cuda_constant
Local, // cuda_shared
Global, // sycl_global
Local, // sycl_local
Constant, // sycl_constant
Private, // sycl_private
Generic, // ptr32_sptr
Generic, // ptr32_uptr
Generic // ptr64
Expand Down
4 changes: 4 additions & 0 deletions clang/lib/Basic/Targets/NVPTX.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,10 @@ static const unsigned NVPTXAddrSpaceMap[] = {
1, // cuda_device
4, // cuda_constant
3, // cuda_shared
1, // sycl_global
3, // sycl_local
4, // sycl_constant
0, // sycl_private
0, // ptr32_sptr
0, // ptr32_uptr
0 // ptr64
Expand Down
33 changes: 28 additions & 5 deletions clang/lib/Basic/Targets/SPIR.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,29 @@ static const unsigned SPIRAddrSpaceMap[] = {
0, // cuda_device
0, // cuda_constant
0, // cuda_shared
1, // sycl_global
3, // sycl_local
2, // sycl_constant
0, // sycl_private
0, // ptr32_sptr
0, // ptr32_uptr
0 // ptr64
};

static const unsigned SYCLAddrSpaceMap[] = {
4, // Default
1, // opencl_global
3, // opencl_local
2, // opencl_constant
0, // opencl_private
4, // opencl_generic
0, // cuda_device
0, // cuda_constant
0, // cuda_shared
1, // sycl_global
3, // sycl_local
2, // sycl_constant
0, // sycl_private
0, // ptr32_sptr
0, // ptr32_uptr
0 // ptr64
Expand All @@ -40,14 +63,14 @@ class LLVM_LIBRARY_VISIBILITY SPIRTargetInfo : public TargetInfo {
public:
SPIRTargetInfo(const llvm::Triple &Triple, const TargetOptions &)
: TargetInfo(Triple) {
assert(getTriple().getOS() == llvm::Triple::UnknownOS &&
"SPIR target must use unknown OS");
assert(getTriple().getEnvironment() == llvm::Triple::UnknownEnvironment &&
"SPIR target must use unknown environment type");
TLSSupported = false;
VLASupported = false;
LongWidth = LongAlign = 64;
AddrSpaceMap = &SPIRAddrSpaceMap;
if (Triple.getEnvironment() == llvm::Triple::SYCLDevice) {
AddrSpaceMap = &SYCLAddrSpaceMap;
} else {
AddrSpaceMap = &SPIRAddrSpaceMap;
}
UseAddrSpaceMapMangling = true;
HasLegalHalfType = true;
HasFloat16 = true;
Expand Down
4 changes: 4 additions & 0 deletions clang/lib/Basic/Targets/TCE.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,10 @@ static const unsigned TCEOpenCLAddrSpaceMap[] = {
0, // cuda_device
0, // cuda_constant
0, // cuda_shared
3, // sycl_global
4, // sycl_local
5, // sycl_constant
0, // sycl_private
0, // ptr32_sptr
0, // ptr32_uptr
0, // ptr64
Expand Down
4 changes: 4 additions & 0 deletions clang/lib/Basic/Targets/X86.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@ static const unsigned X86AddrSpaceMap[] = {
0, // cuda_device
0, // cuda_constant
0, // cuda_shared
0, // sycl_global
0, // sycl_local
0, // sycl_constant
0, // sycl_private
270, // ptr32_sptr
271, // ptr32_uptr
272 // ptr64
Expand Down
24 changes: 24 additions & 0 deletions clang/lib/CodeGen/CGCall.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4100,6 +4100,16 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
V->getType()->isIntegerTy())
V = Builder.CreateZExt(V, ArgInfo.getCoerceToType());

if (FirstIRArg < IRFuncTy->getNumParams()) {
const auto *LHSPtrTy = dyn_cast<llvm::PointerType>(V->getType());
const auto *RHSPtrTy =
dyn_cast<llvm::PointerType>(IRFuncTy->getParamType(FirstIRArg));
if (LHSPtrTy && RHSPtrTy &&
LHSPtrTy->getAddressSpace() != RHSPtrTy->getAddressSpace())
V = Builder.CreateAddrSpaceCast(V,
IRFuncTy->getParamType(FirstIRArg));
}

// If the argument doesn't match, perform a bitcast to coerce it. This
// can happen due to trivial type mismatches.
if (FirstIRArg < IRFuncTy->getNumParams() &&
Expand Down Expand Up @@ -4309,6 +4319,20 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
if (!CallArgs.getCleanupsToDeactivate().empty())
deactivateArgCleanupsBeforeCall(*this, CallArgs);

// Addrspace cast to generic if necessary
for (unsigned i = 0; i < IRFuncTy->getNumParams(); ++i) {
if (auto *PtrTy = dyn_cast<llvm::PointerType>(IRCallArgs[i]->getType())) {
auto *ExpectedPtrType =
cast<llvm::PointerType>(IRFuncTy->getParamType(i));
unsigned ValueAS = PtrTy->getAddressSpace();
unsigned ExpectedAS = ExpectedPtrType->getAddressSpace();
if (ValueAS != ExpectedAS) {
IRCallArgs[i] = Builder.CreatePointerBitCastOrAddrSpaceCast(
IRCallArgs[i], ExpectedPtrType);
}
}
}

// Assert that the arguments we computed match up. The IR verifier
// will catch this, but this is a common enough source of problems
// during IRGen changes that it's way better for debugging to catch
Expand Down
2 changes: 1 addition & 1 deletion clang/lib/CodeGen/CGClass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,7 @@ Address CodeGenFunction::GetAddressOfBaseClass(
EmitTypeCheck(TCK_Upcast, Loc, Value.getPointer(),
DerivedTy, DerivedAlign, SkippedChecks);
}
return Builder.CreateBitCast(Value, BasePtrTy);
return Builder.CreatePointerBitCastOrAddrSpaceCast(Value, BasePtrTy);
}

llvm::BasicBlock *origBB = nullptr;
Expand Down
2 changes: 1 addition & 1 deletion clang/lib/CodeGen/CGDecl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -344,7 +344,7 @@ CodeGenFunction::AddInitializerToStaticVarDecl(const VarDecl &D,
OldGV->getLinkage(), Init, "",
/*InsertBefore*/ OldGV,
OldGV->getThreadLocalMode(),
CGM.getContext().getTargetAddressSpace(D.getType()));
OldGV->getType()->getPointerAddressSpace());
GV->setVisibility(OldGV->getVisibility());
GV->setDSOLocal(OldGV->isDSOLocal());
GV->setComdat(OldGV->getComdat());
Expand Down
8 changes: 5 additions & 3 deletions clang/lib/CodeGen/CGDeclCXX.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -161,13 +161,15 @@ void CodeGenFunction::EmitInvariantStart(llvm::Constant *Addr, CharUnits Size) {
// Grab the llvm.invariant.start intrinsic.
llvm::Intrinsic::ID InvStartID = llvm::Intrinsic::invariant_start;
// Overloaded address space type.
llvm::Type *ObjectPtr[1] = {Int8PtrTy};
llvm::Type *ResTy = llvm::PointerType::getInt8PtrTy(
CGM.getLLVMContext(), Addr->getType()->getPointerAddressSpace());
llvm::Type *ObjectPtr[1] = {ResTy};
llvm::Function *InvariantStart = CGM.getIntrinsic(InvStartID, ObjectPtr);

// Emit a call with the size in bytes of the object.
uint64_t Width = Size.getQuantity();
llvm::Value *Args[2] = { llvm::ConstantInt::getSigned(Int64Ty, Width),
llvm::ConstantExpr::getBitCast(Addr, Int8PtrTy)};
llvm::Value *Args[2] = {llvm::ConstantInt::getSigned(Int64Ty, Width),
llvm::ConstantExpr::getBitCast(Addr, ResTy)};
Builder.CreateCall(InvariantStart, Args);
}

Expand Down
50 changes: 42 additions & 8 deletions clang/lib/CodeGen/CGExpr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1089,10 +1089,8 @@ Address CodeGenFunction::EmitPointerWithAlignment(const Expr *E,
CodeGenFunction::CFITCK_UnrelatedCast,
CE->getBeginLoc());
}
return CE->getCastKind() != CK_AddressSpaceConversion
? Builder.CreateBitCast(Addr, ConvertType(E->getType()))
: Builder.CreateAddrSpaceCast(Addr,
ConvertType(E->getType()));
return Builder.CreatePointerBitCastOrAddrSpaceCast(
Addr, ConvertType(E->getType()));
}
break;

Expand Down Expand Up @@ -1750,6 +1748,17 @@ void CodeGenFunction::EmitStoreOfScalar(llvm::Value *Value, Address Addr,
return;
}

if (auto *PtrTy = dyn_cast<llvm::PointerType>(Value->getType())) {
auto *ExpectedPtrType =
cast<llvm::PointerType>(Addr.getType()->getElementType());
unsigned ValueAS = PtrTy->getAddressSpace();
unsigned ExpectedAS = ExpectedPtrType->getAddressSpace();
if (ValueAS != ExpectedAS) {
Value =
Builder.CreatePointerBitCastOrAddrSpaceCast(Value, ExpectedPtrType);
}
}

llvm::StoreInst *Store = Builder.CreateStore(Value, Addr, Volatile);
if (isNontemporal) {
llvm::MDNode *Node =
Expand Down Expand Up @@ -4309,10 +4318,35 @@ EmitConditionalOperatorLValue(const AbstractConditionalOperator *expr) {
EmitBlock(contBlock);

if (lhs && rhs) {
llvm::PHINode *phi =
Builder.CreatePHI(lhs->getPointer(*this)->getType(), 2, "cond-lvalue");
phi->addIncoming(lhs->getPointer(*this), lhsBlock);
phi->addIncoming(rhs->getPointer(*this), rhsBlock);
llvm::Value *lhsPtr = lhs->getPointer(*this);
llvm::Value *rhsPtr = rhs->getPointer(*this);
if (rhsPtr->getType() != lhsPtr->getType()) {
if (!getLangOpts().SYCLIsDevice)
llvm_unreachable(
"Unable to find a common address space for two pointers.");

auto CastToAS = [](llvm::Value *V, llvm::BasicBlock *BB, unsigned AS) {
auto *Ty = cast<llvm::PointerType>(V->getType());
if (Ty->getAddressSpace() == AS)
return V;
llvm::IRBuilder<> Builder(BB->getTerminator());
auto *TyAS = llvm::PointerType::get(Ty->getElementType(), AS);
return Builder.CreatePointerBitCastOrAddrSpaceCast(V, TyAS);
};

// Language rules define if it is legal to cast from one address space
// to another, and which address space we should use as a "common
// denominator". In SYCL, generic address space overlaps with all other
// address spaces.
unsigned GenericAS =
getContext().getTargetAddressSpace(LangAS::opencl_generic);

lhsPtr = CastToAS(lhsPtr, lhsBlock, GenericAS);
rhsPtr = CastToAS(rhsPtr, rhsBlock, GenericAS);
}
llvm::PHINode *phi = Builder.CreatePHI(lhsPtr->getType(), 2, "cond-lvalue");
phi->addIncoming(lhsPtr, lhsBlock);
phi->addIncoming(rhsPtr, rhsBlock);
Address result(phi, std::min(lhs->getAlignment(), rhs->getAlignment()));
AlignmentSource alignSource =
std::max(lhs->getBaseInfo().getAlignmentSource(),
Expand Down
Loading