Skip to content

Commit 0228c23

Browse files
[SYCL] Fix using some of math built-ins when ESIMD is included (#14793)
ESIMD headers declare some of `__spirv_ocl_*` built-ins as template functions, but those built-ins are also automatically declared by the compiler implicitly when used. On Windows, redeclarations in headers cause compilation issues, because by some reason they take priority, but template arguments for them couldn't be inferred. This commit effectively introduces new tests to cover affected scenarios and reverts a couple of ESIMD commits to fix the issue: - #14020 is completely reverted - #13383 is partially reverted to preserve new interfaces and tests, but stop declaring `__spirv_ocl_*` built-ins I suppose that both PRs were made in attempt to move away from custom ESIMD intrinsic to standard SPIR-V ones, but that should be done without manually declaring the latter. A bigger refactoring might be needed to use auto-declared SPIR-V built-ins in ESIMD because of presence and usage of single-element vectors in ESIMD (which do not exist in SPIR-V).
1 parent 619185f commit 0228c23

File tree

14 files changed

+163
-51
lines changed

14 files changed

+163
-51
lines changed

llvm/lib/SYCLLowerIR/ESIMD/LowerESIMD.cpp

+18
Original file line numberDiff line numberDiff line change
@@ -1234,6 +1234,21 @@ static Instruction *addCastInstIfNeeded(Instruction *OldI, Instruction *NewI,
12341234
return NewI;
12351235
}
12361236

1237+
// Translates the following intrinsics:
1238+
// %res = call float @llvm.fmuladd.f32(float %a, float %b, float %c)
1239+
// %res = call double @llvm.fmuladd.f64(double %a, double %b, double %c)
1240+
// To
1241+
// %mul = fmul <type> %a, <type> %b
1242+
// %res = fadd <type> %mul, <type> %c
1243+
// TODO: Remove when newer GPU driver is used in CI.
1244+
void translateFmuladd(CallInst *CI) {
1245+
assert(CI->getIntrinsicID() == Intrinsic::fmuladd);
1246+
IRBuilder<> Bld(CI);
1247+
auto *Mul = Bld.CreateFMul(CI->getOperand(0), CI->getOperand(1));
1248+
auto *Res = Bld.CreateFAdd(Mul, CI->getOperand(2));
1249+
CI->replaceAllUsesWith(Res);
1250+
}
1251+
12371252
// Translates an LLVM intrinsic to a form, digestable by the BE.
12381253
bool translateLLVMIntrinsic(CallInst *CI) {
12391254
Function *F = CI->getCalledFunction();
@@ -1245,6 +1260,9 @@ bool translateLLVMIntrinsic(CallInst *CI) {
12451260
// no translation - it will be simply removed.
12461261
// TODO: make use of 'assume' info in the BE
12471262
break;
1263+
case Intrinsic::fmuladd:
1264+
translateFmuladd(CI);
1265+
break;
12481266
default:
12491267
return false; // "intrinsic wasn't translated, keep the original call"
12501268
}
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
; RUN: opt -passes=LowerESIMD -S < %s | FileCheck %s
22

3-
; This test checks that LowerESIMD pass does not lower some llvm intrinsics
4-
; which can now be handled by the VC BE.
3+
; This test checks that LowerESIMD pass correctly lowers some llvm intrinsics
4+
; which can't be handled by the VC BE.
55
target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
66
target triple = "spir64-unknown-unknown"
77

@@ -10,15 +10,17 @@ declare double @llvm.fmuladd.f64(double %x, double %y, double %z)
1010

1111
define spir_func float @test_fmuladd_f32(float %x, float %y, float %z) {
1212
%1 = call float @llvm.fmuladd.f32(float %x, float %y, float %z)
13-
; CHECK: %[[A:[0-9a-zA-Z\._]+]] = call float @llvm.fmuladd.f32(float %x, float %y, float %z)
13+
; CHECK: %[[A:[0-9a-zA-Z\._]+]] = fmul float %x, %y
14+
; CHECK: %[[B:[0-9a-zA-Z\._]+]] = fadd float %[[A]], %z
1415
ret float %1
15-
; CHECK: ret float %[[A]]
16+
; CHECK: ret float %[[B]]
1617
}
1718

1819
define spir_func double @test_fmuladd_f64(double %x, double %y, double %z) {
1920
%1 = call double @llvm.fmuladd.f64(double %x, double %y, double %z)
20-
; CHECK: %[[A:[0-9a-zA-Z\._]+]] = call double @llvm.fmuladd.f64(double %x, double %y, double %z)
21+
; CHECK: %[[A:[0-9a-zA-Z\._]+]] = fmul double %x, %y
22+
; CHECK: %[[B:[0-9a-zA-Z\._]+]] = fadd double %[[A]], %z
2123
ret double %1
22-
; CHECK: ret double %[[A]]
24+
; CHECK: ret double %[[B]]
2325
}
2426

sycl/include/sycl/ext/intel/esimd/detail/math_intrin.hpp

+14-19
Original file line numberDiff line numberDiff line change
@@ -72,23 +72,6 @@ template <typename T, int N>
7272
__ESIMD_INTRIN __ESIMD_raw_vec_t(T, N)
7373
__spirv_ocl_native_powr(__ESIMD_raw_vec_t(T, N), __ESIMD_raw_vec_t(T, N));
7474

75-
template <typename T, int N>
76-
__ESIMD_INTRIN __ESIMD_raw_vec_t(T, N)
77-
__spirv_ocl_fabs(__ESIMD_raw_vec_t(T, N)) __ESIMD_INTRIN_END;
78-
79-
template <typename T, int N>
80-
__ESIMD_INTRIN __ESIMD_raw_vec_t(T, N)
81-
__spirv_ocl_s_abs(__ESIMD_raw_vec_t(T, N)) __ESIMD_INTRIN_END;
82-
83-
template <typename T, int N>
84-
__ESIMD_INTRIN __ESIMD_raw_vec_t(T, N)
85-
__spirv_ocl_fmin(__ESIMD_raw_vec_t(T, N),
86-
__ESIMD_raw_vec_t(T, N)) __ESIMD_INTRIN_END;
87-
88-
template <typename T, int N>
89-
__ESIMD_INTRIN __ESIMD_raw_vec_t(T, N)
90-
__spirv_ocl_fmax(__ESIMD_raw_vec_t(T, N),
91-
__ESIMD_raw_vec_t(T, N)) __ESIMD_INTRIN_END;
9275
// saturation intrinsics
9376
template <typename T0, typename T1, int SZ>
9477
__ESIMD_INTRIN __ESIMD_raw_vec_t(T0, SZ)
@@ -118,7 +101,15 @@ template <typename T0, typename T1, int SZ>
118101
__ESIMD_INTRIN __ESIMD_raw_vec_t(T0, SZ)
119102
__esimd_sstrunc_sat(__ESIMD_raw_vec_t(T1, SZ) src) __ESIMD_INTRIN_END;
120103

121-
/// 3 kinds of max, the missing fmax uses spir-v intrinsics above
104+
template <typename T, int SZ>
105+
__ESIMD_INTRIN __ESIMD_raw_vec_t(T, SZ)
106+
__esimd_abs(__ESIMD_raw_vec_t(T, SZ) src0) __ESIMD_INTRIN_END;
107+
108+
/// 3 kinds of max
109+
template <typename T, int SZ>
110+
__ESIMD_INTRIN __ESIMD_raw_vec_t(T, SZ)
111+
__esimd_fmax(__ESIMD_raw_vec_t(T, SZ) src0,
112+
__ESIMD_raw_vec_t(T, SZ) src1) __ESIMD_INTRIN_END;
122113
template <typename T, int SZ>
123114
__ESIMD_INTRIN __ESIMD_raw_vec_t(T, SZ)
124115
__esimd_umax(__ESIMD_raw_vec_t(T, SZ) src0,
@@ -128,7 +119,11 @@ __ESIMD_INTRIN __ESIMD_raw_vec_t(T, SZ)
128119
__esimd_smax(__ESIMD_raw_vec_t(T, SZ) src0,
129120
__ESIMD_raw_vec_t(T, SZ) src1) __ESIMD_INTRIN_END;
130121

131-
/// 3 kinds of min, the missing fmin uses spir-v instrinsics above
122+
/// 3 kinds of min
123+
template <typename T, int SZ>
124+
__ESIMD_INTRIN __ESIMD_raw_vec_t(T, SZ)
125+
__esimd_fmin(__ESIMD_raw_vec_t(T, SZ) src0,
126+
__ESIMD_raw_vec_t(T, SZ) src1) __ESIMD_INTRIN_END;
132127
template <typename T, int SZ>
133128
__ESIMD_INTRIN __ESIMD_raw_vec_t(T, SZ)
134129
__esimd_umin(__ESIMD_raw_vec_t(T, SZ) src0,

sycl/include/sycl/ext/intel/esimd/math.hpp

+5-9
Original file line numberDiff line numberDiff line change
@@ -97,11 +97,7 @@ namespace detail {
9797
template <typename TRes, typename TArg, int SZ>
9898
ESIMD_NODEBUG ESIMD_INLINE simd<TRes, SZ>
9999
__esimd_abs_common_internal(simd<TArg, SZ> src0) {
100-
simd<TArg, SZ> Result;
101-
if constexpr (detail::is_generic_floating_point_v<TArg>)
102-
Result = simd<TArg, SZ>(__spirv_ocl_fabs<TArg, SZ>(src0.data()));
103-
else
104-
Result = simd<TArg, SZ>(__spirv_ocl_s_abs<TArg, SZ>(src0.data()));
100+
simd<TArg, SZ> Result = simd<TArg, SZ>(__esimd_abs<TArg, SZ>(src0.data()));
105101
return convert<TRes>(Result);
106102
}
107103

@@ -185,7 +181,7 @@ __ESIMD_API simd<T, SZ>(max)(simd<T, SZ> src0, simd<T, SZ> src1, Sat sat = {}) {
185181
constexpr bool is_sat = std::is_same_v<Sat, saturation_on_tag>;
186182

187183
if constexpr (std::is_floating_point<T>::value) {
188-
auto Result = __spirv_ocl_fmax<T, SZ>(src0.data(), src1.data());
184+
auto Result = __esimd_fmax<T, SZ>(src0.data(), src1.data());
189185
if constexpr (is_sat)
190186
Result = __esimd_sat<T, T, SZ>(Result);
191187
return simd<T, SZ>(Result);
@@ -270,7 +266,7 @@ __ESIMD_API simd<T, SZ>(min)(simd<T, SZ> src0, simd<T, SZ> src1, Sat sat = {}) {
270266
constexpr bool is_sat = std::is_same_v<Sat, saturation_on_tag>;
271267

272268
if constexpr (std::is_floating_point<T>::value) {
273-
auto Result = __spirv_ocl_fmin<T, SZ>(src0.data(), src1.data());
269+
auto Result = __esimd_fmin<T, SZ>(src0.data(), src1.data());
274270
if constexpr (is_sat)
275271
Result = __esimd_sat<T, T, SZ>(Result);
276272
return simd<T, SZ>(Result);
@@ -1466,7 +1462,7 @@ template <typename T0, typename T1, int SZ> struct esimd_apply_reduced_max {
14661462
template <typename... T>
14671463
simd<T0, SZ> operator()(simd<T1, SZ> v1, simd<T1, SZ> v2) {
14681464
if constexpr (std::is_floating_point<T1>::value) {
1469-
return __spirv_ocl_fmax<T1, SZ>(v1.data(), v2.data());
1465+
return __esimd_fmax<T1, SZ>(v1.data(), v2.data());
14701466
} else if constexpr (std::is_unsigned<T1>::value) {
14711467
return __esimd_umax<T1, SZ>(v1.data(), v2.data());
14721468
} else {
@@ -1479,7 +1475,7 @@ template <typename T0, typename T1, int SZ> struct esimd_apply_reduced_min {
14791475
template <typename... T>
14801476
simd<T0, SZ> operator()(simd<T1, SZ> v1, simd<T1, SZ> v2) {
14811477
if constexpr (std::is_floating_point<T1>::value) {
1482-
return __spirv_ocl_fmin<T1, SZ>(v1.data(), v2.data());
1478+
return __esimd_fmin<T1, SZ>(v1.data(), v2.data());
14831479
} else if constexpr (std::is_unsigned<T1>::value) {
14841480
return __esimd_umin<T1, SZ>(v1.data(), v2.data());
14851481
} else {

sycl/include/sycl/ext/intel/experimental/esimd/detail/math_intrin.hpp

+2-13
Original file line numberDiff line numberDiff line change
@@ -112,19 +112,8 @@ __ESIMD_INTRIN __ESIMD_raw_vec_t(sycl::half, N)
112112

113113
template <typename T, int N>
114114
__ESIMD_INTRIN __ESIMD_raw_vec_t(T, N)
115-
__spirv_ocl_fma(__ESIMD_raw_vec_t(T, N) a, __ESIMD_raw_vec_t(T, N) b,
116-
__ESIMD_raw_vec_t(T, N) c) __ESIMD_INTRIN_END;
117-
template <typename T, int N>
118-
__ESIMD_INTRIN __ESIMD_raw_vec_t(T, N)
119-
__spirv_ocl_popcount(__ESIMD_raw_vec_t(T, N) src0) __ESIMD_INTRIN_END;
120-
121-
template <typename T, int N>
122-
__ESIMD_INTRIN __ESIMD_raw_vec_t(T, N)
123-
__spirv_ocl_ctz(__ESIMD_raw_vec_t(T, N) src0) __ESIMD_INTRIN_END;
124-
125-
template <typename T, int N>
126-
__ESIMD_INTRIN __ESIMD_raw_vec_t(T, N)
127-
__spirv_ocl_clz(__ESIMD_raw_vec_t(T, N) src0) __ESIMD_INTRIN_END;
115+
__esimd_fmadd(__ESIMD_raw_vec_t(T, N) a, __ESIMD_raw_vec_t(T, N) b,
116+
__ESIMD_raw_vec_t(T, N) c) __ESIMD_INTRIN_END;
128117

129118
#undef __ESIMD_raw_vec_t
130119
#undef __ESIMD_cpp_vec_t

sycl/include/sycl/ext/intel/experimental/esimd/math.hpp

+16-4
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,11 @@ template <typename T, int N>
3232
__ESIMD_API std::enable_if_t<std::is_integral_v<T> && sizeof(T) < 8,
3333
__ESIMD_NS::simd<T, N>>
3434
popcount(__ESIMD_NS::simd<T, N> vec) {
35-
return __spirv_ocl_popcount<T, N>(vec.data());
35+
#ifdef __SYCL_DEVICE_ONLY__
36+
return __spirv_ocl_popcount(vec.data());
37+
#else
38+
return vec;
39+
#endif
3640
}
3741

3842
/// Count the number of leading zeros.
@@ -44,7 +48,11 @@ template <typename T, int N>
4448
__ESIMD_API std::enable_if_t<std::is_integral_v<T> && sizeof(T) < 8,
4549
__ESIMD_NS::simd<T, N>>
4650
clz(__ESIMD_NS::simd<T, N> vec) {
47-
return __spirv_ocl_clz<T, N>(vec.data());
51+
#ifdef __SYCL_DEVICE_ONLY__
52+
return __spirv_ocl_clz(vec.data());
53+
#else
54+
return vec;
55+
#endif
4856
}
4957

5058
/// Count the number of trailing zeros.
@@ -55,7 +63,11 @@ template <typename T, int N>
5563
__ESIMD_API std::enable_if_t<std::is_integral_v<T> && sizeof(T) < 8,
5664
__ESIMD_NS::simd<T, N>>
5765
ctz(__ESIMD_NS::simd<T, N> vec) {
58-
return __spirv_ocl_ctz<T, N>(vec.data());
66+
#ifdef __SYCL_DEVICE_ONLY__
67+
return __spirv_ocl_ctz(vec.data());
68+
#else
69+
return vec;
70+
#endif
5971
}
6072

6173
/// @} sycl_esimd_bitmanip
@@ -740,7 +752,7 @@ ESIMD_INLINE __ESIMD_NS::simd<T, N> fma(__ESIMD_NS::simd<T, N> a,
740752
static_assert(__ESIMD_DNS::is_generic_floating_point_v<T>,
741753
"fma only supports floating point types");
742754
using CppT = __ESIMD_DNS::element_type_traits<T>::EnclosingCppT;
743-
auto Ret = __spirv_ocl_fma<__ESIMD_DNS::__raw_t<CppT>, N>(
755+
auto Ret = __esimd_fmadd<__ESIMD_DNS::__raw_t<CppT>, N>(
744756
__ESIMD_DNS::convert_vector<CppT, T, N>(a.data()),
745757
__ESIMD_DNS::convert_vector<CppT, T, N>(b.data()),
746758
__ESIMD_DNS::convert_vector<CppT, T, N>(c.data()));

sycl/test/regression/esimd/abs.cpp

+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
// RUN: %clangxx -fsycl -fsyntax-only %s
2+
3+
#include <sycl/sycl.hpp>
4+
5+
#include <sycl/ext/intel/esimd.hpp>
6+
7+
SYCL_EXTERNAL sycl::vec<int, 8> call_abs_vec(sycl::vec<int, 8> input) {
8+
return sycl::abs(input);
9+
}
10+
11+
SYCL_EXTERNAL int call_abs_scalar(int input) { return sycl::abs(input); }

sycl/test/regression/esimd/clz.cpp

+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
// RUN: %clangxx -fsycl -fsyntax-only %s
2+
3+
#include <sycl/sycl.hpp>
4+
5+
#include <sycl/ext/intel/esimd.hpp>
6+
7+
SYCL_EXTERNAL sycl::vec<int, 8> call_clz_vec(sycl::vec<int, 8> input) {
8+
return sycl::clz(input);
9+
}
10+
11+
SYCL_EXTERNAL int call_clz_scalar(int input) { return sycl::clz(input); }

sycl/test/regression/esimd/ctz.cpp

+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
// RUN: %clangxx -fsycl -fsyntax-only %s
2+
3+
#include <sycl/sycl.hpp>
4+
5+
#include <sycl/ext/intel/esimd.hpp>
6+
7+
SYCL_EXTERNAL sycl::vec<int, 8> call_ctz_vec(sycl::vec<int, 8> input) {
8+
return sycl::ctz(input);
9+
}
10+
11+
SYCL_EXTERNAL int call_ctz_scalar(int input) { return sycl::ctz(input); }

sycl/test/regression/esimd/fabs.cpp

+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
// RUN: %clangxx -fsycl -fsyntax-only %s
2+
3+
#include <sycl/sycl.hpp>
4+
5+
#include <sycl/ext/intel/esimd.hpp>
6+
7+
SYCL_EXTERNAL sycl::vec<float, 8> call_fabs_vec(sycl::vec<float, 8> input) {
8+
return sycl::fabs(input);
9+
}
10+
11+
SYCL_EXTERNAL float call_fabs_scalar(float input) { return sycl::fabs(input); }

sycl/test/regression/esimd/fma.cpp

+15
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
// RUN: %clangxx -fsycl -fsyntax-only %s
2+
3+
#include <sycl/sycl.hpp>
4+
5+
#include <sycl/ext/intel/esimd.hpp>
6+
7+
SYCL_EXTERNAL sycl::vec<float, 8> call_fma_vec(sycl::vec<float, 8> a,
8+
sycl::vec<float, 8> b,
9+
sycl::vec<float, 8> c) {
10+
return sycl::fma(a, b, c);
11+
}
12+
13+
SYCL_EXTERNAL float call_fma_scalar(float a, float b, float c) {
14+
return sycl::fma(a, b, c);
15+
}

sycl/test/regression/esimd/fmax.cpp

+14
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
// RUN: %clangxx -fsycl -fsyntax-only %s
2+
3+
#include <sycl/sycl.hpp>
4+
5+
#include <sycl/ext/intel/esimd.hpp>
6+
7+
SYCL_EXTERNAL sycl::vec<float, 8> call_fmax_vec(sycl::vec<float, 8> a,
8+
sycl::vec<float, 8> b) {
9+
return sycl::fmax(a, b);
10+
}
11+
12+
SYCL_EXTERNAL float call_fmax_scalar(float a, float b) {
13+
return sycl::fmax(a, b);
14+
}

sycl/test/regression/esimd/fmin.cpp

+14
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
// RUN: %clangxx -fsycl -fsyntax-only %s
2+
3+
#include <sycl/sycl.hpp>
4+
5+
#include <sycl/ext/intel/esimd.hpp>
6+
7+
SYCL_EXTERNAL sycl::vec<float, 8> call_fmin_vec(sycl::vec<float, 8> a,
8+
sycl::vec<float, 8> b) {
9+
return sycl::fmin(a, b);
10+
}
11+
12+
SYCL_EXTERNAL float call_fmin_scalar(float a, float b) {
13+
return sycl::fmin(a, b);
14+
}
+13
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
// RUN: %clangxx -fsycl -fsyntax-only %s
2+
3+
#include <sycl/sycl.hpp>
4+
5+
#include <sycl/ext/intel/esimd.hpp>
6+
7+
SYCL_EXTERNAL sycl::vec<int, 8> call_popcount_vec(sycl::vec<int, 8> input) {
8+
return sycl::popcount(input);
9+
}
10+
11+
SYCL_EXTERNAL int call_popcount_scalar(int input) {
12+
return sycl::popcount(input);
13+
}

0 commit comments

Comments
 (0)