Skip to content

Commit be75136

Browse files
authored
Merge pull request #614 from robertknight/simd-ge-le
Define less-than ops in terms of greater-than ops for int types
2 parents 31f0e93 + db7bfae commit be75136

File tree

6 files changed

+16
-64
lines changed

6 files changed

+16
-64
lines changed

rten-simd/src/safe/arch/aarch64.rs

+3-13
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
use std::arch::aarch64::{
22
float32x4_t, int32x4_t, uint32x4_t, vabsq_f32, vaddq_f32, vaddq_s32, vaddvq_f32, vandq_u32,
33
vbslq_f32, vbslq_s32, vceqq_f32, vceqq_s32, vcgeq_f32, vcgeq_s32, vcgtq_f32, vcgtq_s32,
4-
vcleq_f32, vcleq_s32, vcltq_f32, vcltq_s32, vcvtq_s32_f32, vdivq_f32, vdupq_n_f32, vdupq_n_s32,
5-
vfmaq_f32, vld1q_f32, vld1q_s32, vld1q_u32, vmaxq_f32, vminq_f32, vmulq_f32, vmulq_s32,
6-
vnegq_f32, vnegq_s32, vshlq_n_s32, vst1q_f32, vst1q_s32, vsubq_f32, vsubq_s32,
4+
vcleq_f32, vcltq_f32, vcvtq_s32_f32, vdivq_f32, vdupq_n_f32, vdupq_n_s32, vfmaq_f32, vld1q_f32,
5+
vld1q_s32, vld1q_u32, vmaxq_f32, vminq_f32, vmulq_f32, vmulq_s32, vnegq_f32, vnegq_s32,
6+
vshlq_n_s32, vst1q_f32, vst1q_s32, vsubq_f32, vsubq_s32,
77
};
88
use std::mem::transmute;
99

@@ -223,16 +223,6 @@ unsafe impl SimdOps<int32x4_t> for ArmNeonIsa {
223223
unsafe { vdupq_n_s32(x) }
224224
}
225225

226-
#[inline]
227-
fn lt(self, x: int32x4_t, y: int32x4_t) -> uint32x4_t {
228-
unsafe { vcltq_s32(x, y) }
229-
}
230-
231-
#[inline]
232-
fn le(self, x: int32x4_t, y: int32x4_t) -> uint32x4_t {
233-
unsafe { vcleq_s32(x, y) }
234-
}
235-
236226
#[inline]
237227
fn eq(self, x: int32x4_t, y: int32x4_t) -> uint32x4_t {
238228
unsafe { vceqq_s32(x, y) }

rten-simd/src/safe/arch/generic.rs

-12
Original file line numberDiff line numberDiff line change
@@ -119,18 +119,6 @@ macro_rules! simd_ops_x32_common {
119119
$simd(xs)
120120
}
121121

122-
#[inline]
123-
fn lt(self, x: $simd, y: $simd) -> I32x4 {
124-
let xs = array::from_fn(|i| if x.0[i] < y.0[i] { -1 } else { 0 });
125-
I32x4(xs)
126-
}
127-
128-
#[inline]
129-
fn le(self, x: $simd, y: $simd) -> I32x4 {
130-
let xs = array::from_fn(|i| if x.0[i] <= y.0[i] { -1 } else { 0 });
131-
I32x4(xs)
132-
}
133-
134122
#[inline]
135123
fn eq(self, x: $simd, y: $simd) -> I32x4 {
136124
let xs = array::from_fn(|i| if x.0[i] == y.0[i] { -1 } else { 0 });

rten-simd/src/safe/arch/wasm32.rs

+2-13
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
11
use std::arch::wasm32::{
22
f32x4_abs, f32x4_add, f32x4_div, f32x4_eq, f32x4_extract_lane, f32x4_ge, f32x4_gt, f32x4_le,
33
f32x4_lt, f32x4_max, f32x4_min, f32x4_mul, f32x4_neg, f32x4_splat, f32x4_sub, i32x4_add,
4-
i32x4_eq, i32x4_ge, i32x4_gt, i32x4_le, i32x4_lt, i32x4_mul, i32x4_neg, i32x4_shl,
5-
i32x4_shuffle, i32x4_splat, i32x4_sub, i32x4_trunc_sat_f32x4, v128, v128_and, v128_bitselect,
6-
v128_load, v128_store,
4+
i32x4_eq, i32x4_ge, i32x4_gt, i32x4_mul, i32x4_neg, i32x4_shl, i32x4_shuffle, i32x4_splat,
5+
i32x4_sub, i32x4_trunc_sat_f32x4, v128, v128_and, v128_bitselect, v128_load, v128_store,
76
};
87
use std::mem::transmute;
98

@@ -237,16 +236,6 @@ unsafe impl SimdOps<I32x4> for Wasm32Isa {
237236
I32x4(i32x4_splat(x))
238237
}
239238

240-
#[inline]
241-
fn lt(self, x: I32x4, y: I32x4) -> v128 {
242-
i32x4_lt(x.0, y.0)
243-
}
244-
245-
#[inline]
246-
fn le(self, x: I32x4, y: I32x4) -> v128 {
247-
i32x4_le(x.0, y.0)
248-
}
249-
250239
#[inline]
251240
fn eq(self, x: I32x4, y: I32x4) -> v128 {
252241
i32x4_eq(x.0, y.0)

rten-simd/src/safe/arch/x86_64/avx2.rs

-11
Original file line numberDiff line numberDiff line change
@@ -251,17 +251,6 @@ unsafe impl SimdOps<I32x8> for Avx2Isa {
251251
unsafe { _mm256_set1_epi32(x) }.into()
252252
}
253253

254-
#[inline]
255-
fn lt(self, x: I32x8, y: I32x8) -> I32x8 {
256-
unsafe { _mm256_cmpgt_epi32(y.0, x.0) }.into()
257-
}
258-
259-
#[inline]
260-
fn le(self, x: I32x8, y: I32x8) -> I32x8 {
261-
unsafe { _mm256_or_si256(_mm256_cmpgt_epi32(y.0, x.0), _mm256_cmpeq_epi32(x.0, y.0)) }
262-
.into()
263-
}
264-
265254
#[inline]
266255
fn eq(self, x: I32x8, y: I32x8) -> I32x8 {
267256
unsafe { _mm256_cmpeq_epi32(x.0, y.0) }.into()

rten-simd/src/safe/arch/x86_64/avx512.rs

+3-13
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ use std::arch::x86_64::{
66
_mm512_max_ps, _mm512_min_ps, _mm512_mul_ps, _mm512_mullo_epi32, _mm512_reduce_add_ps,
77
_mm512_set1_epi32, _mm512_set1_ps, _mm512_setzero_si512, _mm512_sllv_epi32, _mm512_storeu_ps,
88
_mm512_storeu_si512, _mm512_sub_epi32, _mm512_sub_ps, _mm512_xor_ps, _mm_prefetch, _CMP_EQ_OQ,
9-
_CMP_GE_OQ, _CMP_GT_OQ, _CMP_LE_OQ, _CMP_LT_OQ, _MM_CMPINT_EQ, _MM_CMPINT_LE, _MM_CMPINT_LT,
9+
_CMP_GE_OQ, _CMP_GT_OQ, _CMP_LE_OQ, _CMP_LT_OQ, _MM_CMPINT_EQ, _MM_CMPINT_NLE, _MM_CMPINT_NLT,
1010
_MM_HINT_ET0, _MM_HINT_T0,
1111
};
1212
use std::mem::transmute;
@@ -233,29 +233,19 @@ unsafe impl SimdOps<I32x16> for Avx512Isa {
233233
unsafe { _mm512_set1_epi32(x) }.into()
234234
}
235235

236-
#[inline]
237-
fn lt(self, x: I32x16, y: I32x16) -> __mmask16 {
238-
unsafe { _mm512_cmp_epi32_mask(x.0, y.0, _MM_CMPINT_LT) }
239-
}
240-
241-
#[inline]
242-
fn le(self, x: I32x16, y: I32x16) -> __mmask16 {
243-
unsafe { _mm512_cmp_epi32_mask(x.0, y.0, _MM_CMPINT_LE) }
244-
}
245-
246236
#[inline]
247237
fn eq(self, x: I32x16, y: I32x16) -> __mmask16 {
248238
unsafe { _mm512_cmp_epi32_mask(x.0, y.0, _MM_CMPINT_EQ) }
249239
}
250240

251241
#[inline]
252242
fn ge(self, x: I32x16, y: I32x16) -> __mmask16 {
253-
self.le(y, x)
243+
unsafe { _mm512_cmp_epi32_mask(x.0, y.0, _MM_CMPINT_NLT) }
254244
}
255245

256246
#[inline]
257247
fn gt(self, x: I32x16, y: I32x16) -> __mmask16 {
258-
self.lt(y, x)
248+
unsafe { _mm512_cmp_epi32_mask(x.0, y.0, _MM_CMPINT_NLE) }
259249
}
260250

261251
#[inline]

rten-simd/src/safe/vec.rs

+8-2
Original file line numberDiff line numberDiff line change
@@ -202,10 +202,16 @@ pub unsafe trait SimdOps<S: Simd>: Copy {
202202
}
203203

204204
/// Return a mask indicating whether elements in `x` are less than `y`.
205-
fn lt(self, x: S, y: S) -> S::Mask;
205+
#[inline]
206+
fn lt(self, x: S, y: S) -> S::Mask {
207+
self.gt(y, x)
208+
}
206209

207210
/// Return a mask indicating whether elements in `x` are less or equal to `y`.
208-
fn le(self, x: S, y: S) -> S::Mask;
211+
#[inline]
212+
fn le(self, x: S, y: S) -> S::Mask {
213+
self.ge(y, x)
214+
}
209215

210216
/// Return a mask indicating whether elements in `x` are equal to `y`.
211217
fn eq(self, x: S, y: S) -> S::Mask;

0 commit comments

Comments
 (0)