Merge pull request #614 from robertknight/simd-ge-le

robertknight · web-flow · commit be75136aef39 · 2025-03-07T08:38:28.000Z
Define less-than ops in terms of greater-than ops for int types
diff --git a/rten-simd/src/safe/arch/aarch64.rs b/rten-simd/src/safe/arch/aarch64.rs
@@ -1,9 +1,9 @@
 use std::arch::aarch64::{
     float32x4_t, int32x4_t, uint32x4_t, vabsq_f32, vaddq_f32, vaddq_s32, vaddvq_f32, vandq_u32,
     vbslq_f32, vbslq_s32, vceqq_f32, vceqq_s32, vcgeq_f32, vcgeq_s32, vcgtq_f32, vcgtq_s32,
-    vcleq_f32, vcleq_s32, vcltq_f32, vcltq_s32, vcvtq_s32_f32, vdivq_f32, vdupq_n_f32, vdupq_n_s32,
-    vfmaq_f32, vld1q_f32, vld1q_s32, vld1q_u32, vmaxq_f32, vminq_f32, vmulq_f32, vmulq_s32,
-    vnegq_f32, vnegq_s32, vshlq_n_s32, vst1q_f32, vst1q_s32, vsubq_f32, vsubq_s32,
+    vcleq_f32, vcltq_f32, vcvtq_s32_f32, vdivq_f32, vdupq_n_f32, vdupq_n_s32, vfmaq_f32, vld1q_f32,
+    vld1q_s32, vld1q_u32, vmaxq_f32, vminq_f32, vmulq_f32, vmulq_s32, vnegq_f32, vnegq_s32,
+    vshlq_n_s32, vst1q_f32, vst1q_s32, vsubq_f32, vsubq_s32,
 };
 use std::mem::transmute;
 
@@ -223,16 +223,6 @@ unsafe impl SimdOps<int32x4_t> for ArmNeonIsa {
         unsafe { vdupq_n_s32(x) }
     }
 
-    #[inline]
-    fn lt(self, x: int32x4_t, y: int32x4_t) -> uint32x4_t {
-        unsafe { vcltq_s32(x, y) }
-    }
-
-    #[inline]
-    fn le(self, x: int32x4_t, y: int32x4_t) -> uint32x4_t {
-        unsafe { vcleq_s32(x, y) }
-    }
-
     #[inline]
     fn eq(self, x: int32x4_t, y: int32x4_t) -> uint32x4_t {
         unsafe { vceqq_s32(x, y) }
diff --git a/rten-simd/src/safe/arch/generic.rs b/rten-simd/src/safe/arch/generic.rs
@@ -119,18 +119,6 @@ macro_rules! simd_ops_x32_common {
             $simd(xs)
         }
 
-        #[inline]
-        fn lt(self, x: $simd, y: $simd) -> I32x4 {
-            let xs = array::from_fn(|i| if x.0[i] < y.0[i] { -1 } else { 0 });
-            I32x4(xs)
-        }
-
-        #[inline]
-        fn le(self, x: $simd, y: $simd) -> I32x4 {
-            let xs = array::from_fn(|i| if x.0[i] <= y.0[i] { -1 } else { 0 });
-            I32x4(xs)
-        }
-
         #[inline]
         fn eq(self, x: $simd, y: $simd) -> I32x4 {
             let xs = array::from_fn(|i| if x.0[i] == y.0[i] { -1 } else { 0 });
diff --git a/rten-simd/src/safe/arch/wasm32.rs b/rten-simd/src/safe/arch/wasm32.rs
@@ -1,9 +1,8 @@
 use std::arch::wasm32::{
     f32x4_abs, f32x4_add, f32x4_div, f32x4_eq, f32x4_extract_lane, f32x4_ge, f32x4_gt, f32x4_le,
     f32x4_lt, f32x4_max, f32x4_min, f32x4_mul, f32x4_neg, f32x4_splat, f32x4_sub, i32x4_add,
-    i32x4_eq, i32x4_ge, i32x4_gt, i32x4_le, i32x4_lt, i32x4_mul, i32x4_neg, i32x4_shl,
-    i32x4_shuffle, i32x4_splat, i32x4_sub, i32x4_trunc_sat_f32x4, v128, v128_and, v128_bitselect,
-    v128_load, v128_store,
+    i32x4_eq, i32x4_ge, i32x4_gt, i32x4_mul, i32x4_neg, i32x4_shl, i32x4_shuffle, i32x4_splat,
+    i32x4_sub, i32x4_trunc_sat_f32x4, v128, v128_and, v128_bitselect, v128_load, v128_store,
 };
 use std::mem::transmute;
 
@@ -237,16 +236,6 @@ unsafe impl SimdOps<I32x4> for Wasm32Isa {
         I32x4(i32x4_splat(x))
     }
 
-    #[inline]
-    fn lt(self, x: I32x4, y: I32x4) -> v128 {
-        i32x4_lt(x.0, y.0)
-    }
-
-    #[inline]
-    fn le(self, x: I32x4, y: I32x4) -> v128 {
-        i32x4_le(x.0, y.0)
-    }
-
     #[inline]
     fn eq(self, x: I32x4, y: I32x4) -> v128 {
         i32x4_eq(x.0, y.0)
diff --git a/rten-simd/src/safe/arch/x86_64/avx2.rs b/rten-simd/src/safe/arch/x86_64/avx2.rs
@@ -251,17 +251,6 @@ unsafe impl SimdOps<I32x8> for Avx2Isa {
         unsafe { _mm256_set1_epi32(x) }.into()
     }
 
-    #[inline]
-    fn lt(self, x: I32x8, y: I32x8) -> I32x8 {
-        unsafe { _mm256_cmpgt_epi32(y.0, x.0) }.into()
-    }
-
-    #[inline]
-    fn le(self, x: I32x8, y: I32x8) -> I32x8 {
-        unsafe { _mm256_or_si256(_mm256_cmpgt_epi32(y.0, x.0), _mm256_cmpeq_epi32(x.0, y.0)) }
-            .into()
-    }
-
     #[inline]
     fn eq(self, x: I32x8, y: I32x8) -> I32x8 {
         unsafe { _mm256_cmpeq_epi32(x.0, y.0) }.into()
diff --git a/rten-simd/src/safe/arch/x86_64/avx512.rs b/rten-simd/src/safe/arch/x86_64/avx512.rs
@@ -6,7 +6,7 @@ use std::arch::x86_64::{
     _mm512_max_ps, _mm512_min_ps, _mm512_mul_ps, _mm512_mullo_epi32, _mm512_reduce_add_ps,
     _mm512_set1_epi32, _mm512_set1_ps, _mm512_setzero_si512, _mm512_sllv_epi32, _mm512_storeu_ps,
     _mm512_storeu_si512, _mm512_sub_epi32, _mm512_sub_ps, _mm512_xor_ps, _mm_prefetch, _CMP_EQ_OQ,
-    _CMP_GE_OQ, _CMP_GT_OQ, _CMP_LE_OQ, _CMP_LT_OQ, _MM_CMPINT_EQ, _MM_CMPINT_LE, _MM_CMPINT_LT,
+    _CMP_GE_OQ, _CMP_GT_OQ, _CMP_LE_OQ, _CMP_LT_OQ, _MM_CMPINT_EQ, _MM_CMPINT_NLE, _MM_CMPINT_NLT,
     _MM_HINT_ET0, _MM_HINT_T0,
 };
 use std::mem::transmute;
@@ -233,29 +233,19 @@ unsafe impl SimdOps<I32x16> for Avx512Isa {
         unsafe { _mm512_set1_epi32(x) }.into()
     }
 
-    #[inline]
-    fn lt(self, x: I32x16, y: I32x16) -> __mmask16 {
-        unsafe { _mm512_cmp_epi32_mask(x.0, y.0, _MM_CMPINT_LT) }
-    }
-
-    #[inline]
-    fn le(self, x: I32x16, y: I32x16) -> __mmask16 {
-        unsafe { _mm512_cmp_epi32_mask(x.0, y.0, _MM_CMPINT_LE) }
-    }
-
     #[inline]
     fn eq(self, x: I32x16, y: I32x16) -> __mmask16 {
         unsafe { _mm512_cmp_epi32_mask(x.0, y.0, _MM_CMPINT_EQ) }
     }
 
     #[inline]
     fn ge(self, x: I32x16, y: I32x16) -> __mmask16 {
-        self.le(y, x)
+        unsafe { _mm512_cmp_epi32_mask(x.0, y.0, _MM_CMPINT_NLT) }
     }
 
     #[inline]
     fn gt(self, x: I32x16, y: I32x16) -> __mmask16 {
-        self.lt(y, x)
+        unsafe { _mm512_cmp_epi32_mask(x.0, y.0, _MM_CMPINT_NLE) }
     }
 
     #[inline]
diff --git a/rten-simd/src/safe/vec.rs b/rten-simd/src/safe/vec.rs
@@ -202,10 +202,16 @@ pub unsafe trait SimdOps<S: Simd>: Copy {
     }
 
     /// Return a mask indicating whether elements in `x` are less than `y`.
-    fn lt(self, x: S, y: S) -> S::Mask;
+    #[inline]
+    fn lt(self, x: S, y: S) -> S::Mask {
+        self.gt(y, x)
+    }
 
     /// Return a mask indicating whether elements in `x` are less or equal to `y`.
-    fn le(self, x: S, y: S) -> S::Mask;
+    #[inline]
+    fn le(self, x: S, y: S) -> S::Mask {
+        self.ge(y, x)
+    }
 
     /// Return a mask indicating whether elements in `x` are equal to `y`.
     fn eq(self, x: S, y: S) -> S::Mask;