1
1
use std:: arch:: aarch64:: {
2
- float32x4_t, int16x8_t, int32x4_t, uint16x8_t, uint32x4_t, vabsq_f32, vaddq_f32, vaddq_s16,
3
- vaddq_s32, vaddvq_f32, vandq_u16, vandq_u32, vbslq_f32, vbslq_s16, vbslq_s32, vceqq_f32,
4
- vceqq_s16, vceqq_s32, vcgeq_f32, vcgeq_s16, vcgeq_s32, vcgtq_f32, vcgtq_s16, vcgtq_s32,
5
- vcleq_f32, vcleq_s16, vcltq_f32, vcltq_s16, vcvtnq_s32_f32, vcvtq_s32_f32, vdivq_f32,
6
- vdupq_n_f32, vdupq_n_s16, vdupq_n_s32, vfmaq_f32, vld1q_f32, vld1q_s16, vld1q_s32, vld1q_u16,
7
- vld1q_u32, vmaxq_f32, vminq_f32, vmulq_f32, vmulq_s16, vmulq_s32, vnegq_f32, vnegq_s16,
8
- vnegq_s32, vshlq_n_s16, vshlq_n_s32, vst1q_f32, vst1q_s16, vst1q_s32, vsubq_f32, vsubq_s16,
9
- vsubq_s32,
2
+ float32x4_t, int16x8_t, int32x4_t, int8x16_t, uint16x8_t, uint32x4_t, uint8x16_t, vabsq_f32,
3
+ vaddq_f32, vaddq_s16, vaddq_s32, vaddq_s8, vaddvq_f32, vandq_u16, vandq_u32, vandq_u8,
4
+ vbslq_f32, vbslq_s16, vbslq_s32, vbslq_s8, vceqq_f32, vceqq_s16, vceqq_s32, vceqq_s8,
5
+ vcgeq_f32, vcgeq_s16, vcgeq_s32, vcgeq_s8, vcgtq_f32, vcgtq_s16, vcgtq_s32, vcgtq_s8,
6
+ vcleq_f32, vcleq_s16, vcleq_s8, vcltq_f32, vcltq_s16, vcltq_s8, vcvtnq_s32_f32, vcvtq_s32_f32,
7
+ vdivq_f32, vdupq_n_f32, vdupq_n_s16, vdupq_n_s32, vdupq_n_s8, vfmaq_f32, vld1q_f32, vld1q_s16,
8
+ vld1q_s32, vld1q_s8, vld1q_u16, vld1q_u32, vld1q_u8, vmaxq_f32, vminq_f32, vmulq_f32,
9
+ vmulq_s16, vmulq_s32, vmulq_s8, vnegq_f32, vnegq_s16, vnegq_s32, vnegq_s8, vshlq_n_s16,
10
+ vshlq_n_s32, vshlq_n_s8, vst1q_f32, vst1q_s16, vst1q_s32, vst1q_s8, vsubq_f32, vsubq_s16,
11
+ vsubq_s32, vsubq_s8,
10
12
} ;
11
13
use std:: mem:: transmute;
12
14
@@ -28,6 +30,7 @@ unsafe impl Isa for ArmNeonIsa {
28
30
type F32 = float32x4_t ;
29
31
type I32 = int32x4_t ;
30
32
type I16 = int16x8_t ;
33
+ type I8 = int8x16_t ;
31
34
type Bits = int32x4_t ;
32
35
33
36
fn f32 ( self ) -> impl SimdFloatOps < Self :: F32 , Int = Self :: I32 > {
@@ -41,6 +44,10 @@ unsafe impl Isa for ArmNeonIsa {
41
44
fn i16 ( self ) -> impl SimdIntOps < Self :: I16 > {
42
45
self
43
46
}
47
+
48
+ fn i8 ( self ) -> impl SimdIntOps < Self :: I8 > {
49
+ self
50
+ }
44
51
}
45
52
46
53
macro_rules! simd_ops_common {
@@ -363,6 +370,88 @@ impl SimdIntOps<int16x8_t> for ArmNeonIsa {
363
370
}
364
371
}
365
372
373
+ unsafe impl SimdOps < int8x16_t > for ArmNeonIsa {
374
+ simd_ops_common ! ( int8x16_t, uint8x16_t) ;
375
+
376
+ #[ inline]
377
+ fn add ( self , x : int8x16_t , y : int8x16_t ) -> int8x16_t {
378
+ unsafe { vaddq_s8 ( x, y) }
379
+ }
380
+
381
+ #[ inline]
382
+ fn sub ( self , x : int8x16_t , y : int8x16_t ) -> int8x16_t {
383
+ unsafe { vsubq_s8 ( x, y) }
384
+ }
385
+
386
+ #[ inline]
387
+ fn mul ( self , x : int8x16_t , y : int8x16_t ) -> int8x16_t {
388
+ unsafe { vmulq_s8 ( x, y) }
389
+ }
390
+
391
+ #[ inline]
392
+ fn splat ( self , x : i8 ) -> int8x16_t {
393
+ unsafe { vdupq_n_s8 ( x) }
394
+ }
395
+
396
+ #[ inline]
397
+ fn lt ( self , x : int8x16_t , y : int8x16_t ) -> uint8x16_t {
398
+ unsafe { vcltq_s8 ( x, y) }
399
+ }
400
+
401
+ #[ inline]
402
+ fn le ( self , x : int8x16_t , y : int8x16_t ) -> uint8x16_t {
403
+ unsafe { vcleq_s8 ( x, y) }
404
+ }
405
+
406
+ #[ inline]
407
+ fn eq ( self , x : int8x16_t , y : int8x16_t ) -> uint8x16_t {
408
+ unsafe { vceqq_s8 ( x, y) }
409
+ }
410
+
411
+ #[ inline]
412
+ fn ge ( self , x : int8x16_t , y : int8x16_t ) -> uint8x16_t {
413
+ unsafe { vcgeq_s8 ( x, y) }
414
+ }
415
+
416
+ #[ inline]
417
+ fn gt ( self , x : int8x16_t , y : int8x16_t ) -> uint8x16_t {
418
+ unsafe { vcgtq_s8 ( x, y) }
419
+ }
420
+
421
+ #[ inline]
422
+ unsafe fn load_ptr ( self , ptr : * const i8 ) -> int8x16_t {
423
+ unsafe { vld1q_s8 ( ptr) }
424
+ }
425
+
426
+ #[ inline]
427
+ fn first_n_mask ( self , n : usize ) -> uint8x16_t {
428
+ let mask: [ u8 ; 16 ] = std:: array:: from_fn ( |i| if i < n { u8:: MAX } else { 0 } ) ;
429
+ unsafe { vld1q_u8 ( mask. as_ptr ( ) ) }
430
+ }
431
+
432
+ #[ inline]
433
+ fn select ( self , x : int8x16_t , y : int8x16_t , mask : <int8x16_t as Simd >:: Mask ) -> int8x16_t {
434
+ unsafe { vbslq_s8 ( mask, x, y) }
435
+ }
436
+
437
+ #[ inline]
438
+ unsafe fn store_ptr ( self , x : int8x16_t , ptr : * mut i8 ) {
439
+ unsafe { vst1q_s8 ( ptr, x) }
440
+ }
441
+ }
442
+
443
+ impl SimdIntOps < int8x16_t > for ArmNeonIsa {
444
+ #[ inline]
445
+ fn neg ( self , x : int8x16_t ) -> int8x16_t {
446
+ unsafe { vnegq_s8 ( x) }
447
+ }
448
+
449
+ #[ inline]
450
+ fn shift_left < const SHIFT : i32 > ( self , x : int8x16_t ) -> int8x16_t {
451
+ unsafe { vshlq_n_s8 :: < SHIFT > ( x) }
452
+ }
453
+ }
454
+
366
455
macro_rules! impl_mask {
367
456
( $mask: ty, $elem: ty, $len: expr) => {
368
457
impl Mask for $mask {
@@ -379,6 +468,7 @@ macro_rules! impl_mask {
379
468
380
469
impl_mask ! ( uint32x4_t, u32 , 4 ) ;
381
470
impl_mask ! ( uint16x8_t, u16 , 8 ) ;
471
+ impl_mask ! ( uint8x16_t, u8 , 16 ) ;
382
472
383
473
unsafe impl MaskOps < uint32x4_t > for ArmNeonIsa {
384
474
#[ inline]
@@ -394,6 +484,13 @@ unsafe impl MaskOps<uint16x8_t> for ArmNeonIsa {
394
484
}
395
485
}
396
486
487
+ unsafe impl MaskOps < uint8x16_t > for ArmNeonIsa {
488
+ #[ inline]
489
+ fn and ( self , x : uint8x16_t , y : uint8x16_t ) -> uint8x16_t {
490
+ unsafe { vandq_u8 ( x, y) }
491
+ }
492
+ }
493
+
397
494
macro_rules! simd_common {
398
495
( $mask: ty, $len: expr) => {
399
496
type Array = [ Self :: Elem ; $len] ;
@@ -436,3 +533,4 @@ macro_rules! impl_simd {
436
533
impl_simd ! ( float32x4_t, f32 , 4 , uint32x4_t) ;
437
534
impl_simd ! ( int32x4_t, i32 , 4 , uint32x4_t) ;
438
535
impl_simd ! ( int16x8_t, i16 , 8 , uint16x8_t) ;
536
+ impl_simd ! ( int8x16_t, i8 , 16 , uint8x16_t) ;
0 commit comments