@@ -18,6 +18,20 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
18
18
// Spin loop hint
19
19
}
20
20
21
+ // Used by is_x86_feature_detected!();
22
+ "llvm.x86.xgetbv" => {
23
+ // FIXME use the actual xgetbv instruction
24
+ intrinsic_args ! ( fx, args => ( v) ; intrinsic) ;
25
+
26
+ let v = v. load_scalar ( fx) ;
27
+
28
+ // As of writing on XCR0 exists
29
+ fx. bcx . ins ( ) . trapnz ( v, TrapCode :: UnreachableCodeReached ) ;
30
+
31
+ let res = fx. bcx . ins ( ) . iconst ( types:: I64 , 1 /* bit 0 must be set */ ) ;
32
+ ret. write_cvalue ( fx, CValue :: by_val ( res, fx. layout_of ( fx. tcx . types . i64 ) ) ) ;
33
+ }
34
+
21
35
// Used by `_mm_movemask_epi8` and `_mm256_movemask_epi8`
22
36
"llvm.x86.sse2.pmovmskb.128"
23
37
| "llvm.x86.avx2.pmovmskb"
@@ -53,7 +67,7 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
53
67
let res = CValue :: by_val ( res, fx. layout_of ( fx. tcx . types . i32 ) ) ;
54
68
ret. write_cvalue ( fx, res) ;
55
69
}
56
- "llvm.x86.sse2 .cmp.ps" | "llvm.x86.sse2.cmp.pd" => {
70
+ "llvm.x86.sse .cmp.ps" | "llvm.x86.sse2.cmp.pd" => {
57
71
let ( x, y, kind) = match args {
58
72
[ x, y, kind] => ( x, y, kind) ,
59
73
_ => bug ! ( "wrong number of args for intrinsic {intrinsic}" ) ,
@@ -66,18 +80,95 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
66
80
let flt_cc = match kind
67
81
. try_to_bits ( Size :: from_bytes ( 1 ) )
68
82
. unwrap_or_else ( || panic ! ( "kind not scalar: {:?}" , kind) )
83
+ . try_into ( )
84
+ . unwrap ( )
69
85
{
70
- 0 => FloatCC :: Equal ,
71
- 1 => FloatCC :: LessThan ,
72
- 2 => FloatCC :: LessThanOrEqual ,
73
- 7 => FloatCC :: Ordered ,
74
- 3 => FloatCC :: Unordered ,
75
- 4 => FloatCC :: NotEqual ,
76
- 5 => FloatCC :: UnorderedOrGreaterThanOrEqual ,
77
- 6 => FloatCC :: UnorderedOrGreaterThan ,
86
+ _CMP_EQ_OQ | _CMP_EQ_OS => FloatCC :: Equal ,
87
+ _CMP_LT_OS | _CMP_LT_OQ => FloatCC :: LessThan ,
88
+ _CMP_LE_OS | _CMP_LE_OQ => FloatCC :: LessThanOrEqual ,
89
+ _CMP_UNORD_Q | _CMP_UNORD_S => FloatCC :: Unordered ,
90
+ _CMP_NEQ_UQ | _CMP_NEQ_US => FloatCC :: NotEqual ,
91
+ _CMP_NLT_US | _CMP_NLT_UQ => FloatCC :: UnorderedOrGreaterThanOrEqual ,
92
+ _CMP_NLE_US | _CMP_NLE_UQ => FloatCC :: UnorderedOrGreaterThan ,
93
+ _CMP_ORD_Q | _CMP_ORD_S => FloatCC :: Ordered ,
94
+ _CMP_EQ_UQ | _CMP_EQ_US => FloatCC :: UnorderedOrEqual ,
95
+ _CMP_NGE_US | _CMP_NGE_UQ => FloatCC :: UnorderedOrLessThan ,
96
+ _CMP_NGT_US | _CMP_NGT_UQ => FloatCC :: UnorderedOrLessThanOrEqual ,
97
+ _CMP_FALSE_OQ | _CMP_FALSE_OS => todo ! ( ) ,
98
+ _CMP_NEQ_OQ | _CMP_NEQ_OS => FloatCC :: OrderedNotEqual ,
99
+ _CMP_GE_OS | _CMP_GE_OQ => FloatCC :: GreaterThanOrEqual ,
100
+ _CMP_GT_OS | _CMP_GT_OQ => FloatCC :: GreaterThan ,
101
+ _CMP_TRUE_UQ | _CMP_TRUE_US => todo ! ( ) ,
102
+
78
103
kind => unreachable ! ( "kind {:?}" , kind) ,
79
104
} ;
80
105
106
+ // Copied from stdarch
107
+ /// Equal (ordered, non-signaling)
108
+ const _CMP_EQ_OQ: i32 = 0x00 ;
109
+ /// Less-than (ordered, signaling)
110
+ const _CMP_LT_OS: i32 = 0x01 ;
111
+ /// Less-than-or-equal (ordered, signaling)
112
+ const _CMP_LE_OS: i32 = 0x02 ;
113
+ /// Unordered (non-signaling)
114
+ const _CMP_UNORD_Q: i32 = 0x03 ;
115
+ /// Not-equal (unordered, non-signaling)
116
+ const _CMP_NEQ_UQ: i32 = 0x04 ;
117
+ /// Not-less-than (unordered, signaling)
118
+ const _CMP_NLT_US: i32 = 0x05 ;
119
+ /// Not-less-than-or-equal (unordered, signaling)
120
+ const _CMP_NLE_US: i32 = 0x06 ;
121
+ /// Ordered (non-signaling)
122
+ const _CMP_ORD_Q: i32 = 0x07 ;
123
+ /// Equal (unordered, non-signaling)
124
+ const _CMP_EQ_UQ: i32 = 0x08 ;
125
+ /// Not-greater-than-or-equal (unordered, signaling)
126
+ const _CMP_NGE_US: i32 = 0x09 ;
127
+ /// Not-greater-than (unordered, signaling)
128
+ const _CMP_NGT_US: i32 = 0x0a ;
129
+ /// False (ordered, non-signaling)
130
+ const _CMP_FALSE_OQ: i32 = 0x0b ;
131
+ /// Not-equal (ordered, non-signaling)
132
+ const _CMP_NEQ_OQ: i32 = 0x0c ;
133
+ /// Greater-than-or-equal (ordered, signaling)
134
+ const _CMP_GE_OS: i32 = 0x0d ;
135
+ /// Greater-than (ordered, signaling)
136
+ const _CMP_GT_OS: i32 = 0x0e ;
137
+ /// True (unordered, non-signaling)
138
+ const _CMP_TRUE_UQ: i32 = 0x0f ;
139
+ /// Equal (ordered, signaling)
140
+ const _CMP_EQ_OS: i32 = 0x10 ;
141
+ /// Less-than (ordered, non-signaling)
142
+ const _CMP_LT_OQ: i32 = 0x11 ;
143
+ /// Less-than-or-equal (ordered, non-signaling)
144
+ const _CMP_LE_OQ: i32 = 0x12 ;
145
+ /// Unordered (signaling)
146
+ const _CMP_UNORD_S: i32 = 0x13 ;
147
+ /// Not-equal (unordered, signaling)
148
+ const _CMP_NEQ_US: i32 = 0x14 ;
149
+ /// Not-less-than (unordered, non-signaling)
150
+ const _CMP_NLT_UQ: i32 = 0x15 ;
151
+ /// Not-less-than-or-equal (unordered, non-signaling)
152
+ const _CMP_NLE_UQ: i32 = 0x16 ;
153
+ /// Ordered (signaling)
154
+ const _CMP_ORD_S: i32 = 0x17 ;
155
+ /// Equal (unordered, signaling)
156
+ const _CMP_EQ_US: i32 = 0x18 ;
157
+ /// Not-greater-than-or-equal (unordered, non-signaling)
158
+ const _CMP_NGE_UQ: i32 = 0x19 ;
159
+ /// Not-greater-than (unordered, non-signaling)
160
+ const _CMP_NGT_UQ: i32 = 0x1a ;
161
+ /// False (ordered, signaling)
162
+ const _CMP_FALSE_OS: i32 = 0x1b ;
163
+ /// Not-equal (ordered, signaling)
164
+ const _CMP_NEQ_OS: i32 = 0x1c ;
165
+ /// Greater-than-or-equal (ordered, non-signaling)
166
+ const _CMP_GE_OQ: i32 = 0x1d ;
167
+ /// Greater-than (ordered, non-signaling)
168
+ const _CMP_GT_OQ: i32 = 0x1e ;
169
+ /// True (unordered, signaling)
170
+ const _CMP_TRUE_US: i32 = 0x1f ;
171
+
81
172
simd_pair_for_each_lane ( fx, x, y, ret, & |fx, lane_ty, res_lane_ty, x_lane, y_lane| {
82
173
let res_lane = match lane_ty. kind ( ) {
83
174
ty:: Float ( _) => fx. bcx . ins ( ) . fcmp ( flt_cc, x_lane, y_lane) ,
@@ -103,6 +194,23 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
103
194
_ => fx. bcx . ins ( ) . iconst ( types:: I32 , 0 ) ,
104
195
} ) ;
105
196
}
197
+ "llvm.x86.sse2.psrai.d" => {
198
+ let ( a, imm8) = match args {
199
+ [ a, imm8] => ( a, imm8) ,
200
+ _ => bug ! ( "wrong number of args for intrinsic {intrinsic}" ) ,
201
+ } ;
202
+ let a = codegen_operand ( fx, a) ;
203
+ let imm8 = crate :: constant:: mir_operand_get_const_val ( fx, imm8)
204
+ . expect ( "llvm.x86.sse2.psrai.d imm8 not const" ) ;
205
+
206
+ simd_for_each_lane ( fx, a, ret, & |fx, _lane_ty, _res_lane_ty, lane| match imm8
207
+ . try_to_bits ( Size :: from_bytes ( 4 ) )
208
+ . unwrap_or_else ( || panic ! ( "imm8 not scalar: {:?}" , imm8) )
209
+ {
210
+ imm8 if imm8 < 32 => fx. bcx . ins ( ) . sshr_imm ( lane, i64:: from ( imm8 as u8 ) ) ,
211
+ _ => fx. bcx . ins ( ) . iconst ( types:: I32 , 0 ) ,
212
+ } ) ;
213
+ }
106
214
"llvm.x86.sse2.pslli.d" => {
107
215
let ( a, imm8) = match args {
108
216
[ a, imm8] => ( a, imm8) ,
@@ -137,6 +245,23 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
137
245
_ => fx. bcx . ins ( ) . iconst ( types:: I32 , 0 ) ,
138
246
} ) ;
139
247
}
248
+ "llvm.x86.sse2.psrai.w" => {
249
+ let ( a, imm8) = match args {
250
+ [ a, imm8] => ( a, imm8) ,
251
+ _ => bug ! ( "wrong number of args for intrinsic {intrinsic}" ) ,
252
+ } ;
253
+ let a = codegen_operand ( fx, a) ;
254
+ let imm8 = crate :: constant:: mir_operand_get_const_val ( fx, imm8)
255
+ . expect ( "llvm.x86.sse2.psrai.d imm8 not const" ) ;
256
+
257
+ simd_for_each_lane ( fx, a, ret, & |fx, _lane_ty, _res_lane_ty, lane| match imm8
258
+ . try_to_bits ( Size :: from_bytes ( 4 ) )
259
+ . unwrap_or_else ( || panic ! ( "imm8 not scalar: {:?}" , imm8) )
260
+ {
261
+ imm8 if imm8 < 16 => fx. bcx . ins ( ) . sshr_imm ( lane, i64:: from ( imm8 as u8 ) ) ,
262
+ _ => fx. bcx . ins ( ) . iconst ( types:: I32 , 0 ) ,
263
+ } ) ;
264
+ }
140
265
"llvm.x86.sse2.pslli.w" => {
141
266
let ( a, imm8) = match args {
142
267
[ a, imm8] => ( a, imm8) ,
@@ -171,6 +296,57 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
171
296
_ => fx. bcx . ins ( ) . iconst ( types:: I32 , 0 ) ,
172
297
} ) ;
173
298
}
299
+ "llvm.x86.avx.psrai.d" => {
300
+ let ( a, imm8) = match args {
301
+ [ a, imm8] => ( a, imm8) ,
302
+ _ => bug ! ( "wrong number of args for intrinsic {intrinsic}" ) ,
303
+ } ;
304
+ let a = codegen_operand ( fx, a) ;
305
+ let imm8 = crate :: constant:: mir_operand_get_const_val ( fx, imm8)
306
+ . expect ( "llvm.x86.avx.psrai.d imm8 not const" ) ;
307
+
308
+ simd_for_each_lane ( fx, a, ret, & |fx, _lane_ty, _res_lane_ty, lane| match imm8
309
+ . try_to_bits ( Size :: from_bytes ( 4 ) )
310
+ . unwrap_or_else ( || panic ! ( "imm8 not scalar: {:?}" , imm8) )
311
+ {
312
+ imm8 if imm8 < 32 => fx. bcx . ins ( ) . sshr_imm ( lane, i64:: from ( imm8 as u8 ) ) ,
313
+ _ => fx. bcx . ins ( ) . iconst ( types:: I32 , 0 ) ,
314
+ } ) ;
315
+ }
316
+ "llvm.x86.sse2.psrli.q" => {
317
+ let ( a, imm8) = match args {
318
+ [ a, imm8] => ( a, imm8) ,
319
+ _ => bug ! ( "wrong number of args for intrinsic {intrinsic}" ) ,
320
+ } ;
321
+ let a = codegen_operand ( fx, a) ;
322
+ let imm8 = crate :: constant:: mir_operand_get_const_val ( fx, imm8)
323
+ . expect ( "llvm.x86.avx.psrli.q imm8 not const" ) ;
324
+
325
+ simd_for_each_lane ( fx, a, ret, & |fx, _lane_ty, _res_lane_ty, lane| match imm8
326
+ . try_to_bits ( Size :: from_bytes ( 4 ) )
327
+ . unwrap_or_else ( || panic ! ( "imm8 not scalar: {:?}" , imm8) )
328
+ {
329
+ imm8 if imm8 < 64 => fx. bcx . ins ( ) . ushr_imm ( lane, i64:: from ( imm8 as u8 ) ) ,
330
+ _ => fx. bcx . ins ( ) . iconst ( types:: I32 , 0 ) ,
331
+ } ) ;
332
+ }
333
+ "llvm.x86.sse2.pslli.q" => {
334
+ let ( a, imm8) = match args {
335
+ [ a, imm8] => ( a, imm8) ,
336
+ _ => bug ! ( "wrong number of args for intrinsic {intrinsic}" ) ,
337
+ } ;
338
+ let a = codegen_operand ( fx, a) ;
339
+ let imm8 = crate :: constant:: mir_operand_get_const_val ( fx, imm8)
340
+ . expect ( "llvm.x86.avx.pslli.q imm8 not const" ) ;
341
+
342
+ simd_for_each_lane ( fx, a, ret, & |fx, _lane_ty, _res_lane_ty, lane| match imm8
343
+ . try_to_bits ( Size :: from_bytes ( 4 ) )
344
+ . unwrap_or_else ( || panic ! ( "imm8 not scalar: {:?}" , imm8) )
345
+ {
346
+ imm8 if imm8 < 64 => fx. bcx . ins ( ) . ishl_imm ( lane, i64:: from ( imm8 as u8 ) ) ,
347
+ _ => fx. bcx . ins ( ) . iconst ( types:: I32 , 0 ) ,
348
+ } ) ;
349
+ }
174
350
"llvm.x86.avx.pslli.d" => {
175
351
let ( a, imm8) = match args {
176
352
[ a, imm8] => ( a, imm8) ,
@@ -205,6 +381,23 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
205
381
_ => fx. bcx . ins ( ) . iconst ( types:: I32 , 0 ) ,
206
382
} ) ;
207
383
}
384
+ "llvm.x86.avx2.psrai.w" => {
385
+ let ( a, imm8) = match args {
386
+ [ a, imm8] => ( a, imm8) ,
387
+ _ => bug ! ( "wrong number of args for intrinsic {intrinsic}" ) ,
388
+ } ;
389
+ let a = codegen_operand ( fx, a) ;
390
+ let imm8 = crate :: constant:: mir_operand_get_const_val ( fx, imm8)
391
+ . expect ( "llvm.x86.avx.psrai.w imm8 not const" ) ;
392
+
393
+ simd_for_each_lane ( fx, a, ret, & |fx, _lane_ty, _res_lane_ty, lane| match imm8
394
+ . try_to_bits ( Size :: from_bytes ( 4 ) )
395
+ . unwrap_or_else ( || panic ! ( "imm8 not scalar: {:?}" , imm8) )
396
+ {
397
+ imm8 if imm8 < 16 => fx. bcx . ins ( ) . sshr_imm ( lane, i64:: from ( imm8 as u8 ) ) ,
398
+ _ => fx. bcx . ins ( ) . iconst ( types:: I32 , 0 ) ,
399
+ } ) ;
400
+ }
208
401
"llvm.x86.avx2.pslli.w" => {
209
402
let ( a, imm8) = match args {
210
403
[ a, imm8] => ( a, imm8) ,
@@ -313,25 +506,53 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
313
506
ret. place_lane ( fx, 2 ) . to_ptr ( ) . store ( fx, res_2, MemFlags :: trusted ( ) ) ;
314
507
ret. place_lane ( fx, 3 ) . to_ptr ( ) . store ( fx, res_3, MemFlags :: trusted ( ) ) ;
315
508
}
316
- "llvm.x86.sse2.storeu.dq" => {
509
+ "llvm.x86.sse2.storeu.dq" | "llvm.x86.sse2.storeu.pd" => {
317
510
intrinsic_args ! ( fx, args => ( mem_addr, a) ; intrinsic) ;
318
511
let mem_addr = mem_addr. load_scalar ( fx) ;
319
512
320
513
// FIXME correctly handle the unalignment
321
514
let dest = CPlace :: for_ptr ( Pointer :: new ( mem_addr) , a. layout ( ) ) ;
322
515
dest. write_cvalue ( fx, a) ;
323
516
}
324
- "llvm.x86.addcarry.64" => {
517
+ "llvm.x86.ssse3.pabs.b.128" | "llvm.x86.ssse3.pabs.w.128" | "llvm.x86.ssse3.pabs.d.128" => {
518
+ let a = match args {
519
+ [ a] => a,
520
+ _ => bug ! ( "wrong number of args for intrinsic {intrinsic}" ) ,
521
+ } ;
522
+ let a = codegen_operand ( fx, a) ;
523
+
524
+ simd_for_each_lane ( fx, a, ret, & |fx, _lane_ty, _res_lane_ty, lane| {
525
+ fx. bcx . ins ( ) . iabs ( lane)
526
+ } ) ;
527
+ }
528
+ "llvm.x86.addcarry.32" | "llvm.x86.addcarry.64" => {
325
529
intrinsic_args ! ( fx, args => ( c_in, a, b) ; intrinsic) ;
326
530
let c_in = c_in. load_scalar ( fx) ;
327
531
328
- llvm_add_sub ( fx, BinOp :: Add , ret, c_in, a, b) ;
532
+ let ( cb_out, c) = llvm_add_sub ( fx, BinOp :: Add , c_in, a, b) ;
533
+
534
+ let layout = fx. layout_of ( fx. tcx . mk_tup ( & [ fx. tcx . types . u8 , a. layout ( ) . ty ] ) ) ;
535
+ let val = CValue :: by_val_pair ( cb_out, c, layout) ;
536
+ ret. write_cvalue ( fx, val) ;
329
537
}
330
- "llvm.x86.subborrow.64" => {
538
+ "llvm.x86.addcarryx.u32" | "llvm.x86.addcarryx.u64" => {
539
+ intrinsic_args ! ( fx, args => ( c_in, a, b, out) ; intrinsic) ;
540
+ let c_in = c_in. load_scalar ( fx) ;
541
+
542
+ let ( cb_out, c) = llvm_add_sub ( fx, BinOp :: Add , c_in, a, b) ;
543
+
544
+ Pointer :: new ( out. load_scalar ( fx) ) . store ( fx, c, MemFlags :: trusted ( ) ) ;
545
+ ret. write_cvalue ( fx, CValue :: by_val ( cb_out, fx. layout_of ( fx. tcx . types . u8 ) ) ) ;
546
+ }
547
+ "llvm.x86.subborrow.32" | "llvm.x86.subborrow.64" => {
331
548
intrinsic_args ! ( fx, args => ( b_in, a, b) ; intrinsic) ;
332
549
let b_in = b_in. load_scalar ( fx) ;
333
550
334
- llvm_add_sub ( fx, BinOp :: Sub , ret, b_in, a, b) ;
551
+ let ( cb_out, c) = llvm_add_sub ( fx, BinOp :: Sub , b_in, a, b) ;
552
+
553
+ let layout = fx. layout_of ( fx. tcx . mk_tup ( & [ fx. tcx . types . u8 , a. layout ( ) . ty ] ) ) ;
554
+ let val = CValue :: by_val_pair ( cb_out, c, layout) ;
555
+ ret. write_cvalue ( fx, val) ;
335
556
}
336
557
_ => {
337
558
fx. tcx
@@ -356,37 +577,26 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
356
577
fn llvm_add_sub < ' tcx > (
357
578
fx : & mut FunctionCx < ' _ , ' _ , ' tcx > ,
358
579
bin_op : BinOp ,
359
- ret : CPlace < ' tcx > ,
360
580
cb_in : Value ,
361
581
a : CValue < ' tcx > ,
362
582
b : CValue < ' tcx > ,
363
- ) {
364
- assert_eq ! (
365
- a. layout( ) . ty,
366
- fx. tcx. types. u64 ,
367
- "llvm.x86.addcarry.64/llvm.x86.subborrow.64 second operand must be u64"
368
- ) ;
369
- assert_eq ! (
370
- b. layout( ) . ty,
371
- fx. tcx. types. u64 ,
372
- "llvm.x86.addcarry.64/llvm.x86.subborrow.64 third operand must be u64"
373
- ) ;
583
+ ) -> ( Value , Value ) {
584
+ assert_eq ! ( a. layout( ) . ty, b. layout( ) . ty) ;
374
585
375
586
// c + carry -> c + first intermediate carry or borrow respectively
376
587
let int0 = crate :: num:: codegen_checked_int_binop ( fx, bin_op, a, b) ;
377
588
let c = int0. value_field ( fx, FieldIdx :: new ( 0 ) ) ;
378
589
let cb0 = int0. value_field ( fx, FieldIdx :: new ( 1 ) ) . load_scalar ( fx) ;
379
590
380
591
// c + carry -> c + second intermediate carry or borrow respectively
381
- let cb_in_as_u64 = fx. bcx . ins ( ) . uextend ( types:: I64 , cb_in) ;
382
- let cb_in_as_u64 = CValue :: by_val ( cb_in_as_u64, fx. layout_of ( fx. tcx . types . u64 ) ) ;
383
- let int1 = crate :: num:: codegen_checked_int_binop ( fx, bin_op, c, cb_in_as_u64) ;
592
+ let clif_ty = fx. clif_type ( a. layout ( ) . ty ) . unwrap ( ) ;
593
+ let cb_in_as_int = fx. bcx . ins ( ) . uextend ( clif_ty, cb_in) ;
594
+ let cb_in_as_int = CValue :: by_val ( cb_in_as_int, fx. layout_of ( a. layout ( ) . ty ) ) ;
595
+ let int1 = crate :: num:: codegen_checked_int_binop ( fx, bin_op, c, cb_in_as_int) ;
384
596
let ( c, cb1) = int1. load_scalar_pair ( fx) ;
385
597
386
598
// carry0 | carry1 -> carry or borrow respectively
387
599
let cb_out = fx. bcx . ins ( ) . bor ( cb0, cb1) ;
388
600
389
- let layout = fx. layout_of ( fx. tcx . mk_tup ( & [ fx. tcx . types . u8 , fx. tcx . types . u64 ] ) ) ;
390
- let val = CValue :: by_val_pair ( cb_out, c, layout) ;
391
- ret. write_cvalue ( fx, val) ;
601
+ ( cb_out, c)
392
602
}
0 commit comments