Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add intrinsics for bigint helper methods #131566

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions compiler/rustc_codegen_gcc/src/intrinsic/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,12 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tc
| sym::prefetch_write_instruction => {
unimplemented!();
}
#[cfg(not(bootstrap))]
sym::add_with_carry
| sym::sub_with_carry
| sym::mul_double
| sym::mul_double_add
| sym::mul_double_add2 => unimplemented!(),
sym::ctlz
| sym::ctlz_nonzero
| sym::cttz
Expand Down
77 changes: 76 additions & 1 deletion compiler/rustc_codegen_llvm/src/intrinsic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -347,7 +347,12 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
| sym::rotate_left
| sym::rotate_right
| sym::saturating_add
| sym::saturating_sub => {
| sym::saturating_sub
| sym::add_with_carry
| sym::sub_with_carry
| sym::mul_double
| sym::mul_double_add
| sym::mul_double_add2 => {
let ty = arg_tys[0];
match int_type_width_signed(ty, self) {
Some((width, signed)) => match name {
Expand Down Expand Up @@ -417,6 +422,76 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
);
self.call_intrinsic(llvm_name, &[lhs, rhs])
}
sym::add_with_carry | sym::sub_with_carry => {
let llty = self.type_ix(width);
let is_add = name == sym::add_with_carry;
let lhs = args[0].immediate();
let rhs = args[1].immediate();

// sign-extending the carry would treat it as -1, not 1
let carry = self.intcast(args[2].immediate(), llty, false);

let llvm_name = &format!(
"llvm.{}{}.with.overflow.i{}",
if signed { 's' } else { 'u' },
if is_add { "add" } else { "sub" },
width,
);

let ret = self.call_intrinsic(llvm_name, &[lhs, rhs]);
let agg = self.extract_value(ret, 0);
let overflow1 = self.extract_value(ret, 1);

let ret = self.call_intrinsic(llvm_name, &[agg, carry]);
let agg = self.extract_value(ret, 0);
let overflow2 = self.extract_value(ret, 1);

let overflow = if signed {
self.icmp(IntPredicate::IntNE, overflow1, overflow2)
} else {
self.or(overflow1, overflow2)
};

let holder = self.const_struct(
&[self.const_undef(llty), self.const_undef(self.type_i1())],
false,
);
let holder = self.insert_value(holder, agg, 0);
let holder = self.insert_value(holder, overflow, 1);
holder
}
sym::mul_double | sym::mul_double_add | sym::mul_double_add2 => {
let single_ty = self.type_ix(width);
let double_ty = self.type_ix(width * 2);
let lhs = self.intcast(args[0].immediate(), double_ty, signed);
let rhs = self.intcast(args[1].immediate(), double_ty, signed);
let mut ret = self.mul(lhs, rhs);
if name == sym::mul_double_add || name == sym::mul_double_add2 {
let carry = self.intcast(args[2].immediate(), double_ty, signed);
ret = self.add(ret, carry)
}
if name == sym::mul_double_add2 {
let carry2 = self.intcast(args[3].immediate(), double_ty, signed);
ret = self.add(ret, carry2);
}

// note: insignificant part is always treated as unsigned, even if we
// coerce it to signed in the final result to make the intrinsic
// signature simpler
let lo = self.intcast(ret, single_ty, signed);

let bits = self.const_uint(double_ty, width);
let hi = self.ashr(ret, bits);
let hi = self.intcast(hi, single_ty, signed);

let holder = self.const_struct(
&[self.const_undef(single_ty), self.const_undef(single_ty)],
false,
);
let holder = self.insert_value(holder, lo, 0);
let holder = self.insert_value(holder, hi, 1);
holder
}
_ => bug!(),
},
None => {
Expand Down
128 changes: 128 additions & 0 deletions compiler/rustc_const_eval/src/interpret/intrinsics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,34 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
)?;
self.write_scalar(val, dest)?;
}
sym::add_with_carry | sym::sub_with_carry => {
let l = self.read_immediate(&args[0])?;
let r = self.read_immediate(&args[1])?;
let c = self.read_immediate(&args[2])?;
let (val, overflowed) = self.carrying_arith(
if intrinsic_name == sym::add_with_carry { BinOp::Add } else { BinOp::Sub },
&l,
&r,
&c,
)?;
self.write_scalar_pair(val, overflowed, dest)?;
}
sym::mul_double | sym::mul_double_add | sym::mul_double_add2 => {
let l = self.read_immediate(&args[0])?;
let r = self.read_immediate(&args[1])?;
let c1 = if intrinsic_name != sym::mul_double {
Some(self.read_immediate(&args[2])?)
} else {
None
};
let c2 = if intrinsic_name == sym::mul_double_add2 {
Some(self.read_immediate(&args[3])?)
} else {
None
};
let (lo, hi) = self.mul_double_add2(&l, &r, c1.as_ref(), c2.as_ref())?;
self.write_scalar_pair(lo, hi, dest)?;
}
sym::discriminant_value => {
let place = self.deref_pointer(&args[0])?;
let variant = self.read_discriminant(&place)?;
Expand Down Expand Up @@ -573,6 +601,106 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
})
}

pub fn carrying_arith(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add a doc comment to both new methods

&self,
mir_op: BinOp,
l: &ImmTy<'tcx, M::Provenance>,
r: &ImmTy<'tcx, M::Provenance>,
c: &ImmTy<'tcx, M::Provenance>,
) -> InterpResult<'tcx, (Scalar<M::Provenance>, Scalar<M::Provenance>)> {
assert_eq!(l.layout.ty, r.layout.ty);
assert_matches!(l.layout.ty.kind(), ty::Int(..) | ty::Uint(..));
assert_matches!(c.layout.ty.kind(), ty::Bool);
assert_matches!(mir_op, BinOp::Add | BinOp::Sub);

let mir_op = mir_op.wrapping_to_overflowing().unwrap();

let (val, overflowed1) = self.binary_op(mir_op, l, r)?.to_scalar_pair();

let val = ImmTy::from_scalar(val, l.layout);
let c = ImmTy::from_scalar(c.to_scalar(), l.layout);

let (val, overflowed2) = self.binary_op(mir_op, &val, &c)?.to_scalar_pair();

let overflowed1 = overflowed1.to_bool()?;
let overflowed2 = overflowed2.to_bool()?;

let overflowed = Scalar::from_bool(if l.layout.abi.is_signed() {
overflowed1 != overflowed2
} else {
overflowed1 | overflowed2
});
Comment on lines +628 to +632
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add a comment explaining the logic here

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I definitely should just update the intrinsic docs to clarify the behaviour here, although the standard library has a bit of a weird relationship with how it documents intrinsics.

Most of them rely on having stabilised versions that they can just point to, since those will have the proper docs. The documentation here is split between iN::carrying_add, uN::carrying_add, iN::borrowing_sub, and uN::borrowing_sub, but the bottom line is that signed methods are merely checking for overflow, whereas unsigned methods want to actually return a new carry bit that can be chained along. That's what we're testing for in the methods and I'm just duplicating that here.

Not sure what the best solution for documentation would be here; open to ideas. I could just link those docs here, for now.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just don't understand why the signed thing overflowed if exactly one of the sub-operations overflowed. Like I could probably think about it for a minute and figure it out, but there should really be comments explaining that.

Your answer confused me even more, in which sense is the result different for signed vs unsigned? That should also be documented...


interp_ok((val, overflowed))
}

pub fn mul_double_add2(
&self,
l: &ImmTy<'tcx, M::Provenance>,
r: &ImmTy<'tcx, M::Provenance>,
c1: Option<&ImmTy<'tcx, M::Provenance>>,
c2: Option<&ImmTy<'tcx, M::Provenance>>,
) -> InterpResult<'tcx, (Scalar<M::Provenance>, Scalar<M::Provenance>)> {
assert_eq!(l.layout.ty, r.layout.ty);
assert_matches!(l.layout.ty.kind(), ty::Int(..) | ty::Uint(..));

let is_signed = l.layout.abi.is_signed();
let size = l.layout.size;
let bits = size.bits();
let l = l.to_scalar_int()?;
let r = r.to_scalar_int()?;

interp_ok(if is_signed {
let l = l.to_int(size);
let r = r.to_int(size);
let c1 = c1.map_or(interp_ok(0), |c1| interp_ok(c1.to_scalar_int()?.to_int(size)))?;
let c2 = c2.map_or(interp_ok(0), |c2| interp_ok(c2.to_scalar_int()?.to_int(size)))?;
if bits == 128 {
#[cfg(bootstrap)]
{
let _ = (l, r, c1, c2);
unimplemented!()
}
#[cfg(not(bootstrap))]
{
let (lo, hi) = l.carrying2_mul(r, c1, c2);
let lo = Scalar::from_uint(lo, size);
let hi = Scalar::from_int(hi, size);
(lo, hi)
}
} else {
let prod = l * r + c1 + c2;
let lo = Scalar::from_int(prod, size);
let hi = Scalar::from_int(prod >> size.bits(), size);
(lo, hi)
}
} else {
let l = l.to_uint(size);
let r = r.to_uint(size);
let c1 = c1.map_or(interp_ok(0), |c1| interp_ok(c1.to_scalar_int()?.to_uint(size)))?;
let c2 = c2.map_or(interp_ok(0), |c2| interp_ok(c2.to_scalar_int()?.to_uint(size)))?;
if bits == 128 {
#[cfg(bootstrap)]
{
let _ = (l, r, c1, c2);
unimplemented!()
}
#[cfg(not(bootstrap))]
{
let (lo, hi) = l.carrying2_mul(r, c1, c2);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is unfortunate... for basic arithmetic like this, it'd be better if we had our own implementation of them rather than having to use the host operation. How hard would that be?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So, my thought process here is that the ideal solution is to replace the current hard-coded i128/u128 version of the code with a bigint implementation and do the multiplication directly in all cases. That would be the most ideal, and it would support a potential future with integers larger than 128 bits. It would also likely use methods like this one to perform bigint multiplication.

However, without that, my thought process was that I could either manually code a version of carrying2_mul here that would perform worse and require extra scrutiny, or just use the version that's already been implemented and tested.

I'll defer to whatever you think is the better option, but that at least explains my reasoning.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't know how hard this is to implement directly, is there some code somewhere that would give me an idea?

It's also not great to have a completely different codepath for u128 and the rest, that makes proper testing more tricky.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So, just sharing a few of the implementations mentioned on the tracking issue:

Note that also, regardless of what we do, the result of double-wide multiplication of 128-bit integers is going to be two 128-bit integers, and it's only going to be the case of 128-bit integers where we need to scoop out the extra data from the more-significant 128 bits. So, effectively, even if I had the same path for all integers using the 128-bit double-wide mul, we'd still be special-casing 128 bits by only looking at the higher-order word in the 128-bit case.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

with a bigint implementation

FWIW I'd love that for all our arithmetic.^^ It's probably too slow though. And using it only sometimes seems odd.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

However, without that, my thought process was that I could either manually code a version of carrying2_mul here that would perform worse and require extra scrutiny, or just use the version that's already been implemented and tested.

So, my thought process here is that we typically want to be independent from possible bugs in the standard library, and provide our own reference implementation. But, we haven't done that in numeric_intrinsic, so it'd be odd to use a higher standard here.

So fair, please stick with the current implementation, just with more comments.

let lo = Scalar::from_uint(lo, size);
let hi = Scalar::from_uint(hi, size);
(lo, hi)
}
} else {
let prod = l * r + c1 + c2;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add a comment explaining why this does not cause overflow. Also please use strict_ operations.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will convert to strict, although I thought that the lack of overflow was evident by the fact that the 128-bit case was covered separately-- all other integers would be maximum 64 bits, where this operation cannot overflow.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You are multiplying two 64bit numbers here, which results in a 128bit number. Then you add more stuff. u64::MAX * u64::MAX is fairly close to u128::MAX, and then we add stuff... why can't this overflow? I have no intuition for this, so it definitely needs comments.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It turns out that u64::MAX * u64::MAX + u64::MAX + u64::MAX is actually u128::MAX, which is the principle behind allowing up to two carries for double-wide multiplication.

But yes, I'll add comments.

let lo = Scalar::from_uint(prod, size);
let hi = Scalar::from_uint(prod >> size.bits(), size);
(lo, hi)
}
})
}

/// Offsets a pointer by some multiple of its type, returning an error if the pointer leaves its
/// allocation.
pub fn ptr_offset_inbounds(
Expand Down
11 changes: 11 additions & 0 deletions compiler/rustc_const_eval/src/interpret/place.rs
Original file line number Diff line number Diff line change
Expand Up @@ -626,6 +626,17 @@ where
self.write_immediate(Immediate::Scalar(val.into()), dest)
}

/// Write a scalar pair to a place
#[inline(always)]
pub fn write_scalar_pair(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we want this helper. We have immediates specifically to represent these pairs. Also this encourages to have scalar pairs without having their type, which is dangerous. I think we actually want to change write_immediate to take an ImmTy instead of an Immediate, but that's a larger change... but this helper moves us in the wrong direction IMO.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I like the idea of having typed immediates everywhere, I just know that this isn't what the code is doing right now, and that's why I added this method, since it's either this or import Immediate directly into the intrinsics module and construct one myself.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

or import Immediate directly into the intrinsics module and construct one myself.

Yes please do that.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I had assumed that the presence of write_scalar was to avoid that, but I'll keep that in mind. Perhaps write_scalar should also be removed if the goal is to have typed immediates everywhere?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Scalars are much less at risk of odd effects due to bad types since there's no field offsets / padding being computed.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, that makes sense. I wasn't aware that this padding/offset was even meaningful since I thought that the reason for having a "pair" primitive was explicitly to avoid this.

&mut self,
val1: impl Into<Scalar<M::Provenance>>,
val2: impl Into<Scalar<M::Provenance>>,
dest: &impl Writeable<'tcx, M::Provenance>,
) -> InterpResult<'tcx> {
self.write_immediate(Immediate::ScalarPair(val1.into(), val2.into()), dest)
}

/// Write a pointer to a place
#[inline(always)]
pub fn write_pointer(
Expand Down
1 change: 1 addition & 0 deletions compiler/rustc_const_eval/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#![cfg_attr(not(bootstrap), warn(unqualified_local_imports))]
#![doc(rust_logo)]
#![feature(assert_matches)]
#![feature(bigint_helper_methods)]
#![feature(box_patterns)]
#![feature(decl_macro)]
#![feature(if_let_guard)]
Expand Down
27 changes: 27 additions & 0 deletions compiler/rustc_hir_analysis/src/check/intrinsic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,11 @@ pub fn intrinsic_operation_unsafety(tcx: TyCtxt<'_>, intrinsic_id: LocalDefId) -
| sym::add_with_overflow
| sym::sub_with_overflow
| sym::mul_with_overflow
| sym::add_with_carry
| sym::sub_with_carry
| sym::mul_double
| sym::mul_double_add
| sym::mul_double_add2
| sym::wrapping_add
| sym::wrapping_sub
| sym::wrapping_mul
Expand Down Expand Up @@ -433,6 +438,28 @@ pub fn check_intrinsic_type(
(1, 0, vec![param(0), param(0)], Ty::new_tup(tcx, &[param(0), tcx.types.bool]))
}

sym::add_with_carry | sym::sub_with_carry => (
1,
0,
vec![param(0), param(0), tcx.types.bool],
Ty::new_tup(tcx, &[param(0), tcx.types.bool]),
),

sym::mul_double => {
(1, 0, vec![param(0), param(0)], Ty::new_tup(tcx, &[param(0), param(0)]))
}

sym::mul_double_add => {
(1, 0, vec![param(0), param(0), param(0)], Ty::new_tup(tcx, &[param(0), param(0)]))
}

sym::mul_double_add2 => (
1,
0,
vec![param(0), param(0), param(0), param(0)],
Ty::new_tup(tcx, &[param(0), param(0)]),
),

sym::ptr_guaranteed_cmp => (
1,
0,
Expand Down
5 changes: 5 additions & 0 deletions compiler/rustc_span/src/symbol.rs
Original file line number Diff line number Diff line change
Expand Up @@ -377,6 +377,7 @@ symbols! {
abort,
add,
add_assign,
add_with_carry,
add_with_overflow,
address,
adt_const_params,
Expand Down Expand Up @@ -1276,6 +1277,9 @@ symbols! {
move_size_limit,
mul,
mul_assign,
mul_double,
mul_double_add,
mul_double_add2,
mul_with_overflow,
multiple_supertrait_upcastable,
must_not_suspend,
Expand Down Expand Up @@ -1918,6 +1922,7 @@ symbols! {
structural_peq,
sub,
sub_assign,
sub_with_carry,
sub_with_overflow,
suggestion,
surface_async_drop_in_place,
Expand Down
Loading
Loading