Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a compiler intrinsic to back bigint_helper_methods #133663

Merged
merged 2 commits into from
Dec 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions compiler/rustc_codegen_llvm/src/intrinsic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,37 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
self.const_i32(cache_type),
])
}
sym::carrying_mul_add => {
let (size, signed) = fn_args.type_at(0).int_size_and_signed(self.tcx);

let wide_llty = self.type_ix(size.bits() * 2);
let args = args.as_array().unwrap();
let [a, b, c, d] = args.map(|a| self.intcast(a.immediate(), wide_llty, signed));

let wide = if signed {
let prod = self.unchecked_smul(a, b);
let acc = self.unchecked_sadd(prod, c);
self.unchecked_sadd(acc, d)
} else {
let prod = self.unchecked_umul(a, b);
let acc = self.unchecked_uadd(prod, c);
self.unchecked_uadd(acc, d)
};

let narrow_llty = self.type_ix(size.bits());
let low = self.trunc(wide, narrow_llty);
let bits_const = self.const_uint(wide_llty, size.bits());
// No need for ashr when signed; LLVM changes it to lshr anyway.
let high = self.lshr(wide, bits_const);
// FIXME: could be `trunc nuw`, even for signed.
let high = self.trunc(high, narrow_llty);

let pair_llty = self.type_struct(&[narrow_llty, narrow_llty], false);
let pair = self.const_poison(pair_llty);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just comparing to my version, this doesn't really matter that much, but const_undef feels slightly more accurate here just semantically. Not actually sure if this affects codegen though.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

poison is always preferred where possible, because it doesn't have the same complications as undef. (undef << 1) & 1 is guaranteed to be 0, but (poison << 1) & 1 is still poison. You'll also see that both what we emit for tuples and what the optimizer does for tuples both use poison for this: https://rust.godbolt.org/z/WTnW4GPE6

let pair = self.insert_value(pair, low, 0);
let pair = self.insert_value(pair, high, 1);
pair
}
sym::ctlz
| sym::ctlz_nonzero
| sym::cttz
Expand Down
1 change: 1 addition & 0 deletions compiler/rustc_codegen_llvm/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#![feature(iter_intersperse)]
#![feature(let_chains)]
#![feature(rustdoc_internals)]
#![feature(slice_as_array)]
#![feature(try_blocks)]
#![warn(unreachable_pub)]
// tidy-alphabetical-end
Expand Down
5 changes: 5 additions & 0 deletions compiler/rustc_hir_analysis/src/check/intrinsic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ pub fn intrinsic_operation_unsafety(tcx: TyCtxt<'_>, intrinsic_id: LocalDefId) -
| sym::add_with_overflow
| sym::sub_with_overflow
| sym::mul_with_overflow
| sym::carrying_mul_add
| sym::wrapping_add
| sym::wrapping_sub
| sym::wrapping_mul
Expand Down Expand Up @@ -436,6 +437,10 @@ pub fn check_intrinsic_type(
(1, 0, vec![param(0), param(0)], Ty::new_tup(tcx, &[param(0), tcx.types.bool]))
}

sym::carrying_mul_add => {
(2, 0, vec![param(0); 4], Ty::new_tup(tcx, &[param(1), param(0)]))
}

sym::ptr_guaranteed_cmp => (
1,
0,
Expand Down
1 change: 1 addition & 0 deletions compiler/rustc_span/src/symbol.rs
Original file line number Diff line number Diff line change
Expand Up @@ -555,6 +555,7 @@ symbols! {
call_ref_future,
caller_location,
capture_disjoint_fields,
carrying_mul_add,
catch_unwind,
cause,
cdylib,
Expand Down
111 changes: 111 additions & 0 deletions library/core/src/intrinsics/fallback.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
#![unstable(
feature = "core_intrinsics_fallbacks",
reason = "The fallbacks will never be stable, as they exist only to be called \
by the fallback MIR, but they're exported so they can be tested on \
platforms where the fallback MIR isn't actually used",
issue = "none"
)]
#![allow(missing_docs)]

#[const_trait]
pub trait CarryingMulAdd: Copy + 'static {
type Unsigned: Copy + 'static;
fn carrying_mul_add(
self,
multiplicand: Self,
addend: Self,
carry: Self,
) -> (Self::Unsigned, Self);
}

macro_rules! impl_carrying_mul_add_by_widening {
($($t:ident $u:ident $w:ident,)+) => {$(
#[rustc_const_unstable(feature = "core_intrinsics_fallbacks", issue = "none")]
impl const CarryingMulAdd for $t {
type Unsigned = $u;
#[inline]
fn carrying_mul_add(self, a: Self, b: Self, c: Self) -> ($u, $t) {
let wide = (self as $w) * (a as $w) + (b as $w) + (c as $w);
(wide as _, (wide >> Self::BITS) as _)
}
}
)+};
}
impl_carrying_mul_add_by_widening! {
u8 u8 u16,
u16 u16 u32,
u32 u32 u64,
u64 u64 u128,
usize usize UDoubleSize,
i8 u8 i16,
i16 u16 i32,
i32 u32 i64,
i64 u64 i128,
isize usize UDoubleSize,
}

#[cfg(target_pointer_width = "16")]
type UDoubleSize = u32;
#[cfg(target_pointer_width = "32")]
type UDoubleSize = u64;
#[cfg(target_pointer_width = "64")]
type UDoubleSize = u128;

#[inline]
const fn wide_mul_u128(a: u128, b: u128) -> (u128, u128) {
#[inline]
const fn to_low_high(x: u128) -> [u128; 2] {
const MASK: u128 = u64::MAX as _;
[x & MASK, x >> 64]
}
#[inline]
const fn from_low_high(x: [u128; 2]) -> u128 {
x[0] | (x[1] << 64)
}
#[inline]
const fn scalar_mul(low_high: [u128; 2], k: u128) -> [u128; 3] {
let [x, c] = to_low_high(k * low_high[0]);
let [y, z] = to_low_high(k * low_high[1] + c);
[x, y, z]
}
let a = to_low_high(a);
let b = to_low_high(b);
let low = scalar_mul(a, b[0]);
let high = scalar_mul(a, b[1]);
let r0 = low[0];
let [r1, c] = to_low_high(low[1] + high[0]);
let [r2, c] = to_low_high(low[2] + high[1] + c);
let r3 = high[2] + c;
(from_low_high([r0, r1]), from_low_high([r2, r3]))
}

#[rustc_const_unstable(feature = "core_intrinsics_fallbacks", issue = "none")]
impl const CarryingMulAdd for u128 {
type Unsigned = u128;
#[inline]
fn carrying_mul_add(self, b: u128, c: u128, d: u128) -> (u128, u128) {
let (low, mut high) = wide_mul_u128(self, b);
let (low, carry) = u128::overflowing_add(low, c);
high += carry as u128;
let (low, carry) = u128::overflowing_add(low, d);
high += carry as u128;
(low, high)
}
}

#[rustc_const_unstable(feature = "core_intrinsics_fallbacks", issue = "none")]
impl const CarryingMulAdd for i128 {
type Unsigned = u128;
#[inline]
fn carrying_mul_add(self, b: i128, c: i128, d: i128) -> (u128, i128) {
let (low, high) = wide_mul_u128(self as u128, b as u128);
let mut high = high as i128;
high = high.wrapping_add(i128::wrapping_mul(self >> 127, b));
high = high.wrapping_add(i128::wrapping_mul(self, b >> 127));
let (low, carry) = u128::overflowing_add(low, c as u128);
high = high.wrapping_add((carry as i128) + (c >> 127));
let (low, carry) = u128::overflowing_add(low, d as u128);
high = high.wrapping_add((carry as i128) + (d >> 127));
(low, high)
}
}
29 changes: 29 additions & 0 deletions library/core/src/intrinsics/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ use crate::marker::{DiscriminantKind, Tuple};
use crate::mem::SizedTypeProperties;
use crate::{ptr, ub_checks};

pub mod fallback;
pub mod mir;
pub mod simd;

Expand Down Expand Up @@ -3305,6 +3306,34 @@ pub const fn mul_with_overflow<T: Copy>(_x: T, _y: T) -> (T, bool) {
unimplemented!()
}

/// Performs full-width multiplication and addition with a carry:
/// `multiplier * multiplicand + addend + carry`.
///
/// This is possible without any overflow. For `uN`:
/// MAX * MAX + MAX + MAX
/// => (2ⁿ-1) × (2ⁿ-1) + (2ⁿ-1) + (2ⁿ-1)
/// => (2²ⁿ - 2ⁿ⁺¹ + 1) + (2ⁿ⁺¹ - 2)
/// => 2²ⁿ - 1
///
/// For `iN`, the upper bound is MIN * MIN + MAX + MAX => 2²ⁿ⁻² + 2ⁿ - 2,
Copy link
Member Author

@scottmcm scottmcm Dec 8, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Pop Quiz: after a double-width isize::MIN * isize::MIN (which is the largest possible positive result from a multiplication), how many more isize::MAXs can you add to the result before it would need to carry to triple-width?

Answer: isize::MAX + 2 of them!

(The carrying_mul_add really feels so much nicer for unsigned types, since uN::MAX * uN::MAX + uN::MAX + uN::MAX == u2N::MAX -- no more space available after that.)

/// and the lower bound is MAX * MIN + MIN + MIN => -2²ⁿ⁻² - 2ⁿ + 2ⁿ⁺¹.
///
/// This currently supports unsigned integers *only*, no signed ones.
/// The stabilized versions of this intrinsic are available on integers.
#[unstable(feature = "core_intrinsics", issue = "none")]
#[rustc_const_unstable(feature = "const_carrying_mul_add", issue = "85532")]
#[rustc_nounwind]
#[cfg_attr(not(bootstrap), rustc_intrinsic)]
#[cfg_attr(not(bootstrap), miri::intrinsic_fallback_is_spec)]
pub const fn carrying_mul_add<T: ~const fallback::CarryingMulAdd<Unsigned = U>, U>(
multiplier: T,
multiplicand: T,
addend: T,
carry: T,
) -> (U, T) {
multiplier.carrying_mul_add(multiplicand, addend, carry)
}

/// Performs an exact division, resulting in undefined behavior where
/// `x % y != 0` or `y == 0` or `x == T::MIN && y == -1`
///
Expand Down
1 change: 1 addition & 0 deletions library/core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@
#![cfg_attr(bootstrap, feature(do_not_recommend))]
#![feature(array_ptr_get)]
#![feature(asm_experimental_arch)]
#![feature(const_carrying_mul_add)]
#![feature(const_eval_select)]
#![feature(const_typed_swap)]
#![feature(core_intrinsics)]
Expand Down
Loading
Loading