Merge pull request #26 from Vurich/optimizations-1
Optimize binary operations on bigints
This commit is contained in:
commit
b8baf9dab6
|
@ -15,6 +15,7 @@ rustc_version = "0.2"
|
|||
rustc-hex = { version = "1.0", optional = true }
|
||||
heapsize = { version = "0.4", optional = true }
|
||||
byteorder = { version = "1", default-features = false }
|
||||
crunchy = "0.1.5"
|
||||
|
||||
[features]
|
||||
heapsizeof = ["heapsize", "std"]
|
||||
|
@ -22,4 +23,4 @@ std = ["rustc-hex"]
|
|||
|
||||
[[example]]
|
||||
name = "modular"
|
||||
required-features = ["std"]
|
||||
required-features = ["std"]
|
||||
|
|
|
@ -68,22 +68,30 @@ fn u512_add(b: &mut Bencher) {
|
|||
});
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn u512_mul(b: &mut Bencher) {
|
||||
b.iter(|| {
|
||||
let n = black_box(10000);
|
||||
let one = black_box(U512::one());
|
||||
(1..n).fold(one, |old, new| { old.overflowing_mul(U512::from(black_box(new))).0 })
|
||||
});
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn u256_mul(b: &mut Bencher) {
|
||||
b.iter(|| {
|
||||
let n = black_box(10000);
|
||||
let one = black_box(U256::one());
|
||||
(0..n).fold(one, |old, new| { old.overflowing_mul(U256::from(black_box(new))).0 })
|
||||
(1..n).fold(one, |old, new| { old.overflowing_mul(U256::from(black_box(new))).0 })
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
#[bench]
|
||||
fn u256_full_mul(b: &mut Bencher) {
|
||||
b.iter(|| {
|
||||
let n = black_box(10000);
|
||||
let one = black_box(U256::one());
|
||||
(0..n).fold(one,
|
||||
(1..n).fold(one,
|
||||
|old, new| {
|
||||
let new = black_box(new);
|
||||
let U512(ref u512words) = old.full_mul(U256([new, new, new, new]));
|
||||
|
@ -97,7 +105,7 @@ fn u256_full_mul(b: &mut Bencher) {
|
|||
fn u128_mul(b: &mut Bencher) {
|
||||
b.iter(|| {
|
||||
let n = black_box(10000);
|
||||
(0..n).fold(U128([12345u64, 0u64]), |old, new| { old.overflowing_mul(U128::from(new)).0 })
|
||||
(1..n).fold(U128([12345u64, 0u64]), |old, new| { old.overflowing_mul(U128::from(new)).0 })
|
||||
});
|
||||
}
|
||||
|
||||
|
|
|
@ -17,6 +17,9 @@ extern crate byteorder;
|
|||
#[cfg(feature="std")]
|
||||
extern crate rustc_hex;
|
||||
|
||||
#[macro_use]
|
||||
extern crate crunchy;
|
||||
|
||||
#[cfg(feature="heapsizeof")]
|
||||
#[macro_use]
|
||||
extern crate heapsize;
|
||||
|
|
242
src/uint.rs
242
src/uint.rs
|
@ -56,34 +56,26 @@ macro_rules! impl_map_from {
|
|||
|
||||
#[cfg(not(all(asm_available, target_arch="x86_64")))]
|
||||
macro_rules! uint_overflowing_add {
|
||||
($name:ident, $n_words:expr, $self_expr: expr, $other: expr) => ({
|
||||
($name:ident, $n_words:tt, $self_expr: expr, $other: expr) => ({
|
||||
uint_overflowing_add_reg!($name, $n_words, $self_expr, $other)
|
||||
})
|
||||
}
|
||||
|
||||
macro_rules! uint_overflowing_add_reg {
|
||||
($name:ident, $n_words:expr, $self_expr: expr, $other: expr) => ({
|
||||
let $name(ref me) = $self_expr;
|
||||
let $name(ref you) = $other;
|
||||
|
||||
let mut ret = [0u64; $n_words];
|
||||
let mut carry = 0u64;
|
||||
|
||||
for i in 0..$n_words {
|
||||
let (res1, overflow1) = me[i].overflowing_add(you[i]);
|
||||
let (res2, overflow2) = res1.overflowing_add(carry);
|
||||
|
||||
ret[i] = res2;
|
||||
carry = overflow1 as u64 + overflow2 as u64;
|
||||
}
|
||||
|
||||
($name(ret), carry > 0)
|
||||
($name:ident, $n_words:tt, $self_expr: expr, $other: expr) => ({
|
||||
uint_overflowing_binop!(
|
||||
$name,
|
||||
$n_words,
|
||||
$self_expr,
|
||||
$other,
|
||||
u64::overflowing_add
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(all(asm_available, target_arch="x86_64"))]
|
||||
macro_rules! uint_overflowing_add {
|
||||
(U256, $n_words: expr, $self_expr: expr, $other: expr) => ({
|
||||
(U256, $n_words:tt, $self_expr: expr, $other: expr) => ({
|
||||
let mut result: [u64; $n_words] = unsafe { ::core::mem::uninitialized() };
|
||||
let self_t: &[u64; $n_words] = &$self_expr.0;
|
||||
let other_t: &[u64; $n_words] = &$other.0;
|
||||
|
@ -106,7 +98,7 @@ macro_rules! uint_overflowing_add {
|
|||
}
|
||||
(U256(result), overflow != 0)
|
||||
});
|
||||
(U512, $n_words: expr, $self_expr: expr, $other: expr) => ({
|
||||
(U512, $n_words:tt, $self_expr: expr, $other: expr) => ({
|
||||
let mut result: [u64; $n_words] = unsafe { ::core::mem::uninitialized() };
|
||||
let self_t: &[u64; $n_words] = &$self_expr.0;
|
||||
let other_t: &[u64; $n_words] = &$other.0;
|
||||
|
@ -152,42 +144,76 @@ macro_rules! uint_overflowing_add {
|
|||
(U512(result), overflow != 0)
|
||||
});
|
||||
|
||||
($name:ident, $n_words:expr, $self_expr: expr, $other: expr) => (
|
||||
($name:ident, $n_words:tt, $self_expr: expr, $other: expr) => (
|
||||
uint_overflowing_add_reg!($name, $n_words, $self_expr, $other)
|
||||
)
|
||||
}
|
||||
|
||||
#[cfg(not(all(asm_available, target_arch="x86_64")))]
|
||||
macro_rules! uint_overflowing_sub {
|
||||
($name:ident, $n_words: expr, $self_expr: expr, $other: expr) => ({
|
||||
($name:ident, $n_words:tt, $self_expr: expr, $other: expr) => ({
|
||||
uint_overflowing_sub_reg!($name, $n_words, $self_expr, $other)
|
||||
})
|
||||
}
|
||||
|
||||
macro_rules! uint_overflowing_sub_reg {
|
||||
($name:ident, $n_words: expr, $self_expr: expr, $other: expr) => ({
|
||||
macro_rules! uint_overflowing_binop {
|
||||
($name:ident, $n_words:tt, $self_expr: expr, $other: expr, $fn:expr) => ({
|
||||
let $name(ref me) = $self_expr;
|
||||
let $name(ref you) = $other;
|
||||
|
||||
let mut ret = [0u64; $n_words];
|
||||
let mut ret = unsafe { ::core::mem::uninitialized() };
|
||||
let ret_ptr = &mut ret as *mut [u64; $n_words] as *mut u64;
|
||||
let mut carry = 0u64;
|
||||
|
||||
for i in 0..$n_words {
|
||||
let (res1, overflow1) = me[i].overflowing_sub(you[i]);
|
||||
let (res2, overflow2) = res1.overflowing_sub(carry);
|
||||
unroll! {
|
||||
for i in 0..$n_words {
|
||||
use ::core::ptr;
|
||||
|
||||
ret[i] = res2;
|
||||
carry = overflow1 as u64 + overflow2 as u64;
|
||||
if carry != 0 {
|
||||
let (res1, overflow1) = ($fn)(me[i], you[i]);
|
||||
let (res2, overflow2) = ($fn)(res1, carry);
|
||||
|
||||
unsafe {
|
||||
ptr::write(
|
||||
ret_ptr.offset(i as _),
|
||||
res2
|
||||
);
|
||||
}
|
||||
carry = (overflow1 as u8 + overflow2 as u8) as u64;
|
||||
} else {
|
||||
let (res, overflow) = ($fn)(me[i], you[i]);
|
||||
|
||||
unsafe {
|
||||
ptr::write(
|
||||
ret_ptr.offset(i as _),
|
||||
res
|
||||
);
|
||||
}
|
||||
|
||||
carry = overflow as u64;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
($name(ret), carry > 0)
|
||||
})
|
||||
}
|
||||
|
||||
macro_rules! uint_overflowing_sub_reg {
|
||||
($name:ident, $n_words:tt, $self_expr: expr, $other: expr) => ({
|
||||
uint_overflowing_binop!(
|
||||
$name,
|
||||
$n_words,
|
||||
$self_expr,
|
||||
$other,
|
||||
u64::overflowing_sub
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(all(asm_available, target_arch="x86_64"))]
|
||||
macro_rules! uint_overflowing_sub {
|
||||
(U256, $n_words: expr, $self_expr: expr, $other: expr) => ({
|
||||
(U256, $n_words:tt, $self_expr: expr, $other: expr) => ({
|
||||
let mut result: [u64; $n_words] = unsafe { ::core::mem::uninitialized() };
|
||||
let self_t: &[u64; $n_words] = &$self_expr.0;
|
||||
let other_t: &[u64; $n_words] = &$other.0;
|
||||
|
@ -209,7 +235,7 @@ macro_rules! uint_overflowing_sub {
|
|||
}
|
||||
(U256(result), overflow != 0)
|
||||
});
|
||||
(U512, $n_words: expr, $self_expr: expr, $other: expr) => ({
|
||||
(U512, $n_words:tt, $self_expr: expr, $other: expr) => ({
|
||||
let mut result: [u64; $n_words] = unsafe { ::core::mem::uninitialized() };
|
||||
let self_t: &[u64; $n_words] = &$self_expr.0;
|
||||
let other_t: &[u64; $n_words] = &$other.0;
|
||||
|
@ -254,7 +280,7 @@ macro_rules! uint_overflowing_sub {
|
|||
}
|
||||
(U512(result), overflow != 0)
|
||||
});
|
||||
($name:ident, $n_words: expr, $self_expr: expr, $other: expr) => ({
|
||||
($name:ident, $n_words:tt, $self_expr: expr, $other: expr) => ({
|
||||
uint_overflowing_sub_reg!($name, $n_words, $self_expr, $other)
|
||||
})
|
||||
}
|
||||
|
@ -374,76 +400,82 @@ macro_rules! uint_overflowing_mul {
|
|||
}
|
||||
(U256(result), overflow > 0)
|
||||
});
|
||||
($name:ident, $n_words:expr, $self_expr: expr, $other: expr) => (
|
||||
($name:ident, $n_words:tt, $self_expr: expr, $other: expr) => (
|
||||
uint_overflowing_mul_reg!($name, $n_words, $self_expr, $other)
|
||||
)
|
||||
}
|
||||
|
||||
#[cfg(not(all(asm_available, target_arch="x86_64")))]
|
||||
macro_rules! uint_overflowing_mul {
|
||||
($name:ident, $n_words: expr, $self_expr: expr, $other: expr) => ({
|
||||
($name:ident, $n_words:tt, $self_expr: expr, $other: expr) => ({
|
||||
uint_overflowing_mul_reg!($name, $n_words, $self_expr, $other)
|
||||
})
|
||||
}
|
||||
|
||||
macro_rules! uint_overflowing_mul_reg {
|
||||
($name:ident, $n_words: expr, $self_expr: expr, $other: expr) => ({
|
||||
macro_rules! uint_full_mul_reg {
|
||||
($name:ident, $n_words:tt, $self_expr:expr, $other:expr) => ({{
|
||||
#![allow(unused_assignments)]
|
||||
|
||||
let $name(ref me) = $self_expr;
|
||||
let $name(ref you) = $other;
|
||||
let mut ret = [0u64; 2*$n_words];
|
||||
|
||||
let mut i = 0;
|
||||
for _ in 0..$n_words {
|
||||
if you[i] == 0 {
|
||||
i += 1;
|
||||
continue;
|
||||
}
|
||||
unroll! {
|
||||
for i in 0..$n_words {
|
||||
let mut carry = 0u64;
|
||||
let (b_u, b_l) = split(you[i]);
|
||||
|
||||
let mut carry2 = 0u64;
|
||||
let (b_u, b_l) = split(you[i]);
|
||||
unroll! {
|
||||
for j in 0..$n_words {
|
||||
if me[j] != 0 || carry != 0 {
|
||||
let a = split(me[j]);
|
||||
|
||||
let mut j = 0;
|
||||
for _ in 0..$n_words {
|
||||
if me[j] == 0 && carry2 == 0 {
|
||||
j += 1;
|
||||
continue;
|
||||
// multiply parts
|
||||
let (c_l, overflow_l) = mul_u32(a, b_l, ret[i + j]);
|
||||
let (c_u, overflow_u) = mul_u32(a, b_u, c_l >> 32);
|
||||
ret[i + j] = (c_l & 0xFFFFFFFF) + (c_u << 32);
|
||||
|
||||
// No overflow here
|
||||
let res = (c_u >> 32) + (overflow_u << 32);
|
||||
// possible overflows
|
||||
let (res, o1) = res.overflowing_add(overflow_l + carry);
|
||||
let (res, o2) = res.overflowing_add(ret[i + j + 1]);
|
||||
ret[i + j + 1] = res;
|
||||
|
||||
// Only single overflow possible there
|
||||
carry = (o1 | o2) as u64;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let a = split(me[j]);
|
||||
|
||||
// multiply parts
|
||||
let (c_l, overflow_l) = mul_u32(a, b_l, ret[i + j]);
|
||||
let (c_u, overflow_u) = mul_u32(a, b_u, c_l >> 32);
|
||||
ret[i + j] = (c_l & 0xFFFFFFFF) + (c_u << 32);
|
||||
|
||||
// No overflow here
|
||||
let res = (c_u >> 32) + (overflow_u << 32);
|
||||
// possible overflows
|
||||
let (res, o1) = res.overflowing_add(overflow_l);
|
||||
let (res, o2) = res.overflowing_add(carry2);
|
||||
let (res, o3) = res.overflowing_add(ret[i + j + 1]);
|
||||
ret[i + j + 1] = res;
|
||||
|
||||
// Only single overflow possible there
|
||||
carry2 = (o1 | o2 | o3) as u64;
|
||||
j += 1;
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
|
||||
let mut res = [0u64; $n_words];
|
||||
let mut overflow = false;
|
||||
res.copy_from_slice(&ret[0..$n_words]);
|
||||
|
||||
unsafe {
|
||||
let mut ret_ptr = ret.as_ptr().offset($n_words);
|
||||
for _ in $n_words..2*$n_words {
|
||||
overflow |= *ret_ptr != 0;
|
||||
ret_ptr = ret_ptr.offset(1);
|
||||
}
|
||||
}
|
||||
|
||||
($name(res), overflow)
|
||||
ret
|
||||
}})
|
||||
}
|
||||
|
||||
macro_rules! uint_overflowing_mul_reg {
|
||||
($name:ident, $n_words:tt, $self_expr: expr, $other: expr) => ({
|
||||
let ret: [u64; $n_words * 2] = uint_full_mul_reg!($name, $n_words, $self_expr, $other);
|
||||
|
||||
// The safety of this is enforced by the compiler
|
||||
let ret: [[u64; $n_words]; 2] = unsafe { mem::transmute(ret) };
|
||||
|
||||
// The compiler WILL NOT inline this if you remove this annotation.
|
||||
#[inline(always)]
|
||||
fn any_nonzero(arr: &[u64; $n_words]) -> bool {
|
||||
unroll! {
|
||||
for i in 0..$n_words {
|
||||
if arr[i] != 0 {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
false
|
||||
}
|
||||
|
||||
($name(ret[0]), any_nonzero(&ret[1]))
|
||||
})
|
||||
}
|
||||
|
||||
|
@ -489,7 +521,7 @@ fn split(a: u64) -> (u64, u64) {
|
|||
}
|
||||
|
||||
macro_rules! construct_uint {
|
||||
($name:ident, $n_words:expr) => (
|
||||
($name:ident, $n_words:tt) => (
|
||||
/// Little-endian large integer type
|
||||
#[repr(C)]
|
||||
#[derive(Copy, Clone, Eq, PartialEq, Hash)]
|
||||
|
@ -1161,9 +1193,9 @@ macro_rules! construct_uint {
|
|||
);
|
||||
}
|
||||
|
||||
construct_uint!(U512, 8);
|
||||
construct_uint!(U256, 4);
|
||||
construct_uint!(U128, 2);
|
||||
construct_uint!(U256, 4);
|
||||
construct_uint!(U512, 8);
|
||||
|
||||
impl U256 {
|
||||
/// Multiplies two 256-bit integers to produce full 512-bit integer
|
||||
|
@ -1307,46 +1339,10 @@ impl U256 {
|
|||
|
||||
/// Multiplies two 256-bit integers to produce full 512-bit integer
|
||||
/// No overflow possible
|
||||
#[inline(always)]
|
||||
#[cfg(not(all(asm_available, target_arch="x86_64")))]
|
||||
pub fn full_mul(self, other: U256) -> U512 {
|
||||
let U256(ref me) = self;
|
||||
let U256(ref you) = other;
|
||||
let mut ret = [0u64; 8];
|
||||
|
||||
for i in 0..4 {
|
||||
if you[i] == 0 {
|
||||
continue;
|
||||
}
|
||||
|
||||
let mut carry2 = 0u64;
|
||||
let (b_u, b_l) = split(you[i]);
|
||||
|
||||
for j in 0..4 {
|
||||
if me[j] == 0 && carry2 == 0 {
|
||||
continue;
|
||||
}
|
||||
|
||||
let a = split(me[j]);
|
||||
|
||||
// multiply parts
|
||||
let (c_l, overflow_l) = mul_u32(a, b_l, ret[i + j]);
|
||||
let (c_u, overflow_u) = mul_u32(a, b_u, c_l >> 32);
|
||||
ret[i + j] = (c_l & 0xFFFFFFFF) + (c_u << 32);
|
||||
|
||||
// No overflow here
|
||||
let res = (c_u >> 32) + (overflow_u << 32);
|
||||
// possible overflows
|
||||
let (res, o1) = res.overflowing_add(overflow_l);
|
||||
let (res, o2) = res.overflowing_add(carry2);
|
||||
let (res, o3) = res.overflowing_add(ret[i + j + 1]);
|
||||
ret[i + j + 1] = res;
|
||||
|
||||
// Only single overflow possible there
|
||||
carry2 = (o1 | o2 | o3) as u64;
|
||||
}
|
||||
}
|
||||
|
||||
U512(ret)
|
||||
U512(uint_full_mul_reg!(U256, 4, self, other))
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue