Merge pull request #26 from Vurich/optimizations-1

Optimize binary operations on bigints
This commit is contained in:
Nikolay Volf 2017-08-18 18:26:22 +03:00 committed by GitHub
commit b8baf9dab6
5 changed files with 136 additions and 128 deletions

View File

@ -15,6 +15,7 @@ rustc_version = "0.2"
rustc-hex = { version = "1.0", optional = true }
heapsize = { version = "0.4", optional = true }
byteorder = { version = "1", default-features = false }
crunchy = "0.1.5"
[features]
heapsizeof = ["heapsize", "std"]
@ -22,4 +23,4 @@ std = ["rustc-hex"]
[[example]]
name = "modular"
required-features = ["std"]
required-features = ["std"]

View File

@ -68,22 +68,30 @@ fn u512_add(b: &mut Bencher) {
});
}
#[bench]
fn u512_mul(b: &mut Bencher) {
b.iter(|| {
let n = black_box(10000);
let one = black_box(U512::one());
(1..n).fold(one, |old, new| { old.overflowing_mul(U512::from(black_box(new))).0 })
});
}
#[bench]
fn u256_mul(b: &mut Bencher) {
b.iter(|| {
let n = black_box(10000);
let one = black_box(U256::one());
(0..n).fold(one, |old, new| { old.overflowing_mul(U256::from(black_box(new))).0 })
(1..n).fold(one, |old, new| { old.overflowing_mul(U256::from(black_box(new))).0 })
});
}
#[bench]
fn u256_full_mul(b: &mut Bencher) {
b.iter(|| {
let n = black_box(10000);
let one = black_box(U256::one());
(0..n).fold(one,
(1..n).fold(one,
|old, new| {
let new = black_box(new);
let U512(ref u512words) = old.full_mul(U256([new, new, new, new]));
@ -97,7 +105,7 @@ fn u256_full_mul(b: &mut Bencher) {
fn u128_mul(b: &mut Bencher) {
b.iter(|| {
let n = black_box(10000);
(0..n).fold(U128([12345u64, 0u64]), |old, new| { old.overflowing_mul(U128::from(new)).0 })
(1..n).fold(U128([12345u64, 0u64]), |old, new| { old.overflowing_mul(U128::from(new)).0 })
});
}

0
src/int.rs Normal file
View File

View File

@ -17,6 +17,9 @@ extern crate byteorder;
#[cfg(feature="std")]
extern crate rustc_hex;
#[macro_use]
extern crate crunchy;
#[cfg(feature="heapsizeof")]
#[macro_use]
extern crate heapsize;

View File

@ -56,34 +56,26 @@ macro_rules! impl_map_from {
#[cfg(not(all(asm_available, target_arch="x86_64")))]
macro_rules! uint_overflowing_add {
($name:ident, $n_words:expr, $self_expr: expr, $other: expr) => ({
($name:ident, $n_words:tt, $self_expr: expr, $other: expr) => ({
uint_overflowing_add_reg!($name, $n_words, $self_expr, $other)
})
}
macro_rules! uint_overflowing_add_reg {
($name:ident, $n_words:expr, $self_expr: expr, $other: expr) => ({
let $name(ref me) = $self_expr;
let $name(ref you) = $other;
let mut ret = [0u64; $n_words];
let mut carry = 0u64;
for i in 0..$n_words {
let (res1, overflow1) = me[i].overflowing_add(you[i]);
let (res2, overflow2) = res1.overflowing_add(carry);
ret[i] = res2;
carry = overflow1 as u64 + overflow2 as u64;
}
($name(ret), carry > 0)
($name:ident, $n_words:tt, $self_expr: expr, $other: expr) => ({
uint_overflowing_binop!(
$name,
$n_words,
$self_expr,
$other,
u64::overflowing_add
)
})
}
#[cfg(all(asm_available, target_arch="x86_64"))]
macro_rules! uint_overflowing_add {
(U256, $n_words: expr, $self_expr: expr, $other: expr) => ({
(U256, $n_words:tt, $self_expr: expr, $other: expr) => ({
let mut result: [u64; $n_words] = unsafe { ::core::mem::uninitialized() };
let self_t: &[u64; $n_words] = &$self_expr.0;
let other_t: &[u64; $n_words] = &$other.0;
@ -106,7 +98,7 @@ macro_rules! uint_overflowing_add {
}
(U256(result), overflow != 0)
});
(U512, $n_words: expr, $self_expr: expr, $other: expr) => ({
(U512, $n_words:tt, $self_expr: expr, $other: expr) => ({
let mut result: [u64; $n_words] = unsafe { ::core::mem::uninitialized() };
let self_t: &[u64; $n_words] = &$self_expr.0;
let other_t: &[u64; $n_words] = &$other.0;
@ -152,42 +144,76 @@ macro_rules! uint_overflowing_add {
(U512(result), overflow != 0)
});
($name:ident, $n_words:expr, $self_expr: expr, $other: expr) => (
($name:ident, $n_words:tt, $self_expr: expr, $other: expr) => (
uint_overflowing_add_reg!($name, $n_words, $self_expr, $other)
)
}
#[cfg(not(all(asm_available, target_arch="x86_64")))]
macro_rules! uint_overflowing_sub {
($name:ident, $n_words: expr, $self_expr: expr, $other: expr) => ({
($name:ident, $n_words:tt, $self_expr: expr, $other: expr) => ({
uint_overflowing_sub_reg!($name, $n_words, $self_expr, $other)
})
}
macro_rules! uint_overflowing_sub_reg {
($name:ident, $n_words: expr, $self_expr: expr, $other: expr) => ({
macro_rules! uint_overflowing_binop {
($name:ident, $n_words:tt, $self_expr: expr, $other: expr, $fn:expr) => ({
let $name(ref me) = $self_expr;
let $name(ref you) = $other;
let mut ret = [0u64; $n_words];
let mut ret = unsafe { ::core::mem::uninitialized() };
let ret_ptr = &mut ret as *mut [u64; $n_words] as *mut u64;
let mut carry = 0u64;
for i in 0..$n_words {
let (res1, overflow1) = me[i].overflowing_sub(you[i]);
let (res2, overflow2) = res1.overflowing_sub(carry);
unroll! {
for i in 0..$n_words {
use ::core::ptr;
ret[i] = res2;
carry = overflow1 as u64 + overflow2 as u64;
if carry != 0 {
let (res1, overflow1) = ($fn)(me[i], you[i]);
let (res2, overflow2) = ($fn)(res1, carry);
unsafe {
ptr::write(
ret_ptr.offset(i as _),
res2
);
}
carry = (overflow1 as u8 + overflow2 as u8) as u64;
} else {
let (res, overflow) = ($fn)(me[i], you[i]);
unsafe {
ptr::write(
ret_ptr.offset(i as _),
res
);
}
carry = overflow as u64;
}
}
}
($name(ret), carry > 0)
})
}
macro_rules! uint_overflowing_sub_reg {
($name:ident, $n_words:tt, $self_expr: expr, $other: expr) => ({
uint_overflowing_binop!(
$name,
$n_words,
$self_expr,
$other,
u64::overflowing_sub
)
})
}
#[cfg(all(asm_available, target_arch="x86_64"))]
macro_rules! uint_overflowing_sub {
(U256, $n_words: expr, $self_expr: expr, $other: expr) => ({
(U256, $n_words:tt, $self_expr: expr, $other: expr) => ({
let mut result: [u64; $n_words] = unsafe { ::core::mem::uninitialized() };
let self_t: &[u64; $n_words] = &$self_expr.0;
let other_t: &[u64; $n_words] = &$other.0;
@ -209,7 +235,7 @@ macro_rules! uint_overflowing_sub {
}
(U256(result), overflow != 0)
});
(U512, $n_words: expr, $self_expr: expr, $other: expr) => ({
(U512, $n_words:tt, $self_expr: expr, $other: expr) => ({
let mut result: [u64; $n_words] = unsafe { ::core::mem::uninitialized() };
let self_t: &[u64; $n_words] = &$self_expr.0;
let other_t: &[u64; $n_words] = &$other.0;
@ -254,7 +280,7 @@ macro_rules! uint_overflowing_sub {
}
(U512(result), overflow != 0)
});
($name:ident, $n_words: expr, $self_expr: expr, $other: expr) => ({
($name:ident, $n_words:tt, $self_expr: expr, $other: expr) => ({
uint_overflowing_sub_reg!($name, $n_words, $self_expr, $other)
})
}
@ -374,76 +400,82 @@ macro_rules! uint_overflowing_mul {
}
(U256(result), overflow > 0)
});
($name:ident, $n_words:expr, $self_expr: expr, $other: expr) => (
($name:ident, $n_words:tt, $self_expr: expr, $other: expr) => (
uint_overflowing_mul_reg!($name, $n_words, $self_expr, $other)
)
}
#[cfg(not(all(asm_available, target_arch="x86_64")))]
macro_rules! uint_overflowing_mul {
($name:ident, $n_words: expr, $self_expr: expr, $other: expr) => ({
($name:ident, $n_words:tt, $self_expr: expr, $other: expr) => ({
uint_overflowing_mul_reg!($name, $n_words, $self_expr, $other)
})
}
macro_rules! uint_overflowing_mul_reg {
($name:ident, $n_words: expr, $self_expr: expr, $other: expr) => ({
macro_rules! uint_full_mul_reg {
($name:ident, $n_words:tt, $self_expr:expr, $other:expr) => ({{
#![allow(unused_assignments)]
let $name(ref me) = $self_expr;
let $name(ref you) = $other;
let mut ret = [0u64; 2*$n_words];
let mut i = 0;
for _ in 0..$n_words {
if you[i] == 0 {
i += 1;
continue;
}
unroll! {
for i in 0..$n_words {
let mut carry = 0u64;
let (b_u, b_l) = split(you[i]);
let mut carry2 = 0u64;
let (b_u, b_l) = split(you[i]);
unroll! {
for j in 0..$n_words {
if me[j] != 0 || carry != 0 {
let a = split(me[j]);
let mut j = 0;
for _ in 0..$n_words {
if me[j] == 0 && carry2 == 0 {
j += 1;
continue;
// multiply parts
let (c_l, overflow_l) = mul_u32(a, b_l, ret[i + j]);
let (c_u, overflow_u) = mul_u32(a, b_u, c_l >> 32);
ret[i + j] = (c_l & 0xFFFFFFFF) + (c_u << 32);
// No overflow here
let res = (c_u >> 32) + (overflow_u << 32);
// possible overflows
let (res, o1) = res.overflowing_add(overflow_l + carry);
let (res, o2) = res.overflowing_add(ret[i + j + 1]);
ret[i + j + 1] = res;
// Only single overflow possible there
carry = (o1 | o2) as u64;
}
}
}
let a = split(me[j]);
// multiply parts
let (c_l, overflow_l) = mul_u32(a, b_l, ret[i + j]);
let (c_u, overflow_u) = mul_u32(a, b_u, c_l >> 32);
ret[i + j] = (c_l & 0xFFFFFFFF) + (c_u << 32);
// No overflow here
let res = (c_u >> 32) + (overflow_u << 32);
// possible overflows
let (res, o1) = res.overflowing_add(overflow_l);
let (res, o2) = res.overflowing_add(carry2);
let (res, o3) = res.overflowing_add(ret[i + j + 1]);
ret[i + j + 1] = res;
// Only single overflow possible there
carry2 = (o1 | o2 | o3) as u64;
j += 1;
}
i += 1;
}
let mut res = [0u64; $n_words];
let mut overflow = false;
res.copy_from_slice(&ret[0..$n_words]);
unsafe {
let mut ret_ptr = ret.as_ptr().offset($n_words);
for _ in $n_words..2*$n_words {
overflow |= *ret_ptr != 0;
ret_ptr = ret_ptr.offset(1);
}
}
($name(res), overflow)
ret
}})
}
macro_rules! uint_overflowing_mul_reg {
($name:ident, $n_words:tt, $self_expr: expr, $other: expr) => ({
let ret: [u64; $n_words * 2] = uint_full_mul_reg!($name, $n_words, $self_expr, $other);
// The safety of this is enforced by the compiler
let ret: [[u64; $n_words]; 2] = unsafe { mem::transmute(ret) };
// The compiler WILL NOT inline this if you remove this annotation.
#[inline(always)]
fn any_nonzero(arr: &[u64; $n_words]) -> bool {
unroll! {
for i in 0..$n_words {
if arr[i] != 0 {
return true;
}
}
}
false
}
($name(ret[0]), any_nonzero(&ret[1]))
})
}
@ -489,7 +521,7 @@ fn split(a: u64) -> (u64, u64) {
}
macro_rules! construct_uint {
($name:ident, $n_words:expr) => (
($name:ident, $n_words:tt) => (
/// Little-endian large integer type
#[repr(C)]
#[derive(Copy, Clone, Eq, PartialEq, Hash)]
@ -1161,9 +1193,9 @@ macro_rules! construct_uint {
);
}
construct_uint!(U512, 8);
construct_uint!(U256, 4);
construct_uint!(U128, 2);
construct_uint!(U256, 4);
construct_uint!(U512, 8);
impl U256 {
/// Multiplies two 256-bit integers to produce full 512-bit integer
@ -1307,46 +1339,10 @@ impl U256 {
/// Multiplies two 256-bit integers to produce full 512-bit integer
/// No overflow possible
#[inline(always)]
#[cfg(not(all(asm_available, target_arch="x86_64")))]
pub fn full_mul(self, other: U256) -> U512 {
let U256(ref me) = self;
let U256(ref you) = other;
let mut ret = [0u64; 8];
for i in 0..4 {
if you[i] == 0 {
continue;
}
let mut carry2 = 0u64;
let (b_u, b_l) = split(you[i]);
for j in 0..4 {
if me[j] == 0 && carry2 == 0 {
continue;
}
let a = split(me[j]);
// multiply parts
let (c_l, overflow_l) = mul_u32(a, b_l, ret[i + j]);
let (c_u, overflow_u) = mul_u32(a, b_u, c_l >> 32);
ret[i + j] = (c_l & 0xFFFFFFFF) + (c_u << 32);
// No overflow here
let res = (c_u >> 32) + (overflow_u << 32);
// possible overflows
let (res, o1) = res.overflowing_add(overflow_l);
let (res, o2) = res.overflowing_add(carry2);
let (res, o3) = res.overflowing_add(ret[i + j + 1]);
ret[i + j + 1] = res;
// Only single overflow possible there
carry2 = (o1 | o2 | o3) as u64;
}
}
U512(ret)
U512(uint_full_mul_reg!(U256, 4, self, other))
}
}