arithmetic::best_multiexp parallelize bucket arithmetic

2023-09-06 12:39:59 +09:00 · 2023-09-06 12:39:59 +09:00 · e00f0d1233
parent af1713f1d3
commit e00f0d1233
1 changed files with 109 additions and 100 deletions
--- a/halo2_proofs/src/arithmetic.rs
+++ b/halo2_proofs/src/arithmetic.rs
@ -7,7 +7,7 @@ use group::{
    ff::{BatchInvert, PrimeField},
    Group as _, GroupOpsOwned, ScalarMulOwned,
 };
-
+use maybe_rayon::prelude::*;
 pub use pasta_curves::arithmetic::*;

 /// This represents an element of a group with basic operations that can be
@ -25,96 +25,57 @@ where
 {
 }

-fn multiexp_serial<C: CurveAffine>(coeffs: &[C::Scalar], bases: &[C], acc: &mut C::Curve) {
-    let coeffs: Vec<_> = coeffs.iter().map(|a| a.to_repr()).collect();
+#[derive(Clone, Copy)]
+enum Bucket<C: CurveAffine> {
+    None,
+    Affine(C),
+    Projective(C::Curve),
+}

-    let c = if bases.len() < 4 {
-        1
-    } else if bases.len() < 32 {
-        3
-    } else {
-        (f64::from(bases.len() as u32)).ln().ceil() as usize
-    };
-
-    fn get_at<F: PrimeField>(segment: usize, c: usize, bytes: &F::Repr) -> usize {
-        let skip_bits = segment * c;
-        let skip_bytes = skip_bits / 8;
-
-        if skip_bytes >= 32 {
-            return 0;
+impl<C: CurveAffine> Bucket<C> {
+    fn add_assign(&mut self, other: &C) {
+        *self = match *self {
+            Bucket::None => Bucket::Affine(*other),
+            Bucket::Affine(a) => Bucket::Projective(a + *other),
+            Bucket::Projective(mut a) => {
+                a += *other;
+                Bucket::Projective(a)
+            }
        }
-
-        let mut v = [0; 8];
-        for (v, o) in v.iter_mut().zip(bytes.as_ref()[skip_bytes..].iter()) {
-            *v = *o;
-        }
-
-        let mut tmp = u64::from_le_bytes(v);
-        tmp >>= skip_bits - (skip_bytes * 8);
-        tmp %= 1 << c;
-
-        tmp as usize
    }

-    let segments = (256 / c) + 1;
-
-    for current_segment in (0..segments).rev() {
-        for _ in 0..c {
-            *acc = acc.double();
-        }
-
-        #[derive(Clone, Copy)]
-        enum Bucket<C: CurveAffine> {
-            None,
-            Affine(C),
-            Projective(C::Curve),
-        }
-
-        impl<C: CurveAffine> Bucket<C> {
-            fn add_assign(&mut self, other: &C) {
-                *self = match *self {
-                    Bucket::None => Bucket::Affine(*other),
-                    Bucket::Affine(a) => Bucket::Projective(a + *other),
-                    Bucket::Projective(mut a) => {
-                        a += *other;
-                        Bucket::Projective(a)
-                    }
-                }
+    fn add(self, mut other: C::Curve) -> C::Curve {
+        match self {
+            Bucket::None => other,
+            Bucket::Affine(a) => {
+                other += a;
+                other
            }
-
-            fn add(self, mut other: C::Curve) -> C::Curve {
-                match self {
-                    Bucket::None => other,
-                    Bucket::Affine(a) => {
-                        other += a;
-                        other
-                    }
-                    Bucket::Projective(a) => other + &a,
-                }
-            }
-        }
-
-        let mut buckets: Vec<Bucket<C>> = vec![Bucket::None; (1 << c) - 1];
-
-        for (coeff, base) in coeffs.iter().zip(bases.iter()) {
-            let coeff = get_at::<C::Scalar>(current_segment, c, coeff);
-            if coeff != 0 {
-                buckets[coeff - 1].add_assign(base);
-            }
-        }
-
-        // Summation by parts
-        // e.g. 3a + 2b + 1c = a +
-        //                    (a) + b +
-        //                    ((a) + b) + c
-        let mut running_sum = C::Curve::identity();
-        for exp in buckets.into_iter().rev() {
-            running_sum = exp.add(running_sum);
-            *acc += &running_sum;
+            Bucket::Projective(a) => other + &a,
        }
    }
 }

+fn get_at<F: PrimeField>(segment: usize, c: usize, bytes: &F::Repr) -> usize {
+    let skip_bits = segment * c;
+    let skip_bytes = skip_bits / 8;
+
+    if skip_bytes >= 32 {
+        return 0;
+    }
+
+    let mut v = [0; 8];
+    for (v, o) in v.iter_mut().zip(bytes.as_ref()[skip_bytes..].iter()) {
+        *v = *o;
+    }
+
+    let mut tmp = u64::from_le_bytes(v);
+    tmp >>= skip_bits - (skip_bytes * 8);
+    tmp %= 1 << c;
+
+    tmp as usize
+}
+
 /// Performs a small multi-exponentiation operation.
 /// Uses the double-and-add algorithm with doublings shared across points.
 pub fn small_multiexp<C: CurveAffine>(coeffs: &[C::Scalar], bases: &[C]) -> C::Curve {
@ -147,29 +108,77 @@ pub fn small_multiexp<C: CurveAffine>(coeffs: &[C::Scalar], bases: &[C]) -> C::C
 pub fn best_multiexp<C: CurveAffine>(coeffs: &[C::Scalar], bases: &[C]) -> C::Curve {
    assert_eq!(coeffs.len(), bases.len());

+    let c = if bases.len() < 4 {
+        1
+    } else if bases.len() < 32 {
+        3
+    } else {
+        (f64::from(bases.len() as u32)).ln().ceil() as usize
+    };
+
+    let mut multi_buckets: Vec<Vec<Bucket<C>>> =
+        vec![vec![Bucket::None; (1 << c) - 1]; (256 / c) + 1];
+
    let num_threads = multicore::current_num_threads();
    if coeffs.len() > num_threads {
-        let chunk = coeffs.len() / num_threads;
-        let num_chunks = coeffs.chunks(chunk).len();
-        let mut results = vec![C::Curve::identity(); num_chunks];
-        multicore::scope(|scope| {
-            let chunk = coeffs.len() / num_threads;
+        multi_buckets
+            .par_iter_mut()
+            .enumerate()
+            .rev()
+            .map(|(i, buckets)| {
+                // get segmentation and add coeff to buckets content
+                for (coeff, base) in coeffs.iter().zip(bases.iter()) {
+                    let seg = get_at::<C::Scalar>(i, c, &coeff.to_repr());
+                    if seg != 0 {
+                        buckets[seg - 1].add_assign(base);
+                    }
+                }

-            for ((coeffs, bases), acc) in coeffs
-                .chunks(chunk)
-                .zip(bases.chunks(chunk))
-                .zip(results.iter_mut())
-            {
-                scope.spawn(move |_| {
-                    multiexp_serial(coeffs, bases, acc);
+                // Summation by parts
+                // e.g. 3a + 2b + 1c = a +
+                //                    (a) + b +
+                //                    ((a) + b) + c
+                let mut acc = C::Curve::identity();
+                let mut sum = C::Curve::identity();
+                buckets.iter().rev().for_each(|b| {
+                    sum = b.add(sum);
+                    acc += sum;
                });
-            }
-        });
-        results.iter().fold(C::Curve::identity(), |a, b| a + b)
+                (0..c * i).for_each(|_| acc = acc.double());
+                acc
+            })
+            .reduce(|| C::Curve::identity(), |a, b| a + b)
    } else {
-        let mut acc = C::Curve::identity();
-        multiexp_serial(coeffs, bases, &mut acc);
-        acc
+        multi_buckets
+            .iter_mut()
+            .enumerate()
+            .rev()
+            .map(|(i, buckets)| {
+                // get segmentation and add coeff to buckets content
+                for (coeff, base) in coeffs.iter().zip(bases.iter()) {
+                    let seg = get_at::<C::Scalar>(i, c, &coeff.to_repr());
+                    if seg != 0 {
+                        buckets[seg - 1].add_assign(base);
+                    }
+                }
+
+                // Summation by parts
+                // e.g. 3a + 2b + 1c = a +
+                //                    (a) + b +
+                //                    ((a) + b) + c
+                let mut acc = C::Curve::identity();
+                let mut sum = C::Curve::identity();
+                buckets.iter().rev().for_each(|b| {
+                    sum = b.add(sum);
+                    acc += sum;
+                });
+                acc
+            })
+            .fold(C::Curve::identity(), |mut sum, bucket| {
+                // restore original evaluation point
+                (0..c).for_each(|_| sum = sum.double());
+                sum + bucket
+            })
    }
 }