2019-06-01 07:55:43 -07:00
|
|
|
//! The `weighted_shuffle` module provides an iterator over shuffled weights.
|
|
|
|
|
|
|
|
use itertools::Itertools;
|
|
|
|
use num_traits::{FromPrimitive, ToPrimitive};
|
2019-10-29 21:02:11 -07:00
|
|
|
use rand::{Rng, SeedableRng};
|
2019-06-01 07:55:43 -07:00
|
|
|
use rand_chacha::ChaChaRng;
|
|
|
|
use std::iter;
|
|
|
|
use std::ops::Div;
|
|
|
|
|
2019-07-17 12:44:28 -07:00
|
|
|
/// Returns a list of indexes shuffled based on the input weights
|
|
|
|
/// Note - The sum of all weights must not exceed `u64::MAX`
|
2019-10-29 21:02:11 -07:00
|
|
|
pub fn weighted_shuffle<T>(weights: Vec<T>, seed: [u8; 32]) -> Vec<usize>
|
2019-06-01 07:55:43 -07:00
|
|
|
where
|
|
|
|
T: Copy + PartialOrd + iter::Sum + Div<T, Output = T> + FromPrimitive + ToPrimitive,
|
|
|
|
{
|
|
|
|
let total_weight: T = weights.clone().into_iter().sum();
|
2019-10-29 21:02:11 -07:00
|
|
|
let mut rng = ChaChaRng::from_seed(seed);
|
2019-06-01 07:55:43 -07:00
|
|
|
weights
|
|
|
|
.into_iter()
|
|
|
|
.enumerate()
|
|
|
|
.map(|(i, v)| {
|
2019-10-29 17:04:11 -07:00
|
|
|
// This generates an "inverse" weight but it avoids floating point math
|
2019-07-17 12:44:28 -07:00
|
|
|
let x = (total_weight / v)
|
|
|
|
.to_u64()
|
|
|
|
.expect("values > u64::max are not supported");
|
2019-06-01 07:55:43 -07:00
|
|
|
(
|
|
|
|
i,
|
2019-07-17 12:44:28 -07:00
|
|
|
// capture the u64 into u128s to prevent overflow
|
2019-10-29 17:04:11 -07:00
|
|
|
rng.gen_range(1, u128::from(std::u16::MAX)) * u128::from(x),
|
2019-06-01 07:55:43 -07:00
|
|
|
)
|
|
|
|
})
|
2019-10-29 17:04:11 -07:00
|
|
|
// sort in ascending order
|
2019-06-01 07:55:43 -07:00
|
|
|
.sorted_by(|(_, l_val), (_, r_val)| l_val.cmp(r_val))
|
|
|
|
.map(|x| x.0)
|
|
|
|
.collect()
|
|
|
|
}
|
|
|
|
|
2019-10-01 09:38:29 -07:00
|
|
|
/// Returns the highest index after computing a weighted shuffle.
|
|
|
|
/// Saves doing any sorting for O(n) max calculation.
|
2019-10-29 21:02:11 -07:00
|
|
|
pub fn weighted_best(weights_and_indexes: &[(u64, usize)], seed: [u8; 32]) -> usize {
|
2019-10-29 17:04:11 -07:00
|
|
|
if weights_and_indexes.is_empty() {
|
2019-10-01 09:38:29 -07:00
|
|
|
return 0;
|
|
|
|
}
|
2019-10-29 21:02:11 -07:00
|
|
|
let mut rng = ChaChaRng::from_seed(seed);
|
2019-10-29 17:04:11 -07:00
|
|
|
let total_weight: u64 = weights_and_indexes.iter().map(|x| x.0).sum();
|
|
|
|
let mut lowest_weight = std::u128::MAX;
|
2019-10-01 09:38:29 -07:00
|
|
|
let mut best_index = 0;
|
2019-10-29 17:04:11 -07:00
|
|
|
for v in weights_and_indexes {
|
|
|
|
// This generates an "inverse" weight but it avoids floating point math
|
2019-10-01 09:38:29 -07:00
|
|
|
let x = (total_weight / v.0)
|
|
|
|
.to_u64()
|
|
|
|
.expect("values > u64::max are not supported");
|
|
|
|
// capture the u64 into u128s to prevent overflow
|
2019-10-29 17:04:11 -07:00
|
|
|
let computed_weight = rng.gen_range(1, u128::from(std::u16::MAX)) * u128::from(x);
|
|
|
|
// The highest input weight maps to the lowest computed weight
|
|
|
|
if computed_weight < lowest_weight {
|
|
|
|
lowest_weight = computed_weight;
|
2019-10-01 09:38:29 -07:00
|
|
|
best_index = v.1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
best_index
|
|
|
|
}
|
|
|
|
|
2019-06-01 07:55:43 -07:00
|
|
|
#[cfg(test)]
|
|
|
|
mod tests {
|
|
|
|
use super::*;
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_weighted_shuffle_iterator() {
|
|
|
|
let mut test_set = [0; 6];
|
|
|
|
let mut count = 0;
|
2019-10-29 21:02:11 -07:00
|
|
|
let shuffle = weighted_shuffle(vec![50, 10, 2, 1, 1, 1], [0x5a; 32]);
|
2019-06-01 07:55:43 -07:00
|
|
|
shuffle.into_iter().for_each(|x| {
|
|
|
|
assert_eq!(test_set[x], 0);
|
|
|
|
test_set[x] = 1;
|
|
|
|
count += 1;
|
|
|
|
});
|
|
|
|
assert_eq!(count, 6);
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_weighted_shuffle_iterator_large() {
|
|
|
|
let mut test_set = [0; 100];
|
|
|
|
let mut test_weights = vec![0; 100];
|
|
|
|
(0..100).for_each(|i| test_weights[i] = (i + 1) as u64);
|
|
|
|
let mut count = 0;
|
2019-10-29 21:02:11 -07:00
|
|
|
let shuffle = weighted_shuffle(test_weights, [0xa5; 32]);
|
2019-06-01 07:55:43 -07:00
|
|
|
shuffle.into_iter().for_each(|x| {
|
|
|
|
assert_eq!(test_set[x], 0);
|
|
|
|
test_set[x] = 1;
|
|
|
|
count += 1;
|
|
|
|
});
|
|
|
|
assert_eq!(count, 100);
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_weighted_shuffle_compare() {
|
2019-10-29 21:02:11 -07:00
|
|
|
let shuffle = weighted_shuffle(vec![50, 10, 2, 1, 1, 1], [0x5a; 32]);
|
2019-06-01 07:55:43 -07:00
|
|
|
|
2019-10-29 21:02:11 -07:00
|
|
|
let shuffle1 = weighted_shuffle(vec![50, 10, 2, 1, 1, 1], [0x5a; 32]);
|
2019-06-01 07:55:43 -07:00
|
|
|
shuffle1
|
|
|
|
.into_iter()
|
|
|
|
.zip(shuffle.into_iter())
|
|
|
|
.for_each(|(x, y)| {
|
|
|
|
assert_eq!(x, y);
|
|
|
|
});
|
|
|
|
}
|
2019-07-17 12:44:28 -07:00
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_weighted_shuffle_imbalanced() {
|
|
|
|
let mut weights = vec![std::u32::MAX as u64; 3];
|
|
|
|
weights.push(1);
|
2019-10-29 21:02:11 -07:00
|
|
|
let shuffle = weighted_shuffle(weights.clone(), [0x5a; 32]);
|
2019-07-17 12:44:28 -07:00
|
|
|
shuffle.into_iter().for_each(|x| {
|
|
|
|
if x == weights.len() - 1 {
|
|
|
|
assert_eq!(weights[x], 1);
|
|
|
|
} else {
|
|
|
|
assert_eq!(weights[x], std::u32::MAX as u64);
|
|
|
|
}
|
|
|
|
});
|
|
|
|
}
|
2019-10-01 09:38:29 -07:00
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_weighted_best() {
|
2019-10-29 17:04:11 -07:00
|
|
|
let weights_and_indexes: Vec<_> = vec![100u64, 1000, 10_000, 10]
|
|
|
|
.into_iter()
|
|
|
|
.enumerate()
|
|
|
|
.map(|(i, weight)| (weight, i))
|
|
|
|
.collect();
|
2019-10-29 21:02:11 -07:00
|
|
|
let best_index = weighted_best(&weights_and_indexes, [0x5b; 32]);
|
2019-10-29 17:04:11 -07:00
|
|
|
assert_eq!(best_index, 2);
|
2019-10-01 09:38:29 -07:00
|
|
|
}
|
2019-06-01 07:55:43 -07:00
|
|
|
}
|