2018-06-28 03:49:11 -07:00
|
|
|
|
//! # Broadcast
|
|
|
|
|
//!
|
|
|
|
|
//! The Reliable Broadcast Protocol assumes a network of `N` nodes that send signed messages to
|
|
|
|
|
//! each other, with at most `f` of them faulty, where `3 * f < N`. Handling the networking and
|
2018-06-28 12:46:51 -07:00
|
|
|
|
//! signing is the responsibility of this crate's user; a message is only handed to the Broadcast
|
|
|
|
|
//! instance after it has been verified to be "from node i". One of the nodes is the "proposer"
|
|
|
|
|
//! who sends a value. It needs to be determined beforehand, and all nodes need to know and agree
|
|
|
|
|
//! who it is. Under the above conditions, the protocol guarantees that either all or none
|
2018-06-28 03:49:11 -07:00
|
|
|
|
//! of the correct nodes output a value, and that if the proposer is correct, all correct nodes
|
|
|
|
|
//! output the proposed value.
|
|
|
|
|
//!
|
|
|
|
|
//! ## How it works
|
|
|
|
|
//!
|
|
|
|
|
//! * The proposer uses a Reed-Solomon code to split the value into `N` chunks, `f + 1` of which
|
|
|
|
|
//! suffice to reconstruct the value. These chunks are put into a Merkle tree, so that with the
|
|
|
|
|
//! tree's root hash `h`, branch `bi` and chunk `si`, the `i`-th chunk `si` can be verified by
|
|
|
|
|
//! anyone as belonging to the Merkle tree with root hash `h`. These values are "proof" number `i`:
|
|
|
|
|
//! `pi = (h, bi, si)`.
|
|
|
|
|
//! * The proposer sends `Value(pi)` to node `i`. It translates to: "I am the proposer, and `pi`
|
|
|
|
|
//! contains the `i`-th share of my value."
|
|
|
|
|
//! * Every (correct) node that receives `Value(pi)` from the proposer sends it on to everyone else
|
|
|
|
|
//! as `Echo(pi)`. An `Echo` translates to: "I have received `pi` directly from the proposer." If
|
2018-06-28 12:46:51 -07:00
|
|
|
|
//! the proposer sends another `Value` message it is ignored.
|
|
|
|
|
//! * So every node that receives at least `f + 1` `Echo` messages with the same root hash can
|
|
|
|
|
//! decode a value.
|
2018-06-28 03:49:11 -07:00
|
|
|
|
//! * Every node that has received `N - f` `Echo`s with the same root hash from different nodes
|
|
|
|
|
//! knows that at least `f + 1` _correct_ nodes have sent an `Echo` with that hash to everyone, and
|
|
|
|
|
//! therefore everyone will eventually receive at least `f + 1` of them. So upon receiving `N - f`
|
2018-06-28 12:46:51 -07:00
|
|
|
|
//! `Echo`s, they send a `Ready(h)` to everyone. It translates to: "I know that everyone will
|
|
|
|
|
//! eventually be able to decode the value with root hash `h`." Moreover, since every correct node
|
|
|
|
|
//! only sends one kind of `Echo` message, there is no danger of receiving `N - f` `Echo`s with two
|
|
|
|
|
//! different root hashes.
|
2018-06-28 03:49:11 -07:00
|
|
|
|
//! * Even without enough `Echo` messages, if a node receives `f + 1` `Ready` messages, it knows
|
|
|
|
|
//! that at least one _correct_ node has sent `Ready`. It therefore also knows that everyone will
|
|
|
|
|
//! be able to decode eventually, and multicasts `Ready` itself.
|
|
|
|
|
//! * If a node has received `2 * f + 1` `Ready`s (with matching root hash) from different nodes,
|
|
|
|
|
//! it knows that at least `f + 1` _correct_ nodes have sent it. Therefore, every correct node will
|
|
|
|
|
//! eventually receive `f + 1`, and multicast it itself. Therefore, every correct node will
|
|
|
|
|
//! eventually receive `2 * f + 1` `Ready`s, too. _And_ we know at this point that every correct
|
|
|
|
|
//! node will eventually be able to decode (i.e. receive at least `f + 1` `Echo` messages).
|
|
|
|
|
//! * So a node with `2 * f + 1` `Ready`s and `f + 1` `Echos` will decode and _output_ the value,
|
|
|
|
|
//! knowing that every other correct node will eventually do the same.
|
|
|
|
|
|
2018-05-29 05:17:30 -07:00
|
|
|
|
use std::collections::{BTreeMap, VecDeque};
|
|
|
|
|
use std::fmt::{self, Debug};
|
|
|
|
|
use std::iter::once;
|
|
|
|
|
use std::rc::Rc;
|
|
|
|
|
|
2018-05-30 06:33:33 -07:00
|
|
|
|
use byteorder::{BigEndian, ByteOrder};
|
2018-05-17 07:50:47 -07:00
|
|
|
|
use merkle::{MerkleTree, Proof};
|
2018-04-06 09:39:15 -07:00
|
|
|
|
use reed_solomon_erasure as rse;
|
2018-03-27 13:59:38 -07:00
|
|
|
|
use reed_solomon_erasure::ReedSolomon;
|
2018-05-14 09:30:07 -07:00
|
|
|
|
use ring::digest;
|
2018-04-05 05:09:46 -07:00
|
|
|
|
|
2018-05-30 06:33:33 -07:00
|
|
|
|
use fmt::{HexBytes, HexList, HexProof};
|
2018-05-29 05:17:30 -07:00
|
|
|
|
use messaging::{DistAlgorithm, NetworkInfo, Target, TargetedMessage};
|
2018-05-02 22:47:07 -07:00
|
|
|
|
|
2018-05-20 04:51:33 -07:00
|
|
|
|
error_chain!{
|
|
|
|
|
types {
|
|
|
|
|
Error, ErrorKind, ResultExt, BroadcastResult;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
foreign_links {
|
|
|
|
|
ReedSolomon(rse::Error);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
errors {
|
|
|
|
|
InstanceCannotPropose
|
|
|
|
|
NotImplemented
|
|
|
|
|
ProofConstructionFailed
|
|
|
|
|
RootHashMismatch
|
|
|
|
|
Threading
|
|
|
|
|
UnknownSender
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2018-05-08 07:20:32 -07:00
|
|
|
|
/// The three kinds of message sent during the reliable broadcast stage of the
|
|
|
|
|
/// consensus algorithm.
|
2018-06-20 01:21:52 -07:00
|
|
|
|
#[derive(Serialize, Deserialize, Clone, PartialEq)]
|
2018-05-10 08:50:07 -07:00
|
|
|
|
pub enum BroadcastMessage {
|
|
|
|
|
Value(Proof<Vec<u8>>),
|
|
|
|
|
Echo(Proof<Vec<u8>>),
|
2018-05-08 07:20:32 -07:00
|
|
|
|
Ready(Vec<u8>),
|
|
|
|
|
}
|
|
|
|
|
|
2018-05-17 07:50:47 -07:00
|
|
|
|
impl Debug for BroadcastMessage {
|
2018-05-08 07:20:32 -07:00
|
|
|
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
|
|
|
|
match *self {
|
|
|
|
|
BroadcastMessage::Value(ref v) => write!(f, "Value({:?})", HexProof(&v)),
|
|
|
|
|
BroadcastMessage::Echo(ref v) => write!(f, "Echo({:?})", HexProof(&v)),
|
|
|
|
|
BroadcastMessage::Ready(ref bytes) => write!(f, "Ready({:?})", HexBytes(bytes)),
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2018-04-27 05:19:39 -07:00
|
|
|
|
/// Reliable Broadcast algorithm instance.
|
2018-06-18 07:37:07 -07:00
|
|
|
|
pub struct Broadcast<NodeUid> {
|
2018-05-29 05:17:30 -07:00
|
|
|
|
/// Shared network data.
|
2018-06-18 07:14:17 -07:00
|
|
|
|
netinfo: Rc<NetworkInfo<NodeUid>>,
|
2018-05-03 01:07:37 -07:00
|
|
|
|
/// The UID of the sending node.
|
2018-06-18 07:14:17 -07:00
|
|
|
|
proposer_id: NodeUid,
|
2018-04-25 12:41:46 -07:00
|
|
|
|
data_shard_num: usize,
|
2018-05-14 07:16:57 -07:00
|
|
|
|
coding: Coding,
|
2018-05-14 00:35:34 -07:00
|
|
|
|
/// Whether we have already multicast `Echo`.
|
|
|
|
|
echo_sent: bool,
|
|
|
|
|
/// Whether we have already multicast `Ready`.
|
|
|
|
|
ready_sent: bool,
|
|
|
|
|
/// Whether we have already output a value.
|
2018-05-15 07:05:55 -07:00
|
|
|
|
decided: bool,
|
2018-05-14 00:35:34 -07:00
|
|
|
|
/// The proofs we have received via `Echo` messages, by sender ID.
|
2018-06-18 07:14:17 -07:00
|
|
|
|
echos: BTreeMap<NodeUid, Proof<Vec<u8>>>,
|
2018-05-14 00:35:34 -07:00
|
|
|
|
/// The root hashes we received via `Ready` messages, by sender ID.
|
2018-06-18 07:14:17 -07:00
|
|
|
|
readys: BTreeMap<NodeUid, Vec<u8>>,
|
2018-05-14 05:35:06 -07:00
|
|
|
|
/// The outgoing message queue.
|
2018-06-18 07:14:17 -07:00
|
|
|
|
messages: VecDeque<TargetedMessage<BroadcastMessage, NodeUid>>,
|
2018-05-14 05:35:06 -07:00
|
|
|
|
/// The output, if any.
|
|
|
|
|
output: Option<Vec<u8>>,
|
2018-04-24 03:29:13 -07:00
|
|
|
|
}
|
|
|
|
|
|
2018-06-18 07:37:07 -07:00
|
|
|
|
impl<NodeUid: Debug + Clone + Ord> DistAlgorithm for Broadcast<NodeUid> {
|
2018-06-18 07:14:17 -07:00
|
|
|
|
type NodeUid = NodeUid;
|
2018-05-14 05:55:53 -07:00
|
|
|
|
// TODO: Allow anything serializable and deserializable, i.e. make this a type parameter
|
|
|
|
|
// T: Serialize + DeserializeOwned
|
|
|
|
|
type Input = Vec<u8>;
|
2018-05-14 05:35:06 -07:00
|
|
|
|
type Output = Self::Input;
|
|
|
|
|
type Message = BroadcastMessage;
|
|
|
|
|
type Error = Error;
|
|
|
|
|
|
2018-05-20 04:51:33 -07:00
|
|
|
|
fn input(&mut self, input: Self::Input) -> BroadcastResult<()> {
|
2018-05-29 05:17:30 -07:00
|
|
|
|
if *self.netinfo.our_uid() != self.proposer_id {
|
2018-05-20 04:51:33 -07:00
|
|
|
|
return Err(ErrorKind::InstanceCannotPropose.into());
|
2018-05-14 05:35:06 -07:00
|
|
|
|
}
|
|
|
|
|
// Split the value into chunks/shards, encode them with erasure codes.
|
|
|
|
|
// Assemble a Merkle tree from data and parity shards. Take all proofs
|
|
|
|
|
// from this tree and send them, each to its own node.
|
|
|
|
|
let proof = self.send_shards(input)?;
|
2018-05-29 05:17:30 -07:00
|
|
|
|
let our_uid = &self.netinfo.our_uid().clone();
|
|
|
|
|
self.handle_value(our_uid, proof)
|
2018-05-14 05:35:06 -07:00
|
|
|
|
}
|
|
|
|
|
|
2018-06-18 07:14:17 -07:00
|
|
|
|
fn handle_message(
|
|
|
|
|
&mut self,
|
|
|
|
|
sender_id: &NodeUid,
|
|
|
|
|
message: Self::Message,
|
|
|
|
|
) -> BroadcastResult<()> {
|
2018-05-29 05:17:30 -07:00
|
|
|
|
if !self.netinfo.all_uids().contains(sender_id) {
|
2018-05-20 04:51:33 -07:00
|
|
|
|
return Err(ErrorKind::UnknownSender.into());
|
2018-05-14 05:35:06 -07:00
|
|
|
|
}
|
|
|
|
|
match message {
|
|
|
|
|
BroadcastMessage::Value(p) => self.handle_value(sender_id, p),
|
|
|
|
|
BroadcastMessage::Echo(p) => self.handle_echo(sender_id, p),
|
|
|
|
|
BroadcastMessage::Ready(ref hash) => self.handle_ready(sender_id, hash),
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2018-06-18 07:14:17 -07:00
|
|
|
|
fn next_message(&mut self) -> Option<TargetedMessage<Self::Message, NodeUid>> {
|
2018-05-14 05:35:06 -07:00
|
|
|
|
self.messages.pop_front()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn next_output(&mut self) -> Option<Self::Output> {
|
|
|
|
|
self.output.take()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn terminated(&self) -> bool {
|
2018-05-15 07:05:55 -07:00
|
|
|
|
self.decided
|
2018-05-14 05:35:06 -07:00
|
|
|
|
}
|
|
|
|
|
|
2018-06-18 07:14:17 -07:00
|
|
|
|
fn our_id(&self) -> &NodeUid {
|
2018-05-29 05:17:30 -07:00
|
|
|
|
self.netinfo.our_uid()
|
2018-05-14 05:35:06 -07:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2018-06-18 07:37:07 -07:00
|
|
|
|
impl<NodeUid: Debug + Clone + Ord> Broadcast<NodeUid> {
|
2018-05-03 01:07:37 -07:00
|
|
|
|
/// Creates a new broadcast instance to be used by node `our_id` which expects a value proposal
|
|
|
|
|
/// from node `proposer_id`.
|
2018-06-18 07:14:17 -07:00
|
|
|
|
pub fn new(netinfo: Rc<NetworkInfo<NodeUid>>, proposer_id: NodeUid) -> BroadcastResult<Self> {
|
2018-05-29 05:17:30 -07:00
|
|
|
|
let parity_shard_num = 2 * netinfo.num_faulty();
|
|
|
|
|
let data_shard_num = netinfo.num_nodes() - parity_shard_num;
|
2018-05-14 07:16:57 -07:00
|
|
|
|
let coding = Coding::new(data_shard_num, parity_shard_num)?;
|
2018-05-01 09:32:01 -07:00
|
|
|
|
|
|
|
|
|
Ok(Broadcast {
|
2018-05-29 05:17:30 -07:00
|
|
|
|
netinfo,
|
2018-05-03 01:07:37 -07:00
|
|
|
|
proposer_id,
|
2018-05-01 09:32:01 -07:00
|
|
|
|
data_shard_num,
|
|
|
|
|
coding,
|
2018-05-14 00:35:34 -07:00
|
|
|
|
echo_sent: false,
|
|
|
|
|
ready_sent: false,
|
2018-05-15 07:05:55 -07:00
|
|
|
|
decided: false,
|
2018-05-14 00:35:34 -07:00
|
|
|
|
echos: BTreeMap::new(),
|
|
|
|
|
readys: BTreeMap::new(),
|
2018-05-14 05:35:06 -07:00
|
|
|
|
messages: VecDeque::new(),
|
|
|
|
|
output: None,
|
2018-05-01 07:28:31 -07:00
|
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
|
2018-04-27 05:19:39 -07:00
|
|
|
|
/// Breaks the input value into shards of equal length and encodes them --
|
|
|
|
|
/// and some extra parity shards -- with a Reed-Solomon erasure coding
|
|
|
|
|
/// scheme. The returned value contains the shard assigned to this
|
|
|
|
|
/// node. That shard doesn't need to be sent anywhere. It gets recorded in
|
|
|
|
|
/// the broadcast instance.
|
2018-05-20 04:51:33 -07:00
|
|
|
|
fn send_shards(&mut self, mut value: Vec<u8>) -> BroadcastResult<Proof<Vec<u8>>> {
|
2018-04-27 05:19:39 -07:00
|
|
|
|
let data_shard_num = self.coding.data_shard_count();
|
|
|
|
|
let parity_shard_num = self.coding.parity_shard_count();
|
|
|
|
|
|
2018-04-30 08:55:51 -07:00
|
|
|
|
debug!(
|
|
|
|
|
"Data shards: {}, parity shards: {}",
|
|
|
|
|
self.data_shard_num, parity_shard_num
|
|
|
|
|
);
|
2018-04-27 05:19:39 -07:00
|
|
|
|
// Insert the length of `v` so it can be decoded without the padding.
|
2018-05-30 06:33:33 -07:00
|
|
|
|
let payload_len = value.len() as u32;
|
|
|
|
|
value.splice(0..0, 0..4); // Insert four bytes at the beginning.
|
|
|
|
|
BigEndian::write_u32(&mut value[..4], payload_len); // Write the size.
|
2018-04-27 05:19:39 -07:00
|
|
|
|
let value_len = value.len();
|
|
|
|
|
// Size of a Merkle tree leaf value, in bytes.
|
|
|
|
|
let shard_len = if value_len % data_shard_num > 0 {
|
|
|
|
|
value_len / data_shard_num + 1
|
2018-04-30 08:55:51 -07:00
|
|
|
|
} else {
|
2018-04-27 05:19:39 -07:00
|
|
|
|
value_len / data_shard_num
|
|
|
|
|
};
|
|
|
|
|
// Pad the last data shard with zeros. Fill the parity shards with
|
|
|
|
|
// zeros.
|
|
|
|
|
value.resize(shard_len * (data_shard_num + parity_shard_num), 0);
|
|
|
|
|
|
|
|
|
|
debug!("value_len {}, shard_len {}", value_len, shard_len);
|
|
|
|
|
|
|
|
|
|
// Divide the vector into chunks/shards.
|
|
|
|
|
let shards_iter = value.chunks_mut(shard_len);
|
|
|
|
|
// Convert the iterator over slices into a vector of slices.
|
2018-05-04 00:58:21 -07:00
|
|
|
|
let mut shards: Vec<&mut [u8]> = shards_iter.collect();
|
2018-04-27 05:19:39 -07:00
|
|
|
|
|
2018-05-08 07:20:32 -07:00
|
|
|
|
debug!("Shards before encoding: {:?}", HexList(&shards));
|
2018-04-27 05:19:39 -07:00
|
|
|
|
|
|
|
|
|
// Construct the parity chunks/shards
|
2018-05-30 06:33:33 -07:00
|
|
|
|
self.coding
|
|
|
|
|
.encode(&mut shards)
|
|
|
|
|
.expect("the size and number of shards is correct");
|
2018-04-27 05:19:39 -07:00
|
|
|
|
|
2018-05-08 07:20:32 -07:00
|
|
|
|
debug!("Shards: {:?}", HexList(&shards));
|
2018-04-27 05:19:39 -07:00
|
|
|
|
|
2018-05-08 07:20:32 -07:00
|
|
|
|
// TODO: `MerkleTree` generates the wrong proof if a leaf occurs more than once, so we
|
|
|
|
|
// prepend an "index byte" to each shard. Consider using the `merkle_light` crate instead.
|
2018-05-10 08:50:07 -07:00
|
|
|
|
let shards_t: Vec<Vec<u8>> = shards
|
2018-05-08 07:20:32 -07:00
|
|
|
|
.into_iter()
|
|
|
|
|
.enumerate()
|
2018-05-29 05:17:30 -07:00
|
|
|
|
.map(|(i, s)| once(i as u8).chain(s.iter().cloned()).collect())
|
2018-05-08 07:20:32 -07:00
|
|
|
|
.collect();
|
2018-04-27 05:19:39 -07:00
|
|
|
|
|
|
|
|
|
// Convert the Merkle tree into a partial binary tree for later
|
|
|
|
|
// deconstruction into compound branches.
|
2018-05-14 09:30:07 -07:00
|
|
|
|
let mtree = MerkleTree::from_vec(&digest::SHA256, shards_t);
|
2018-04-27 05:19:39 -07:00
|
|
|
|
|
|
|
|
|
// Default result in case of `gen_proof` error.
|
2018-05-20 04:51:33 -07:00
|
|
|
|
let mut result = Err(ErrorKind::ProofConstructionFailed.into());
|
2018-05-29 05:17:30 -07:00
|
|
|
|
assert_eq!(self.netinfo.num_nodes(), mtree.iter().count());
|
2018-04-27 05:19:39 -07:00
|
|
|
|
|
|
|
|
|
// Send each proof to a node.
|
2018-06-14 02:05:05 -07:00
|
|
|
|
for (leaf_value, uid) in mtree.iter().zip(self.netinfo.all_uids()) {
|
2018-05-04 00:58:21 -07:00
|
|
|
|
let proof = mtree
|
|
|
|
|
.gen_proof(leaf_value.to_vec())
|
2018-05-20 04:51:33 -07:00
|
|
|
|
.ok_or(ErrorKind::ProofConstructionFailed)?;
|
2018-05-29 05:17:30 -07:00
|
|
|
|
if *uid == *self.netinfo.our_uid() {
|
2018-05-04 00:58:21 -07:00
|
|
|
|
// The proof is addressed to this node.
|
|
|
|
|
result = Ok(proof);
|
|
|
|
|
} else {
|
|
|
|
|
// Rest of the proofs are sent to remote nodes.
|
2018-05-14 05:35:06 -07:00
|
|
|
|
let msg = Target::Node(uid.clone()).message(BroadcastMessage::Value(proof));
|
|
|
|
|
self.messages.push_back(msg);
|
2018-04-27 05:19:39 -07:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2018-05-14 05:35:06 -07:00
|
|
|
|
result
|
2018-05-02 22:47:07 -07:00
|
|
|
|
}
|
2018-04-25 12:41:46 -07:00
|
|
|
|
|
2018-05-02 22:47:07 -07:00
|
|
|
|
/// Handles a received echo and verifies the proof it contains.
|
2018-06-18 07:14:17 -07:00
|
|
|
|
fn handle_value(&mut self, sender_id: &NodeUid, p: Proof<Vec<u8>>) -> BroadcastResult<()> {
|
2018-05-08 07:20:32 -07:00
|
|
|
|
// If the sender is not the proposer, this is not the first `Value` or the proof is invalid,
|
|
|
|
|
// ignore.
|
2018-05-03 01:07:37 -07:00
|
|
|
|
if *sender_id != self.proposer_id {
|
2018-05-08 07:20:32 -07:00
|
|
|
|
info!(
|
|
|
|
|
"Node {:?} received Value from {:?} instead of {:?}.",
|
2018-05-29 05:17:30 -07:00
|
|
|
|
self.netinfo.our_uid(),
|
|
|
|
|
sender_id,
|
|
|
|
|
self.proposer_id
|
2018-05-08 07:20:32 -07:00
|
|
|
|
);
|
2018-05-14 05:35:06 -07:00
|
|
|
|
return Ok(());
|
2018-05-03 01:07:37 -07:00
|
|
|
|
}
|
2018-05-14 00:35:34 -07:00
|
|
|
|
if self.echo_sent {
|
2018-05-29 05:17:30 -07:00
|
|
|
|
info!(
|
|
|
|
|
"Node {:?} received multiple Values.",
|
|
|
|
|
self.netinfo.our_uid()
|
|
|
|
|
);
|
2018-05-14 05:35:06 -07:00
|
|
|
|
return Ok(());
|
2018-05-02 22:47:07 -07:00
|
|
|
|
}
|
2018-05-29 05:17:30 -07:00
|
|
|
|
if !self.validate_proof(&p, &self.netinfo.our_uid()) {
|
2018-05-14 05:35:06 -07:00
|
|
|
|
return Ok(());
|
2018-05-02 22:47:07 -07:00
|
|
|
|
}
|
2018-04-25 13:00:22 -07:00
|
|
|
|
|
2018-05-08 07:20:32 -07:00
|
|
|
|
// Otherwise multicast the proof in an `Echo` message, and handle it ourselves.
|
2018-06-26 05:50:06 -07:00
|
|
|
|
self.send_echo(p)
|
2018-05-02 22:47:07 -07:00
|
|
|
|
}
|
|
|
|
|
|
2018-05-08 07:20:32 -07:00
|
|
|
|
/// Handles a received `Echo` message.
|
2018-06-18 07:14:17 -07:00
|
|
|
|
fn handle_echo(&mut self, sender_id: &NodeUid, p: Proof<Vec<u8>>) -> BroadcastResult<()> {
|
2018-05-08 07:20:32 -07:00
|
|
|
|
// If the proof is invalid or the sender has already sent `Echo`, ignore.
|
2018-05-14 00:35:34 -07:00
|
|
|
|
if self.echos.contains_key(sender_id) {
|
2018-05-08 07:20:32 -07:00
|
|
|
|
info!(
|
|
|
|
|
"Node {:?} received multiple Echos from {:?}.",
|
2018-05-29 05:17:30 -07:00
|
|
|
|
self.netinfo.our_uid(),
|
|
|
|
|
sender_id,
|
2018-05-03 01:07:37 -07:00
|
|
|
|
);
|
2018-05-14 05:35:06 -07:00
|
|
|
|
return Ok(());
|
2018-05-02 22:47:07 -07:00
|
|
|
|
}
|
2018-05-08 07:20:32 -07:00
|
|
|
|
if !self.validate_proof(&p, sender_id) {
|
2018-05-14 05:35:06 -07:00
|
|
|
|
return Ok(());
|
2018-05-02 22:47:07 -07:00
|
|
|
|
}
|
|
|
|
|
|
2018-05-08 07:20:32 -07:00
|
|
|
|
let hash = p.root_hash.clone();
|
2018-05-02 22:47:07 -07:00
|
|
|
|
|
2018-05-08 07:20:32 -07:00
|
|
|
|
// Save the proof for reconstructing the tree later.
|
2018-05-14 00:35:34 -07:00
|
|
|
|
self.echos.insert(sender_id.clone(), p);
|
2018-05-08 07:20:32 -07:00
|
|
|
|
|
2018-05-29 05:17:30 -07:00
|
|
|
|
if self.ready_sent
|
|
|
|
|
|| self.count_echos(&hash) < self.netinfo.num_nodes() - self.netinfo.num_faulty()
|
|
|
|
|
{
|
2018-05-14 05:35:06 -07:00
|
|
|
|
return self.compute_output(&hash);
|
2018-05-02 22:47:07 -07:00
|
|
|
|
}
|
|
|
|
|
|
2018-05-08 07:20:32 -07:00
|
|
|
|
// Upon receiving `N - f` `Echo`s with this root hash, multicast `Ready`.
|
2018-06-26 05:50:06 -07:00
|
|
|
|
self.send_ready(&hash)
|
2018-05-02 22:47:07 -07:00
|
|
|
|
}
|
|
|
|
|
|
2018-05-08 07:20:32 -07:00
|
|
|
|
/// Handles a received `Ready` message.
|
2018-06-18 07:14:17 -07:00
|
|
|
|
fn handle_ready(&mut self, sender_id: &NodeUid, hash: &[u8]) -> BroadcastResult<()> {
|
2018-05-08 07:20:32 -07:00
|
|
|
|
// If the sender has already sent a `Ready` before, ignore.
|
2018-05-14 00:35:34 -07:00
|
|
|
|
if self.readys.contains_key(sender_id) {
|
2018-05-08 07:20:32 -07:00
|
|
|
|
info!(
|
|
|
|
|
"Node {:?} received multiple Readys from {:?}.",
|
2018-05-29 05:17:30 -07:00
|
|
|
|
self.netinfo.our_uid(),
|
|
|
|
|
sender_id
|
2018-05-08 07:20:32 -07:00
|
|
|
|
);
|
2018-05-14 05:35:06 -07:00
|
|
|
|
return Ok(());
|
2018-05-08 07:20:32 -07:00
|
|
|
|
}
|
2018-05-02 22:47:07 -07:00
|
|
|
|
|
2018-05-14 00:35:34 -07:00
|
|
|
|
self.readys.insert(sender_id.clone(), hash.to_vec());
|
2018-05-02 22:47:07 -07:00
|
|
|
|
|
|
|
|
|
// Upon receiving f + 1 matching Ready(h) messages, if Ready
|
|
|
|
|
// has not yet been sent, multicast Ready(h).
|
2018-05-29 05:17:30 -07:00
|
|
|
|
if self.count_readys(hash) == self.netinfo.num_faulty() + 1 && !self.ready_sent {
|
2018-05-08 07:20:32 -07:00
|
|
|
|
// Enqueue a broadcast of a Ready message.
|
2018-06-26 05:50:06 -07:00
|
|
|
|
self.send_ready(hash)?;
|
2018-05-14 05:35:06 -07:00
|
|
|
|
}
|
2018-06-26 05:50:06 -07:00
|
|
|
|
self.compute_output(hash)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Sends an `Echo` message and handles it. Does nothing if we are only an observer.
|
|
|
|
|
fn send_echo(&mut self, p: Proof<Vec<u8>>) -> BroadcastResult<()> {
|
|
|
|
|
self.echo_sent = true;
|
|
|
|
|
if !self.netinfo.is_peer() {
|
|
|
|
|
return Ok(());
|
|
|
|
|
}
|
|
|
|
|
let echo_msg = Target::All.message(BroadcastMessage::Echo(p.clone()));
|
|
|
|
|
self.messages.push_back(echo_msg);
|
|
|
|
|
let our_uid = &self.netinfo.our_uid().clone();
|
|
|
|
|
self.handle_echo(our_uid, p)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Sends a `Ready` message and handles it. Does nothing if we are only an observer.
|
|
|
|
|
fn send_ready(&mut self, hash: &[u8]) -> BroadcastResult<()> {
|
|
|
|
|
self.ready_sent = true;
|
|
|
|
|
if !self.netinfo.is_peer() {
|
|
|
|
|
return Ok(());
|
|
|
|
|
}
|
|
|
|
|
let ready_msg = Target::All.message(BroadcastMessage::Ready(hash.to_vec()));
|
|
|
|
|
self.messages.push_back(ready_msg);
|
|
|
|
|
let our_uid = &self.netinfo.our_uid().clone();
|
|
|
|
|
self.handle_ready(our_uid, hash)
|
2018-05-08 07:20:32 -07:00
|
|
|
|
}
|
|
|
|
|
|
2018-05-14 05:35:06 -07:00
|
|
|
|
/// Checks whether the condition for output are met for this hash, and if so, sets the output
|
2018-05-08 07:20:32 -07:00
|
|
|
|
/// value.
|
2018-05-20 04:51:33 -07:00
|
|
|
|
fn compute_output(&mut self, hash: &[u8]) -> BroadcastResult<()> {
|
2018-05-21 02:01:49 -07:00
|
|
|
|
if self.decided
|
2018-05-29 05:17:30 -07:00
|
|
|
|
|| self.count_readys(hash) <= 2 * self.netinfo.num_faulty()
|
|
|
|
|
|| self.count_echos(hash) <= self.netinfo.num_faulty()
|
2018-05-08 07:20:32 -07:00
|
|
|
|
{
|
2018-05-14 05:35:06 -07:00
|
|
|
|
return Ok(());
|
2018-04-25 06:07:16 -07:00
|
|
|
|
}
|
2018-05-02 22:47:07 -07:00
|
|
|
|
|
2018-05-08 07:20:32 -07:00
|
|
|
|
// Upon receiving 2f + 1 matching Ready(h) messages, wait for N − 2f Echo messages.
|
2018-05-21 02:01:49 -07:00
|
|
|
|
let mut leaf_values: Vec<Option<Box<[u8]>>> = self
|
2018-05-29 05:17:30 -07:00
|
|
|
|
.netinfo
|
|
|
|
|
.all_uids()
|
2018-05-08 07:20:32 -07:00
|
|
|
|
.iter()
|
|
|
|
|
.map(|id| {
|
2018-05-14 00:35:34 -07:00
|
|
|
|
self.echos.get(id).and_then(|p| {
|
2018-05-08 07:20:32 -07:00
|
|
|
|
if p.root_hash.as_slice() == hash {
|
|
|
|
|
Some(p.value.clone().into_boxed_slice())
|
|
|
|
|
} else {
|
|
|
|
|
None
|
|
|
|
|
}
|
|
|
|
|
})
|
|
|
|
|
})
|
|
|
|
|
.collect();
|
2018-05-30 06:33:33 -07:00
|
|
|
|
let value = decode_from_shards(&mut leaf_values, &self.coding, self.data_shard_num, hash);
|
|
|
|
|
self.decided = value.is_some();
|
|
|
|
|
self.output = value;
|
2018-05-14 05:35:06 -07:00
|
|
|
|
Ok(())
|
2018-05-08 07:20:32 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Returns `i` if `node_id` is the `i`-th ID among all participating nodes.
|
2018-06-18 07:14:17 -07:00
|
|
|
|
fn index_of_node(&self, node_id: &NodeUid) -> Option<usize> {
|
2018-06-14 02:05:05 -07:00
|
|
|
|
self.netinfo.all_uids().iter().position(|id| id == node_id)
|
2018-05-08 07:20:32 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Returns `true` if the proof is valid and has the same index as the node ID. Otherwise
|
|
|
|
|
/// logs an info message.
|
2018-06-18 07:14:17 -07:00
|
|
|
|
fn validate_proof(&self, p: &Proof<Vec<u8>>, id: &NodeUid) -> bool {
|
2018-05-08 07:20:32 -07:00
|
|
|
|
if !p.validate(&p.root_hash) {
|
|
|
|
|
info!(
|
|
|
|
|
"Node {:?} received invalid proof: {:?}",
|
2018-05-29 05:17:30 -07:00
|
|
|
|
self.netinfo.our_uid(),
|
2018-05-08 07:20:32 -07:00
|
|
|
|
HexProof(&p)
|
|
|
|
|
);
|
|
|
|
|
false
|
|
|
|
|
} else if self.index_of_node(id) != Some(p.value[0] as usize)
|
2018-05-29 05:17:30 -07:00
|
|
|
|
|| p.index(self.netinfo.num_nodes()) != p.value[0] as usize
|
2018-05-08 07:20:32 -07:00
|
|
|
|
{
|
|
|
|
|
info!(
|
|
|
|
|
"Node {:?} received proof for wrong position: {:?}.",
|
2018-05-29 05:17:30 -07:00
|
|
|
|
self.netinfo.our_uid(),
|
2018-05-08 07:20:32 -07:00
|
|
|
|
HexProof(&p)
|
|
|
|
|
);
|
|
|
|
|
false
|
|
|
|
|
} else {
|
|
|
|
|
true
|
|
|
|
|
}
|
2018-04-24 09:31:21 -07:00
|
|
|
|
}
|
2018-05-14 00:35:34 -07:00
|
|
|
|
|
|
|
|
|
/// Returns the number of nodes that have sent us an `Echo` message with this hash.
|
|
|
|
|
fn count_echos(&self, hash: &[u8]) -> usize {
|
|
|
|
|
self.echos
|
|
|
|
|
.values()
|
|
|
|
|
.filter(|p| p.root_hash.as_slice() == hash)
|
|
|
|
|
.count()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Returns the number of nodes that have sent us a `Ready` message with this hash.
|
|
|
|
|
fn count_readys(&self, hash: &[u8]) -> usize {
|
|
|
|
|
self.readys
|
|
|
|
|
.values()
|
|
|
|
|
.filter(|h| h.as_slice() == hash)
|
|
|
|
|
.count()
|
|
|
|
|
}
|
2018-04-24 09:31:21 -07:00
|
|
|
|
}
|
|
|
|
|
|
2018-05-14 07:16:57 -07:00
|
|
|
|
/// A wrapper for `ReedSolomon` that doesn't panic if there are no parity shards.
|
|
|
|
|
enum Coding {
|
|
|
|
|
/// A `ReedSolomon` instance with at least one parity shard.
|
|
|
|
|
ReedSolomon(Box<ReedSolomon>),
|
|
|
|
|
/// A no-op replacement that doesn't encode or decode anything.
|
|
|
|
|
Trivial(usize),
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl Coding {
|
|
|
|
|
/// Creates a new `Coding` instance with the given number of shards.
|
2018-05-20 04:51:33 -07:00
|
|
|
|
fn new(data_shard_num: usize, parity_shard_num: usize) -> BroadcastResult<Self> {
|
2018-05-14 07:16:57 -07:00
|
|
|
|
Ok(if parity_shard_num > 0 {
|
|
|
|
|
let rs = ReedSolomon::new(data_shard_num, parity_shard_num)?;
|
|
|
|
|
Coding::ReedSolomon(Box::new(rs))
|
|
|
|
|
} else {
|
|
|
|
|
Coding::Trivial(data_shard_num)
|
|
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Returns the number of data shards.
|
|
|
|
|
fn data_shard_count(&self) -> usize {
|
|
|
|
|
match *self {
|
|
|
|
|
Coding::ReedSolomon(ref rs) => rs.data_shard_count(),
|
|
|
|
|
Coding::Trivial(dsc) => dsc,
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Returns the number of parity shards.
|
|
|
|
|
fn parity_shard_count(&self) -> usize {
|
|
|
|
|
match *self {
|
|
|
|
|
Coding::ReedSolomon(ref rs) => rs.parity_shard_count(),
|
|
|
|
|
Coding::Trivial(_) => 0,
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Constructs (and overwrites) the parity shards.
|
2018-05-20 04:51:33 -07:00
|
|
|
|
fn encode(&self, slices: &mut [&mut [u8]]) -> BroadcastResult<()> {
|
2018-05-14 07:16:57 -07:00
|
|
|
|
match *self {
|
|
|
|
|
Coding::ReedSolomon(ref rs) => rs.encode(slices)?,
|
|
|
|
|
Coding::Trivial(_) => (),
|
|
|
|
|
}
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// If enough shards are present, reconstructs the missing ones.
|
2018-05-20 04:51:33 -07:00
|
|
|
|
fn reconstruct_shards(&self, shards: &mut [Option<Box<[u8]>>]) -> BroadcastResult<()> {
|
2018-05-14 07:16:57 -07:00
|
|
|
|
match *self {
|
|
|
|
|
Coding::ReedSolomon(ref rs) => rs.reconstruct_shards(shards)?,
|
|
|
|
|
Coding::Trivial(_) => {
|
|
|
|
|
if shards.iter().any(Option::is_none) {
|
|
|
|
|
return Err(rse::Error::TooFewShardsPresent.into());
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2018-05-30 07:24:52 -07:00
|
|
|
|
fn decode_from_shards(
|
2018-05-04 00:58:21 -07:00
|
|
|
|
leaf_values: &mut [Option<Box<[u8]>>],
|
2018-05-14 07:16:57 -07:00
|
|
|
|
coding: &Coding,
|
2018-04-30 08:55:51 -07:00
|
|
|
|
data_shard_num: usize,
|
|
|
|
|
root_hash: &[u8],
|
2018-05-30 07:24:52 -07:00
|
|
|
|
) -> Option<Vec<u8>> {
|
2018-05-04 00:58:21 -07:00
|
|
|
|
// Try to interpolate the Merkle tree using the Reed-Solomon erasure coding scheme.
|
2018-05-31 00:12:15 -07:00
|
|
|
|
if let Err(err) = coding.reconstruct_shards(leaf_values) {
|
|
|
|
|
debug!("Shard reconstruction failed: {:?}", err); // Faulty proposer
|
|
|
|
|
return None;
|
|
|
|
|
}
|
2018-03-29 09:23:02 -07:00
|
|
|
|
|
|
|
|
|
// Recompute the Merkle tree root.
|
2018-05-04 00:58:21 -07:00
|
|
|
|
|
2018-04-14 03:27:17 -07:00
|
|
|
|
// Collect shards for tree construction.
|
2018-05-10 08:50:07 -07:00
|
|
|
|
let shards: Vec<Vec<u8>> = leaf_values
|
2018-05-04 00:58:21 -07:00
|
|
|
|
.iter()
|
|
|
|
|
.filter_map(|l| l.as_ref().map(|v| v.to_vec()))
|
|
|
|
|
.collect();
|
2018-05-08 07:20:32 -07:00
|
|
|
|
|
|
|
|
|
debug!("Reconstructed shards: {:?}", HexList(&shards));
|
|
|
|
|
|
2018-03-29 09:23:02 -07:00
|
|
|
|
// Construct the Merkle tree.
|
2018-05-14 09:30:07 -07:00
|
|
|
|
let mtree = MerkleTree::from_vec(&digest::SHA256, shards);
|
2018-03-29 09:23:02 -07:00
|
|
|
|
// If the root hash of the reconstructed tree does not match the one
|
|
|
|
|
// received with proofs then abort.
|
2018-04-29 06:27:40 -07:00
|
|
|
|
if &mtree.root_hash()[..] != root_hash {
|
2018-05-30 06:33:33 -07:00
|
|
|
|
None // The proposer is faulty.
|
2018-04-30 08:55:51 -07:00
|
|
|
|
} else {
|
2018-03-29 09:23:02 -07:00
|
|
|
|
// Reconstruct the value from the data shards.
|
2018-05-30 07:24:52 -07:00
|
|
|
|
glue_shards(mtree, data_shard_num)
|
2018-03-29 09:23:02 -07:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Concatenates the first `n` leaf values of a Merkle tree `m` in one value of
|
|
|
|
|
/// type `T`. This is useful for reconstructing the data value held in the tree
|
|
|
|
|
/// and forgetting the leaves that contain parity information.
|
2018-05-30 07:24:52 -07:00
|
|
|
|
fn glue_shards(m: MerkleTree<Vec<u8>>, n: usize) -> Option<Vec<u8>> {
|
|
|
|
|
// Create an iterator over the shard payload, drop the index bytes.
|
|
|
|
|
let mut bytes = m.into_iter().take(n).flat_map(|s| s.into_iter().skip(1));
|
|
|
|
|
let payload_len = match (bytes.next(), bytes.next(), bytes.next(), bytes.next()) {
|
|
|
|
|
(Some(b0), Some(b1), Some(b2), Some(b3)) => BigEndian::read_u32(&[b0, b1, b2, b3]) as usize,
|
|
|
|
|
_ => return None, // The proposing node is faulty: no payload size.
|
|
|
|
|
};
|
|
|
|
|
let payload: Vec<u8> = bytes.take(payload_len).collect();
|
|
|
|
|
debug!("Glued data shards {:?}", HexBytes(&payload));
|
|
|
|
|
Some(payload)
|
2018-03-23 15:54:40 -07:00
|
|
|
|
}
|