Port bash checkpoint scripts to zebra-checkpoints single rust binary (#740)

* make zebra-checkpoints
* fix LOOKAHEAD_LIMIT scope
* add a default cli path
* change doc usage text
* add tracing
* move MAX_CHECKPOINT_HEIGHT_GAP to zebra-consensus
* do byte_reverse_hex in a map
This commit is contained in:
Alfredo Garcia 2020-07-25 04:53:00 -03:00 committed by GitHub
parent b59cfc49b7
commit 5b3c6e4c6c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 129 additions and 124 deletions

7
Cargo.lock generated
View File

@ -2729,8 +2729,15 @@ dependencies = [
name = "zebra-utils"
version = "0.1.0"
dependencies = [
"abscissa_core",
"color-eyre",
"hex",
"serde_json",
"structopt",
"tracing-error",
"tracing-subscriber",
"zebra-chain",
"zebra-consensus",
]
[[package]]

View File

@ -77,6 +77,10 @@ type QueuedBlockList = Vec<QueuedBlock>;
/// usage by committing blocks to the disk state. (Or dropping invalid blocks.)
pub const MAX_QUEUED_BLOCKS_PER_HEIGHT: usize = 4;
/// We limit the maximum number of blocks in each checkpoint. Each block uses a
/// constant amount of memory for the supporting data structures and futures.
pub const MAX_CHECKPOINT_HEIGHT_GAP: usize = 2_000;
/// A checkpointing block verifier.
///
/// Verifies blocks using a supplied list of checkpoints. There must be at

View File

@ -6,5 +6,13 @@ version = "3.0.0-alpha.0"
edition = "2018"
[dependencies]
abscissa_core = "0.5"
structopt = "0.3.15"
color-eyre = "0.5.0"
hex = "0.4"
serde_json = "1.0"
tracing-error = { version = "0.1.2", features = ["traced-error"] }
tracing-subscriber = { version = "0.2.8", features = ["tracing-log"] }
zebra-chain = { path = "../zebra-chain" }
zebra-consensus = { path = "../zebra-consensus" }

View File

@ -1,65 +0,0 @@
#!/bin/bash
set -euo pipefail
# Prints Zebra checkpoints, based on a list of block heights, sizes, ans hashes.
#
# Reads lines containing a block height, block byte size, and block header hash
# from stdin. Writes each checkpoint to stdout, as a line with space-separated
# fields.
#
# The block header hash is read in Bitcoin order, but written out in Zebra's
# internal byte order.
#
# Usage: get-height-size-hash.sh | calculate-checkpoints.sh
# get-height-size-hash.sh -testnet | calculate-checkpoints.sh
#
# calculate-checkpoints.sh ignores any command-line arguments.
#
# TODO: rewrite as a stand-alone Rust command-line tool.
# zebra-consensus accepts an ordered list of checkpoints, starting with the
# genesis block. Checkpoint heights can be chosen arbitrarily.
# We limit the memory usage for each checkpoint, based on the cumulative size of
# the serialized blocks in the chain. Deserialized blocks are larger, because
# they contain pointers and non-compact integers. But they should be within a
# constant factor of the serialized size.
MAX_CHECKPOINT_BYTE_COUNT=$((256*1024*1024))
# We limit the maximum number of blocks in each checkpoint. Each block uses a
# constant amount of memory for the supporting data structures and futures.
#
# TODO: In the Rust implementation, set this gap to half the sync service's
# LOOKAHEAD_LIMIT.
MAX_CHECKPOINT_HEIGHT_GAP=2000
cumulative_bytes=0
height_gap=0
while read -r height size hash; do
cumulative_bytes=$((cumulative_bytes + size))
height_gap=$((height_gap + 1))
# Checkpoints can be slightly larger the maximum byte count. That's ok,
# because the memory usage is only approximate. (This is a bash-specific
# optimisation, to avoid keeping a copy of the previous height and hash.
# Since exact sizes don't matter, we can use the same check in the Rust
# implementation. Or choose a simpler alternative.)
if [ "$height" -eq 0 ] || \
[ "$cumulative_bytes" -ge "$MAX_CHECKPOINT_BYTE_COUNT" ] || \
[ "$height_gap" -ge "$MAX_CHECKPOINT_HEIGHT_GAP" ]; then
# Reverse the byte order of hash.
#
# We reverse the hash after selecting the checkpoints, because launching
# a zebrad subprocess is expensive. (This is a bash-specific
# optimisation, the Rust implementation should reverse hashes as it loads
# them.)
hash=$(zebrad revhex "$hash")
echo "$height $hash"
cumulative_bytes=0
height_gap=0
fi
done

View File

@ -1,43 +0,0 @@
#!/bin/bash
set -euo pipefail
# Print the block height, size, and hash for each block.
#
# For each block in the best chain, gets the block height, block byte size, and
# block header hash using zcash RPC via zcash-cli. Writes each block's info to
# stdout, as a line with space-separated fields.
#
# The block header hash is written out in Bitcoin order, which is different from
# Zebra's internal byte order, as an optimisation. (calculate-checkpoints.sh
# converts hashes to Zebra's internal order after choosing checkpoints.)
#
# Usage: get-height-size-hash.sh | calculate-checkpoints.sh
# get-height-size-hash.sh -testnet | calculate-checkpoints.sh
#
# get-height-size-hash.sh passes its arguments through to zcash-cli.
#
# Requires zcash-cli, jq, and zebrad in your path. zcash-cli must be able to
# access a working, synced zcashd instance.
#
# TODO: rewrite as a stand-alone Rust command-line tool.
block_count=$(zcash-cli "$@" getblockcount)
# Checkpoints must be on the main chain, so we skip blocks that are within the
# zcashd reorg limit.
BLOCK_REORG_LIMIT=100
block_count=$((block_count - BLOCK_REORG_LIMIT))
i=0
while [ "$i" -lt "$block_count" ]; do
# Unfortunately, there is no simple RPC for height, size, and hash.
# So we use the expensive block RPC, and extract fields using jq.
#
# We don't byte-reverse the hash here, because launching a zebrad subprocess
# is expensive. (This is a bash-specific optimisation, the Rust
# implementation should reverse hashes as it loads them.)
zcash-cli "$@" getblock "$i" | \
jq -r '"\(.height) \(.size) \(.hash)"'
i=$((i + 1))
done

View File

@ -2,9 +2,9 @@ use structopt::StructOpt;
#[derive(Debug, StructOpt)]
pub struct Args {
/// Use the test network
#[structopt(short, long)]
pub testnet: bool,
/// Path to zcash-cli command
#[structopt(default_value = "zcash-cli", short, long)]
pub cli: String,
/// Passthrough args for `zcash-cli`
#[structopt(last = true)]

View File

@ -1,33 +1,127 @@
//! Prints Zebra checkpoints as "height hash" output lines.
//!
//! Get all the blocks up to network current tip and print the ones that are
//! checkpoints according to rules.
//!
//! For usage please refer to the program help: `zebra-checkpoints --help`
//!
//! zebra-consensus accepts an ordered list of checkpoints, starting with the
//! genesis block. Checkpoint heights can be chosen arbitrarily.
#![allow(clippy::try_err)]
use color_eyre::eyre::{eyre, Result};
use color_eyre::eyre::Result;
use serde_json::Value;
use std::process::Stdio;
use structopt::StructOpt;
use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt};
use zebra_chain::block::BlockHeaderHash;
use zebra_chain::types::BlockHeight;
mod args;
/// We limit the memory usage for each checkpoint, based on the cumulative size of
/// the serialized blocks in the chain. Deserialized blocks are larger, because
/// they contain pointers and non-compact integers. But they should be within a
/// constant factor of the serialized size.
const MAX_CHECKPOINT_BYTE_COUNT: u64 = 256 * 1024 * 1024;
/// Checkpoints must be on the main chain, so we skip blocks that are within the
/// zcashd reorg limit.
const BLOCK_REORG_LIMIT: BlockHeight = BlockHeight(100);
// Passthrough arguments if needed
fn passthrough(mut cmd: std::process::Command, args: &args::Args) -> std::process::Command {
if !args.zcli_args.is_empty() {
cmd.args(&args.zcli_args);
}
cmd
}
fn main() -> Result<()> {
// todo add tracing setup
init_tracing();
color_eyre::install()?;
// create process
let args = args::Args::from_args();
let mut cmd = std::process::Command::new(&args.cli);
cmd = passthrough(cmd, &args);
let mut cmd = std::process::Command::new("zcash-cli");
// set up counters
let mut cumulative_bytes: u64 = 0;
let mut height_gap: BlockHeight = BlockHeight(0);
if args.testnet {
cmd.arg("-testnet");
}
// get the current block count
cmd.arg("getblockcount");
let mut subprocess = cmd.stdout(Stdio::piped()).spawn().unwrap();
let output = cmd.output().unwrap();
subprocess.kill()?;
let mut requested_height: BlockHeight = String::from_utf8_lossy(&output.stdout)
.trim()
.parse()
.unwrap();
requested_height = BlockHeight(
requested_height
.0
.checked_sub(BLOCK_REORG_LIMIT.0)
.expect("zcashd has some mature blocks: wait for zcashd to sync more blocks"),
);
cmd.args(args.zcli_args.into_iter());
// loop through all blocks
for x in 0..requested_height.0 {
// unfortunatly we need to create a process for each block
let mut cmd = std::process::Command::new(&args.cli);
cmd = passthrough(cmd, &args);
let mut child = cmd.spawn()?;
// get block data
cmd.args(&["getblock", &x.to_string()]);
let mut subprocess = cmd.stdout(Stdio::piped()).spawn().unwrap();
let output = cmd.output().unwrap();
let block_raw = String::from_utf8_lossy(&output.stdout);
// handle communicating with this child process via it's stdin and stdout handles
// convert raw block to json
let v: Value = serde_json::from_str(block_raw.trim())?;
let exit_status = child.wait()?;
// get the values we are interested in
let hash: BlockHeaderHash = v["hash"]
.as_str()
.map(zebra_chain::utils::byte_reverse_hex)
.unwrap()
.parse()
.unwrap();
let height = BlockHeight(v["height"].as_u64().unwrap() as u32);
assert!(height <= BlockHeight::MAX);
assert_eq!(x, height.0);
let size = v["size"].as_u64().unwrap();
assert!(size <= zebra_chain::block::MAX_BLOCK_BYTES);
if !exit_status.success() {
Err(eyre!("throw a more informative error here, might wanna shove stdin / stdout in here as custom sections"))?;
// kill spawned
subprocess.wait()?;
// compute
cumulative_bytes += size;
height_gap = BlockHeight(height_gap.0 + 1);
// check if checkpoint
if height == BlockHeight(0)
|| cumulative_bytes >= MAX_CHECKPOINT_BYTE_COUNT
|| height_gap.0 >= zebra_consensus::checkpoint::MAX_CHECKPOINT_HEIGHT_GAP as u32
{
// print to output
println!("{} {}", height.0, &hex::encode(hash.0),);
// reset counters
cumulative_bytes = 0;
height_gap = BlockHeight(0);
}
}
Ok(())
}
fn init_tracing() {
tracing_subscriber::Registry::default()
.with(tracing_error::ErrorLayer::default())
.init();
}

View File

@ -20,7 +20,7 @@ const FANOUT: usize = checkpoint::MAX_QUEUED_BLOCKS_PER_HEIGHT;
/// Controls how far ahead of the chain tip the syncer tries to download before
/// waiting for queued verifications to complete. Set to twice the maximum
/// checkpoint distance.
const LOOKAHEAD_LIMIT: usize = 2 * 2_000;
pub const LOOKAHEAD_LIMIT: usize = checkpoint::MAX_CHECKPOINT_HEIGHT_GAP * 2;
#[derive(Debug)]
pub struct Syncer<ZN, ZS, ZV>