Parse block heights in coinbase transactions.

BIP34, which is included in Zcash, encodes the block height into each
block by adding it into the unused BitcoinScript field of the block's
coinbase transaction.  However, this is done just by requiring that the
script pushes the block height onto the stack when it executes, and
there are multiple different ways to push data onto the stack in
BitcoinScript.  Also, the genesis block does not include the block
height, by accident.

Because we want to *parse* transactions into an algebraic data type that
encodes their structural properties, rather than allow possibly-invalid
data to float through the internals of our node, we want to extract the
block height upfront and store it separately from the rest of the
coinbase data, which is inert.  So the serialization code now contains
just enough logic to parse BitcoinScript-encoded block heights, and
special-case the encoding of the genesis block.

Elsewhere in the source code, the `LockTime` struct requires that we
must use block heights less than 500,000,000 (above which the number is
interpreted as a unix timestamp, not a height).  To unify invariants, we
ensure that the parsing logic works with block heights up to
500,000,000, even though these are unlikely to ever be used for Zcash.
This commit is contained in:
Henry de Valence 2020-02-07 15:56:06 -08:00 committed by Deirdre Connolly
parent 5059c23e99
commit 44b299d15c
4 changed files with 148 additions and 9 deletions

View File

@ -15,6 +15,13 @@ use super::*;
const OVERWINTER_VERSION_GROUP_ID: u32 = 0x03C4_8270;
const SAPLING_VERSION_GROUP_ID: u32 = 0x892F_2085;
const GENESIS_COINBASE_DATA: [u8; 77] = [
4, 255, 255, 7, 31, 1, 4, 69, 90, 99, 97, 115, 104, 48, 98, 57, 99, 52, 101, 101, 102, 56, 98,
55, 99, 99, 52, 49, 55, 101, 101, 53, 48, 48, 49, 101, 51, 53, 48, 48, 57, 56, 52, 98, 54, 102,
101, 97, 51, 53, 54, 56, 51, 97, 55, 99, 97, 99, 49, 52, 49, 97, 48, 52, 51, 99, 52, 50, 48,
54, 52, 56, 51, 53, 100, 51, 52,
];
impl ZcashSerialize for OutPoint {
fn zcash_serialize<W: io::Write>(&self, mut writer: W) -> Result<(), io::Error> {
writer.write_all(&self.hash.0[..])?;
@ -32,6 +39,103 @@ impl ZcashDeserialize for OutPoint {
}
}
// Coinbase inputs include block heights (BIP34). These are not encoded
// directly, but as a Bitcoin script that pushes the block height to the stack
// when executed. The script data is otherwise unused. Because we want to
// *parse* transactions into an internal representation where illegal states are
// unrepresentable, we need just enough parsing of Bitcoin scripts to parse the
// coinbase height and split off the rest of the (inert) coinbase data.
fn parse_coinbase_height(mut data: Vec<u8>) -> Result<(BlockHeight, Vec<u8>), SerializationError> {
match (data.get(0), data.len()) {
// Blocks 1 through 16 inclusive encode block height with OP_N opcodes.
(Some(op_n @ 0x51..=0x60), len) if len >= 1 => {
Ok((BlockHeight((op_n - 0x50) as u32), data.split_off(1)))
}
// Blocks 17 through 256 exclusive encode block height with the `0x01` opcode.
(Some(0x01), len) if len >= 2 => Ok((BlockHeight(data[1] as u32), data.split_off(2))),
// Blocks 256 through 65536 exclusive encode block height with the `0x02` opcode.
(Some(0x02), len) if len >= 3 => Ok((
BlockHeight(data[1] as u32 + ((data[2] as u32) << 8)),
data.split_off(3),
)),
// Blocks 65536 through 2**24 exclusive encode block height with the `0x03` opcode.
(Some(0x03), len) if len >= 4 => Ok((
BlockHeight(data[1] as u32 + ((data[2] as u32) << 8) + ((data[3] as u32) << 16)),
data.split_off(4),
)),
// The genesis block does not encode the block height by mistake; special case it.
// The first five bytes are [4, 255, 255, 7, 31], the little-endian encoding of
// 520_617_983. This is lucky because it means we can special-case the genesis block
// while remaining below the maximum `BlockHeight` of 500_000_000 forced by `LockTime`.
// While it's unlikely this code will ever process a block height that high, this means
// we don't need to maintain a cascade of different invariants for allowable `BlockHeight`s.
(Some(0x04), _) if &data[..] == &GENESIS_COINBASE_DATA[..] => Ok((BlockHeight(0), data)),
// As noted above, this is included for completeness.
(Some(0x04), len) if len >= 5 => {
let h = data[1] as u32
+ ((data[2] as u32) << 8)
+ ((data[3] as u32) << 16)
+ ((data[4] as u32) << 24);
if h < 500_000_000 {
Ok((BlockHeight(h), data.split_off(5)))
} else {
Err(SerializationError::Parse("Invalid block height"))
}
}
_ => Err(SerializationError::Parse(
"Could not parse BIP34 height in coinbase data",
)),
}
}
fn coinbase_height_len(height: BlockHeight) -> usize {
// We can't write this as a match statement on stable until exclusive range
// guards are stabilized.
if let 0 = height.0 {
0
} else if let _h @ 1..=16 = height.0 {
1
} else if let _h @ 17..=255 = height.0 {
2
} else if let _h @ 256..=65535 = height.0 {
3
} else if let _h @ 65536..=16777215 = height.0 {
4
} else if let _h @ 16777216..=499_999_999 = height.0 {
5
} else {
panic!("Invalid coinbase height");
}
}
fn write_coinbase_height<W: io::Write>(height: BlockHeight, mut w: W) -> Result<(), io::Error> {
// We can't write this as a match statement on stable until exclusive range
// guards are stabilized.
if let 0 = height.0 {
// Genesis block does not include height.
} else if let h @ 1..=16 = height.0 {
w.write_u8(0x50 + (h as u8))?;
} else if let h @ 17..=255 = height.0 {
w.write_u8(0x01)?;
w.write_u8(h as u8)?;
} else if let h @ 256..=65535 = height.0 {
w.write_u8(0x02)?;
w.write_u16::<LittleEndian>(h as u16)?;
} else if let h @ 65536..=16777215 = height.0 {
w.write_u8(0x03)?;
w.write_u8(h as u8)?;
w.write_u8((h >> 8) as u8)?;
w.write_u8((h >> 16) as u8)?;
} else if let h @ 16777216..=499_999_999 = height.0 {
w.write_u8(0x04)?;
w.write_u32::<LittleEndian>(h)?;
} else {
panic!("Invalid coinbase height");
}
Ok(())
}
impl ZcashSerialize for TransparentInput {
fn zcash_serialize<W: io::Write>(&self, mut writer: W) -> Result<(), io::Error> {
match self {
@ -44,11 +148,18 @@ impl ZcashSerialize for TransparentInput {
script.zcash_serialize(&mut writer)?;
writer.write_u32::<LittleEndian>(*sequence)?;
}
TransparentInput::Coinbase { data, sequence } => {
TransparentInput::Coinbase {
height,
data,
sequence,
} => {
writer.write_all(&[0; 32][..])?;
writer.write_u32::<LittleEndian>(0xffff_ffff)?;
assert!(data.len() <= 100);
writer.write_compactsize(data.len() as u64)?;
let height_len = coinbase_height_len(*height);
let total_len = height_len + data.len();
assert!(total_len <= 100);
writer.write_compactsize(total_len as u64)?;
write_coinbase_height(*height, &mut writer)?;
writer.write_all(&data[..])?;
writer.write_u32::<LittleEndian>(*sequence)?;
}
@ -72,8 +183,13 @@ impl ZcashDeserialize for TransparentInput {
}
let mut data = Vec::with_capacity(len as usize);
(&mut reader).take(len).read_to_end(&mut data)?;
let (height, data) = parse_coinbase_height(data)?;
let sequence = reader.read_u32::<LittleEndian>()?;
Ok(TransparentInput::Coinbase { data, sequence })
Ok(TransparentInput::Coinbase {
height,
data,
sequence,
})
} else {
Ok(TransparentInput::PrevOut {
outpoint: OutPoint {

View File

@ -131,8 +131,14 @@ impl Arbitrary for TransparentInput {
}
})
.boxed(),
(vec(any::<u8>(), 0..100), any::<u32>())
.prop_map(|(data, sequence)| { TransparentInput::Coinbase { data, sequence } })
(any::<BlockHeight>(), vec(any::<u8>(), 0..95), any::<u32>())
.prop_map(|(height, data, sequence)| {
TransparentInput::Coinbase {
height,
data,
sequence,
}
})
.boxed(),
]
.boxed()

View File

@ -3,7 +3,7 @@
#[cfg(test)]
use proptest_derive::Arbitrary;
use crate::types::Script;
use crate::types::{BlockHeight, Script};
use super::TransactionHash;
@ -35,7 +35,10 @@ pub enum TransparentInput {
},
/// New coins created by the block reward.
Coinbase {
/// 100 bytes of arbitrary data.
/// The height of this block.
height: BlockHeight,
/// Approximately 100 bytes of data (95 to be safe).
/// XXX refine this type.
data: Vec<u8>,
/// The sequence number for the output.
sequence: u32,

View File

@ -39,10 +39,24 @@ impl fmt::Debug for Sha256dChecksum {
}
/// A u32 which represents a block height value.
///
/// # Invariants
///
/// Users should not construct block heights greater than or equal to `500_000_000`.
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
#[cfg_attr(test, derive(Arbitrary))]
pub struct BlockHeight(pub u32);
#[cfg(test)]
impl Arbitrary for BlockHeight {
type Parameters = ();
fn arbitrary_with(_args: ()) -> Self::Strategy {
(0u32..500_000_000_u32).prop_map(|h| BlockHeight(h)).boxed()
}
type Strategy = BoxedStrategy<Self>;
}
/// A Bitcoin-style `locktime`, representing either a block height or an epoch
/// time.
///