diff --git a/core/src/sigverify.rs b/core/src/sigverify.rs index ff01b30c4..d87544b92 100644 --- a/core/src/sigverify.rs +++ b/core/src/sigverify.rs @@ -6,17 +6,14 @@ use crate::packet::{Packet, SharedPackets}; use crate::result::Result; -use byteorder::{LittleEndian, ReadBytesExt}; use solana_metrics::counter::Counter; use solana_sdk::pubkey::Pubkey; +use solana_sdk::short_vec::decode_len; use solana_sdk::signature::Signature; #[cfg(test)] use solana_sdk::transaction::Transaction; -use std::io::Cursor; use std::mem::size_of; -pub const TX_OFFSET: usize = 0; - type TxOffsets = (Vec, Vec, Vec, Vec, Vec>); #[cfg(feature = "cuda")] @@ -124,17 +121,20 @@ pub fn ed25519_verify(batches: &[SharedPackets]) -> Vec> { } pub fn get_packet_offsets(packet: &Packet, current_offset: u32) -> (u32, u32, u32, u32) { - // Read in u64 as the size of signatures array - let mut rdr = Cursor::new(&packet.data[TX_OFFSET..size_of::()]); - let sig_len = rdr.read_u64::().unwrap() as u32; + let (sig_len, sig_size) = decode_len(&packet.data); + let msg_start_offset = current_offset as usize + sig_size + sig_len * size_of::(); - let msg_start_offset = - current_offset + size_of::() as u32 + sig_len * size_of::() as u32; - let pubkey_offset = msg_start_offset + size_of::() as u32; + let (_pubkey_len, pubkey_size) = decode_len(&packet.data[msg_start_offset..]); + let pubkey_offset = msg_start_offset + pubkey_size; - let sig_start = TX_OFFSET as u32 + size_of::() as u32; + let sig_start = current_offset as usize + sig_size; - (sig_len, sig_start, msg_start_offset, pubkey_offset) + ( + sig_len as u32, + sig_start as u32, + msg_start_offset as u32, + pubkey_offset as u32, + ) } pub fn generate_offsets(batches: &[SharedPackets]) -> Result { @@ -149,14 +149,14 @@ pub fn generate_offsets(batches: &[SharedPackets]) -> Result { p.read().unwrap().packets.iter().for_each(|packet| { let current_offset = current_packet as u32 * size_of::() as u32; - let (sig_len, _sig_start, msg_start_offset, pubkey_offset) = + let (sig_len, sig_start, msg_start_offset, pubkey_offset) = get_packet_offsets(packet, current_offset); let mut pubkey_offset = pubkey_offset; sig_lens.push(sig_len); trace!("pubkey_offset: {}", pubkey_offset); - let mut sig_offset = current_offset + size_of::() as u32; + let mut sig_offset = sig_start; for _ in 0..sig_len { signature_offsets.push(sig_offset); sig_offset += size_of::() as u32; @@ -329,7 +329,7 @@ mod tests { use bincode::{deserialize, serialize}; use solana_sdk::transaction::Transaction; - const SIG_OFFSET: usize = std::mem::size_of::(); + const SIG_OFFSET: usize = 1; pub fn memfind(a: &[A], b: &[A]) -> Option { assert!(a.len() >= b.len()); @@ -347,7 +347,7 @@ mod tests { let tx = test_tx(); let tx_bytes = serialize(&tx).unwrap(); let packet = serialize(&tx).unwrap(); - assert_matches!(memfind(&packet, &tx_bytes), Some(sigverify::TX_OFFSET)); + assert_matches!(memfind(&packet, &tx_bytes), Some(0)); assert_matches!(memfind(&packet, &[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), None); } @@ -416,11 +416,11 @@ mod tests { #[test] fn test_get_packet_offsets() { - assert_eq!(get_packet_offsets_from_tx(test_tx(), 0), (1, 8, 64, 8)); - assert_eq!(get_packet_offsets_from_tx(test_tx(), 100), (1, 8, 64, 8)); + assert_eq!(get_packet_offsets_from_tx(test_tx(), 0), (1, 1, 64, 1)); + assert_eq!(get_packet_offsets_from_tx(test_tx(), 100), (1, 1, 64, 1)); assert_eq!( get_packet_offsets_from_tx(test_multisig_tx(), 0), - (2, 8, 128, 8) + (2, 1, 128, 1) ); } diff --git a/sdk/benches/short_vec.rs b/sdk/benches/short_vec.rs new file mode 100644 index 000000000..69b823beb --- /dev/null +++ b/sdk/benches/short_vec.rs @@ -0,0 +1,36 @@ +#![feature(test)] + +extern crate test; +use bincode::deserialize; +use solana_sdk::short_vec::ShortVec; +use test::Bencher; + +// Return a ShortVec with 127 bytes +fn create_encoded_short_vec() -> Vec { + let mut bytes = vec![127]; + bytes.extend_from_slice(&vec![0u8; 127]); + bytes +} + +// Return a Vec with 127 bytes +fn create_encoded_vec() -> Vec { + let mut bytes = vec![127, 0, 0, 0, 0, 0, 0, 0]; + bytes.extend_from_slice(&vec![0u8; 127]); + bytes +} + +#[bench] +fn bench_short_vec(b: &mut Bencher) { + b.iter(|| { + let bytes = test::black_box(create_encoded_short_vec()); + deserialize::>(&bytes).unwrap(); + }); +} + +#[bench] +fn bench_vec(b: &mut Bencher) { + b.iter(|| { + let bytes = test::black_box(create_encoded_vec()); + deserialize::>(&bytes).unwrap(); + }); +} diff --git a/sdk/src/instruction.rs b/sdk/src/instruction.rs index 08066615d..2867c6759 100644 --- a/sdk/src/instruction.rs +++ b/sdk/src/instruction.rs @@ -1,6 +1,7 @@ //! Defines a composable Instruction type and a memory-efficient CompiledInstruction. use crate::pubkey::Pubkey; +use crate::short_vec; use crate::system_instruction::SystemError; use bincode::serialize; use serde::Serialize; @@ -92,7 +93,7 @@ impl Instruction { } /// Account metadata used to define Instructions -#[derive(Debug, PartialEq)] +#[derive(Debug, PartialEq, Serialize, Deserialize)] pub struct AccountMeta { /// An account's public key pub pubkey: Pubkey, @@ -112,8 +113,10 @@ pub struct CompiledInstruction { /// Index into the transaction program ids array indicating the program account that executes this instruction pub program_ids_index: u8, /// Ordered indices into the transaction keys array indicating which accounts to pass to the program + #[serde(with = "short_vec")] pub accounts: Vec, /// The program input data + #[serde(with = "short_vec")] pub data: Vec, } diff --git a/sdk/src/lib.rs b/sdk/src/lib.rs index 8606d8cf6..7f0214456 100644 --- a/sdk/src/lib.rs +++ b/sdk/src/lib.rs @@ -10,6 +10,7 @@ pub mod native_program; pub mod packet; pub mod pubkey; pub mod rpc_port; +pub mod short_vec; pub mod signature; pub mod system_instruction; pub mod system_program; diff --git a/sdk/src/message.rs b/sdk/src/message.rs index 75a762d2d..27d581f26 100644 --- a/sdk/src/message.rs +++ b/sdk/src/message.rs @@ -3,6 +3,7 @@ use crate::hash::Hash; use crate::instruction::{CompiledInstruction, Instruction}; use crate::pubkey::Pubkey; +use crate::short_vec; use itertools::Itertools; fn position(keys: &[Pubkey], key: &Pubkey) -> u8 { @@ -67,13 +68,17 @@ fn get_program_ids(instructions: &[Instruction]) -> Vec { .collect() } -#[derive(Debug, PartialEq, Eq, Clone)] +#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)] pub struct Message { + #[serde(skip)] pub num_signatures: u8, + #[serde(with = "short_vec")] pub account_keys: Vec, pub recent_blockhash: Hash, pub fee: u64, + #[serde(with = "short_vec")] pub program_ids: Vec, + #[serde(with = "short_vec")] pub instructions: Vec, } diff --git a/sdk/src/packet.rs b/sdk/src/packet.rs index 6a204dbd6..a28761155 100644 --- a/sdk/src/packet.rs +++ b/sdk/src/packet.rs @@ -1,3 +1,2 @@ /// Maximum over-the-wire size of a Transaction -// TODO: Set this back to 512 after shortvec optimization is reinstated. -pub const PACKET_DATA_SIZE: usize = 522; +pub const PACKET_DATA_SIZE: usize = 512; diff --git a/sdk/src/short_vec.rs b/sdk/src/short_vec.rs new file mode 100644 index 000000000..ad58bb7e9 --- /dev/null +++ b/sdk/src/short_vec.rs @@ -0,0 +1,224 @@ +use serde::de::{self, Deserializer, SeqAccess, Visitor}; +use serde::ser::{SerializeTuple, Serializer}; +use serde::{Deserialize, Serialize}; +use std::fmt; +use std::marker::PhantomData; +use std::mem::size_of; + +/// Same as usize, but serialized with 1 to 9 bytes. If the value is above +/// 0x7f, the top bit is set and the remaining value is stored in the next +/// bytes. Each byte follows the same pattern until the 9th byte. The 9th +/// byte, if needed, uses all 8 bits to store the last byte of the original +/// value. +pub struct ShortUsize(pub usize); + +impl Serialize for ShortUsize { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + let mut seq = serializer.serialize_tuple(0)?; + + let mut rem_len = self.0; + loop { + let mut elem = (rem_len & 0x7f) as u8; + rem_len >>= 7; + if rem_len == 0 { + seq.serialize_element(&elem)?; + break; + } else { + elem |= 0x80; + seq.serialize_element(&elem)?; + } + } + seq.end() + } +} + +struct ShortLenVisitor; + +impl<'de> Visitor<'de> for ShortLenVisitor { + type Value = ShortUsize; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("a multi-byte length") + } + + fn visit_seq(self, mut seq: A) -> Result + where + A: SeqAccess<'de>, + { + let mut len: usize = 0; + let mut size: usize = 0; + loop { + let elem: u8 = seq + .next_element()? + .ok_or_else(|| de::Error::invalid_length(size, &self))?; + + len |= (elem as usize & 0x7f) << (size * 7); + size += 1; + + if elem as usize & 0x80 == 0 { + break; + } + + if size > size_of::() + 1 { + return Err(de::Error::invalid_length(size, &self)); + } + } + + Ok(ShortUsize(len)) + } +} + +impl<'de> Deserialize<'de> for ShortUsize { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + deserializer.deserialize_tuple(9, ShortLenVisitor) + } +} + +/// If you don't want to use the ShortVec newtype, you can do ShortVec +/// serialization on an ordinary vector with the following field annotation: +/// +/// #[serde(with = "short_vec")] +/// +pub fn serialize( + elements: &[T], + serializer: S, +) -> Result { + let mut seq = serializer.serialize_tuple(0)?; + + let short_len = ShortUsize(elements.len()); + seq.serialize_element(&short_len)?; + + for element in elements { + seq.serialize_element(element)?; + } + seq.end() +} + +struct ShortVecVisitor { + _t: PhantomData, +} + +impl<'de, T> Visitor<'de> for ShortVecVisitor +where + T: Deserialize<'de>, +{ + type Value = Vec; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("a Vec with a multi-byte length") + } + + fn visit_seq(self, mut seq: A) -> Result, A::Error> + where + A: SeqAccess<'de>, + { + let short_len: ShortUsize = seq + .next_element()? + .ok_or_else(|| de::Error::invalid_length(0, &self))?; + let len = short_len.0; + + let mut result = Vec::with_capacity(len); + for i in 0..len { + let elem = seq + .next_element()? + .ok_or_else(|| de::Error::invalid_length(i, &self))?; + result.push(elem); + } + Ok(result) + } +} + +/// If you don't want to use the ShortVec newtype, you can do ShortVec +/// deserialization on an ordinary vector with the following field annotation: +/// +/// #[serde(with = "short_vec")] +/// +pub fn deserialize<'de, D, T>(deserializer: D) -> Result, D::Error> +where + D: Deserializer<'de>, + T: Deserialize<'de>, +{ + let visitor = ShortVecVisitor { _t: PhantomData }; + deserializer.deserialize_tuple(std::usize::MAX, visitor) +} + +pub struct ShortVec(pub Vec); + +impl Serialize for ShortVec { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + serialize(&self.0, serializer) + } +} + +impl<'de, T: Deserialize<'de>> Deserialize<'de> for ShortVec { + fn deserialize(deserializer: D) -> Result, D::Error> + where + D: Deserializer<'de>, + { + deserialize(deserializer).map(ShortVec) + } +} + +/// Return the serialized length. +pub fn encode_len(len: usize) -> Vec { + bincode::serialize(&ShortUsize(len)).unwrap() +} + +/// Return the decoded value and how many bytes it consumed. +pub fn decode_len(bytes: &[u8]) -> (usize, usize) { + let short_len: ShortUsize = bincode::deserialize(bytes).unwrap(); + let num_bytes = bincode::serialized_size(&short_len).unwrap() as usize; + (short_len.0, num_bytes) +} + +#[cfg(test)] +mod tests { + use super::*; + use bincode::{deserialize, serialize}; + + fn assert_len_encoding(len: usize, bytes: &[u8]) { + assert_eq!(encode_len(len), bytes, "unexpected usize encoding"); + assert_eq!( + decode_len(bytes), + (len, bytes.len()), + "unexpected usize decoding" + ); + } + + #[test] + fn test_short_vec_encode_len() { + assert_len_encoding(0x0, &[0x0]); + assert_len_encoding(0x7f, &[0x7f]); + assert_len_encoding(0x80, &[0x80, 0x01]); + assert_len_encoding(0xff, &[0xff, 0x01]); + assert_len_encoding(0x100, &[0x80, 0x02]); + assert_len_encoding(0x7fff, &[0xff, 0xff, 0x01]); + assert_len_encoding(0x200000, &[0x80, 0x80, 0x80, 0x01]); + assert_len_encoding(0x7ffffffff, &[0xff, 0xff, 0xff, 0xff, 0x7f]); + } + + #[test] + #[should_panic] + fn test_short_vec_decode_zero_len() { + decode_len(&[]); + } + + #[test] + fn test_short_vec_u8() { + let vec = ShortVec(vec![4u8; 32]); + let bytes = serialize(&vec).unwrap(); + assert_eq!(bytes.len(), vec.0.len() + 1); + + let vec1: ShortVec = deserialize(&bytes).unwrap(); + assert_eq!(vec.0, vec1.0); + } +} diff --git a/sdk/src/transaction.rs b/sdk/src/transaction.rs index 5dbb0b46f..e71e60e1e 100644 --- a/sdk/src/transaction.rs +++ b/sdk/src/transaction.rs @@ -4,9 +4,9 @@ use crate::hash::Hash; use crate::instruction::{CompiledInstruction, Instruction, InstructionError}; use crate::message::Message; use crate::pubkey::Pubkey; +use crate::short_vec; use crate::signature::{KeypairUtil, Signature}; use bincode::serialize; -use serde::Serialize; /// Reasons a transaction might be rejected. #[derive(Debug, PartialEq, Eq, Clone)] @@ -51,17 +51,26 @@ pub enum TransactionError { pub struct Transaction { /// A set of digital signatures of `account_keys`, `program_ids`, `recent_blockhash`, `fee` and `instructions`, signed by the first /// signatures.len() keys of account_keys + #[serde(with = "short_vec")] pub signatures: Vec, /// All the account keys used by this transaction + + #[serde(with = "short_vec")] pub account_keys: Vec, + /// The id of a recent ledger entry. pub recent_blockhash: Hash, + /// The number of lamports paid for processing and storing of this transaction. pub fee: u64, + /// All the program id keys used to execute this transaction's instructions + #[serde(with = "short_vec")] pub program_ids: Vec, + /// Programs that will be executed in sequence and committed in one atomic transaction if all /// succeed. + #[serde(with = "short_vec")] pub instructions: Vec, } @@ -167,16 +176,15 @@ impl Transaction { } /// Get the transaction data to sign. pub fn message(&self) -> Vec { - let mut data = serialize(&self.account_keys).expect("serialize account_keys"); - let blockhash = serialize(&self.recent_blockhash).expect("serialize recent_blockhash"); - data.extend_from_slice(&blockhash); - let fee_data = serialize(&self.fee).expect("serialize fee"); - data.extend_from_slice(&fee_data); - let program_ids = serialize(&self.program_ids).expect("serialize program_ids"); - data.extend_from_slice(&program_ids); - let instructions = serialize(&self.instructions).expect("serialize instructions"); - data.extend_from_slice(&instructions); - data + let message = Message { + num_signatures: self.signatures.len() as u8, + account_keys: self.account_keys.clone(), + recent_blockhash: self.recent_blockhash, + fee: self.fee, + program_ids: self.program_ids.clone(), + instructions: self.instructions.clone(), + }; + serialize(&message).unwrap() } /// Sign this transaction. @@ -224,6 +232,7 @@ mod tests { use crate::signature::Keypair; use crate::system_instruction::SystemInstruction; use bincode::{deserialize, serialize, serialized_size}; + use std::mem::size_of; #[test] fn test_refs() { @@ -356,18 +365,10 @@ mod tests { let message = Message::new(vec![ix]); assert_eq!( serialized_size(&message.instructions[0]).unwrap() as usize, - expected_instruction_size + size_of::() + 6, // TODO: Don't use serialize_bytes(). + expected_instruction_size, "unexpected Instruction::serialized_size" ); - // These two ways of calculating serialized size should return the same value, but - // currently don't. - assert_eq!( - message.instructions[0].serialized_size().unwrap() as usize + size_of::() + 6, - serialized_size(&message.instructions[0]).unwrap() as usize, - "serialized_size mismatch" - ); - let tx = Transaction::new(&[&alice_keypair], message, Hash::default()); let expected_transaction_size = 1 @@ -384,14 +385,9 @@ mod tests { assert_eq!( serialized_size(&tx).unwrap() as usize, - expected_transaction_size + size_of::(), // TODO: Don't use serialize_bytes() + expected_transaction_size, "unexpected serialized transaction size" ); - assert_eq!( - tx.serialized_size().unwrap() as usize, - serialized_size(&tx).unwrap() as usize, - "unexpected Transaction::serialized_size" - ); } /// Detect binary changes in the serialized transaction data, which could have a downstream @@ -401,18 +397,16 @@ mod tests { assert_eq!( serialize(&create_sample_transaction()).unwrap(), vec![ - 1, 0, 0, 0, 0, 0, 0, 0, 60, 2, 97, 229, 100, 48, 42, 208, 222, 192, 129, 29, 142, - 187, 4, 174, 210, 77, 78, 162, 101, 146, 144, 241, 159, 44, 89, 89, 10, 103, 229, - 94, 92, 240, 124, 0, 83, 22, 216, 2, 112, 193, 158, 93, 210, 144, 222, 144, 13, - 138, 209, 246, 89, 156, 195, 234, 186, 215, 92, 250, 125, 210, 24, 10, 2, 0, 0, 0, - 0, 0, 0, 0, 36, 100, 158, 252, 33, 161, 97, 185, 62, 89, 99, 195, 250, 249, 187, - 189, 171, 118, 241, 90, 248, 14, 68, 219, 231, 62, 157, 5, 142, 27, 210, 117, 1, 1, - 1, 4, 5, 6, 7, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 8, 7, 6, 5, 4, 1, - 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 99, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 4, 5, - 6, 7, 8, 9, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 8, 7, 6, 5, 4, 2, 2, 2, 1, - 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 0, 0, 0, 0, 0, 0, 0, 1, 2, - 3 + 1, 107, 231, 179, 42, 11, 220, 153, 173, 229, 29, 51, 218, 98, 26, 46, 164, 248, + 228, 118, 244, 191, 192, 198, 228, 190, 119, 21, 52, 66, 25, 124, 247, 192, 73, 48, + 231, 2, 70, 34, 82, 133, 137, 148, 66, 73, 231, 72, 195, 100, 133, 214, 2, 168, + 108, 252, 200, 83, 99, 105, 51, 216, 145, 30, 14, 2, 36, 100, 158, 252, 33, 161, + 97, 185, 62, 89, 99, 195, 250, 249, 187, 189, 171, 118, 241, 90, 248, 14, 68, 219, + 231, 62, 157, 5, 142, 27, 210, 117, 1, 1, 1, 4, 5, 6, 7, 8, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 8, 7, 6, 5, 4, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 99, 0, 0, 0, 0, 0, 0, + 0, 1, 2, 2, 2, 4, 5, 6, 7, 8, 9, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 8, 7, + 6, 5, 4, 2, 2, 2, 1, 0, 2, 0, 1, 3, 1, 2, 3 ] ); }