Add CRC Reader and Writer I/O wrappers (#3322)
* add CRC Reader and Writer I/O wrappers * typo fix and variable rename
This commit is contained in:
parent
a15927f8d0
commit
ca69b7b75b
|
@ -229,6 +229,11 @@ name = "bs58"
|
|||
version = "0.2.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "build_const"
|
||||
version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "bv"
|
||||
version = "0.11.0"
|
||||
|
@ -370,6 +375,14 @@ dependencies = [
|
|||
"libc 0.2.50 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crc"
|
||||
version = "1.8.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"build_const 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crc32fast"
|
||||
version = "1.1.2"
|
||||
|
@ -1973,6 +1986,7 @@ dependencies = [
|
|||
"bs58 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"byteorder 1.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"chrono 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"crc 1.8.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"hashbrown 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"hex-literal 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"indexmap 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
|
@ -3021,6 +3035,7 @@ dependencies = [
|
|||
"checksum block-padding 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "4fc4358306e344bf9775d0197fd00d2603e5afb0771bb353538630f022068ea3"
|
||||
"checksum bloom 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "d00ac8e5056d6d65376a3c1aa5c7c34850d6949ace17f0266953a254eb3d6fe8"
|
||||
"checksum bs58 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "0de79cfb98e7aa9988188784d8664b4b5dad6eaaa0863b91d9a4ed871d4f7a42"
|
||||
"checksum build_const 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "39092a32794787acd8525ee150305ff051b0aa6cc2abaf193924f5ab05425f39"
|
||||
"checksum bv 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "6cd4ae9e585e783756cd14b0ea21863acdfbb6383664ac2f7c9ef8d180a14727"
|
||||
"checksum byte-tools 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "560c32574a12a89ecd91f5e742165893f86e3ab98d21f8ea548658eb9eef5f40"
|
||||
"checksum byte-tools 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "980479e6fde23246dfb54d47580d66b4e99202e7579c5eaa9fe10ecb5ebd2182"
|
||||
|
@ -3039,6 +3054,7 @@ dependencies = [
|
|||
"checksum constant_time_eq 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "8ff012e225ce166d4422e0e78419d901719760f62ae2b7969ca6b564d1b54a9e"
|
||||
"checksum core-foundation 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "286e0b41c3a20da26536c6000a280585d519fd07b3956b43aed8a79e9edce980"
|
||||
"checksum core-foundation-sys 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "716c271e8613ace48344f723b60b900a93150271e5be206212d052bbc0883efa"
|
||||
"checksum crc 1.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "d663548de7f5cca343f1e0a48d14dcfb0e9eb4e079ec58883b7251539fa10aeb"
|
||||
"checksum crc32fast 1.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "e91d5240c6975ef33aeb5f148f35275c25eda8e8a5f95abe421978b05b8bf192"
|
||||
"checksum crossbeam-channel 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "137bc235f622ffaa0428e3854e24acb53291fc0b3ff6fb2cb75a8be6fb02f06b"
|
||||
"checksum crossbeam-deque 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "f739f8c5363aca78cfb059edf753d8f0d36908c348f3d8d1503f03d8b75d9cf3"
|
||||
|
|
|
@ -17,13 +17,14 @@ codecov = { repository = "solana-labs/solana", branch = "master", service = "git
|
|||
chacha = []
|
||||
cuda = []
|
||||
erasure = []
|
||||
kvstore = ["memmap"]
|
||||
kvstore = ["crc", "memmap"]
|
||||
|
||||
[dependencies]
|
||||
bincode = "1.1.2"
|
||||
bs58 = "0.2.0"
|
||||
byteorder = "1.3.1"
|
||||
chrono = { version = "0.4.0", features = ["serde"] }
|
||||
crc = { version = "1.8.1", optional = true }
|
||||
hashbrown = "0.1.8"
|
||||
indexmap = "1.0"
|
||||
itertools = "0.8.0"
|
||||
|
|
|
@ -1,7 +1,9 @@
|
|||
use byteorder::{BigEndian, ByteOrder};
|
||||
use crc::crc32;
|
||||
use memmap::Mmap;
|
||||
|
||||
use std::cmp;
|
||||
use std::fs::File;
|
||||
use std::io::{self, BufWriter, Seek, SeekFrom, Write};
|
||||
use std::io::{self, BufWriter, Read, Seek, SeekFrom, Write};
|
||||
use std::ops::Deref;
|
||||
use std::sync::{Arc, RwLock};
|
||||
|
||||
|
@ -25,12 +27,125 @@ pub struct SharedWriter {
|
|||
pos: u64,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct CRCWriter<W: Write> {
|
||||
writer: W,
|
||||
buffer: Vec<u8>,
|
||||
position: usize,
|
||||
capacity: usize,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct CRCReader<R: Read> {
|
||||
reader: R,
|
||||
buffer: Vec<u8>,
|
||||
position: usize,
|
||||
chunk_size: usize,
|
||||
}
|
||||
|
||||
impl SharedWriter {
|
||||
pub fn new(buf: Arc<RwLock<Vec<u8>>>) -> SharedWriter {
|
||||
SharedWriter { buf, pos: 0 }
|
||||
}
|
||||
}
|
||||
|
||||
impl<W: Write> CRCWriter<W> {
|
||||
#[allow(dead_code)]
|
||||
pub fn new(inner: W, chunk_size: usize) -> CRCWriter<W> {
|
||||
if chunk_size <= 8 {
|
||||
panic!("chunk_size must be > 8");
|
||||
}
|
||||
|
||||
CRCWriter {
|
||||
writer: inner,
|
||||
buffer: vec![0; chunk_size],
|
||||
position: 0,
|
||||
capacity: chunk_size - 8,
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub fn into_inner(mut self) -> io::Result<W> {
|
||||
self.flush()?;
|
||||
Ok(self.writer)
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub fn get_ref(&self) -> &W {
|
||||
&self.writer
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub fn get_mut(&mut self) -> &mut W {
|
||||
&mut self.writer
|
||||
}
|
||||
}
|
||||
|
||||
impl<R: Read> CRCReader<R> {
|
||||
#[allow(dead_code)]
|
||||
pub fn new(inner: R, chunk_size: usize) -> CRCReader<R> {
|
||||
if chunk_size <= 8 {
|
||||
panic!("chunk_size must be > 8");
|
||||
}
|
||||
|
||||
CRCReader {
|
||||
reader: inner,
|
||||
buffer: vec![0; chunk_size - 8],
|
||||
position: chunk_size,
|
||||
chunk_size,
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub fn into_inner(self) -> R {
|
||||
self.reader
|
||||
}
|
||||
|
||||
fn load_block(&mut self) -> io::Result<()> {
|
||||
self.buffer.clear();
|
||||
self.position = 0;
|
||||
|
||||
let mut block_buffer = vec![0; self.chunk_size];
|
||||
let mut block_position = 0;
|
||||
|
||||
while block_position < self.chunk_size {
|
||||
let bytes_read = self.reader.read(&mut block_buffer[block_position..])?;
|
||||
if bytes_read == 0 {
|
||||
break;
|
||||
}
|
||||
block_position += bytes_read
|
||||
}
|
||||
|
||||
if block_position < self.chunk_size {
|
||||
return Err(io::ErrorKind::UnexpectedEof.into());
|
||||
}
|
||||
|
||||
assert_eq!(block_position, self.chunk_size);
|
||||
|
||||
let stored_digest = BigEndian::read_u32(&block_buffer[0..4]);
|
||||
let payload_len = BigEndian::read_u32(&block_buffer[4..8]) as usize;
|
||||
if payload_len + 8 > block_buffer.len() {
|
||||
return Err(io::Error::new(
|
||||
io::ErrorKind::InvalidData,
|
||||
"CRCReader: invalid block size",
|
||||
));
|
||||
}
|
||||
let payload = &block_buffer[8..8 + payload_len];
|
||||
let computed_digest = crc32::checksum_ieee(&block_buffer[4..8 + payload_len]);
|
||||
|
||||
if computed_digest != stored_digest {
|
||||
return Err(io::Error::new(
|
||||
io::ErrorKind::InvalidData,
|
||||
"CRCReader: CRC validation failed",
|
||||
));
|
||||
}
|
||||
|
||||
self.buffer.extend_from_slice(payload);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl Deref for MemMap {
|
||||
type Target = [u8];
|
||||
|
||||
|
@ -48,6 +163,69 @@ impl Deref for MemMap {
|
|||
}
|
||||
}
|
||||
|
||||
impl<W> Write for CRCWriter<W>
|
||||
where
|
||||
W: Write,
|
||||
{
|
||||
fn write(&mut self, buffer: &[u8]) -> io::Result<usize> {
|
||||
let mut written = 0;
|
||||
|
||||
while written < buffer.len() {
|
||||
let batch_len = (&mut self.buffer[8 + self.position..]).write(&buffer[written..])?;
|
||||
|
||||
self.position += batch_len;
|
||||
written += batch_len;
|
||||
|
||||
if self.position >= self.capacity {
|
||||
self.flush()?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(written)
|
||||
}
|
||||
|
||||
fn flush(&mut self) -> io::Result<()> {
|
||||
BigEndian::write_u32(&mut self.buffer[4..8], self.position as u32);
|
||||
let total_len = self.position + 8;
|
||||
|
||||
// crc over length + payload
|
||||
let digest = crc32::checksum_ieee(&self.buffer[4..total_len]);
|
||||
|
||||
BigEndian::write_u32(&mut self.buffer[0..4], digest);
|
||||
self.writer.write_all(&self.buffer)?;
|
||||
|
||||
self.position = 0;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl<R> Read for CRCReader<R>
|
||||
where
|
||||
R: Read,
|
||||
{
|
||||
fn read(&mut self, buffer: &mut [u8]) -> io::Result<usize> {
|
||||
let mut write_position = 0;
|
||||
|
||||
while write_position < buffer.len() {
|
||||
if self.position >= self.buffer.len() {
|
||||
self.load_block()?;
|
||||
}
|
||||
|
||||
let bytes_available = self.buffer.len() - self.position;
|
||||
let space_remaining = buffer.len() - write_position;
|
||||
let copy_len = cmp::min(bytes_available, space_remaining);
|
||||
|
||||
(&mut buffer[write_position..write_position + copy_len])
|
||||
.copy_from_slice(&self.buffer[self.position..self.position + copy_len]);
|
||||
|
||||
write_position += copy_len;
|
||||
self.position += copy_len;
|
||||
}
|
||||
|
||||
Ok(write_position)
|
||||
}
|
||||
}
|
||||
|
||||
impl Write for SharedWriter {
|
||||
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
|
||||
use std::cmp;
|
||||
|
@ -129,3 +307,117 @@ impl Seek for Writer {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_crc_write() {
|
||||
let block_sizes = &[256, 512, 1024, 2048];
|
||||
let byte_counts = &[8, 128, 1024, 1024 * 8];
|
||||
|
||||
for &block_size in block_sizes {
|
||||
for &n_bytes in byte_counts {
|
||||
let bytes: Vec<_> = (0..n_bytes).map(|x| (x % 255) as u8).collect();
|
||||
let buffer = Vec::new();
|
||||
|
||||
let mut writer = CRCWriter::new(buffer, block_size);
|
||||
writer.write_all(&bytes).unwrap();
|
||||
|
||||
let buffer = writer.into_inner().unwrap();
|
||||
|
||||
let space_per_block = block_size - 8;
|
||||
let n_full_blocks = n_bytes / space_per_block;
|
||||
let blocks_expected = n_full_blocks + (n_bytes % space_per_block != 0) as usize;
|
||||
let expected_len = blocks_expected * block_size;
|
||||
|
||||
assert_eq!(buffer.len(), expected_len);
|
||||
assert_eq!(&buffer[8..16], &[0, 1, 2, 3, 4, 5, 6, 7]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_crc_io() {
|
||||
const BLK_SIZE: usize = 1024;
|
||||
let bytes: Vec<_> = (0..512 * 1024).map(|x| (x % 255) as u8).collect();
|
||||
let buffer = Vec::new();
|
||||
|
||||
let mut writer = CRCWriter::new(buffer, BLK_SIZE);
|
||||
writer.write_all(&bytes).unwrap();
|
||||
|
||||
let buffer = writer.into_inner().unwrap();
|
||||
assert_eq!(&buffer[8..16], &[0, 1, 2, 3, 4, 5, 6, 7]);
|
||||
|
||||
let mut reader = CRCReader::new(&buffer[..], BLK_SIZE);
|
||||
|
||||
let mut retrieved = Vec::with_capacity(512 * 1024);
|
||||
let read_buffer = &mut [0; 1024];
|
||||
while let Ok(amt) = reader.read(read_buffer) {
|
||||
if amt == 0 {
|
||||
break;
|
||||
}
|
||||
retrieved.extend_from_slice(&read_buffer[..amt]);
|
||||
}
|
||||
|
||||
assert_eq!(&retrieved[..8], &[0, 1, 2, 3, 4, 5, 6, 7]);
|
||||
|
||||
assert_eq!(bytes.len(), retrieved.len());
|
||||
assert_eq!(bytes, retrieved);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_crc_validation() {
|
||||
const BLK_SIZE: usize = 1024;
|
||||
let n_bytes = 512 * 1024;
|
||||
let bytes: Vec<_> = (0..n_bytes).map(|x| (x % 255) as u8).collect();
|
||||
let buffer = Vec::new();
|
||||
|
||||
let mut writer = CRCWriter::new(buffer, BLK_SIZE);
|
||||
writer.write_all(&bytes).unwrap();
|
||||
|
||||
let mut buffer = writer.into_inner().unwrap();
|
||||
buffer[BLK_SIZE / 2] += 1;
|
||||
|
||||
let mut reader = CRCReader::new(&buffer[..], BLK_SIZE);
|
||||
|
||||
let mut retrieved = vec![];
|
||||
let res = reader.read_to_end(&mut retrieved);
|
||||
assert_eq!(res.unwrap_err().kind(), io::ErrorKind::InvalidData);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_crc_size_mismatch() {
|
||||
const BLK_SIZE: usize = 1024;
|
||||
let n_bytes = 512 * 1024;
|
||||
let bytes: Vec<_> = (0..n_bytes).map(|x| (x % 255) as u8).collect();
|
||||
let buffer = Vec::new();
|
||||
|
||||
let mut writer = CRCWriter::new(buffer, BLK_SIZE);
|
||||
writer.write_all(&bytes).unwrap();
|
||||
|
||||
let mut buffer = writer.into_inner().unwrap();
|
||||
buffer.drain((n_bytes - 512)..n_bytes);
|
||||
|
||||
for &size_diff in &[100, 1, 25, BLK_SIZE - 9] {
|
||||
let mut reader = CRCReader::new(&buffer[..], BLK_SIZE - size_diff);
|
||||
|
||||
let mut retrieved = vec![];
|
||||
let res = reader.read_to_end(&mut retrieved);
|
||||
assert_eq!(res.unwrap_err().kind(), io::ErrorKind::InvalidData);
|
||||
}
|
||||
}
|
||||
|
||||
#[should_panic]
|
||||
#[test]
|
||||
fn test_crc_writer_invalid_chunk_size() {
|
||||
let _ = CRCWriter::new(Vec::new(), 8);
|
||||
}
|
||||
|
||||
#[should_panic]
|
||||
#[test]
|
||||
fn test_crc_reader_invalid_chunk_size() {
|
||||
let _ = CRCReader::new(io::empty(), 8);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue