Add CRC Reader and Writer I/O wrappers (#3322)

* add CRC Reader and Writer I/O wrappers

* typo fix and variable rename
This commit is contained in:
Mark 2019-03-15 13:17:49 -05:00 committed by GitHub
parent a15927f8d0
commit ca69b7b75b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 312 additions and 3 deletions

16
Cargo.lock generated
View File

@ -229,6 +229,11 @@ name = "bs58"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "build_const"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "bv"
version = "0.11.0"
@ -370,6 +375,14 @@ dependencies = [
"libc 0.2.50 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "crc"
version = "1.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"build_const 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "crc32fast"
version = "1.1.2"
@ -1973,6 +1986,7 @@ dependencies = [
"bs58 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
"byteorder 1.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
"chrono 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)",
"crc 1.8.1 (registry+https://github.com/rust-lang/crates.io-index)",
"hashbrown 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)",
"hex-literal 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
"indexmap 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
@ -3021,6 +3035,7 @@ dependencies = [
"checksum block-padding 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "4fc4358306e344bf9775d0197fd00d2603e5afb0771bb353538630f022068ea3"
"checksum bloom 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "d00ac8e5056d6d65376a3c1aa5c7c34850d6949ace17f0266953a254eb3d6fe8"
"checksum bs58 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "0de79cfb98e7aa9988188784d8664b4b5dad6eaaa0863b91d9a4ed871d4f7a42"
"checksum build_const 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "39092a32794787acd8525ee150305ff051b0aa6cc2abaf193924f5ab05425f39"
"checksum bv 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "6cd4ae9e585e783756cd14b0ea21863acdfbb6383664ac2f7c9ef8d180a14727"
"checksum byte-tools 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "560c32574a12a89ecd91f5e742165893f86e3ab98d21f8ea548658eb9eef5f40"
"checksum byte-tools 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "980479e6fde23246dfb54d47580d66b4e99202e7579c5eaa9fe10ecb5ebd2182"
@ -3039,6 +3054,7 @@ dependencies = [
"checksum constant_time_eq 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "8ff012e225ce166d4422e0e78419d901719760f62ae2b7969ca6b564d1b54a9e"
"checksum core-foundation 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "286e0b41c3a20da26536c6000a280585d519fd07b3956b43aed8a79e9edce980"
"checksum core-foundation-sys 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "716c271e8613ace48344f723b60b900a93150271e5be206212d052bbc0883efa"
"checksum crc 1.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "d663548de7f5cca343f1e0a48d14dcfb0e9eb4e079ec58883b7251539fa10aeb"
"checksum crc32fast 1.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "e91d5240c6975ef33aeb5f148f35275c25eda8e8a5f95abe421978b05b8bf192"
"checksum crossbeam-channel 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "137bc235f622ffaa0428e3854e24acb53291fc0b3ff6fb2cb75a8be6fb02f06b"
"checksum crossbeam-deque 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "f739f8c5363aca78cfb059edf753d8f0d36908c348f3d8d1503f03d8b75d9cf3"

View File

@ -17,13 +17,14 @@ codecov = { repository = "solana-labs/solana", branch = "master", service = "git
chacha = []
cuda = []
erasure = []
kvstore = ["memmap"]
kvstore = ["crc", "memmap"]
[dependencies]
bincode = "1.1.2"
bs58 = "0.2.0"
byteorder = "1.3.1"
chrono = { version = "0.4.0", features = ["serde"] }
crc = { version = "1.8.1", optional = true }
hashbrown = "0.1.8"
indexmap = "1.0"
itertools = "0.8.0"

View File

@ -1,7 +1,9 @@
use byteorder::{BigEndian, ByteOrder};
use crc::crc32;
use memmap::Mmap;
use std::cmp;
use std::fs::File;
use std::io::{self, BufWriter, Seek, SeekFrom, Write};
use std::io::{self, BufWriter, Read, Seek, SeekFrom, Write};
use std::ops::Deref;
use std::sync::{Arc, RwLock};
@ -25,12 +27,125 @@ pub struct SharedWriter {
pos: u64,
}
#[derive(Debug)]
pub struct CRCWriter<W: Write> {
writer: W,
buffer: Vec<u8>,
position: usize,
capacity: usize,
}
#[derive(Debug)]
pub struct CRCReader<R: Read> {
reader: R,
buffer: Vec<u8>,
position: usize,
chunk_size: usize,
}
impl SharedWriter {
pub fn new(buf: Arc<RwLock<Vec<u8>>>) -> SharedWriter {
SharedWriter { buf, pos: 0 }
}
}
impl<W: Write> CRCWriter<W> {
#[allow(dead_code)]
pub fn new(inner: W, chunk_size: usize) -> CRCWriter<W> {
if chunk_size <= 8 {
panic!("chunk_size must be > 8");
}
CRCWriter {
writer: inner,
buffer: vec![0; chunk_size],
position: 0,
capacity: chunk_size - 8,
}
}
#[allow(dead_code)]
pub fn into_inner(mut self) -> io::Result<W> {
self.flush()?;
Ok(self.writer)
}
#[allow(dead_code)]
pub fn get_ref(&self) -> &W {
&self.writer
}
#[allow(dead_code)]
pub fn get_mut(&mut self) -> &mut W {
&mut self.writer
}
}
impl<R: Read> CRCReader<R> {
#[allow(dead_code)]
pub fn new(inner: R, chunk_size: usize) -> CRCReader<R> {
if chunk_size <= 8 {
panic!("chunk_size must be > 8");
}
CRCReader {
reader: inner,
buffer: vec![0; chunk_size - 8],
position: chunk_size,
chunk_size,
}
}
#[allow(dead_code)]
pub fn into_inner(self) -> R {
self.reader
}
fn load_block(&mut self) -> io::Result<()> {
self.buffer.clear();
self.position = 0;
let mut block_buffer = vec![0; self.chunk_size];
let mut block_position = 0;
while block_position < self.chunk_size {
let bytes_read = self.reader.read(&mut block_buffer[block_position..])?;
if bytes_read == 0 {
break;
}
block_position += bytes_read
}
if block_position < self.chunk_size {
return Err(io::ErrorKind::UnexpectedEof.into());
}
assert_eq!(block_position, self.chunk_size);
let stored_digest = BigEndian::read_u32(&block_buffer[0..4]);
let payload_len = BigEndian::read_u32(&block_buffer[4..8]) as usize;
if payload_len + 8 > block_buffer.len() {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"CRCReader: invalid block size",
));
}
let payload = &block_buffer[8..8 + payload_len];
let computed_digest = crc32::checksum_ieee(&block_buffer[4..8 + payload_len]);
if computed_digest != stored_digest {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"CRCReader: CRC validation failed",
));
}
self.buffer.extend_from_slice(payload);
Ok(())
}
}
impl Deref for MemMap {
type Target = [u8];
@ -48,6 +163,69 @@ impl Deref for MemMap {
}
}
impl<W> Write for CRCWriter<W>
where
W: Write,
{
fn write(&mut self, buffer: &[u8]) -> io::Result<usize> {
let mut written = 0;
while written < buffer.len() {
let batch_len = (&mut self.buffer[8 + self.position..]).write(&buffer[written..])?;
self.position += batch_len;
written += batch_len;
if self.position >= self.capacity {
self.flush()?;
}
}
Ok(written)
}
fn flush(&mut self) -> io::Result<()> {
BigEndian::write_u32(&mut self.buffer[4..8], self.position as u32);
let total_len = self.position + 8;
// crc over length + payload
let digest = crc32::checksum_ieee(&self.buffer[4..total_len]);
BigEndian::write_u32(&mut self.buffer[0..4], digest);
self.writer.write_all(&self.buffer)?;
self.position = 0;
Ok(())
}
}
impl<R> Read for CRCReader<R>
where
R: Read,
{
fn read(&mut self, buffer: &mut [u8]) -> io::Result<usize> {
let mut write_position = 0;
while write_position < buffer.len() {
if self.position >= self.buffer.len() {
self.load_block()?;
}
let bytes_available = self.buffer.len() - self.position;
let space_remaining = buffer.len() - write_position;
let copy_len = cmp::min(bytes_available, space_remaining);
(&mut buffer[write_position..write_position + copy_len])
.copy_from_slice(&self.buffer[self.position..self.position + copy_len]);
write_position += copy_len;
self.position += copy_len;
}
Ok(write_position)
}
}
impl Write for SharedWriter {
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
use std::cmp;
@ -129,3 +307,117 @@ impl Seek for Writer {
}
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_crc_write() {
let block_sizes = &[256, 512, 1024, 2048];
let byte_counts = &[8, 128, 1024, 1024 * 8];
for &block_size in block_sizes {
for &n_bytes in byte_counts {
let bytes: Vec<_> = (0..n_bytes).map(|x| (x % 255) as u8).collect();
let buffer = Vec::new();
let mut writer = CRCWriter::new(buffer, block_size);
writer.write_all(&bytes).unwrap();
let buffer = writer.into_inner().unwrap();
let space_per_block = block_size - 8;
let n_full_blocks = n_bytes / space_per_block;
let blocks_expected = n_full_blocks + (n_bytes % space_per_block != 0) as usize;
let expected_len = blocks_expected * block_size;
assert_eq!(buffer.len(), expected_len);
assert_eq!(&buffer[8..16], &[0, 1, 2, 3, 4, 5, 6, 7]);
}
}
}
#[test]
fn test_crc_io() {
const BLK_SIZE: usize = 1024;
let bytes: Vec<_> = (0..512 * 1024).map(|x| (x % 255) as u8).collect();
let buffer = Vec::new();
let mut writer = CRCWriter::new(buffer, BLK_SIZE);
writer.write_all(&bytes).unwrap();
let buffer = writer.into_inner().unwrap();
assert_eq!(&buffer[8..16], &[0, 1, 2, 3, 4, 5, 6, 7]);
let mut reader = CRCReader::new(&buffer[..], BLK_SIZE);
let mut retrieved = Vec::with_capacity(512 * 1024);
let read_buffer = &mut [0; 1024];
while let Ok(amt) = reader.read(read_buffer) {
if amt == 0 {
break;
}
retrieved.extend_from_slice(&read_buffer[..amt]);
}
assert_eq!(&retrieved[..8], &[0, 1, 2, 3, 4, 5, 6, 7]);
assert_eq!(bytes.len(), retrieved.len());
assert_eq!(bytes, retrieved);
}
#[test]
fn test_crc_validation() {
const BLK_SIZE: usize = 1024;
let n_bytes = 512 * 1024;
let bytes: Vec<_> = (0..n_bytes).map(|x| (x % 255) as u8).collect();
let buffer = Vec::new();
let mut writer = CRCWriter::new(buffer, BLK_SIZE);
writer.write_all(&bytes).unwrap();
let mut buffer = writer.into_inner().unwrap();
buffer[BLK_SIZE / 2] += 1;
let mut reader = CRCReader::new(&buffer[..], BLK_SIZE);
let mut retrieved = vec![];
let res = reader.read_to_end(&mut retrieved);
assert_eq!(res.unwrap_err().kind(), io::ErrorKind::InvalidData);
}
#[test]
fn test_crc_size_mismatch() {
const BLK_SIZE: usize = 1024;
let n_bytes = 512 * 1024;
let bytes: Vec<_> = (0..n_bytes).map(|x| (x % 255) as u8).collect();
let buffer = Vec::new();
let mut writer = CRCWriter::new(buffer, BLK_SIZE);
writer.write_all(&bytes).unwrap();
let mut buffer = writer.into_inner().unwrap();
buffer.drain((n_bytes - 512)..n_bytes);
for &size_diff in &[100, 1, 25, BLK_SIZE - 9] {
let mut reader = CRCReader::new(&buffer[..], BLK_SIZE - size_diff);
let mut retrieved = vec![];
let res = reader.read_to_end(&mut retrieved);
assert_eq!(res.unwrap_err().kind(), io::ErrorKind::InvalidData);
}
}
#[should_panic]
#[test]
fn test_crc_writer_invalid_chunk_size() {
let _ = CRCWriter::new(Vec::new(), 8);
}
#[should_panic]
#[test]
fn test_crc_reader_invalid_chunk_size() {
let _ = CRCReader::new(io::empty(), 8);
}
}