#[macro_use] extern crate log; use solana_sdk::hash::{Hash, Hasher}; use std::fs::File; use std::io::{self, BufReader, ErrorKind, Read, Seek, SeekFrom}; use std::mem::size_of; use std::path::Path; pub fn sample_file(in_path: &Path, sample_offsets: &[u64]) -> io::Result { let in_file = File::open(in_path)?; let metadata = in_file.metadata()?; let mut buffer_file = BufReader::new(in_file); let mut hasher = Hasher::default(); let sample_size = size_of::(); let sample_size64 = sample_size as u64; let mut buf = vec![0; sample_size]; let file_len = metadata.len(); if file_len < sample_size64 { return Err(io::Error::new(ErrorKind::Other, "file too short!")); } for offset in sample_offsets { if *offset > (file_len - sample_size64) / sample_size64 { return Err(io::Error::new(ErrorKind::Other, "offset too large")); } buffer_file.seek(SeekFrom::Start(*offset * sample_size64))?; trace!("sampling @ {} ", *offset); match buffer_file.read(&mut buf) { Ok(size) => { assert_eq!(size, buf.len()); hasher.hash(&buf); } Err(e) => { warn!("Error sampling file"); return Err(e); } } } Ok(hasher.result()) } #[cfg(test)] mod tests { use super::*; use rand::{thread_rng, Rng}; use std::fs::{create_dir_all, remove_file}; use std::io::Write; use std::path::PathBuf; extern crate hex; fn tmp_file_path(name: &str) -> PathBuf { use std::env; let out_dir = env::var("FARF_DIR").unwrap_or_else(|_| "farf".to_string()); let mut rand_bits = [0u8; 32]; thread_rng().fill(&mut rand_bits[..]); let mut path = PathBuf::new(); path.push(out_dir); path.push("tmp"); create_dir_all(&path).unwrap(); path.push(format!("{}-{:?}", name, hex::encode(rand_bits))); println!("path: {:?}", path); path } #[test] fn test_sample_file() { solana_logger::setup(); let in_path = tmp_file_path("test_sample_file_input.txt"); let num_strings = 4096; let string = "12foobar"; { let mut in_file = File::create(&in_path).unwrap(); for _ in 0..num_strings { in_file.write(string.as_bytes()).unwrap(); } } let num_samples = (string.len() * num_strings / size_of::()) as u64; let samples: Vec<_> = (0..num_samples).collect(); let res = sample_file(&in_path, samples.as_slice()); let ref_hash: Hash = Hash::new(&[ 173, 251, 182, 165, 10, 54, 33, 150, 133, 226, 106, 150, 99, 192, 179, 1, 230, 144, 151, 126, 18, 191, 54, 67, 249, 140, 230, 160, 56, 30, 170, 52, ]); let res = res.unwrap(); assert_eq!(res, ref_hash); // Sample just past the end assert!(sample_file(&in_path, &[num_samples]).is_err()); remove_file(&in_path).unwrap(); } #[test] fn test_sample_file_invalid_offset() { let in_path = tmp_file_path("test_sample_file_invalid_offset_input.txt"); { let mut in_file = File::create(&in_path).unwrap(); for _ in 0..4096 { in_file.write("123456foobar".as_bytes()).unwrap(); } } let samples = [0, 200000]; let res = sample_file(&in_path, &samples); assert!(res.is_err()); remove_file(in_path).unwrap(); } #[test] fn test_sample_file_missing_file() { let in_path = tmp_file_path("test_sample_file_that_doesnt_exist.txt"); let samples = [0, 5]; let res = sample_file(&in_path, &samples); assert!(res.is_err()); } }