121 lines
3.7 KiB
Rust
121 lines
3.7 KiB
Rust
#[macro_use]
|
|
extern crate log;
|
|
|
|
use solana_sdk::hash::{Hash, Hasher};
|
|
use std::fs::File;
|
|
use std::io::{self, BufReader, ErrorKind, Read, Seek, SeekFrom};
|
|
use std::mem::size_of;
|
|
use std::path::Path;
|
|
|
|
pub fn sample_file(in_path: &Path, sample_offsets: &[u64]) -> io::Result<Hash> {
|
|
let in_file = File::open(in_path)?;
|
|
let metadata = in_file.metadata()?;
|
|
let mut buffer_file = BufReader::new(in_file);
|
|
|
|
let mut hasher = Hasher::default();
|
|
let sample_size = size_of::<Hash>();
|
|
let sample_size64 = sample_size as u64;
|
|
let mut buf = vec![0; sample_size];
|
|
|
|
let file_len = metadata.len();
|
|
if file_len < sample_size64 {
|
|
return Err(io::Error::new(ErrorKind::Other, "file too short!"));
|
|
}
|
|
for offset in sample_offsets {
|
|
if *offset > (file_len - sample_size64) / sample_size64 {
|
|
return Err(io::Error::new(ErrorKind::Other, "offset too large"));
|
|
}
|
|
buffer_file.seek(SeekFrom::Start(*offset * sample_size64))?;
|
|
trace!("sampling @ {} ", *offset);
|
|
match buffer_file.read(&mut buf) {
|
|
Ok(size) => {
|
|
assert_eq!(size, buf.len());
|
|
hasher.hash(&buf);
|
|
}
|
|
Err(e) => {
|
|
warn!("Error sampling file");
|
|
return Err(e);
|
|
}
|
|
}
|
|
}
|
|
|
|
Ok(hasher.result())
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
use rand::{thread_rng, Rng};
|
|
use std::fs::{create_dir_all, remove_file};
|
|
use std::io::Write;
|
|
use std::path::PathBuf;
|
|
|
|
extern crate hex;
|
|
|
|
fn tmp_file_path(name: &str) -> PathBuf {
|
|
use std::env;
|
|
let out_dir = env::var("FARF_DIR").unwrap_or_else(|_| "farf".to_string());
|
|
let mut rand_bits = [0u8; 32];
|
|
thread_rng().fill(&mut rand_bits[..]);
|
|
|
|
let mut path = PathBuf::new();
|
|
path.push(out_dir);
|
|
path.push("tmp");
|
|
create_dir_all(&path).unwrap();
|
|
|
|
path.push(format!("{}-{:?}", name, hex::encode(rand_bits)));
|
|
println!("path: {:?}", path);
|
|
path
|
|
}
|
|
|
|
#[test]
|
|
fn test_sample_file() {
|
|
solana_logger::setup();
|
|
let in_path = tmp_file_path("test_sample_file_input.txt");
|
|
let num_strings = 4096;
|
|
let string = "12foobar";
|
|
{
|
|
let mut in_file = File::create(&in_path).unwrap();
|
|
for _ in 0..num_strings {
|
|
in_file.write(string.as_bytes()).unwrap();
|
|
}
|
|
}
|
|
let num_samples = (string.len() * num_strings / size_of::<Hash>()) as u64;
|
|
let samples: Vec<_> = (0..num_samples).collect();
|
|
let res = sample_file(&in_path, samples.as_slice());
|
|
let ref_hash: Hash = Hash::new(&[
|
|
173, 251, 182, 165, 10, 54, 33, 150, 133, 226, 106, 150, 99, 192, 179, 1, 230, 144,
|
|
151, 126, 18, 191, 54, 67, 249, 140, 230, 160, 56, 30, 170, 52,
|
|
]);
|
|
let res = res.unwrap();
|
|
assert_eq!(res, ref_hash);
|
|
|
|
// Sample just past the end
|
|
assert!(sample_file(&in_path, &[num_samples]).is_err());
|
|
remove_file(&in_path).unwrap();
|
|
}
|
|
|
|
#[test]
|
|
fn test_sample_file_invalid_offset() {
|
|
let in_path = tmp_file_path("test_sample_file_invalid_offset_input.txt");
|
|
{
|
|
let mut in_file = File::create(&in_path).unwrap();
|
|
for _ in 0..4096 {
|
|
in_file.write("123456foobar".as_bytes()).unwrap();
|
|
}
|
|
}
|
|
let samples = [0, 200000];
|
|
let res = sample_file(&in_path, &samples);
|
|
assert!(res.is_err());
|
|
remove_file(in_path).unwrap();
|
|
}
|
|
|
|
#[test]
|
|
fn test_sample_file_missing_file() {
|
|
let in_path = tmp_file_path("test_sample_file_that_doesnt_exist.txt");
|
|
let samples = [0, 5];
|
|
let res = sample_file(&in_path, &samples);
|
|
assert!(res.is_err());
|
|
}
|
|
}
|