2020-03-25 02:46:41 -07:00
use bzip2 ::bufread ::BzDecoder ;
use log ::* ;
use regex ::Regex ;
use solana_sdk ::genesis_config ::GenesisConfig ;
use std ::{
fs ::{ self , File } ,
io ::{ BufReader , Read } ,
path ::{
Component ::{ CurDir , Normal } ,
Path ,
} ,
time ::Instant ,
} ;
use tar ::{
Archive ,
EntryType ::{ Directory , GNUSparse , Regular } ,
} ;
use thiserror ::Error ;
#[ derive(Error, Debug) ]
pub enum UnpackError {
2020-04-29 18:53:34 -07:00
#[ error( " IO error: {0} " ) ]
2020-03-25 02:46:41 -07:00
IO ( #[ from ] std ::io ::Error ) ,
2020-04-29 18:53:34 -07:00
#[ error( " Archive error: {0} " ) ]
2020-03-25 02:46:41 -07:00
Archive ( String ) ,
}
pub type Result < T > = std ::result ::Result < T , UnpackError > ;
const MAX_SNAPSHOT_ARCHIVE_UNPACKED_SIZE : u64 = 500 * 1024 * 1024 * 1024 ; // 500 GiB
const MAX_SNAPSHOT_ARCHIVE_UNPACKED_COUNT : u64 = 500_000 ;
2020-04-29 18:53:34 -07:00
pub const MAX_GENESIS_ARCHIVE_UNPACKED_SIZE : u64 = 10 * 1024 * 1024 ; // 10 MiB
2020-03-25 02:46:41 -07:00
const MAX_GENESIS_ARCHIVE_UNPACKED_COUNT : u64 = 100 ;
fn checked_total_size_sum ( total_size : u64 , entry_size : u64 , limit_size : u64 ) -> Result < u64 > {
let total_size = total_size . saturating_add ( entry_size ) ;
if total_size > limit_size {
return Err ( UnpackError ::Archive ( format! (
2020-04-29 18:53:34 -07:00
" too large archive: {} than limit: {} " ,
total_size , limit_size ,
2020-03-25 02:46:41 -07:00
) ) ) ;
}
Ok ( total_size )
}
fn checked_total_count_increment ( total_count : u64 , limit_count : u64 ) -> Result < u64 > {
let total_count = total_count + 1 ;
if total_count > limit_count {
return Err ( UnpackError ::Archive ( format! (
" too many files in snapshot: {:?} " ,
total_count
) ) ) ;
}
Ok ( total_count )
}
fn check_unpack_result ( unpack_result : bool , path : String ) -> Result < ( ) > {
if ! unpack_result {
return Err ( UnpackError ::Archive ( format! (
" failed to unpack: {:?} " ,
path
) ) ) ;
}
Ok ( ( ) )
}
fn unpack_archive < A : Read , P : AsRef < Path > , C > (
archive : & mut Archive < A > ,
unpack_dir : P ,
limit_size : u64 ,
limit_count : u64 ,
entry_checker : C ,
) -> Result < ( ) >
where
C : Fn ( & [ & str ] , tar ::EntryType ) -> bool ,
{
let mut total_size : u64 = 0 ;
let mut total_count : u64 = 0 ;
2020-05-27 10:41:05 -07:00
let mut total_entries = 0 ;
let mut last_log_update = Instant ::now ( ) ;
2020-03-25 02:46:41 -07:00
for entry in archive . entries ( ) ? {
let mut entry = entry ? ;
let path = entry . path ( ) ? ;
let path_str = path . display ( ) . to_string ( ) ;
// Although the `tar` crate safely skips at the actual unpacking, fail
// first by ourselves when there are odd paths like including `..` or /
// for our clearer pattern matching reasoning:
// https://docs.rs/tar/0.4.26/src/tar/entry.rs.html#371
let parts = path . components ( ) . map ( | p | match p {
CurDir = > Some ( " . " ) ,
Normal ( c ) = > c . to_str ( ) ,
_ = > None , // Prefix (for Windows) and RootDir are forbidden
} ) ;
if parts . clone ( ) . any ( | p | p . is_none ( ) ) {
return Err ( UnpackError ::Archive ( format! (
" invalid path found: {:?} " ,
path_str
) ) ) ;
}
let parts : Vec < _ > = parts . map ( | p | p . unwrap ( ) ) . collect ( ) ;
if ! entry_checker ( parts . as_slice ( ) , entry . header ( ) . entry_type ( ) ) {
return Err ( UnpackError ::Archive ( format! (
" extra entry found: {:?} " ,
path_str
) ) ) ;
}
total_size = checked_total_size_sum ( total_size , entry . header ( ) . size ( ) ? , limit_size ) ? ;
total_count = checked_total_count_increment ( total_count , limit_count ) ? ;
// unpack_in does its own sanitization
// ref: https://docs.rs/tar/*/tar/struct.Entry.html#method.unpack_in
2020-05-27 10:41:05 -07:00
check_unpack_result ( entry . unpack_in ( & unpack_dir ) ? , path_str ) ? ;
total_entries + = 1 ;
let now = Instant ::now ( ) ;
if now . duration_since ( last_log_update ) . as_secs ( ) > = 10 {
info! ( " unpacked {} entries so far... " , total_entries ) ;
last_log_update = now ;
}
2020-03-25 02:46:41 -07:00
}
2020-05-27 10:41:05 -07:00
info! ( " unpacked {} entries total " , total_entries ) ;
2020-03-25 02:46:41 -07:00
Ok ( ( ) )
}
pub fn unpack_snapshot < A : Read , P : AsRef < Path > > (
archive : & mut Archive < A > ,
unpack_dir : P ,
) -> Result < ( ) > {
unpack_archive (
archive ,
unpack_dir ,
MAX_SNAPSHOT_ARCHIVE_UNPACKED_SIZE ,
MAX_SNAPSHOT_ARCHIVE_UNPACKED_COUNT ,
is_valid_snapshot_archive_entry ,
)
}
fn is_valid_snapshot_archive_entry ( parts : & [ & str ] , kind : tar ::EntryType ) -> bool {
let like_storage = Regex ::new ( r "^\d+\.\d+$" ) . unwrap ( ) ;
let like_slot = Regex ::new ( r "^\d+$" ) . unwrap ( ) ;
trace! ( " validating: {:?} {:?} " , parts , kind ) ;
match ( parts , kind ) {
( [ " version " ] , Regular ) = > true ,
( [ " accounts " ] , Directory ) = > true ,
( [ " accounts " , file ] , GNUSparse ) if like_storage . is_match ( file ) = > true ,
( [ " accounts " , file ] , Regular ) if like_storage . is_match ( file ) = > true ,
( [ " snapshots " ] , Directory ) = > true ,
( [ " snapshots " , " status_cache " ] , Regular ) = > true ,
( [ " snapshots " , dir , file ] , Regular )
if like_slot . is_match ( dir ) & & like_slot . is_match ( file ) = >
{
true
}
( [ " snapshots " , dir ] , Directory ) if like_slot . is_match ( dir ) = > true ,
_ = > false ,
}
}
2020-04-29 18:53:34 -07:00
pub fn open_genesis_config (
ledger_path : & Path ,
max_genesis_archive_unpacked_size : u64 ,
) -> GenesisConfig {
2020-03-25 02:46:41 -07:00
GenesisConfig ::load ( & ledger_path ) . unwrap_or_else ( | load_err | {
let genesis_package = ledger_path . join ( " genesis.tar.bz2 " ) ;
2020-04-29 18:53:34 -07:00
unpack_genesis_archive (
& genesis_package ,
ledger_path ,
max_genesis_archive_unpacked_size ,
)
. unwrap_or_else ( | unpack_err | {
2020-03-25 02:46:41 -07:00
warn! (
" Failed to open ledger genesis_config at {:?}: {}, {} " ,
ledger_path , load_err , unpack_err ,
) ;
std ::process ::exit ( 1 ) ;
} ) ;
// loading must succeed at this moment
GenesisConfig ::load ( & ledger_path ) . unwrap ( )
} )
}
pub fn unpack_genesis_archive (
archive_filename : & Path ,
destination_dir : & Path ,
2020-04-29 18:53:34 -07:00
max_genesis_archive_unpacked_size : u64 ,
) -> std ::result ::Result < ( ) , UnpackError > {
2020-03-25 02:46:41 -07:00
info! ( " Extracting {:?}... " , archive_filename ) ;
let extract_start = Instant ::now ( ) ;
2020-04-29 18:53:34 -07:00
fs ::create_dir_all ( destination_dir ) ? ;
let tar_bz2 = File ::open ( & archive_filename ) ? ;
2020-03-25 02:46:41 -07:00
let tar = BzDecoder ::new ( BufReader ::new ( tar_bz2 ) ) ;
let mut archive = Archive ::new ( tar ) ;
2020-04-29 18:53:34 -07:00
unpack_genesis (
& mut archive ,
destination_dir ,
max_genesis_archive_unpacked_size ,
) ? ;
2020-03-25 02:46:41 -07:00
info! (
" Extracted {:?} in {:?} " ,
archive_filename ,
Instant ::now ( ) . duration_since ( extract_start )
) ;
Ok ( ( ) )
}
2020-04-29 18:53:34 -07:00
fn unpack_genesis < A : Read , P : AsRef < Path > > (
archive : & mut Archive < A > ,
unpack_dir : P ,
max_genesis_archive_unpacked_size : u64 ,
) -> Result < ( ) > {
2020-03-25 02:46:41 -07:00
unpack_archive (
archive ,
unpack_dir ,
2020-04-29 18:53:34 -07:00
max_genesis_archive_unpacked_size ,
2020-03-25 02:46:41 -07:00
MAX_GENESIS_ARCHIVE_UNPACKED_COUNT ,
is_valid_genesis_archive_entry ,
)
}
fn is_valid_genesis_archive_entry ( parts : & [ & str ] , kind : tar ::EntryType ) -> bool {
trace! ( " validating: {:?} {:?} " , parts , kind ) ;
match ( parts , kind ) {
( [ " genesis.bin " ] , Regular ) = > true ,
( [ " rocksdb " ] , Directory ) = > true ,
( [ " rocksdb " , .. ] , GNUSparse ) = > true ,
( [ " rocksdb " , .. ] , Regular ) = > true ,
_ = > false ,
}
}
#[ cfg(test) ]
mod tests {
use super ::* ;
use matches ::assert_matches ;
use tar ::{ Builder , Header } ;
#[ test ]
fn test_archive_is_valid_entry ( ) {
assert! ( is_valid_snapshot_archive_entry (
& [ " accounts " , " 0.0 " ] ,
tar ::EntryType ::Regular
) ) ;
assert! ( is_valid_snapshot_archive_entry (
& [ " snapshots " ] ,
tar ::EntryType ::Directory
) ) ;
assert! ( is_valid_snapshot_archive_entry (
& [ " snapshots " , " 3 " ] ,
tar ::EntryType ::Directory
) ) ;
assert! ( is_valid_snapshot_archive_entry (
& [ " snapshots " , " 3 " , " 3 " ] ,
tar ::EntryType ::Regular
) ) ;
assert! ( is_valid_snapshot_archive_entry (
& [ " version " ] ,
tar ::EntryType ::Regular
) ) ;
assert! ( is_valid_snapshot_archive_entry (
& [ " accounts " ] ,
tar ::EntryType ::Directory
) ) ;
assert! ( ! is_valid_snapshot_archive_entry (
& [ " accounts " , " 0x0 " ] ,
tar ::EntryType ::Regular
) ) ;
assert! ( ! is_valid_snapshot_archive_entry (
& [ " snapshots " ] ,
tar ::EntryType ::Regular
) ) ;
assert! ( ! is_valid_snapshot_archive_entry (
& [ " snapshots " , " x0 " ] ,
tar ::EntryType ::Directory
) ) ;
assert! ( ! is_valid_snapshot_archive_entry (
& [ " snapshots " , " 0x " ] ,
tar ::EntryType ::Directory
) ) ;
assert! ( ! is_valid_snapshot_archive_entry (
& [ " snapshots " , " 0 " , " aa " ] ,
tar ::EntryType ::Regular
) ) ;
assert! ( ! is_valid_snapshot_archive_entry (
& [ " aaaa " ] ,
tar ::EntryType ::Regular
) ) ;
}
#[ test ]
fn test_archive_is_valid_archive_entry ( ) {
assert! ( is_valid_genesis_archive_entry (
& [ " genesis.bin " ] ,
tar ::EntryType ::Regular
) ) ;
assert! ( is_valid_genesis_archive_entry (
& [ " rocksdb " ] ,
tar ::EntryType ::Directory
) ) ;
assert! ( is_valid_genesis_archive_entry (
& [ " rocksdb " , " foo " ] ,
tar ::EntryType ::Regular
) ) ;
assert! ( is_valid_genesis_archive_entry (
& [ " rocksdb " , " foo " , " bar " ] ,
tar ::EntryType ::Regular
) ) ;
assert! ( ! is_valid_genesis_archive_entry (
& [ " aaaa " ] ,
tar ::EntryType ::Regular
) ) ;
}
fn with_finalize_and_unpack < C > ( archive : tar ::Builder < Vec < u8 > > , checker : C ) -> Result < ( ) >
where
C : Fn ( & mut Archive < BufReader < & [ u8 ] > > , & Path ) -> Result < ( ) > ,
{
let data = archive . into_inner ( ) . unwrap ( ) ;
let reader = BufReader ::new ( & data [ .. ] ) ;
let mut archive : Archive < std ::io ::BufReader < & [ u8 ] > > = Archive ::new ( reader ) ;
let temp_dir = tempfile ::TempDir ::new ( ) . unwrap ( ) ;
checker ( & mut archive , & temp_dir . into_path ( ) )
}
fn finalize_and_unpack_snapshot ( archive : tar ::Builder < Vec < u8 > > ) -> Result < ( ) > {
with_finalize_and_unpack ( archive , | a , b | unpack_snapshot ( a , b ) )
}
fn finalize_and_unpack_genesis ( archive : tar ::Builder < Vec < u8 > > ) -> Result < ( ) > {
2020-04-29 18:53:34 -07:00
with_finalize_and_unpack ( archive , | a , b | {
unpack_genesis ( a , b , MAX_GENESIS_ARCHIVE_UNPACKED_SIZE )
} )
2020-03-25 02:46:41 -07:00
}
#[ test ]
fn test_archive_unpack_snapshot_ok ( ) {
let mut header = Header ::new_gnu ( ) ;
header . set_path ( " version " ) . unwrap ( ) ;
header . set_size ( 4 ) ;
header . set_cksum ( ) ;
let data : & [ u8 ] = & [ 1 , 2 , 3 , 4 ] ;
let mut archive = Builder ::new ( Vec ::new ( ) ) ;
archive . append ( & header , data ) . unwrap ( ) ;
let result = finalize_and_unpack_snapshot ( archive ) ;
assert_matches! ( result , Ok ( ( ) ) ) ;
}
#[ test ]
fn test_archive_unpack_genesis_ok ( ) {
let mut header = Header ::new_gnu ( ) ;
header . set_path ( " genesis.bin " ) . unwrap ( ) ;
header . set_size ( 4 ) ;
header . set_cksum ( ) ;
let data : & [ u8 ] = & [ 1 , 2 , 3 , 4 ] ;
let mut archive = Builder ::new ( Vec ::new ( ) ) ;
archive . append ( & header , data ) . unwrap ( ) ;
let result = finalize_and_unpack_genesis ( archive ) ;
assert_matches! ( result , Ok ( ( ) ) ) ;
}
#[ test ]
fn test_archive_unpack_snapshot_invalid_path ( ) {
let mut header = Header ::new_gnu ( ) ;
// bypass the sanitization of the .set_path()
for ( p , c ) in header
. as_old_mut ( )
. name
. iter_mut ( )
. zip ( b " foo/../../../dangerous " . iter ( ) . chain ( Some ( & 0 ) ) )
{
* p = * c ;
}
header . set_size ( 4 ) ;
header . set_cksum ( ) ;
let data : & [ u8 ] = & [ 1 , 2 , 3 , 4 ] ;
let mut archive = Builder ::new ( Vec ::new ( ) ) ;
archive . append ( & header , data ) . unwrap ( ) ;
let result = finalize_and_unpack_snapshot ( archive ) ;
2020-05-15 09:35:43 -07:00
assert_matches! ( result , Err ( UnpackError ::Archive ( ref message ) ) if message = = " invalid path found: \" foo/../../../dangerous \" " ) ;
2020-03-25 02:46:41 -07:00
}
fn with_archive_unpack_snapshot_invalid_path ( path : & str ) -> Result < ( ) > {
let mut header = Header ::new_gnu ( ) ;
// bypass the sanitization of the .set_path()
for ( p , c ) in header
. as_old_mut ( )
. name
. iter_mut ( )
. zip ( path . as_bytes ( ) . iter ( ) . chain ( Some ( & 0 ) ) )
{
* p = * c ;
}
header . set_size ( 4 ) ;
header . set_cksum ( ) ;
let data : & [ u8 ] = & [ 1 , 2 , 3 , 4 ] ;
let mut archive = Builder ::new ( Vec ::new ( ) ) ;
archive . append ( & header , data ) . unwrap ( ) ;
with_finalize_and_unpack ( archive , | unpacking_archive , path | {
for entry in unpacking_archive . entries ( ) ? {
2020-05-15 09:35:43 -07:00
if ! entry ? . unpack_in ( path ) ? {
2020-03-25 02:46:41 -07:00
return Err ( UnpackError ::Archive ( " failed! " . to_string ( ) ) ) ;
} else if ! path . join ( path ) . exists ( ) {
return Err ( UnpackError ::Archive ( " not existing! " . to_string ( ) ) ) ;
}
}
Ok ( ( ) )
} )
}
#[ test ]
fn test_archive_unpack_itself ( ) {
assert_matches! (
with_archive_unpack_snapshot_invalid_path ( " ryoqun/work " ) ,
Ok ( ( ) )
) ;
// Absolute paths are neutralized as relative
assert_matches! (
with_archive_unpack_snapshot_invalid_path ( " /etc/passwd " ) ,
Ok ( ( ) )
) ;
2020-05-15 09:35:43 -07:00
assert_matches! ( with_archive_unpack_snapshot_invalid_path ( " ../../../dangerous " ) , Err ( UnpackError ::Archive ( ref message ) ) if message = = " failed! " ) ;
2020-03-25 02:46:41 -07:00
}
#[ test ]
fn test_archive_unpack_snapshot_invalid_entry ( ) {
let mut header = Header ::new_gnu ( ) ;
header . set_path ( " foo " ) . unwrap ( ) ;
header . set_size ( 4 ) ;
header . set_cksum ( ) ;
let data : & [ u8 ] = & [ 1 , 2 , 3 , 4 ] ;
let mut archive = Builder ::new ( Vec ::new ( ) ) ;
archive . append ( & header , data ) . unwrap ( ) ;
let result = finalize_and_unpack_snapshot ( archive ) ;
2020-05-15 09:35:43 -07:00
assert_matches! ( result , Err ( UnpackError ::Archive ( ref message ) ) if message = = " extra entry found: \" foo \" " ) ;
2020-03-25 02:46:41 -07:00
}
#[ test ]
fn test_archive_unpack_snapshot_too_large ( ) {
let mut header = Header ::new_gnu ( ) ;
header . set_path ( " version " ) . unwrap ( ) ;
2020-05-15 09:35:43 -07:00
header . set_size ( 1024 * 1024 * 1024 * 1024 * 1024 ) ;
2020-03-25 02:46:41 -07:00
header . set_cksum ( ) ;
let data : & [ u8 ] = & [ 1 , 2 , 3 , 4 ] ;
let mut archive = Builder ::new ( Vec ::new ( ) ) ;
archive . append ( & header , data ) . unwrap ( ) ;
let result = finalize_and_unpack_snapshot ( archive ) ;
2020-05-15 09:35:43 -07:00
assert_matches! ( result , Err ( UnpackError ::Archive ( ref message ) ) if message = = & format! ( " too large archive: 1125899906842624 than limit: {} " , MAX_SNAPSHOT_ARCHIVE_UNPACKED_SIZE ) ) ;
2020-03-25 02:46:41 -07:00
}
#[ test ]
fn test_archive_unpack_snapshot_bad_unpack ( ) {
let result = check_unpack_result ( false , " abc " . to_string ( ) ) ;
2020-05-15 09:35:43 -07:00
assert_matches! ( result , Err ( UnpackError ::Archive ( ref message ) ) if message = = " failed to unpack: \" abc \" " ) ;
2020-03-25 02:46:41 -07:00
}
#[ test ]
fn test_archive_checked_total_size_sum ( ) {
let result = checked_total_size_sum ( 500 , 500 , MAX_SNAPSHOT_ARCHIVE_UNPACKED_SIZE ) ;
assert_matches! ( result , Ok ( 1000 ) ) ;
let result =
checked_total_size_sum ( u64 ::max_value ( ) - 2 , 2 , MAX_SNAPSHOT_ARCHIVE_UNPACKED_SIZE ) ;
2020-05-15 09:35:43 -07:00
assert_matches! ( result , Err ( UnpackError ::Archive ( ref message ) ) if message = = & format! ( " too large archive: 18446744073709551615 than limit: {} " , MAX_SNAPSHOT_ARCHIVE_UNPACKED_SIZE ) ) ;
2020-03-25 02:46:41 -07:00
}
#[ test ]
fn test_archive_checked_total_size_count ( ) {
let result = checked_total_count_increment ( 101 , MAX_SNAPSHOT_ARCHIVE_UNPACKED_COUNT ) ;
assert_matches! ( result , Ok ( 102 ) ) ;
let result =
checked_total_count_increment ( 999_999_999_999 , MAX_SNAPSHOT_ARCHIVE_UNPACKED_COUNT ) ;
2020-05-15 09:35:43 -07:00
assert_matches! ( result , Err ( UnpackError ::Archive ( ref message ) ) if message = = " too many files in snapshot: 1000000000000 " ) ;
2020-03-25 02:46:41 -07:00
}
}