zebra/zebra-state/src/config.rs

265 lines
9.4 KiB
Rust
Raw Normal View History

//! Cached state configuration for Zebra.
use std::{
fs::{canonicalize, remove_dir_all, DirEntry, ReadDir},
path::{Path, PathBuf},
};
use serde::{Deserialize, Serialize};
use tokio::task::{spawn_blocking, JoinHandle};
use tracing::Span;
use zebra_chain::parameters::Network;
/// Configuration for the state service.
#[derive(Clone, Debug, Deserialize, Serialize)]
#[serde(deny_unknown_fields, default)]
pub struct Config {
/// The root directory for storing cached data.
///
/// Cached data includes any state that can be replicated from the network
/// (e.g., the chain state, the blocks, the UTXO set, etc.). It does *not*
/// include private data that cannot be replicated from the network, such as
/// wallet data. That data is not handled by `zebra-state`.
///
/// Each state format version and network has a separate state.
/// These states are stored in `state/vN/mainnet` and `state/vN/testnet` subdirectories,
/// underneath the `cache_dir` path, where `N` is the state format version.
///
/// When Zebra's state format changes, it creates a new state subdirectory for that version,
/// and re-syncs from genesis.
///
/// Old state versions are [not automatically deleted](https://github.com/ZcashFoundation/zebra/issues/1213).
/// It is ok to manually delete old state versions.
///
/// It is also ok to delete the entire cached state directory.
/// If you do, Zebra will re-sync from genesis next time it is launched.
///
/// The default directory is platform dependent, based on
/// [`dirs::cache_dir()`](https://docs.rs/dirs/3.0.1/dirs/fn.cache_dir.html):
///
/// |Platform | Value | Example |
/// | ------- | ----------------------------------------------- | ------------------------------------ |
/// | Linux | `$XDG_CACHE_HOME/zebra` or `$HOME/.cache/zebra` | `/home/alice/.cache/zebra` |
/// | macOS | `$HOME/Library/Caches/zebra` | `/Users/Alice/Library/Caches/zebra` |
/// | Windows | `{FOLDERID_LocalAppData}\zebra` | `C:\Users\Alice\AppData\Local\zebra` |
/// | Other | `std::env::current_dir()/cache/zebra` | `/cache/zebra` |
pub cache_dir: PathBuf,
/// Whether to use an ephemeral database.
///
/// Ephemeral databases are stored in a temporary directory created using [`tempfile::tempdir()`].
/// They are deleted when Zebra exits successfully.
/// (If Zebra panics or crashes, the ephemeral database won't be deleted.)
///
/// Set to `false` by default. If this is set to `true`, [`cache_dir`] is ignored.
///
/// Ephemeral directories are created in the [`std::env::temp_dir()`].
/// Zebra names each directory after the state version and network, for example: `zebra-state-v21-mainnet-XnyGnE`.
///
/// [`cache_dir`]: struct.Config.html#structfield.cache_dir
pub ephemeral: bool,
/// Commit blocks to the finalized state up to this height, then exit Zebra.
///
2020-11-17 21:13:01 -08:00
/// Set to `None` by default: Zebra continues syncing indefinitely.
pub debug_stop_at_height: Option<u32>,
/// Whether to delete the old database directories when present.
///
/// Set to `true` by default. If this is set to `false`,
/// no check for old database versions will be made and nothing will be
/// deleted.
pub delete_old_database: bool,
#[cfg(feature = "elasticsearch")]
/// The elasticsearch database url.
pub elasticsearch_url: String,
#[cfg(feature = "elasticsearch")]
/// The elasticsearch database username.
pub elasticsearch_username: String,
#[cfg(feature = "elasticsearch")]
/// The elasticsearch database password.
pub elasticsearch_password: String,
}
fn gen_temp_path(prefix: &str) -> PathBuf {
tempfile::Builder::new()
.prefix(prefix)
.tempdir()
.expect("temporary directory is created successfully")
.into_path()
}
impl Config {
/// Returns the path for the finalized state database
add(test): Integration test to send transactions using lightwalletd (#4068) * Export the `zebra_state::Config::db_path` method Make it easier for tests to discover the sub-directory used to store the chain state data. * Generate code for interfacing with lightwalletd Use the `tonic-build` crate to generate Rust code for communicating with lightwalletd using gRPC. The `*.proto` files were obtained from the Zcash lightwalletd repository. * Use `block::Height` instead of `Height` Import the `block` instead to make it slightly clearer. * Add helper function to remove a file if it exists Try to remove it and ignore an error if it says that the file doesn't exist. This will be used later to remove the lock file from a copied chain state directory. * Add helper function to copy chain state dirs Copy an existing chain state directory into a new temporary directory. * Add a `BoxStateService` type alias Make it easier to write and read a boxed version of a state service. * Add a helper function to start the state service Make it easier to specify the state service to use an existing state cache directory. * Import `eyre!` macro at the module level Allow it to be used in different places without having to repeat the imports. * Add `load_tip_height_from_state_directory` helper A function to discover the current chain tip height stored in a state cache. * Add helper function to prepare partial sync. state Loads a partially synchronized cached state directory into a temporary directory that can be used by a zebrad instance, and also returns the chain tip block height of that state. * Add `perform_full_sync_starting_from` helper Runs a zebrad with an existing partially synchronized state, and finishes synchronizing it to the network chain tip. * Add function to load transactions from a block Use a provided state service to load all transactions from a block at a specified height. The state service is a generic type parameter, because `zebra_state::service::ReadStateService` is not exported publicly. Using a generic type parameter also allows the service to be wrapped in layers if needed in the future. * Add `load_transactions_from_block_after` helper A function to load transactions from a block stored in a cached state directory. The cached state must be synchronized to a chain tip higher than the requested height. * Add helper function to load some test transactions Given a partially synchronized chain state, it will extend that chain by performing a full synchronization, and obtain some transactions from one of the newly added blocks. * Update `spawn_zebrad_for_rpc_without_initial_peers` Wait until the mempool is activated. * Add method to start lightwalletd with RPC server Returns the lightwalletd instance and the port that it's listening for RPC connections. The instance can reuse an existing cached lightwalletd state if the `LIGHTWALLETD_DATA_DIR` environment variable is set. * Add a `LightwalletdRpcClient` type alias To make it easier to identify the type generated from the Protobuf files. * Add helper function to connect to lightwalletd Prepare an RPC client to send requests to a lightwalletd instance. * Add a `prepare_send_transaction_request` helper Creates a request message for lightwalletd to send a transaction. * Add test to send transactions using lightwalletd Obtain some valid transactions from future blocks and try to send them to a lightwalletd instance connected to a zebrad instance that hasn't seen those transactions yet. The transactions should be successfully queued in Zebra's mempool. * Make `zebra_directory` parameter generic Allow using a `TempDir` or a `PathBuf`. * Move lightwalletd protobuf files Place them closer to the module directory, so that it's clearer that they specify the RPC protocol for lightwalletd, and not Zebra itself. * Don't use coinbase transactions in the test Coinbase transactions are rejected by the mempool. * Don't remove state lock file It is removed automatically by Zebra when it shuts down, so if it exists it should be reported as a bug. * Force mempool to be enabled in Zebrad instance Speed up the initialization of the Zebrad instance used for lightwalletd to connect to. * Refactor to create `LIGHTWALLETD_DATA_DIR_VAR` Document how the environment variable can be used to speed up the test. * Check for process errors in spawned Zebra instance Enable checking for known process failure messages. * Add `FINISH_PARTIAL_SYNC_TIMEOUT` constant Document why it exists and how the choice of the value affects the test. * Add `LIGHTWALLETD_TEST_TIMEOUT` constant And use it for the Zebrad and the Lightwalletd instances used in the send transaction integration test. * Check `lightwalletd` process for errors Enable checking the lightwalletd process for known failure messages. * Update `tonic` and `prost` dependencies Use the latest version and fix CI failures because `rustfmt` isn't installed in the build environment. * Create `send_transaction_test` module Move the send transaction using lightwalletd test and its helper functions into a new module. * Move `LIGHTWALLETD_TEST_TIMEOUT` constant Place it in the parent `lightwalletd` module. * Move gRPC helper functions and types to `rpc` mod. Make them more accessible so that they can be used by other tests. * Create a `cached_state` module Move the test utility functions related to using a cached Zebra state into the module. * Move `perform_full_sync_starting_from` to `sync` Keep to closer to the synchronization utility functions. * Move Zebra cached state path variable constant Place it in the `cached_state` module. * Skip test if `ZEBRA_TEST_LIGHTWALLETD` is not set Make it part of the set of tests ignored as a whole if no lightwalletd tests should be executed. * Move `spawn_zebrad_for_rpc_without_initial_peers` Place it in the `launch` sub-module. * Rename `rpc` module into `wallet_grpc` Avoid any potential misunderstandings when the name is seen out of context. * Allow duplicate `heck` dependency At least until `structopt` is updated or `zebra-utils` is updated to use `clap` 3. * Fix a deny.toml typo * fix(build): CMake is required by `prost` crate Co-authored-by: teor <teor@riseup.net> Co-authored-by: Gustavo Valverde <gustavo@iterativo.do>
2022-04-27 16:06:11 -07:00
pub fn db_path(&self, network: Network) -> PathBuf {
let net_dir = match network {
Network::Mainnet => "mainnet",
Network::Testnet => "testnet",
};
if self.ephemeral {
gen_temp_path(&format!(
"zebra-state-v{}-{}-",
crate::constants::DATABASE_FORMAT_VERSION,
net_dir
))
} else {
self.cache_dir
.join("state")
.join(format!("v{}", crate::constants::DATABASE_FORMAT_VERSION))
.join(net_dir)
}
}
/// Construct a config for an ephemeral database
pub fn ephemeral() -> Config {
Config {
ephemeral: true,
..Config::default()
}
}
}
impl Default for Config {
fn default() -> Self {
let cache_dir = dirs::cache_dir()
.unwrap_or_else(|| std::env::current_dir().unwrap().join("cache"))
.join("zebra");
Self {
cache_dir,
ephemeral: false,
debug_stop_at_height: None,
delete_old_database: true,
#[cfg(feature = "elasticsearch")]
elasticsearch_url: "https://localhost:9200".to_string(),
#[cfg(feature = "elasticsearch")]
elasticsearch_username: "elastic".to_string(),
#[cfg(feature = "elasticsearch")]
elasticsearch_password: "".to_string(),
}
}
}
// Cleaning up old database versions
/// Spawns a task that checks if there are old database folders,
/// and deletes them from the filesystem.
///
/// Iterate over the files and directories in the databases folder and delete if:
/// - The state directory exists.
/// - The entry is a directory.
/// - The directory name has a prefix `v`.
/// - The directory name without the prefix can be parsed as an unsigned number.
/// - The parsed number is lower than the hardcoded `DATABASE_FORMAT_VERSION`.
pub fn check_and_delete_old_databases(config: Config) -> JoinHandle<()> {
let current_span = Span::current();
spawn_blocking(move || {
current_span.in_scope(|| {
delete_old_databases(config);
info!("finished old database version cleanup task");
})
})
}
/// Check if there are old database folders and delete them from the filesystem.
///
/// See [`check_and_delete_old_databases`] for details.
fn delete_old_databases(config: Config) {
if config.ephemeral || !config.delete_old_database {
return;
}
info!("checking for old database versions");
let state_dir = config.cache_dir.join("state");
if let Some(state_dir) = read_dir(&state_dir) {
for entry in state_dir.flatten() {
let deleted_state = check_and_delete_database(&config, &entry);
if let Some(deleted_state) = deleted_state {
info!(?deleted_state, "deleted outdated state directory");
}
}
}
}
/// Return a `ReadDir` for `dir`, after checking that `dir` exists and can be read.
///
/// Returns `None` if any operation fails.
fn read_dir(dir: &Path) -> Option<ReadDir> {
if dir.exists() {
if let Ok(read_dir) = dir.read_dir() {
return Some(read_dir);
}
}
None
}
/// Check if `entry` is an old database directory, and delete it from the filesystem.
/// See [`check_and_delete_old_databases`] for details.
///
/// If the directory was deleted, returns its path.
fn check_and_delete_database(config: &Config, entry: &DirEntry) -> Option<PathBuf> {
let dir_name = parse_dir_name(entry)?;
let version_number = parse_version_number(&dir_name)?;
if version_number >= crate::constants::DATABASE_FORMAT_VERSION {
return None;
}
let outdated_path = entry.path();
// # Correctness
//
// Check that the path we're about to delete is inside the cache directory.
// If the user has symlinked the outdated state directory to a non-cache directory,
// we don't want to delete it, because it might contain other files.
//
// We don't attempt to guard against malicious symlinks created by attackers
// (TOCTOU attacks). Zebra should not be run with elevated privileges.
let cache_path = canonicalize(&config.cache_dir).ok()?;
let outdated_path = canonicalize(outdated_path).ok()?;
if !outdated_path.starts_with(&cache_path) {
info!(
skipped_path = ?outdated_path,
?cache_path,
"skipped cleanup of outdated state directory: state is outside cache directory",
);
return None;
}
remove_dir_all(&outdated_path).ok().map(|()| outdated_path)
}
/// Check if `entry` is a directory with a valid UTF-8 name.
/// (State directory names are guaranteed to be UTF-8.)
///
/// Returns `None` if any operation fails.
fn parse_dir_name(entry: &DirEntry) -> Option<String> {
if let Ok(file_type) = entry.file_type() {
if file_type.is_dir() {
if let Ok(dir_name) = entry.file_name().into_string() {
return Some(dir_name);
}
}
}
None
}
/// Parse the state version number from `dir_name`.
///
/// Returns `None` if parsing fails, or the directory name is not in the expected format.
fn parse_version_number(dir_name: &str) -> Option<u32> {
dir_name
.strip_prefix('v')
.and_then(|version| version.parse().ok())
}