2022-06-20 17:59:51 -07:00
|
|
|
//! Cached state configuration for Zebra.
|
|
|
|
|
|
|
|
use std::{
|
|
|
|
fs::{canonicalize, remove_dir_all, DirEntry, ReadDir},
|
|
|
|
path::{Path, PathBuf},
|
|
|
|
};
|
2022-02-20 16:20:29 -08:00
|
|
|
|
|
|
|
use serde::{Deserialize, Serialize};
|
2022-06-20 17:59:51 -07:00
|
|
|
use tokio::task::{spawn_blocking, JoinHandle};
|
|
|
|
use tracing::Span;
|
2022-02-20 16:20:29 -08:00
|
|
|
|
2020-09-09 16:42:52 -07:00
|
|
|
use zebra_chain::parameters::Network;
|
|
|
|
|
|
|
|
/// Configuration for the state service.
|
|
|
|
#[derive(Clone, Debug, Deserialize, Serialize)]
|
|
|
|
#[serde(deny_unknown_fields, default)]
|
|
|
|
pub struct Config {
|
|
|
|
/// The root directory for storing cached data.
|
|
|
|
///
|
|
|
|
/// Cached data includes any state that can be replicated from the network
|
|
|
|
/// (e.g., the chain state, the blocks, the UTXO set, etc.). It does *not*
|
|
|
|
/// include private data that cannot be replicated from the network, such as
|
|
|
|
/// wallet data. That data is not handled by `zebra-state`.
|
|
|
|
///
|
2022-04-29 15:02:09 -07:00
|
|
|
/// Each state format version and network has a separate state.
|
|
|
|
/// These states are stored in `state/vN/mainnet` and `state/vN/testnet` subdirectories,
|
|
|
|
/// underneath the `cache_dir` path, where `N` is the state format version.
|
|
|
|
///
|
|
|
|
/// When Zebra's state format changes, it creates a new state subdirectory for that version,
|
|
|
|
/// and re-syncs from genesis.
|
|
|
|
///
|
|
|
|
/// Old state versions are [not automatically deleted](https://github.com/ZcashFoundation/zebra/issues/1213).
|
|
|
|
/// It is ok to manually delete old state versions.
|
|
|
|
///
|
|
|
|
/// It is also ok to delete the entire cached state directory.
|
|
|
|
/// If you do, Zebra will re-sync from genesis next time it is launched.
|
2020-09-09 16:42:52 -07:00
|
|
|
///
|
|
|
|
/// The default directory is platform dependent, based on
|
|
|
|
/// [`dirs::cache_dir()`](https://docs.rs/dirs/3.0.1/dirs/fn.cache_dir.html):
|
|
|
|
///
|
2022-04-29 15:02:09 -07:00
|
|
|
/// |Platform | Value | Example |
|
|
|
|
/// | ------- | ----------------------------------------------- | ------------------------------------ |
|
|
|
|
/// | Linux | `$XDG_CACHE_HOME/zebra` or `$HOME/.cache/zebra` | `/home/alice/.cache/zebra` |
|
|
|
|
/// | macOS | `$HOME/Library/Caches/zebra` | `/Users/Alice/Library/Caches/zebra` |
|
|
|
|
/// | Windows | `{FOLDERID_LocalAppData}\zebra` | `C:\Users\Alice\AppData\Local\zebra` |
|
|
|
|
/// | Other | `std::env::current_dir()/cache/zebra` | `/cache/zebra` |
|
2023-04-13 01:42:17 -07:00
|
|
|
///
|
|
|
|
/// # Security
|
|
|
|
///
|
|
|
|
/// If you are running Zebra with elevated permissions ("root"), create the
|
|
|
|
/// directory for this file before running Zebra, and make sure the Zebra user
|
|
|
|
/// account has exclusive access to that directory, and other users can't modify
|
|
|
|
/// its parent directories.
|
2020-09-09 16:42:52 -07:00
|
|
|
pub cache_dir: PathBuf,
|
|
|
|
|
|
|
|
/// Whether to use an ephemeral database.
|
|
|
|
///
|
2022-04-29 15:02:09 -07:00
|
|
|
/// Ephemeral databases are stored in a temporary directory created using [`tempfile::tempdir()`].
|
2020-11-19 19:01:19 -08:00
|
|
|
/// They are deleted when Zebra exits successfully.
|
|
|
|
/// (If Zebra panics or crashes, the ephemeral database won't be deleted.)
|
2020-09-09 16:42:52 -07:00
|
|
|
///
|
|
|
|
/// Set to `false` by default. If this is set to `true`, [`cache_dir`] is ignored.
|
|
|
|
///
|
2022-04-29 15:02:09 -07:00
|
|
|
/// Ephemeral directories are created in the [`std::env::temp_dir()`].
|
|
|
|
/// Zebra names each directory after the state version and network, for example: `zebra-state-v21-mainnet-XnyGnE`.
|
|
|
|
///
|
2020-09-09 16:42:52 -07:00
|
|
|
/// [`cache_dir`]: struct.Config.html#structfield.cache_dir
|
|
|
|
pub ephemeral: bool,
|
2020-10-27 02:25:29 -07:00
|
|
|
|
|
|
|
/// Commit blocks to the finalized state up to this height, then exit Zebra.
|
|
|
|
///
|
2020-11-17 21:13:01 -08:00
|
|
|
/// Set to `None` by default: Zebra continues syncing indefinitely.
|
2020-10-27 02:25:29 -07:00
|
|
|
pub debug_stop_at_height: Option<u32>,
|
2022-06-20 17:59:51 -07:00
|
|
|
|
|
|
|
/// Whether to delete the old database directories when present.
|
|
|
|
///
|
|
|
|
/// Set to `true` by default. If this is set to `false`,
|
|
|
|
/// no check for old database versions will be made and nothing will be
|
|
|
|
/// deleted.
|
|
|
|
pub delete_old_database: bool,
|
2023-03-13 14:13:30 -07:00
|
|
|
|
|
|
|
#[cfg(feature = "elasticsearch")]
|
|
|
|
/// The elasticsearch database url.
|
|
|
|
pub elasticsearch_url: String,
|
|
|
|
|
|
|
|
#[cfg(feature = "elasticsearch")]
|
|
|
|
/// The elasticsearch database username.
|
|
|
|
pub elasticsearch_username: String,
|
|
|
|
|
|
|
|
#[cfg(feature = "elasticsearch")]
|
|
|
|
/// The elasticsearch database password.
|
|
|
|
pub elasticsearch_password: String,
|
2020-09-09 16:42:52 -07:00
|
|
|
}
|
|
|
|
|
2020-11-19 19:01:19 -08:00
|
|
|
fn gen_temp_path(prefix: &str) -> PathBuf {
|
2022-01-14 07:11:23 -08:00
|
|
|
tempfile::Builder::new()
|
|
|
|
.prefix(prefix)
|
|
|
|
.tempdir()
|
2020-11-19 19:01:19 -08:00
|
|
|
.expect("temporary directory is created successfully")
|
|
|
|
.into_path()
|
2020-11-18 18:05:06 -08:00
|
|
|
}
|
|
|
|
|
2020-09-09 16:42:52 -07:00
|
|
|
impl Config {
|
2022-02-22 04:59:44 -08:00
|
|
|
/// Returns the path for the finalized state database
|
2022-04-27 16:06:11 -07:00
|
|
|
pub fn db_path(&self, network: Network) -> PathBuf {
|
2020-09-09 16:42:52 -07:00
|
|
|
let net_dir = match network {
|
|
|
|
Network::Mainnet => "mainnet",
|
|
|
|
Network::Testnet => "testnet",
|
|
|
|
};
|
|
|
|
|
2022-02-22 04:59:44 -08:00
|
|
|
if self.ephemeral {
|
2020-11-19 19:01:19 -08:00
|
|
|
gen_temp_path(&format!(
|
2022-04-29 15:02:09 -07:00
|
|
|
"zebra-state-v{}-{}-",
|
2020-11-19 19:01:19 -08:00
|
|
|
crate::constants::DATABASE_FORMAT_VERSION,
|
|
|
|
net_dir
|
|
|
|
))
|
2020-09-09 16:42:52 -07:00
|
|
|
} else {
|
2020-11-18 18:05:06 -08:00
|
|
|
self.cache_dir
|
2020-10-27 14:24:00 -07:00
|
|
|
.join("state")
|
2020-11-18 18:05:06 -08:00
|
|
|
.join(format!("v{}", crate::constants::DATABASE_FORMAT_VERSION))
|
|
|
|
.join(net_dir)
|
2022-02-22 04:59:44 -08:00
|
|
|
}
|
2020-09-09 16:42:52 -07:00
|
|
|
}
|
|
|
|
|
2021-01-18 12:40:15 -08:00
|
|
|
/// Construct a config for an ephemeral database
|
|
|
|
pub fn ephemeral() -> Config {
|
|
|
|
Config {
|
|
|
|
ephemeral: true,
|
|
|
|
..Config::default()
|
|
|
|
}
|
2020-09-09 16:42:52 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl Default for Config {
|
|
|
|
fn default() -> Self {
|
|
|
|
let cache_dir = dirs::cache_dir()
|
|
|
|
.unwrap_or_else(|| std::env::current_dir().unwrap().join("cache"))
|
|
|
|
.join("zebra");
|
|
|
|
|
|
|
|
Self {
|
|
|
|
cache_dir,
|
|
|
|
ephemeral: false,
|
2020-10-27 02:25:29 -07:00
|
|
|
debug_stop_at_height: None,
|
2022-06-20 17:59:51 -07:00
|
|
|
delete_old_database: true,
|
2023-03-13 14:13:30 -07:00
|
|
|
#[cfg(feature = "elasticsearch")]
|
|
|
|
elasticsearch_url: "https://localhost:9200".to_string(),
|
|
|
|
#[cfg(feature = "elasticsearch")]
|
|
|
|
elasticsearch_username: "elastic".to_string(),
|
|
|
|
#[cfg(feature = "elasticsearch")]
|
|
|
|
elasticsearch_password: "".to_string(),
|
2022-06-20 17:59:51 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Cleaning up old database versions
|
|
|
|
|
|
|
|
/// Spawns a task that checks if there are old database folders,
|
|
|
|
/// and deletes them from the filesystem.
|
|
|
|
///
|
|
|
|
/// Iterate over the files and directories in the databases folder and delete if:
|
|
|
|
/// - The state directory exists.
|
|
|
|
/// - The entry is a directory.
|
|
|
|
/// - The directory name has a prefix `v`.
|
|
|
|
/// - The directory name without the prefix can be parsed as an unsigned number.
|
|
|
|
/// - The parsed number is lower than the hardcoded `DATABASE_FORMAT_VERSION`.
|
|
|
|
pub fn check_and_delete_old_databases(config: Config) -> JoinHandle<()> {
|
|
|
|
let current_span = Span::current();
|
|
|
|
|
|
|
|
spawn_blocking(move || {
|
|
|
|
current_span.in_scope(|| {
|
|
|
|
delete_old_databases(config);
|
|
|
|
info!("finished old database version cleanup task");
|
|
|
|
})
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Check if there are old database folders and delete them from the filesystem.
|
|
|
|
///
|
|
|
|
/// See [`check_and_delete_old_databases`] for details.
|
|
|
|
fn delete_old_databases(config: Config) {
|
|
|
|
if config.ephemeral || !config.delete_old_database {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
info!("checking for old database versions");
|
|
|
|
|
|
|
|
let state_dir = config.cache_dir.join("state");
|
|
|
|
if let Some(state_dir) = read_dir(&state_dir) {
|
|
|
|
for entry in state_dir.flatten() {
|
|
|
|
let deleted_state = check_and_delete_database(&config, &entry);
|
|
|
|
|
|
|
|
if let Some(deleted_state) = deleted_state {
|
|
|
|
info!(?deleted_state, "deleted outdated state directory");
|
|
|
|
}
|
2020-09-09 16:42:52 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2022-06-20 17:59:51 -07:00
|
|
|
|
|
|
|
/// Return a `ReadDir` for `dir`, after checking that `dir` exists and can be read.
|
|
|
|
///
|
|
|
|
/// Returns `None` if any operation fails.
|
|
|
|
fn read_dir(dir: &Path) -> Option<ReadDir> {
|
|
|
|
if dir.exists() {
|
|
|
|
if let Ok(read_dir) = dir.read_dir() {
|
|
|
|
return Some(read_dir);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
None
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Check if `entry` is an old database directory, and delete it from the filesystem.
|
|
|
|
/// See [`check_and_delete_old_databases`] for details.
|
|
|
|
///
|
|
|
|
/// If the directory was deleted, returns its path.
|
|
|
|
fn check_and_delete_database(config: &Config, entry: &DirEntry) -> Option<PathBuf> {
|
|
|
|
let dir_name = parse_dir_name(entry)?;
|
|
|
|
let version_number = parse_version_number(&dir_name)?;
|
|
|
|
|
|
|
|
if version_number >= crate::constants::DATABASE_FORMAT_VERSION {
|
|
|
|
return None;
|
|
|
|
}
|
|
|
|
|
|
|
|
let outdated_path = entry.path();
|
|
|
|
|
|
|
|
// # Correctness
|
|
|
|
//
|
|
|
|
// Check that the path we're about to delete is inside the cache directory.
|
|
|
|
// If the user has symlinked the outdated state directory to a non-cache directory,
|
|
|
|
// we don't want to delete it, because it might contain other files.
|
|
|
|
//
|
|
|
|
// We don't attempt to guard against malicious symlinks created by attackers
|
|
|
|
// (TOCTOU attacks). Zebra should not be run with elevated privileges.
|
|
|
|
let cache_path = canonicalize(&config.cache_dir).ok()?;
|
|
|
|
let outdated_path = canonicalize(outdated_path).ok()?;
|
|
|
|
|
|
|
|
if !outdated_path.starts_with(&cache_path) {
|
|
|
|
info!(
|
|
|
|
skipped_path = ?outdated_path,
|
|
|
|
?cache_path,
|
|
|
|
"skipped cleanup of outdated state directory: state is outside cache directory",
|
|
|
|
);
|
|
|
|
|
|
|
|
return None;
|
|
|
|
}
|
|
|
|
|
|
|
|
remove_dir_all(&outdated_path).ok().map(|()| outdated_path)
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Check if `entry` is a directory with a valid UTF-8 name.
|
|
|
|
/// (State directory names are guaranteed to be UTF-8.)
|
|
|
|
///
|
|
|
|
/// Returns `None` if any operation fails.
|
|
|
|
fn parse_dir_name(entry: &DirEntry) -> Option<String> {
|
|
|
|
if let Ok(file_type) = entry.file_type() {
|
|
|
|
if file_type.is_dir() {
|
|
|
|
if let Ok(dir_name) = entry.file_name().into_string() {
|
|
|
|
return Some(dir_name);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
None
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Parse the state version number from `dir_name`.
|
|
|
|
///
|
|
|
|
/// Returns `None` if parsing fails, or the directory name is not in the expected format.
|
|
|
|
fn parse_version_number(dir_name: &str) -> Option<u32> {
|
|
|
|
dir_name
|
|
|
|
.strip_prefix('v')
|
|
|
|
.and_then(|version| version.parse().ok())
|
|
|
|
}
|