feat(rpc): implement the get_address_tx_ids RPC method query (#4119)

* Add a finalized state txids query

* Add an address transaction IDs query, without height filters

* Connect the address transaction ID query to the RPC

* Basic filtering of address transaction IDs by height range

* Add a network and range argument to the getaddresstxids test

* Test all block range combinations for mainnet

* Fix a file descriptor limit error

* Optimise seeking the first transaction for an address

The first transaction's location is part of the address location.

* Filter finalized address transaction IDs by height range

* Filter non-finalized address transaction IDs by the height range

* Fix up snapshot tests for the new height range API
This commit is contained in:
teor 2022-04-22 06:19:26 +10:00 committed by GitHub
parent 5575b7a40e
commit c2430c6f45
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 429 additions and 94 deletions

View File

@ -647,7 +647,6 @@ where
end: u32, end: u32,
) -> BoxFuture<Result<Vec<String>>> { ) -> BoxFuture<Result<Vec<String>>> {
let mut state = self.state.clone(); let mut state = self.state.clone();
let mut response_transactions = vec![];
let start = Height(start); let start = Height(start);
let end = Height(end); let end = Height(end);
@ -661,7 +660,7 @@ where
// height range checks // height range checks
check_height_range(start, end, chain_height?)?; check_height_range(start, end, chain_height?)?;
let valid_addresses: Result<Vec<Address>> = addresses let valid_addresses: Result<HashSet<Address>> = addresses
.iter() .iter()
.map(|address| { .map(|address| {
address.parse().map_err(|_| { address.parse().map_err(|_| {
@ -670,8 +669,10 @@ where
}) })
.collect(); .collect();
let request = let request = zebra_state::ReadRequest::TransactionIdsByAddresses {
zebra_state::ReadRequest::TransactionsByAddresses(valid_addresses?, start, end); addresses: valid_addresses?,
height_range: start..=end,
};
let response = state let response = state
.ready() .ready()
.and_then(|service| service.call(request)) .and_then(|service| service.call(request))
@ -682,13 +683,14 @@ where
data: None, data: None,
})?; })?;
match response { let hashes = match response {
zebra_state::ReadResponse::TransactionIds(hashes) => response_transactions zebra_state::ReadResponse::AddressesTransactionIds(hashes) => {
.append(&mut hashes.iter().map(|h| h.to_string()).collect()), hashes.values().map(|tx_id| tx_id.to_string()).collect()
_ => unreachable!("unmatched response to a TransactionsByAddresses request"),
} }
_ => unreachable!("unmatched response to a TransactionsByAddresses request"),
};
Ok(response_transactions) Ok(hashes)
} }
.boxed() .boxed()
} }

View File

@ -1,6 +1,6 @@
//! Fixed test vectors for RPC methods. //! Fixed test vectors for RPC methods.
use std::sync::Arc; use std::{ops::RangeInclusive, sync::Arc};
use jsonrpc_core::ErrorCode; use jsonrpc_core::ErrorCode;
use tower::buffer::Buffer; use tower::buffer::Buffer;
@ -11,6 +11,7 @@ use zebra_chain::{
parameters::Network::*, parameters::Network::*,
serialization::{ZcashDeserializeInto, ZcashSerialize}, serialization::{ZcashDeserializeInto, ZcashSerialize},
transaction::{UnminedTx, UnminedTxId}, transaction::{UnminedTx, UnminedTxId},
transparent,
}; };
use zebra_network::constants::USER_AGENT; use zebra_network::constants::USER_AGENT;
use zebra_node_services::BoxError; use zebra_node_services::BoxError;
@ -395,47 +396,89 @@ async fn rpc_getaddresstxids_invalid_arguments() {
async fn rpc_getaddresstxids_response() { async fn rpc_getaddresstxids_response() {
zebra_test::init(); zebra_test::init();
let blocks: Vec<Arc<Block>> = zebra_test::vectors::CONTINUOUS_MAINNET_BLOCKS for network in [Mainnet, Testnet] {
let blocks: Vec<Arc<Block>> = match network {
Mainnet => &*zebra_test::vectors::CONTINUOUS_MAINNET_BLOCKS,
Testnet => &*zebra_test::vectors::CONTINUOUS_TESTNET_BLOCKS,
}
.iter() .iter()
.map(|(_height, block_bytes)| block_bytes.zcash_deserialize_into().unwrap()) .map(|(_height, block_bytes)| block_bytes.zcash_deserialize_into().unwrap())
.collect(); .collect();
// get the first transaction of the first block // The first few blocks after genesis send funds to the same founders reward address,
// in one output per coinbase transaction.
//
// Get the coinbase transaction of the first block
// (the genesis block coinbase transaction is ignored by the consensus rules).
let first_block_first_transaction = &blocks[1].transactions[0]; let first_block_first_transaction = &blocks[1].transactions[0];
// get the address, this is always `t3Vz22vK5z2LcKEdg16Yv4FFneEL1zg9ojd`
let address = &first_block_first_transaction.outputs()[1] // Get the address.
.address(Mainnet) let address = first_block_first_transaction.outputs()[1]
.address(network)
.unwrap(); .unwrap();
if network == Mainnet {
// Exhaustively test possible block ranges for mainnet.
//
// TODO: if it takes too long on slower machines, turn this into a proptest with 10-20 cases
for start in 1..=10 {
for end in start..=10 {
rpc_getaddresstxids_response_with(network, start..=end, &blocks, &address)
.await;
}
}
} else {
// Just test the full range for testnet.
rpc_getaddresstxids_response_with(network, 1..=10, &blocks, &address).await;
}
}
}
async fn rpc_getaddresstxids_response_with(
network: Network,
range: RangeInclusive<u32>,
blocks: &[Arc<Block>],
address: &transparent::Address,
) {
let mut mempool: MockService<_, _, _, BoxError> = MockService::build().for_unit_tests(); let mut mempool: MockService<_, _, _, BoxError> = MockService::build().for_unit_tests();
// Create a populated state service // Create a populated state service
let (_state, read_state, latest_chain_tip, _chain_tip_change) = let (_state, read_state, latest_chain_tip, _chain_tip_change) =
zebra_state::populated_state(blocks.clone(), Mainnet).await; zebra_state::populated_state(blocks.to_owned(), network).await;
let (rpc, rpc_tx_queue_task_handle) = RpcImpl::new( let (rpc, rpc_tx_queue_task_handle) = RpcImpl::new(
"RPC test", "RPC test",
Buffer::new(mempool.clone(), 1), Buffer::new(mempool.clone(), 1),
Buffer::new(read_state.clone(), 1), Buffer::new(read_state.clone(), 1),
latest_chain_tip, latest_chain_tip,
Mainnet, network,
); );
// call the method with valid arguments // call the method with valid arguments
let addresses = vec![address.to_string()]; let addresses = vec![address.to_string()];
let start: u32 = 1;
let end: u32 = 1;
let response = rpc let response = rpc
.get_address_tx_ids(addresses, start, end) .get_address_tx_ids(addresses, *range.start(), *range.end())
.await .await
.expect("arguments are valid so no error can happen here"); .expect("arguments are valid so no error can happen here");
// TODO: The length of the response should be 1 // One founders reward output per coinbase transactions, no other transactions.
// Fix in the context of #3147 assert_eq!(response.len(), range.count());
assert_eq!(response.len(), 0);
mempool.expect_no_requests().await; mempool.expect_no_requests().await;
// The queue task should continue without errors or panics // Shut down the queue task, to close the state's file descriptors.
// (If we don't, opening ~100 simultaneous states causes process file descriptor limit errors.)
//
// TODO: abort all the join handles in all the tests, except one?
rpc_tx_queue_task_handle.abort();
// The queue task should not have panicked or exited by itself.
// It can still be running, or it can have exited due to the abort.
let rpc_tx_queue_task_result = rpc_tx_queue_task_handle.now_or_never(); let rpc_tx_queue_task_result = rpc_tx_queue_task_handle.now_or_never();
assert!(matches!(rpc_tx_queue_task_result, None)); assert!(
rpc_tx_queue_task_result.is_none()
|| rpc_tx_queue_task_result
.unwrap()
.unwrap_err()
.is_cancelled()
);
} }

View File

@ -2,6 +2,7 @@
use std::{ use std::{
collections::{HashMap, HashSet}, collections::{HashMap, HashSet},
ops::RangeInclusive,
sync::Arc, sync::Arc,
}; };
@ -438,19 +439,27 @@ pub enum ReadRequest {
/// * [`Response::Transaction(None)`](Response::Transaction) otherwise. /// * [`Response::Transaction(None)`](Response::Transaction) otherwise.
Transaction(transaction::Hash), Transaction(transaction::Hash),
/// Looks up transactions hashes that were made by provided addresses in a blockchain height range.
///
/// Returns
///
/// * A vector of transaction hashes.
/// * An empty vector if no transactions were found for the given arguments.
///
/// Returned txids are in the order they appear in blocks, which ensures that they are topologically sorted
/// (i.e. parent txids will appear before child txids).
TransactionsByAddresses(Vec<transparent::Address>, block::Height, block::Height),
/// Looks up the balance of a set of transparent addresses. /// Looks up the balance of a set of transparent addresses.
/// ///
/// Returns an [`Amount`] with the total balance of the set of addresses. /// Returns an [`Amount`] with the total balance of the set of addresses.
AddressBalance(HashSet<transparent::Address>), AddressBalance(HashSet<transparent::Address>),
/// Looks up transaction hashes that sent or received from addresses,
/// in an inclusive blockchain height range.
///
/// Returns
///
/// * A set of transaction hashes.
/// * An empty vector if no transactions were found for the given arguments.
///
/// Returned txids are in the order they appear in blocks,
/// which ensures that they are topologically sorted
/// (i.e. parent txids will appear before child txids).
TransactionIdsByAddresses {
/// The requested addresses.
addresses: HashSet<transparent::Address>,
/// The blocks to be queried for transactions.
height_range: RangeInclusive<block::Height>,
},
} }

View File

@ -1,11 +1,11 @@
//! State [`tower::Service`] response types. //! State [`tower::Service`] response types.
use std::sync::Arc; use std::{collections::BTreeMap, sync::Arc};
use zebra_chain::{ use zebra_chain::{
amount::{Amount, NonNegative}, amount::{Amount, NonNegative},
block::{self, Block}, block::{self, Block},
transaction::{Hash, Transaction}, transaction::{self, Transaction},
transparent, transparent,
}; };
@ -13,6 +13,7 @@ use zebra_chain::{
// will work with inline links. // will work with inline links.
#[allow(unused_imports)] #[allow(unused_imports)]
use crate::Request; use crate::Request;
use crate::TransactionLocation;
#[derive(Clone, Debug, PartialEq, Eq)] #[derive(Clone, Debug, PartialEq, Eq)]
/// A response to a [`StateService`] [`Request`]. /// A response to a [`StateService`] [`Request`].
@ -55,10 +56,10 @@ pub enum ReadResponse {
/// Response to [`ReadRequest::Transaction`] with the specified transaction. /// Response to [`ReadRequest::Transaction`] with the specified transaction.
Transaction(Option<(Arc<Transaction>, block::Height)>), Transaction(Option<(Arc<Transaction>, block::Height)>),
/// Response to [`ReadRequest::TransactionsByAddresses`] with the obtained transaction ids,
/// in the order they appear in blocks.
TransactionIds(Vec<Hash>),
/// Response to [`ReadRequest::AddressBalance`] with the total balance of the addresses. /// Response to [`ReadRequest::AddressBalance`] with the total balance of the addresses.
AddressBalance(Amount<NonNegative>), AddressBalance(Amount<NonNegative>),
/// Response to [`ReadRequest::TransactionIdsByAddresses`] with the obtained transaction ids,
/// in the order they appear in blocks.
AddressesTransactionIds(BTreeMap<TransactionLocation, transaction::Hash>),
} }

View File

@ -993,24 +993,25 @@ impl Service<ReadRequest> for ReadStateService {
} }
// For the get_address_tx_ids RPC. // For the get_address_tx_ids RPC.
ReadRequest::TransactionsByAddresses(_addresses, _start, _end) => { ReadRequest::TransactionIdsByAddresses {
addresses,
height_range,
} => {
metrics::counter!( metrics::counter!(
"state.requests", "state.requests",
1, 1,
"service" => "read_state", "service" => "read_state",
"type" => "transactions_by_addresses", "type" => "transaction_ids_by_addresses",
); );
let _state = self.clone(); let state = self.clone();
async move { async move {
// TODO: Respond with found transactions let tx_ids = state.best_chain_receiver.with_watch_data(|best_chain| {
// At least the following pull requests should be merged: read::transparent_tx_ids(best_chain, &state.db, addresses, height_range)
// - #4022 });
// - #4038
// Do the corresponding update in the context of #3147 tx_ids.map(ReadResponse::AddressesTransactionIds)
let transaction_ids = vec![];
Ok(ReadResponse::TransactionIds(transaction_ids))
} }
.boxed() .boxed()
} }

View File

@ -5,7 +5,7 @@
//! The [`crate::constants::DATABASE_FORMAT_VERSION`] constant must //! The [`crate::constants::DATABASE_FORMAT_VERSION`] constant must
//! be incremented each time the database format (column, serialization, etc) changes. //! be incremented each time the database format (column, serialization, etc) changes.
use std::fmt::Debug; use std::{cmp::max, fmt::Debug};
use zebra_chain::{ use zebra_chain::{
amount::{self, Amount, NonNegative}, amount::{self, Amount, NonNegative},
@ -396,18 +396,29 @@ impl AddressTransaction {
} }
/// Create an [`AddressTransaction`] which starts iteration for the supplied address. /// Create an [`AddressTransaction`] which starts iteration for the supplied address.
/// Starts at the first UTXO, or at the `query_start` height, whichever is greater.
///
/// Used to look up the first transaction with [`ReadDisk::zs_next_key_value_from`]. /// Used to look up the first transaction with [`ReadDisk::zs_next_key_value_from`].
/// ///
/// The transaction location is before all unspent output locations in the index. /// The transaction location might be invalid, if it is based on the `query_start` height.
/// It is always invalid, due to the genesis consensus rules. But this is not an issue /// But this is not an issue, since [`ReadDisk::zs_next_key_value_from`]
/// since [`ReadDisk::zs_next_key_value_from`] will fetch the next existing (valid) value. /// will fetch the next existing (valid) value.
pub fn address_iterator_start(address_location: AddressLocation) -> AddressTransaction { pub fn address_iterator_start(
address_location: AddressLocation,
query_start: Height,
) -> AddressTransaction {
// Iterating from the lowest possible transaction location gets us the first transaction. // Iterating from the lowest possible transaction location gets us the first transaction.
let zero_transaction_location = TransactionLocation::from_usize(Height(0), 0); //
// The address location is the output location of the first UTXO sent to the address,
// and addresses can not spend funds until they receive their first UTXO.
let first_utxo_location = address_location.transaction_location();
// Iterating from the start height filters out transactions that aren't needed.
let query_start_location = TransactionLocation::from_usize(query_start, 0);
AddressTransaction { AddressTransaction {
address_location, address_location,
transaction_location: zero_transaction_location, transaction_location: max(first_utxo_location, query_start_location),
} }
} }

View File

@ -45,12 +45,15 @@ use zebra_chain::{
}; };
use crate::{ use crate::{
service::finalized_state::{ service::{
finalized_state::{
disk_format::{ disk_format::{
block::TransactionIndex, transparent::OutputLocation, FromDisk, TransactionLocation, block::TransactionIndex, transparent::OutputLocation, FromDisk, TransactionLocation,
}, },
FinalizedState, FinalizedState,
}, },
read::ADDRESS_HEIGHTS_FULL_RANGE,
},
Config, Config,
}; };
@ -495,7 +498,9 @@ fn snapshot_transparent_address_data(state: &FinalizedState, height: u32) {
} }
let mut stored_transaction_locations = Vec::new(); let mut stored_transaction_locations = Vec::new();
for transaction_location in state.address_transaction_locations(stored_address_location) { for transaction_location in
state.address_transaction_locations(stored_address_location, ADDRESS_HEIGHTS_FULL_RANGE)
{
assert_eq!( assert_eq!(
transaction_location.address_location(), transaction_location.address_location(),
stored_address_location stored_address_location

View File

@ -11,10 +11,14 @@
//! The [`crate::constants::DATABASE_FORMAT_VERSION`] constant must //! The [`crate::constants::DATABASE_FORMAT_VERSION`] constant must
//! be incremented each time the database format (column, serialization, etc) changes. //! be incremented each time the database format (column, serialization, etc) changes.
use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet}; use std::{
collections::{BTreeMap, BTreeSet, HashMap, HashSet},
ops::RangeInclusive,
};
use zebra_chain::{ use zebra_chain::{
amount::{self, Amount, NonNegative}, amount::{self, Amount, NonNegative},
block::Height,
transaction, transparent, transaction, transparent,
}; };
@ -146,7 +150,7 @@ impl ZebraDb {
let mut unspent_output = AddressUnspentOutput::address_iterator_start(address_location); let mut unspent_output = AddressUnspentOutput::address_iterator_start(address_location);
loop { loop {
// A valid key representing an entry for this address or the next // Seek to a valid entry for this address, or the first entry for the next address
unspent_output = match self unspent_output = match self
.db .db
.zs_next_key_value_from(&utxo_loc_by_transparent_addr_loc, &unspent_output) .zs_next_key_value_from(&utxo_loc_by_transparent_addr_loc, &unspent_output)
@ -177,19 +181,32 @@ impl ZebraDb {
self.db.zs_get(&hash_by_tx_loc, &tx_location) self.db.zs_get(&hash_by_tx_loc, &tx_location)
} }
/// Returns the [`transaction::Hash`]es that created or spent outputs for a [`transparent::Address`], /// Returns the transaction IDs that sent or received funds to `address`,
/// in chain order, if they are in the finalized state. /// in the finalized chain `query_height_range`.
#[allow(dead_code)] ///
/// If address has no finalized sends or receives,
/// or the `query_height_range` is totally outside the finalized block range,
/// returns an empty list.
pub fn address_tx_ids( pub fn address_tx_ids(
&self, &self,
address: &transparent::Address, address: &transparent::Address,
query_height_range: RangeInclusive<Height>,
) -> BTreeMap<TransactionLocation, transaction::Hash> { ) -> BTreeMap<TransactionLocation, transaction::Hash> {
let address_location = match self.address_location(address) { let address_location = match self.address_location(address) {
Some(address_location) => address_location, Some(address_location) => address_location,
None => return BTreeMap::new(), None => return BTreeMap::new(),
}; };
let transaction_locations = self.address_transaction_locations(address_location); // Skip this address if it was first used after the end height.
//
// The address location is the output location of the first UTXO sent to the address,
// and addresses can not spend funds until they receive their first UTXO.
if address_location.height() > *query_height_range.end() {
return BTreeMap::new();
}
let transaction_locations =
self.address_transaction_locations(address_location, query_height_range);
transaction_locations transaction_locations
.iter() .iter()
@ -205,10 +222,10 @@ impl ZebraDb {
/// Returns the locations of any transactions that sent or received from a [`transparent::Address`], /// Returns the locations of any transactions that sent or received from a [`transparent::Address`],
/// if they are in the finalized state. /// if they are in the finalized state.
#[allow(dead_code)]
pub fn address_transaction_locations( pub fn address_transaction_locations(
&self, &self,
address_location: AddressLocation, address_location: AddressLocation,
query_height_range: RangeInclusive<Height>,
) -> BTreeSet<AddressTransaction> { ) -> BTreeSet<AddressTransaction> {
let tx_loc_by_transparent_addr_loc = let tx_loc_by_transparent_addr_loc =
self.db.cf_handle("tx_loc_by_transparent_addr_loc").unwrap(); self.db.cf_handle("tx_loc_by_transparent_addr_loc").unwrap();
@ -216,16 +233,20 @@ impl ZebraDb {
// Manually fetch the entire addresses' transaction locations // Manually fetch the entire addresses' transaction locations
let mut addr_transactions = BTreeSet::new(); let mut addr_transactions = BTreeSet::new();
// An invalid key representing the minimum possible transaction // A potentially invalid key representing the first UTXO send to the address,
let mut transaction_location = AddressTransaction::address_iterator_start(address_location); // or the query start height.
let mut transaction_location = AddressTransaction::address_iterator_start(
address_location,
*query_height_range.start(),
);
loop { loop {
// A valid key representing an entry for this address or the next // Seek to a valid entry for this address, or the first entry for the next address
transaction_location = match self transaction_location = match self
.db .db
.zs_next_key_value_from(&tx_loc_by_transparent_addr_loc, &transaction_location) .zs_next_key_value_from(&tx_loc_by_transparent_addr_loc, &transaction_location)
{ {
Some((unspent_output, ())) => unspent_output, Some((transaction_location, ())) => transaction_location,
// We're finished with the final address in the column family // We're finished with the final address in the column family
None => break, None => break,
}; };
@ -235,6 +256,11 @@ impl ZebraDb {
break; break;
} }
// We're past the end height, so we're finished with this query
if transaction_location.transaction_location().height > *query_height_range.end() {
break;
}
addr_transactions.insert(transaction_location); addr_transactions.insert(transaction_location);
// A potentially invalid key representing the next possible output // A potentially invalid key representing the next possible output
@ -292,6 +318,38 @@ impl ZebraDb {
.flat_map(|address| self.address_utxos(address)) .flat_map(|address| self.address_utxos(address))
.collect() .collect()
} }
/// Returns the transaction IDs that sent or received funds to `addresses`,
/// in the finalized chain `query_height_range`.
///
/// If none of the addresses has finalized sends or receives,
/// or the `query_height_range` is totally outside the finalized block range,
/// returns an empty list.
///
/// # Correctness
///
/// Callers should combine the non-finalized transactions for `addresses`
/// with the returned transactions.
///
/// The transaction IDs will only be correct if the non-finalized chain matches or overlaps with
/// the finalized state.
///
/// Specifically, a block in the partial chain must be a child block of the finalized tip.
/// (But the child block does not have to be the partial chain root.)
///
/// This condition does not apply if there is only one address.
/// Since address transactions are only appended by blocks, and this query reads them in order,
/// it is impossible to get inconsistent transactions for a single address.
pub fn partial_finalized_transparent_tx_ids(
&self,
addresses: &HashSet<transparent::Address>,
query_height_range: RangeInclusive<Height>,
) -> BTreeMap<TransactionLocation, transaction::Hash> {
addresses
.iter()
.flat_map(|address| self.address_tx_ids(address, query_height_range.clone()))
.collect()
}
} }
impl DiskWriteBatch { impl DiskWriteBatch {

View File

@ -4,7 +4,7 @@
use std::{ use std::{
cmp::Ordering, cmp::Ordering,
collections::{BTreeMap, BTreeSet, HashMap, HashSet}, collections::{BTreeMap, BTreeSet, HashMap, HashSet},
ops::Deref, ops::{Deref, RangeInclusive},
sync::Arc, sync::Arc,
}; };
@ -13,7 +13,7 @@ use tracing::instrument;
use zebra_chain::{ use zebra_chain::{
amount::{Amount, NegativeAllowed, NonNegative}, amount::{Amount, NegativeAllowed, NonNegative},
block, block::{self, Height},
history_tree::HistoryTree, history_tree::HistoryTree,
orchard, orchard,
parameters::Network, parameters::Network,
@ -573,7 +573,8 @@ impl Chain {
(created_utxos, spent_utxos) (created_utxos, spent_utxos)
} }
/// Returns the [`transaction::Hash`]es used by `addresses` to receive or spend funds. /// Returns the [`transaction::Hash`]es used by `addresses` to receive or spend funds,
/// in the non-finalized chain, filtered using the `query_height_range`.
/// ///
/// If none of the addresses receive or spend funds in this partial chain, returns an empty list. /// If none of the addresses receive or spend funds in this partial chain, returns an empty list.
/// ///
@ -594,9 +595,10 @@ impl Chain {
pub fn partial_transparent_tx_ids( pub fn partial_transparent_tx_ids(
&self, &self,
addresses: &HashSet<transparent::Address>, addresses: &HashSet<transparent::Address>,
query_height_range: RangeInclusive<Height>,
) -> BTreeMap<TransactionLocation, transaction::Hash> { ) -> BTreeMap<TransactionLocation, transaction::Hash> {
self.partial_transparent_indexes(addresses) self.partial_transparent_indexes(addresses)
.flat_map(|transfers| transfers.tx_ids(&self.tx_by_hash)) .flat_map(|transfers| transfers.tx_ids(&self.tx_by_hash, query_height_range.clone()))
.collect() .collect()
} }

View File

@ -1,11 +1,15 @@
//! Transparent address indexes for non-finalized chains. //! Transparent address indexes for non-finalized chains.
use std::collections::{BTreeMap, BTreeSet, HashMap}; use std::{
collections::{BTreeMap, BTreeSet, HashMap},
ops::RangeInclusive,
};
use mset::MultiSet; use mset::MultiSet;
use zebra_chain::{ use zebra_chain::{
amount::{Amount, NegativeAllowed}, amount::{Amount, NegativeAllowed},
block::Height,
transaction, transparent, transaction, transparent,
}; };
@ -205,29 +209,33 @@ impl TransparentTransfers {
self.balance self.balance
} }
/// Returns the [`transaction::Hash`]es of the transactions that /// Returns the [`transaction::Hash`]es of the transactions that sent or received
/// sent or received transparent transfers to this address, /// transparent transfers to this address, in this partial chain, filtered by `query_height_range`.
/// in this partial chain, in chain order. ///
/// The transactions are returned in chain order.
/// ///
/// `chain_tx_by_hash` should be the `tx_by_hash` field from the [`Chain`] containing this index. /// `chain_tx_by_hash` should be the `tx_by_hash` field from the [`Chain`] containing this index.
/// ///
/// # Panics /// # Panics
/// ///
/// If `chain_tx_by_hash` is missing some transaction hashes from this index. /// If `chain_tx_by_hash` is missing some transaction hashes from this index.
#[allow(dead_code)]
pub fn tx_ids( pub fn tx_ids(
&self, &self,
chain_tx_by_hash: &HashMap<transaction::Hash, TransactionLocation>, chain_tx_by_hash: &HashMap<transaction::Hash, TransactionLocation>,
query_height_range: RangeInclusive<Height>,
) -> BTreeMap<TransactionLocation, transaction::Hash> { ) -> BTreeMap<TransactionLocation, transaction::Hash> {
self.tx_ids self.tx_ids
.distinct_elements() .distinct_elements()
.map(|tx_hash| { .filter_map(|tx_hash| {
( let tx_loc = *chain_tx_by_hash
*chain_tx_by_hash
.get(tx_hash) .get(tx_hash)
.expect("all hashes are indexed"), .expect("all hashes are indexed");
*tx_hash,
) if query_height_range.contains(&tx_loc.height) {
Some((tx_loc, *tx_hash))
} else {
None
}
}) })
.collect() .collect()
} }

View File

@ -37,6 +37,12 @@ pub use utxo::AddressUtxos;
/// If any more arrive, the client should wait until we're synchronised with our peers. /// If any more arrive, the client should wait until we're synchronised with our peers.
const FINALIZED_ADDRESS_INDEX_RETRIES: usize = 3; const FINALIZED_ADDRESS_INDEX_RETRIES: usize = 3;
/// The full range of address heights.
///
/// The genesis coinbase transactions are ignored by a consensus rule,
/// so they are not included in any address indexes.
pub const ADDRESS_HEIGHTS_FULL_RANGE: RangeInclusive<Height> = Height(1)..=Height::MAX;
/// Returns the [`Block`] with [`block::Hash`](zebra_chain::block::Hash) or /// Returns the [`Block`] with [`block::Hash`](zebra_chain::block::Hash) or
/// [`Height`](zebra_chain::block::Height), /// [`Height`](zebra_chain::block::Height),
/// if it exists in the non-finalized `chain` or finalized `db`. /// if it exists in the non-finalized `chain` or finalized `db`.
@ -421,7 +427,11 @@ where
let chain_tx_ids = chain let chain_tx_ids = chain
.as_ref() .as_ref()
.map(|chain| chain.as_ref().partial_transparent_tx_ids(addresses)) .map(|chain| {
chain
.as_ref()
.partial_transparent_tx_ids(addresses, ADDRESS_HEIGHTS_FULL_RANGE)
})
.unwrap_or_default(); .unwrap_or_default();
// First try the in-memory chain, then the disk database // First try the in-memory chain, then the disk database
@ -438,3 +448,188 @@ where
}) })
.collect() .collect()
} }
/// Returns the transaction IDs that sent or received funds from the supplied [`transparent::Address`]es,
/// within `query_height_range`, in chain order.
///
/// If the addresses do not exist in the non-finalized `chain` or finalized `db`,
/// or the `query_height_range` is totally outside both the `chain` and `db` range,
/// returns an empty list.
pub(crate) fn transparent_tx_ids<C>(
chain: Option<C>,
db: &ZebraDb,
addresses: HashSet<transparent::Address>,
query_height_range: RangeInclusive<Height>,
) -> Result<BTreeMap<TransactionLocation, transaction::Hash>, BoxError>
where
C: AsRef<Chain>,
{
let mut tx_id_error = None;
// Retry the finalized tx ID query if it was interruped by a finalizing block,
// and the non-finalized chain doesn't overlap the changed heights.
for _ in 0..=FINALIZED_ADDRESS_INDEX_RETRIES {
let (finalized_tx_ids, finalized_tip_range) =
finalized_transparent_tx_ids(db, &addresses, query_height_range.clone());
// Apply the non-finalized tx ID changes.
let chain_tx_id_changes = chain_transparent_tx_id_changes(
chain.as_ref(),
&addresses,
finalized_tip_range,
query_height_range.clone(),
);
// If the tx IDs are valid, return them, otherwise, retry or return an error.
match chain_tx_id_changes {
Ok(chain_tx_id_changes) => {
let tx_ids = apply_tx_id_changes(finalized_tx_ids, chain_tx_id_changes);
return Ok(tx_ids);
}
Err(error) => tx_id_error = Some(Err(error)),
}
}
tx_id_error.expect("unexpected missing error: attempts should set error or return")
}
/// Returns the [`transaction::Hash`]es for `addresses` in the finalized chain `query_height_range`,
/// and the finalized tip heights the transaction IDs were queried at.
///
/// If the addresses do not exist in the finalized `db`, returns an empty list.
//
// TODO: turn the return type into a struct?
fn finalized_transparent_tx_ids(
db: &ZebraDb,
addresses: &HashSet<transparent::Address>,
query_height_range: RangeInclusive<Height>,
) -> (
BTreeMap<TransactionLocation, transaction::Hash>,
Option<RangeInclusive<Height>>,
) {
// # Correctness
//
// The StateService can commit additional blocks while we are querying transaction IDs.
// Check if the finalized state changed while we were querying it
let start_finalized_tip = db.finalized_tip_height();
let finalized_tx_ids = db.partial_finalized_transparent_tx_ids(addresses, query_height_range);
let end_finalized_tip = db.finalized_tip_height();
let finalized_tip_range = if let (Some(start_finalized_tip), Some(end_finalized_tip)) =
(start_finalized_tip, end_finalized_tip)
{
Some(start_finalized_tip..=end_finalized_tip)
} else {
// State is empty
None
};
(finalized_tx_ids, finalized_tip_range)
}
/// Returns the extra transaction IDs for `addresses` in the non-finalized chain `query_height_range`,
/// matching or overlapping the transaction IDs for the `finalized_tip_range`,
///
/// If the addresses do not exist in the non-finalized `chain`, returns an empty list.
//
// TODO: turn the return type into a struct?
fn chain_transparent_tx_id_changes<C>(
chain: Option<C>,
addresses: &HashSet<transparent::Address>,
finalized_tip_range: Option<RangeInclusive<Height>>,
query_height_range: RangeInclusive<Height>,
) -> Result<BTreeMap<TransactionLocation, transaction::Hash>, BoxError>
where
C: AsRef<Chain>,
{
let finalized_tip_range = match finalized_tip_range {
Some(finalized_tip_range) => finalized_tip_range,
None => {
assert!(
chain.is_none(),
"unexpected non-finalized chain when finalized state is empty"
);
// Empty chains don't contain any tx IDs.
return Ok(Default::default());
}
};
// # Correctness
//
// The StateService commits blocks to the finalized state before updating the latest chain,
// and it can commit additional blocks after we've cloned this `chain` variable.
//
// But we can compensate for addresses with mismatching blocks,
// by adding the overlapping non-finalized transaction IDs.
//
// If there is only one address, mismatches aren't possible,
// because tx IDs are added to the finalized state in chain order (and never removed),
// and they are queried in chain order.
// Check if the finalized and non-finalized states match or overlap
let required_min_chain_root = finalized_tip_range.start().0 + 1;
let mut required_chain_overlap = required_min_chain_root..=finalized_tip_range.end().0;
if chain.is_none() {
if required_chain_overlap.is_empty() || addresses.len() <= 1 {
// The non-finalized chain is empty, and we don't need it.
return Ok(Default::default());
} else {
// We can't compensate for inconsistent database queries,
// because the non-finalized chain is empty.
return Err("unable to get tx IDs: state was committing a block, and non-finalized chain is empty".into());
}
}
let chain = chain.unwrap();
let chain = chain.as_ref();
let chain_root = chain.non_finalized_root_height().0;
let chain_tip = chain.non_finalized_tip_height().0;
assert!(
chain_root <= required_min_chain_root,
"unexpected chain gap: the best chain is updated after its previous root is finalized"
);
// If we've already committed this entire chain, ignore its UTXO changes.
// This is more likely if the non-finalized state is just getting started.
if chain_tip > *required_chain_overlap.end() {
if required_chain_overlap.is_empty() || addresses.len() <= 1 {
// The non-finalized chain has been committed, and we don't need it.
return Ok(Default::default());
} else {
// We can't compensate for inconsistent database queries,
// because the non-finalized chain is below the inconsistent query range.
return Err("unable to get tx IDs: state was committing a block, and non-finalized chain has been committed".into());
}
}
// Correctness: some finalized tx IDs might have come from different blocks for different addresses,
// but we've just checked they can be corrected by applying the non-finalized UTXO changes.
assert!(
required_chain_overlap.all(|height| chain.blocks.contains_key(&Height(height))) || addresses.len() <= 1,
"tx ID query inconsistency: chain must contain required overlap blocks if there are multiple addresses",
);
Ok(chain.partial_transparent_tx_ids(addresses, query_height_range))
}
/// Returns the combined finalized and non-finalized transaction IDs.
fn apply_tx_id_changes(
finalized_tx_ids: BTreeMap<TransactionLocation, transaction::Hash>,
chain_tx_ids: BTreeMap<TransactionLocation, transaction::Hash>,
) -> BTreeMap<TransactionLocation, transaction::Hash> {
// Correctness: compensate for inconsistent tx IDs finalized blocks across multiple addresses,
// by combining them with overalapping non-finalized block tx IDs.
finalized_tx_ids
.into_iter()
.chain(chain_tx_ids.into_iter())
.collect()
}