diff --git a/accountsdb-plugin-interface/Cargo.toml b/accountsdb-plugin-interface/Cargo.toml index b8525c238..889560a21 100644 --- a/accountsdb-plugin-interface/Cargo.toml +++ b/accountsdb-plugin-interface/Cargo.toml @@ -7,7 +7,7 @@ version = "1.10.0" repository = "https://github.com/solana-labs/solana" license = "Apache-2.0" homepage = "https://solana.com/" -documentation = "https://docs.rs/solana-validator" +documentation = "https://docs.rs/solana-accountsdb-plugin-interface" [dependencies] log = "0.4.11" diff --git a/accountsdb-plugin-interface/src/accountsdb_plugin_interface.rs b/accountsdb-plugin-interface/src/accountsdb_plugin_interface.rs index 3a6caa53a..95e1d221f 100644 --- a/accountsdb-plugin-interface/src/accountsdb_plugin_interface.rs +++ b/accountsdb-plugin-interface/src/accountsdb_plugin_interface.rs @@ -12,16 +12,38 @@ use { impl Eq for ReplicaAccountInfo<'_> {} #[derive(Clone, PartialEq, Debug)] +/// Information about an account being updated pub struct ReplicaAccountInfo<'a> { + /// The Pubkey for the account pub pubkey: &'a [u8], + + /// The lamports for the account pub lamports: u64, + + /// The Pubkey of the owner program account pub owner: &'a [u8], + + /// This account's data contains a loaded program (and is now read-only) pub executable: bool, + + /// The epoch at which this account will next owe rent pub rent_epoch: u64, + + /// The data held in this account. pub data: &'a [u8], + + /// A global monotonically increasing atomic number, which can be used + /// to tell the order of the account update. For example, when an + /// account is updated in the same slot multiple times, the update + /// with higher write_version should supersede the one with lower + /// write_version. pub write_version: u64, } +/// A wrapper to future-proof ReplicaAccountInfo handling. +/// If there were a change to the structure of ReplicaAccountInfo, +/// there would be new enum entry for the newer version, forcing +/// plugin implementations to handle the change. pub enum ReplicaAccountInfoVersions<'a> { V0_0_1(&'a ReplicaAccountInfo<'a>), } @@ -38,28 +60,44 @@ pub enum ReplicaTransactionInfoVersions<'a> { V0_0_1(&'a ReplicaTransactionInfo<'a>), } +/// Errors returned by plugin calls #[derive(Error, Debug)] pub enum AccountsDbPluginError { + /// Error opening the configuration file; for example, when the file + /// is not found or when the validator process has no permission to read it. #[error("Error opening config file. Error detail: ({0}).")] ConfigFileOpenError(#[from] io::Error), + /// Error in reading the content of the config file or the content + /// is not in the expected format. #[error("Error reading config file. Error message: ({msg})")] ConfigFileReadError { msg: String }, + /// Error when updating the account. #[error("Error updating account. Error message: ({msg})")] AccountsUpdateError { msg: String }, + /// Error when updating the slot status #[error("Error updating slot status. Error message: ({msg})")] SlotStatusUpdateError { msg: String }, + /// Any custom error defined by the plugin. #[error("Plugin-defined custom error. Error message: ({0})")] Custom(Box), } +/// The current status of a slot #[derive(Debug, Clone)] pub enum SlotStatus { + /// The highest slot of the heaviest fork processed by the node. Ledger state at this slot is + /// not derived from a confirmed or finalized block, but if multiple forks are present, is from + /// the fork the validator believes is most likely to finalize. Processed, + + /// The highest slot having reached max vote lockout. Rooted, + + /// The highest slot that has been voted on by supermajority of the cluster, ie. is confirmed. Confirmed, } @@ -75,6 +113,9 @@ impl SlotStatus { pub type Result = std::result::Result; +/// Defines an AccountsDb plugin, to stream data from the runtime. +/// AccountsDb plugins must describe desired behavior for load and unload, +/// as well as how they will handle streamed data. pub trait AccountsDbPlugin: Any + Send + Sync + std::fmt::Debug { fn name(&self) -> &'static str; @@ -93,6 +134,9 @@ pub trait AccountsDbPlugin: Any + Send + Sync + std::fmt::Debug { fn on_unload(&mut self) {} /// Called when an account is updated at a slot. + /// When `is_startup` is true, it indicates the account is loaded from + /// snapshots when the validator starts up. When `is_startup` is false, + /// the account is updated during transaction processing. #[allow(unused_variables)] fn update_account( &mut self, diff --git a/docs/sidebars.js b/docs/sidebars.js index 3eb4c75df..b8e03e7db 100644 --- a/docs/sidebars.js +++ b/docs/sidebars.js @@ -91,6 +91,7 @@ module.exports = { }, "developing/test-validator", "developing/backwards-compatibility", + "developing/plugins/accountsdb_plugin" ], Integrating: ["integrations/exchange"], Validating: [ diff --git a/docs/src/developing/plugins/accountsdb_plugin.md b/docs/src/developing/plugins/accountsdb_plugin.md new file mode 100644 index 000000000..f99c84fa5 --- /dev/null +++ b/docs/src/developing/plugins/accountsdb_plugin.md @@ -0,0 +1,363 @@ +--- +title: AccountsDb Plugins +--- + +Overview +======== + +Validators under heavy RPC loads, such as when serving getProgramAccounts calls, +can fall behind the network. To solve this problem, the validator has been +enhanced to support a plugin mechanism through which the information about +accounts and slots can be transmitted to external data stores such as relational +databases, NoSQL databases or Kafka. RPC services then can be developed to +consume data from these external data stores with the possibility of more +flexible and targeted optimizations such as caching and indexing. This allows +the validator to focus on processing transactions without being slowed down by +busy RPC requests. + +This document describes the interfaces of the plugin and the referential plugin +implementation for the PostgreSQL database. + +[crates.io]: https://crates.io/search?q=solana- +[docs.rs]: https://docs.rs/releases/search?query=solana- + +### Important Crates: + +- [`solana-accountsdb-plugin-interface`] — This crate defines the plugin +interfaces. + +- [`solana-accountsdb-plugin-postgres`] — The crate for the referential +plugin implementation for the PostgreSQL database. + +[`solana-accountsdb-plugin-interface`]: https://docs.rs/solana-accountsdb-plugin-interface +[`solana-accountsdb-plugin-postgres`]: https://docs.rs/solana-accountsdb-plugin-postgres + + +The Plugin Interface +==================== + +The Plugin interface is declared in [`solana-accountsdb-plugin-interface`]. It +is defined by the trait `AccountsDbPlugin`. The plugin should implement the +trait and expose a "C" function `_create_plugin` to return the pointer to this +trait. For example, in the referential implementation, the following code +instantiates the PostgreSQL plugin `AccountsDbPluginPostgres ` and returns its +pointer. + +``` +#[no_mangle] +#[allow(improper_ctypes_definitions)] +/// # Safety +/// +/// This function returns the AccountsDbPluginPostgres pointer as trait AccountsDbPlugin. +pub unsafe extern "C" fn _create_plugin() -> *mut dyn AccountsDbPlugin { + let plugin = AccountsDbPluginPostgres::new(); + let plugin: Box = Box::new(plugin); + Box::into_raw(plugin) +} +``` + +A plugin implementation can implement the `on_load` method to initialize itself. +This function is invoked after a plugin is dynamically loaded into the validator +when it starts. The configuration of the plugin is controlled by a configuration +file in JSON format. The JSON file must have a field `libpath` that points +to the full path name of the shared library implementing the plugin, and may +have other configuration information, like connection parameters for the external +database. The plugin configuration file is specified by the validator's CLI +parameter `--accountsdb-plugin-config` and the file must be readable to the +validator process. + +Please see the [config file](#config) for the referential +PostgreSQL plugin below for an example. + +The plugin can implement the `on_unload` method to do any cleanup before the +plugin is unloaded when the validator is gracefully shutdown. + +The following method is used for notifying on an account update: + +``` + fn update_account( + &mut self, + account: ReplicaAccountInfoVersions, + slot: u64, + is_startup: bool, + ) -> Result<()> +``` + +The `ReplicaAccountInfoVersions` struct contains the metadata and data of the account +streamed. The `slot` points to the slot the account is being updated at. When +`is_startup` is true, it indicates the account is loaded from snapshots when +the validator starts up. When `is_startup` is false, the account is updated +when processing a transaction. + + +The following method is called when all accounts have been notified when the +validator restores the AccountsDb from snapshots at startup. + +``` +fn notify_end_of_startup(&mut self) -> Result<()> +``` + +When `update_account` is called during processing transactions, the plugin +should process the notification as fast as possible because any delay may +cause the validator to fall behind the network. Persistence to external data +store is best to be done asynchronously. + +The following method is used for notifying slot status changes: + +``` + fn update_slot_status( + &mut self, + slot: u64, + parent: Option, + status: SlotStatus, + ) -> Result<()> +``` + +To ensure data consistency, the plugin implementation can choose to abort +the validator in case of error persisting to external stores. When the +validator restarts the account data will be re-transmitted. + +For more details, please refer to the Rust documentation in +[`solana-accountsdb-plugin-interface`]. + +Example PostgreSQL Plugin +========================= + +The [`solana-accountsdb-plugin-postgres`] crate implements a plugin storing +account data to a PostgreSQL database to illustrate how a plugin can be +developed. + + +## Configuration File Format + + +The plugin is configured using the input configuration file. An example +configuration file looks like the following: + + +``` +{ + "libpath": "/solana/target/release/libsolana_accountsdb_plugin_postgres.so", + "host": "postgres-server", + "user": "solana", + "port": 5433, + "threads": 20, + "batch_size": 20, + "panic_on_db_errors": true, + "accounts_selector" : { + "accounts" : ["*"] + } +} +``` + +The `host`, `user`, and `port` control the PostgreSQL configuration +information. For more advanced connection options, please use the +`connection_str` field. Please see [Rust postgres configuration] +(https://docs.rs/postgres/0.19.2/postgres/config/struct.Config.html). + +To improve the throughput to the database, the plugin supports connection pooling +using multiple threads, each maintaining a connection to the PostgreSQL database. +The count of the threads is controlled by the `threads` field. A higher thread +count usually offers better performance. + +To further improve performance when saving large numbers of accounts at +startup, the plugin uses bulk inserts. The batch size is controlled by the +`batch_size` parameter. This can help reduce the round trips to the database. + +The `panic_on_db_errors` can be used to panic the validator in case of database +errors to ensure data consistency. + +## Account Selection + +The `accounts_selector` can be used to filter the accounts that should be persisted. + +For example, one can use the following to persist only the accounts with particular +Base58-encoded Pubkeys, + +``` + "accounts_selector" : { + "accounts" : ["pubkey-1", "pubkey-2", ..., "pubkey-n"], + } +``` + +Or use the following to select accounts with certain program owners: + +``` + "accounts_selector" : { + "owners" : ["pubkey-owner-1", "pubkey-owner-2", ..., "pubkey-owner-m"], + } +``` + +To select all accounts, use the wildcard character (*): + +``` + "accounts_selector" : { + "accounts" : ["*"], + } +``` + + +## Database Setup + +### Install PostgreSQL Server + +Please follow [PostgreSQL Ubuntu Installation](https://www.postgresql.org/download/linux/ubuntu/) +on instructions to install the PostgreSQL database server. For example, to +install postgresql-14, + +``` +sudo sh -c 'echo "deb http://apt.postgresql.org/pub/repos/apt $(lsb_release -cs)-pgdg main" > /etc/apt/sources.list.d/pgdg.list' +wget --quiet -O - https://www.postgresql.org/media/keys/ACCC4CF8.asc | sudo apt-key add - +sudo apt-get update +sudo apt-get -y install postgresql-14 +``` +### Control the Database Access + +Modify the pg_hba.conf as necessary to grant the plugin to access the database. +For example, in /etc/postgresql/14/main/pg_hba.conf, the following entry allows +nodes with IPs in the CIDR 10.138.0.0/24 to access all databases. The validator +runs in a node with an ip in the specified range. + +``` +host all all 10.138.0.0/24 trust +``` + +It is recommended to run the database server on a separate node from the validator for +better performance. + +### Configure the Database Performance Parameters + +Please refer to the [PostgreSQL Server Configuration](https://www.postgresql.org/docs/14/runtime-config.html) +for configuration details. The referential implementation uses the following +configurations for better database performance in the /etc/postgresql/14/main/postgresql.conf +which are different from the default postgresql-14 installation. + +``` +max_connections = 200 # (change requires restart) +shared_buffers = 1GB # min 128kB +effective_io_concurrency = 1000 # 1-1000; 0 disables prefetching +wal_level = minimal # minimal, replica, or logical +fsync = off # flush data to disk for crash safety +synchronous_commit = off # synchronization level; +full_page_writes = off # recover from partial page writes +max_wal_senders = 0 # max number of walsender processes +``` + +The sample [postgresql.conf](https://github.com/solana-labs/solana/blob/7ac43b16d2c766df61ae0a06d7aaf14ba61996ac/accountsdb-plugin-postgres/scripts/postgresql.conf) +can be used for reference. + +### Create the Database Instance and the Role + +Start the server: + +``` +sudo systemctl start postgresql@14-main +``` + +Create the database. For example, the following creates a database named 'solana': + +``` +sudo -u postgres createdb solana -p 5433 +``` + +Create the database user. For example, the following creates a regular user named 'solana': + +``` +sudo -u postgres createuser -p 5433 solana +``` + +Verify the database is working using psql. For example, assuming the node running +PostgreSQL has the ip 10.138.0.9, the following command will land in a shell where +SQL commands can be entered: + +``` +psql -U solana -p 5433 -h 10.138.0.9 -w -d solana +``` + +### Create the Schema Objects + +Use the [create_schema.sql](https://github.com/solana-labs/solana/blob/7ac43b16d2c766df61ae0a06d7aaf14ba61996ac/accountsdb-plugin-postgres/scripts/create_schema.sql) +to create the objects for storing accounts and slots. + +Download the script from github: + +``` +wget https://raw.githubusercontent.com/solana-labs/solana/7ac43b16d2c766df61ae0a06d7aaf14ba61996ac/accountsdb-plugin-postgres/scripts/create_schema.sql +``` + +Then run the script: + +``` +psql -U solana -p 5433 -h 10.138.0.9 -w -d solana -f create_schema.sql +``` + +After this, start the validator with the plugin by using the `--accountsdb-plugin-config` +argument mentioned above. + +### Destroy the Schema Objects + +To destroy the database objects, created by `create_schema.sql`, use +[drop_schema.sql](https://github.com/solana-labs/solana/blob/7ac43b16d2c766df61ae0a06d7aaf14ba61996ac/accountsdb-plugin-postgres/scripts/drop_schema.sql). +For example, + +``` +psql -U solana -p 5433 -h 10.138.0.9 -w -d solana -f drop_schema.sql +``` + +## Capture Historical Account Data + +The account historical data is captured using a database trigger as shown in +`create_schema.sql`, + +``` +CREATE FUNCTION audit_account_update() RETURNS trigger AS $audit_account_update$ + BEGIN + INSERT INTO account_audit (pubkey, owner, lamports, slot, executable, rent_epoch, data, write_version, updated_on) + VALUES (OLD.pubkey, OLD.owner, OLD.lamports, OLD.slot, + OLD.executable, OLD.rent_epoch, OLD.data, OLD.write_version, OLD.updated_on); + RETURN NEW; + END; + +$audit_account_update$ LANGUAGE plpgsql; + +CREATE TRIGGER account_update_trigger AFTER UPDATE OR DELETE ON account + FOR EACH ROW EXECUTE PROCEDURE audit_account_update(); +``` + +The historical data is stored in the account_audit table. + +The trigger can be dropped to disable this feature, for example, + + +``` +DROP TRIGGER account_update_trigger ON account; +``` + +Over time, the account_audit can accumulate large amount of data. You may choose to +limit that by deleting older historical data. + + +For example, the following SQL statement can be used to keep up to 1000 of the most +recent records for an account: + +``` +delete from account_audit a2 where (pubkey, write_version) in + (select pubkey, write_version from + (select a.pubkey, a.updated_on, a.slot, a.write_version, a.lamports, + rank() OVER ( partition by pubkey order by write_version desc) as rnk + from account_audit a) ranked + where ranked.rnk > 1000) +``` + +## Performance Considerations + +When a validator lacks sufficient compute power, the overhead of saving the +account data can cause it to fall behind the network especially when all +accounts or a large number of accounts are selected. The node hosting the +PostgreSQL database need to be powerful enough to handle the database loads +as well. It has been found using GCP n2-standard-64 machine type for the +validator and n2-highmem-32 for the PostgreSQL node is adequate for handling +transmiting all accounts while keeping up with the network. In addition, it is +best to keep the validator and the PostgreSQL in the same local network to +reduce latency. You may need to size the validator and database nodes +differently if serving other loads.