diff --git a/Cargo.lock b/Cargo.lock index 3a64053352..51709d1ca5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2083,6 +2083,7 @@ dependencies = [ "log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)", "serde 1.0.82 (registry+https://github.com/rust-lang/crates.io-index)", "serde_derive 1.0.82 (registry+https://github.com/rust-lang/crates.io-index)", + "solana-metrics 0.11.0", "solana-sdk 0.11.0", ] diff --git a/metrics/testnet-monitor.json b/metrics/testnet-monitor.json index e4830c35e7..09ccbc627e 100644 --- a/metrics/testnet-monitor.json +++ b/metrics/testnet-monitor.json @@ -15,8 +15,8 @@ "editable": true, "gnetId": null, "graphTooltip": 0, - "id": 74, - "iteration": 1544477352265, + "id": 79, + "iteration": 1544546712840, "links": [ { "asDropdown": true, @@ -458,7 +458,7 @@ "hide": false, "orderByTime": "ASC", "policy": "default", - "query": "SELECT sum(\"count\") FROM \"$testnet\".\"autogen\".\"counter-cluster_info-vote-count\" WHERE $timeFilter \n", + "query": "SELECT sum(\"count\") FROM \"$testnet\".\"autogen\".\"vote-native\" WHERE $timeFilter \n", "rawQuery": true, "refId": "A", "resultFormat": "table", @@ -530,44 +530,6 @@ "stack": false, "steppedLine": false, "targets": [ - { - "groupBy": [ - { - "params": [ - "$__interval" - ], - "type": "time" - }, - { - "params": [ - "null" - ], - "type": "fill" - } - ], - "measurement": "counter-cluster_info-vote-count", - "orderByTime": "ASC", - "policy": "autogen", - "query": "SELECT sum(\"count\") AS \" \" FROM \"$testnet\".\"autogen\".\"counter-cluster_info-vote-count\" WHERE $timeFilter GROUP BY time($__interval) FILL(0)\n\n", - "rawQuery": true, - "refId": "A", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "count" - ], - "type": "field" - }, - { - "params": [], - "type": "sum" - } - ] - ], - "tags": [] - }, { "groupBy": [ { @@ -585,7 +547,7 @@ ], "orderByTime": "ASC", "policy": "default", - "query": "SELECT sum(\"count\") AS \" \" FROM \"$testnet\".\"autogen\".\"counter-validator-vote_sent\" WHERE $timeFilter GROUP BY time($__interval) FILL(0)", + "query": "SELECT sum(\"count\") AS \"total\" FROM \"$testnet\".\"autogen\".\"vote-native\" WHERE $timeFilter GROUP BY time($__interval) FILL(0)", "rawQuery": true, "refId": "B", "resultFormat": "time_series", @@ -622,9 +584,9 @@ ], "orderByTime": "ASC", "policy": "default", - "query": "SELECT sum(\"count\") AS \" \" FROM \"$testnet\".\"autogen\".\"counter-vote_stage-leader_sent_vote\" WHERE $timeFilter GROUP BY time($__interval) FILL(0)\n\n\n", + "query": "SELECT sum(\"count\") AS \" \" FROM \"$testnet\".\"autogen\".\"counter-validator-vote_sent\" WHERE $timeFilter GROUP BY time($__interval) FILL(0)", "rawQuery": true, - "refId": "C", + "refId": "A", "resultFormat": "time_series", "select": [ [ @@ -4995,7 +4957,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "UDP Net Stats (validators)", + "title": "UDP Net Stats ($hostid)", "tooltip": { "shared": true, "sort": 0, @@ -5089,7 +5051,7 @@ "measurement": "counter-cluster_info-vote-count", "orderByTime": "ASC", "policy": "autogen", - "query": "SELECT sum(\"count\") AS \"retransmit\" FROM \"$testnet\".\"autogen\".\"retransmit-stage\" WHERE $timeFilter GROUP BY time($__interval) FILL(0)", + "query": "SELECT sum(\"count\") AS \"retransmit\" FROM \"$testnet\".\"autogen\".\"retransmit-stage\" WHERE host_id =~ /$hostid/ AND $timeFilter GROUP BY time($__interval) FILL(0)", "rawQuery": true, "refId": "A", "resultFormat": "time_series", @@ -5127,7 +5089,7 @@ "measurement": "counter-cluster_info-vote-count", "orderByTime": "ASC", "policy": "autogen", - "query": "SELECT sum(\"count\") AS \"replicate\" FROM \"$testnet\".\"autogen\".\"replicate-stage\" WHERE $timeFilter GROUP BY time($__interval) FILL(0)", + "query": "SELECT sum(\"count\") AS \"replicate\" FROM \"$testnet\".\"autogen\".\"replicate-stage\" WHERE host_id =~ /$hostid/ AND $timeFilter GROUP BY time($__interval) FILL(0)", "rawQuery": true, "refId": "B", "resultFormat": "time_series", @@ -5165,7 +5127,7 @@ "measurement": "counter-cluster_info-vote-count", "orderByTime": "ASC", "policy": "autogen", - "query": "SELECT sum(\"count\") AS \"retransmit_q\" FROM \"$testnet\".\"autogen\".\"retransmit-queue\" WHERE $timeFilter GROUP BY time($__interval) FILL(0)", + "query": "SELECT sum(\"count\") AS \"retransmit_q\" FROM \"$testnet\".\"autogen\".\"retransmit-queue\" WHERE host_id =~ /$hostid/ AND $timeFilter GROUP BY time($__interval) FILL(0)", "rawQuery": true, "refId": "C", "resultFormat": "time_series", @@ -5203,7 +5165,7 @@ "measurement": "counter-cluster_info-vote-count", "orderByTime": "ASC", "policy": "autogen", - "query": "SELECT sum(\"count\") AS \"recv_window\" FROM \"$testnet\".\"autogen\".\"recv-window\" WHERE $timeFilter GROUP BY time($__interval) FILL(0)", + "query": "SELECT sum(\"count\") AS \"recv_window\" FROM \"$testnet\".\"autogen\".\"recv-window\" WHERE host_id =~ /$hostid/ AND $timeFilter GROUP BY time($__interval) FILL(0)", "rawQuery": true, "refId": "D", "resultFormat": "time_series", @@ -5227,7 +5189,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Channel Pressure (validator)", + "title": "Channel Pressure ($hostid)", "tooltip": { "shared": true, "sort": 0, @@ -5320,7 +5282,7 @@ "measurement": "counter-cluster_info-vote-count", "orderByTime": "ASC", "policy": "autogen", - "query": "SELECT last(\"consumed\") AS \"validator\" FROM \"$testnet\".\"autogen\".\"window-stage\" WHERE $timeFilter GROUP BY time($__interval) FILL(0)", + "query": "SELECT last(\"consumed\") AS \"validator\" FROM \"$testnet\".\"autogen\".\"window-stage\" WHERE host_id =~ /$hostid/ AND $timeFilter GROUP BY time($__interval) FILL(0)", "rawQuery": true, "refId": "A", "resultFormat": "time_series", @@ -5382,7 +5344,201 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Leader broadcast vs Validator consume", + "title": "Leader broadcast vs Validator consume ($hostid)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "cluster-info.repair": "#ba43a9", + "window-service.receive": "#b7dbab", + "window-stage.consumed": "#5195ce" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Solana Metrics (read-only)", + "fill": 1, + "gridPos": { + "h": 5, + "w": 12, + "x": 0, + "y": 71 + }, + "id": 42, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": false, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 1, + "points": true, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "counter-cluster_info-vote-count", + "orderByTime": "ASC", + "policy": "autogen", + "query": "SELECT last(\"last-recv\") AS \"receive\" FROM \"$testnet\".\"autogen\".\"window-service\" WHERE host_id =~ /$hostid/ AND $timeFilter GROUP BY time($__interval)", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "count" + ], + "type": "field" + }, + { + "params": [], + "type": "sum" + } + ] + ], + "tags": [] + }, + { + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "orderByTime": "ASC", + "policy": "default", + "query": "SELECT last(\"consumed\") AS \"consumed\" FROM \"$testnet\".\"autogen\".\"window-stage\" WHERE host_id =~ /$hostid/ AND $timeFilter GROUP BY time($__interval)", + "rawQuery": true, + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [] + }, + { + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "orderByTime": "ASC", + "policy": "default", + "query": "SELECT last(\"repair-ix\") AS \"repair\" FROM \"$testnet\".\"autogen\".\"cluster-info\" WHERE host_id =~ /$hostid/ AND $timeFilter GROUP BY time($__interval)", + "rawQuery": true, + "refId": "C", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [] + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Receive/Consume/Repair ($hostid)", "tooltip": { "shared": true, "sort": 0, @@ -5425,7 +5581,7 @@ "h": 1, "w": 24, "x": 0, - "y": 71 + "y": 76 }, "id": 40, "panels": [], @@ -5443,7 +5599,7 @@ "h": 5, "w": 12, "x": 0, - "y": 72 + "y": 77 }, "id": 41, "legend": { @@ -5575,6 +5731,25 @@ "tagsQuery": "", "type": "query", "useTags": false + }, + { + "allValue": null, + "datasource": "Solana Metrics (read-only)", + "hide": 0, + "includeAll": false, + "label": "HostID", + "multi": false, + "name": "hostid", + "options": [], + "query": "SELECT DISTINCT(\"host_id\") FROM \"$testnet\".\"autogen\".\"counter-bank-process_transactions-txs\" ", + "refresh": 2, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false } ] }, @@ -5610,5 +5785,5 @@ "timezone": "", "title": "Testnet Monitor (edge)", "uid": "testnet-edge", - "version": 112 -} + "version": 113 +} \ No newline at end of file diff --git a/programs/native/vote/Cargo.toml b/programs/native/vote/Cargo.toml index 9660da7821..6354b8d150 100644 --- a/programs/native/vote/Cargo.toml +++ b/programs/native/vote/Cargo.toml @@ -12,6 +12,7 @@ env_logger = "0.6.0" log = "0.4.2" serde = "1.0.82" serde_derive = "1.0.82" +solana-metrics = { path = "../../../metrics", version = "0.11.0" } solana-sdk = { path = "../../../sdk", version = "0.11.0" } [lib] diff --git a/programs/native/vote/src/lib.rs b/programs/native/vote/src/lib.rs index efaaddf337..a6f41a4fc8 100644 --- a/programs/native/vote/src/lib.rs +++ b/programs/native/vote/src/lib.rs @@ -5,10 +5,12 @@ extern crate bincode; extern crate env_logger; #[macro_use] extern crate log; +extern crate solana_metrics; #[macro_use] extern crate solana_sdk; use bincode::deserialize; +use solana_metrics::{influxdb, submit}; use solana_sdk::account::KeyedAccount; use solana_sdk::native_program::ProgramError; use solana_sdk::pubkey::Pubkey; @@ -62,6 +64,11 @@ fn entrypoint( Err(ProgramError::InvalidArgument)?; } debug!("{:?} by {}", vote, keyed_accounts[0].signer_key().unwrap()); + submit( + influxdb::Point::new("vote-native") + .add_field("count", influxdb::Value::Integer(1)) + .to_owned(), + ); let mut vote_state = VoteProgram::deserialize(&keyed_accounts[0].account.userdata)?; diff --git a/src/cluster_info.rs b/src/cluster_info.rs index 9c269b54cf..f4f906efc4 100644 --- a/src/cluster_info.rs +++ b/src/cluster_info.rs @@ -30,6 +30,7 @@ use bincode::{deserialize, serialize}; use log::Level; use rand::{thread_rng, Rng}; use rayon::prelude::*; +use solana_metrics::{influxdb, submit}; use solana_sdk::hash::Hash; use solana_sdk::pubkey::Pubkey; use solana_sdk::signature::{Keypair, KeypairUtil, Signable, Signature}; @@ -520,6 +521,13 @@ impl ClusterInfo { let addr = valid[n].gossip; // send the request to the peer's gossip port let req = Protocol::RequestWindowIndex(self.my_data().clone(), ix); let out = serialize(&req)?; + + submit( + influxdb::Point::new("cluster-info") + .add_field("repair-ix", influxdb::Value::Integer(ix as i64)) + .to_owned(), + ); + Ok((addr, out)) } fn new_pull_requests(&mut self) -> Vec<(SocketAddr, Protocol)> { diff --git a/src/window_service.rs b/src/window_service.rs index d7313ffdea..1289e84b45 100644 --- a/src/window_service.rs +++ b/src/window_service.rs @@ -91,6 +91,12 @@ fn recv_window( (p.index()?, p.meta.size) }; + submit( + influxdb::Point::new("window-service") + .add_field("last-recv", influxdb::Value::Integer(pix as i64)) + .to_owned(), + ); + pixs.push(pix); trace!("{} window pix: {} size: {}", id, pix, meta_size);