Provide recent syncer response lengths as a watch channel (#2602)
* Minimal recent sync lengths implementation Also includes metrics and logging, to make diagnosing bugs easier. * Add logging to check what happens when Zebra reaches the chain tip * Add tests for recent sync lengths - initially empty - pruned to correct length - newest entries go first * Drop a redundant `/` from a Cargo.lock URL This seems to be a nightly or beta Rust change, but hopefully stable just accepts it. * Use metrics histograms to avoid overwriting values * Add detailed syncer monitoring dashboard * Increase the recent sync length to 4 This length makes it easier to distinguish between temporary and sustained errors/syncs. Co-authored-by: Janito Vaqueiro Ferreira Filho <janito.vff@gmail.com>
This commit is contained in:
parent
2ec6ae364f
commit
6f8f4d8987
|
@ -1659,7 +1659,7 @@ dependencies = [
|
|||
[[package]]
|
||||
name = "hyper"
|
||||
version = "0.14.0-dev"
|
||||
source = "git+https://github.com/hyperium/hyper/?rev=ed2b22a7f66899d338691552fbcb6c0f2f4e06b9#ed2b22a7f66899d338691552fbcb6c0f2f4e06b9"
|
||||
source = "git+https://github.com/hyperium/hyper?rev=ed2b22a7f66899d338691552fbcb6c0f2f4e06b9#ed2b22a7f66899d338691552fbcb6c0f2f4e06b9"
|
||||
dependencies = [
|
||||
"bytes 0.5.6",
|
||||
"futures-channel",
|
||||
|
@ -4725,6 +4725,8 @@ dependencies = [
|
|||
"metrics-exporter-prometheus",
|
||||
"once_cell",
|
||||
"pin-project 1.0.7",
|
||||
"proptest",
|
||||
"proptest-derive",
|
||||
"regex",
|
||||
"semver 1.0.3",
|
||||
"sentry",
|
||||
|
|
|
@ -0,0 +1,763 @@
|
|||
{
|
||||
"annotations": {
|
||||
"list": [
|
||||
{
|
||||
"builtIn": 1,
|
||||
"datasource": "-- Grafana --",
|
||||
"enable": true,
|
||||
"hide": true,
|
||||
"iconColor": "rgba(0, 211, 255, 1)",
|
||||
"name": "Annotations & Alerts",
|
||||
"type": "dashboard"
|
||||
}
|
||||
]
|
||||
},
|
||||
"description": "",
|
||||
"editable": true,
|
||||
"gnetId": null,
|
||||
"graphTooltip": 0,
|
||||
"id": 5,
|
||||
"iteration": 1628659147689,
|
||||
"links": [],
|
||||
"panels": [
|
||||
{
|
||||
"aliasColors": {},
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": null,
|
||||
"fieldConfig": {
|
||||
"defaults": {},
|
||||
"overrides": []
|
||||
},
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"hiddenSeries": false,
|
||||
"id": 2,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"nullPointMode": "null",
|
||||
"options": {
|
||||
"alertThreshold": true
|
||||
},
|
||||
"percentage": false,
|
||||
"pluginVersion": "7.5.7",
|
||||
"pointradius": 2,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"repeat": "job",
|
||||
"scopedVars": {
|
||||
"job": {
|
||||
"selected": false,
|
||||
"text": "zebrad-mainnet",
|
||||
"value": "zebrad-mainnet"
|
||||
}
|
||||
},
|
||||
"seriesOverrides": [],
|
||||
"spaceLength": 10,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"exemplar": true,
|
||||
"expr": "sync_obtain_queued_hash_count{job=\"$job\"}",
|
||||
"interval": "",
|
||||
"legendFormat": "obtain tips",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"exemplar": true,
|
||||
"expr": "sync_extend_queued_hash_count{job=\"$job\"}",
|
||||
"hide": false,
|
||||
"interval": "",
|
||||
"legendFormat": "extend tips",
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"thresholds": [],
|
||||
"timeFrom": null,
|
||||
"timeRegions": [],
|
||||
"timeShift": null,
|
||||
"title": "Sync Queued Downloads - $job",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": []
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"$$hashKey": "object:65",
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"$$hashKey": "object:66",
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
}
|
||||
],
|
||||
"yaxis": {
|
||||
"align": false,
|
||||
"alignLevel": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"aliasColors": {},
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": null,
|
||||
"fieldConfig": {
|
||||
"defaults": {},
|
||||
"overrides": []
|
||||
},
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 0
|
||||
},
|
||||
"hiddenSeries": false,
|
||||
"id": 8,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"nullPointMode": "null",
|
||||
"options": {
|
||||
"alertThreshold": true
|
||||
},
|
||||
"percentage": false,
|
||||
"pluginVersion": "7.5.7",
|
||||
"pointradius": 2,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"repeatIteration": 1628659147689,
|
||||
"repeatPanelId": 2,
|
||||
"scopedVars": {
|
||||
"job": {
|
||||
"selected": false,
|
||||
"text": "zebrad-testnet",
|
||||
"value": "zebrad-testnet"
|
||||
}
|
||||
},
|
||||
"seriesOverrides": [],
|
||||
"spaceLength": 10,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"exemplar": true,
|
||||
"expr": "sync_obtain_queued_hash_count{job=\"$job\"}",
|
||||
"interval": "",
|
||||
"legendFormat": "obtain tips",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"exemplar": true,
|
||||
"expr": "sync_extend_queued_hash_count{job=\"$job\"}",
|
||||
"hide": false,
|
||||
"interval": "",
|
||||
"legendFormat": "extend tips",
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"thresholds": [],
|
||||
"timeFrom": null,
|
||||
"timeRegions": [],
|
||||
"timeShift": null,
|
||||
"title": "Sync Queued Downloads - $job",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": []
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"$$hashKey": "object:65",
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"$$hashKey": "object:66",
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
}
|
||||
],
|
||||
"yaxis": {
|
||||
"align": false,
|
||||
"alignLevel": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"cards": {
|
||||
"cardPadding": null,
|
||||
"cardRound": null
|
||||
},
|
||||
"color": {
|
||||
"cardColor": "#b4ff00",
|
||||
"colorScale": "sqrt",
|
||||
"colorScheme": "interpolateOranges",
|
||||
"exponent": 0.5,
|
||||
"mode": "spectrum"
|
||||
},
|
||||
"dataFormat": "timeseries",
|
||||
"datasource": null,
|
||||
"fieldConfig": {
|
||||
"defaults": {},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 9
|
||||
},
|
||||
"heatmap": {},
|
||||
"hideZeroBuckets": false,
|
||||
"highlightCards": true,
|
||||
"id": 4,
|
||||
"legend": {
|
||||
"show": false
|
||||
},
|
||||
"pluginVersion": "7.5.7",
|
||||
"repeat": "job",
|
||||
"reverseYBuckets": false,
|
||||
"scopedVars": {
|
||||
"job": {
|
||||
"selected": false,
|
||||
"text": "zebrad-mainnet",
|
||||
"value": "zebrad-mainnet"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"exemplar": true,
|
||||
"expr": "sync_obtain_response_hash_count{job=\"$job\"}",
|
||||
"format": "heatmap",
|
||||
"interval": "",
|
||||
"legendFormat": "obtain tips",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"exemplar": true,
|
||||
"expr": "sync_extend_response_hash_count{job=\"$job\"}",
|
||||
"format": "heatmap",
|
||||
"hide": false,
|
||||
"interval": "",
|
||||
"legendFormat": "extend tips",
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Sync Peer Responses - $job",
|
||||
"tooltip": {
|
||||
"show": true,
|
||||
"showHistogram": false
|
||||
},
|
||||
"type": "heatmap",
|
||||
"xAxis": {
|
||||
"show": true
|
||||
},
|
||||
"xBucketNumber": null,
|
||||
"xBucketSize": null,
|
||||
"yAxis": {
|
||||
"decimals": null,
|
||||
"format": "short",
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true,
|
||||
"splitFactor": null
|
||||
},
|
||||
"yBucketBound": "auto",
|
||||
"yBucketNumber": null,
|
||||
"yBucketSize": null
|
||||
},
|
||||
{
|
||||
"cards": {
|
||||
"cardPadding": null,
|
||||
"cardRound": null
|
||||
},
|
||||
"color": {
|
||||
"cardColor": "#b4ff00",
|
||||
"colorScale": "sqrt",
|
||||
"colorScheme": "interpolateOranges",
|
||||
"exponent": 0.5,
|
||||
"mode": "spectrum"
|
||||
},
|
||||
"dataFormat": "timeseries",
|
||||
"datasource": null,
|
||||
"fieldConfig": {
|
||||
"defaults": {},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 9
|
||||
},
|
||||
"heatmap": {},
|
||||
"hideZeroBuckets": false,
|
||||
"highlightCards": true,
|
||||
"id": 9,
|
||||
"legend": {
|
||||
"show": false
|
||||
},
|
||||
"pluginVersion": "7.5.7",
|
||||
"repeatIteration": 1628659147689,
|
||||
"repeatPanelId": 4,
|
||||
"reverseYBuckets": false,
|
||||
"scopedVars": {
|
||||
"job": {
|
||||
"selected": false,
|
||||
"text": "zebrad-testnet",
|
||||
"value": "zebrad-testnet"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"exemplar": true,
|
||||
"expr": "sync_obtain_response_hash_count{job=\"$job\"}",
|
||||
"format": "heatmap",
|
||||
"interval": "",
|
||||
"legendFormat": "obtain tips",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"exemplar": true,
|
||||
"expr": "sync_extend_response_hash_count{job=\"$job\"}",
|
||||
"format": "heatmap",
|
||||
"hide": false,
|
||||
"interval": "",
|
||||
"legendFormat": "extend tips",
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Sync Peer Responses - $job",
|
||||
"tooltip": {
|
||||
"show": true,
|
||||
"showHistogram": false
|
||||
},
|
||||
"type": "heatmap",
|
||||
"xAxis": {
|
||||
"show": true
|
||||
},
|
||||
"xBucketNumber": null,
|
||||
"xBucketSize": null,
|
||||
"yAxis": {
|
||||
"decimals": null,
|
||||
"format": "short",
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true,
|
||||
"splitFactor": null
|
||||
},
|
||||
"yBucketBound": "auto",
|
||||
"yBucketNumber": null,
|
||||
"yBucketSize": null
|
||||
},
|
||||
{
|
||||
"aliasColors": {},
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": null,
|
||||
"fieldConfig": {
|
||||
"defaults": {},
|
||||
"overrides": []
|
||||
},
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 18
|
||||
},
|
||||
"hiddenSeries": false,
|
||||
"id": 7,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"nullPointMode": "null",
|
||||
"options": {
|
||||
"alertThreshold": true
|
||||
},
|
||||
"percentage": false,
|
||||
"pluginVersion": "7.5.7",
|
||||
"pointradius": 2,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"repeat": "job",
|
||||
"scopedVars": {
|
||||
"job": {
|
||||
"selected": false,
|
||||
"text": "zebrad-mainnet",
|
||||
"value": "zebrad-mainnet"
|
||||
}
|
||||
},
|
||||
"seriesOverrides": [],
|
||||
"spaceLength": 10,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"exemplar": true,
|
||||
"expr": "sync_cancelled_download_count{job=\"$job\"}",
|
||||
"interval": "",
|
||||
"legendFormat": "cancelled downloads",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"exemplar": true,
|
||||
"expr": "sync_cancelled_verify_count{job=\"$job\"}",
|
||||
"hide": false,
|
||||
"interval": "",
|
||||
"legendFormat": "cancelled verifications",
|
||||
"refId": "B"
|
||||
},
|
||||
{
|
||||
"exemplar": true,
|
||||
"expr": "sync_downloads_in_flight{job=\"$job\"}",
|
||||
"hide": false,
|
||||
"interval": "",
|
||||
"legendFormat": "in-flight downloads",
|
||||
"refId": "C"
|
||||
},
|
||||
{
|
||||
"exemplar": true,
|
||||
"expr": "sync_prospective_tips_len{job=\"$job\"}",
|
||||
"hide": false,
|
||||
"interval": "",
|
||||
"legendFormat": "prospective tips",
|
||||
"refId": "D"
|
||||
},
|
||||
{
|
||||
"exemplar": true,
|
||||
"expr": "sync_downloaded_block_count{job=\"$job\"}",
|
||||
"hide": false,
|
||||
"interval": "",
|
||||
"legendFormat": "downloaded blocks",
|
||||
"refId": "E"
|
||||
},
|
||||
{
|
||||
"exemplar": true,
|
||||
"expr": "sync_verified_block_count{job=\"$job\"}",
|
||||
"hide": false,
|
||||
"interval": "",
|
||||
"legendFormat": "verified blocks",
|
||||
"refId": "F"
|
||||
}
|
||||
],
|
||||
"thresholds": [],
|
||||
"timeFrom": null,
|
||||
"timeRegions": [],
|
||||
"timeShift": null,
|
||||
"title": "Sync Status - $job",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": []
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"$$hashKey": "object:65",
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"$$hashKey": "object:66",
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
}
|
||||
],
|
||||
"yaxis": {
|
||||
"align": false,
|
||||
"alignLevel": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"aliasColors": {},
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": null,
|
||||
"fieldConfig": {
|
||||
"defaults": {},
|
||||
"overrides": []
|
||||
},
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 18
|
||||
},
|
||||
"hiddenSeries": false,
|
||||
"id": 10,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"nullPointMode": "null",
|
||||
"options": {
|
||||
"alertThreshold": true
|
||||
},
|
||||
"percentage": false,
|
||||
"pluginVersion": "7.5.7",
|
||||
"pointradius": 2,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"repeatIteration": 1628659147689,
|
||||
"repeatPanelId": 7,
|
||||
"scopedVars": {
|
||||
"job": {
|
||||
"selected": false,
|
||||
"text": "zebrad-testnet",
|
||||
"value": "zebrad-testnet"
|
||||
}
|
||||
},
|
||||
"seriesOverrides": [],
|
||||
"spaceLength": 10,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"exemplar": true,
|
||||
"expr": "sync_cancelled_download_count{job=\"$job\"}",
|
||||
"interval": "",
|
||||
"legendFormat": "cancelled downloads",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"exemplar": true,
|
||||
"expr": "sync_cancelled_verify_count{job=\"$job\"}",
|
||||
"hide": false,
|
||||
"interval": "",
|
||||
"legendFormat": "cancelled verifications",
|
||||
"refId": "B"
|
||||
},
|
||||
{
|
||||
"exemplar": true,
|
||||
"expr": "sync_downloads_in_flight{job=\"$job\"}",
|
||||
"hide": false,
|
||||
"interval": "",
|
||||
"legendFormat": "in-flight downloads",
|
||||
"refId": "C"
|
||||
},
|
||||
{
|
||||
"exemplar": true,
|
||||
"expr": "sync_prospective_tips_len{job=\"$job\"}",
|
||||
"hide": false,
|
||||
"interval": "",
|
||||
"legendFormat": "prospective tips",
|
||||
"refId": "D"
|
||||
},
|
||||
{
|
||||
"exemplar": true,
|
||||
"expr": "sync_downloaded_block_count{job=\"$job\"}",
|
||||
"hide": false,
|
||||
"interval": "",
|
||||
"legendFormat": "downloaded blocks",
|
||||
"refId": "E"
|
||||
},
|
||||
{
|
||||
"exemplar": true,
|
||||
"expr": "sync_verified_block_count{job=\"$job\"}",
|
||||
"hide": false,
|
||||
"interval": "",
|
||||
"legendFormat": "verified blocks",
|
||||
"refId": "F"
|
||||
}
|
||||
],
|
||||
"thresholds": [],
|
||||
"timeFrom": null,
|
||||
"timeRegions": [],
|
||||
"timeShift": null,
|
||||
"title": "Sync Status - $job",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": []
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"$$hashKey": "object:65",
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"$$hashKey": "object:66",
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
}
|
||||
],
|
||||
"yaxis": {
|
||||
"align": false,
|
||||
"alignLevel": null
|
||||
}
|
||||
}
|
||||
],
|
||||
"refresh": "5s",
|
||||
"schemaVersion": 27,
|
||||
"style": "dark",
|
||||
"tags": [],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"allValue": null,
|
||||
"current": {
|
||||
"selected": true,
|
||||
"text": [
|
||||
"All"
|
||||
],
|
||||
"value": [
|
||||
"$__all"
|
||||
]
|
||||
},
|
||||
"datasource": null,
|
||||
"definition": "label_values(zcash_net_in_bytes_total, job)",
|
||||
"description": null,
|
||||
"error": null,
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": null,
|
||||
"multi": true,
|
||||
"name": "job",
|
||||
"options": [],
|
||||
"query": {
|
||||
"query": "label_values(zcash_net_in_bytes_total, job)",
|
||||
"refId": "StandardVariableQuery"
|
||||
},
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-5m",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {},
|
||||
"timezone": "",
|
||||
"title": "Syncer",
|
||||
"uid": "Sl3h19Gnk",
|
||||
"version": 9
|
||||
}
|
|
@ -55,6 +55,10 @@ once_cell = "1.8"
|
|||
regex = "1.4.6"
|
||||
semver = "1.0.3"
|
||||
tempdir = "0.3.7"
|
||||
|
||||
proptest = "0.10"
|
||||
proptest-derive = "0.3"
|
||||
|
||||
zebra-test = { path = "../zebra-test" }
|
||||
|
||||
[features]
|
||||
|
|
|
@ -78,7 +78,8 @@ impl StartCmd {
|
|||
.map_err(|_| eyre!("could not send setup data to inbound service"))?;
|
||||
|
||||
info!("initializing syncer");
|
||||
let syncer = ChainSync::new(&config, peer_set, state, verifier);
|
||||
// TODO: use sync_length_receiver to activate the mempool (#2592)
|
||||
let (syncer, _sync_length_receiver) = ChainSync::new(&config, peer_set, state, verifier);
|
||||
|
||||
syncer.sync().await
|
||||
}
|
||||
|
|
|
@ -5,7 +5,7 @@ use futures::{
|
|||
future::FutureExt,
|
||||
stream::{FuturesUnordered, StreamExt},
|
||||
};
|
||||
use tokio::time::sleep;
|
||||
use tokio::{sync::watch, time::sleep};
|
||||
use tower::{
|
||||
builder::ServiceBuilder, hedge::Hedge, limit::ConcurrencyLimit, retry::Retry, timeout::Timeout,
|
||||
Service, ServiceExt,
|
||||
|
@ -21,10 +21,13 @@ use zebra_state as zs;
|
|||
use crate::{config::ZebradConfig, BoxError};
|
||||
|
||||
mod downloads;
|
||||
mod recent_sync_lengths;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
|
||||
use downloads::{AlwaysHedge, Downloads};
|
||||
use recent_sync_lengths::RecentSyncLengths;
|
||||
|
||||
/// Controls the number of peers used for each ObtainTips and ExtendTips request.
|
||||
const FANOUT: usize = 4;
|
||||
|
@ -193,6 +196,9 @@ where
|
|||
// Internal sync state
|
||||
/// The tips that the syncer is currently following.
|
||||
prospective_tips: HashSet<CheckedTip>,
|
||||
|
||||
/// The lengths of recent sync responses.
|
||||
recent_syncs: RecentSyncLengths,
|
||||
}
|
||||
|
||||
/// Polls the network to determine whether further blocks are available and
|
||||
|
@ -215,7 +221,14 @@ where
|
|||
/// - peers: the zebra-network peers to contact for downloads
|
||||
/// - state: the zebra-state that stores the chain
|
||||
/// - verifier: the zebra-consensus verifier that checks the chain
|
||||
pub fn new(config: &ZebradConfig, peers: ZN, state: ZS, verifier: ZV) -> Self {
|
||||
///
|
||||
/// Also returns a [`watch::Receiver`] endpoint for receiving recent sync lengths.
|
||||
pub fn new(
|
||||
config: &ZebradConfig,
|
||||
peers: ZN,
|
||||
state: ZS,
|
||||
verifier: ZV,
|
||||
) -> (Self, watch::Receiver<Vec<usize>>) {
|
||||
let tip_network = Timeout::new(peers.clone(), TIPS_RESPONSE_TIMEOUT);
|
||||
// The Hedge middleware is the outermost layer, hedging requests
|
||||
// between two retry-wrapped networks. The innermost timeout
|
||||
|
@ -240,23 +253,30 @@ where
|
|||
0.95,
|
||||
2 * SYNC_RESTART_DELAY,
|
||||
);
|
||||
|
||||
// We apply a timeout to the verifier to avoid hangs due to missing earlier blocks.
|
||||
let verifier = Timeout::new(verifier, BLOCK_VERIFY_TIMEOUT);
|
||||
// Warn the user if we're ignoring their configured lookahead limit
|
||||
|
||||
assert!(
|
||||
config.sync.lookahead_limit >= MIN_LOOKAHEAD_LIMIT,
|
||||
"configured lookahead limit {} too low, must be at least {}",
|
||||
config.sync.lookahead_limit,
|
||||
MIN_LOOKAHEAD_LIMIT
|
||||
);
|
||||
Self {
|
||||
|
||||
let (recent_syncs, sync_length_receiver) = RecentSyncLengths::new();
|
||||
|
||||
let new_syncer = Self {
|
||||
genesis_hash: genesis_hash(config.network.network),
|
||||
lookahead_limit: config.sync.lookahead_limit,
|
||||
tip_network,
|
||||
downloads: Box::pin(Downloads::new(block_network, verifier)),
|
||||
state,
|
||||
prospective_tips: HashSet::new(),
|
||||
}
|
||||
recent_syncs,
|
||||
};
|
||||
|
||||
(new_syncer, sync_length_receiver)
|
||||
}
|
||||
|
||||
#[instrument(skip(self))]
|
||||
|
@ -466,10 +486,9 @@ where
|
|||
let prev_download_len = download_set.len();
|
||||
download_set.extend(unknown_hashes);
|
||||
let new_download_len = download_set.len();
|
||||
tracing::debug!(
|
||||
new_hashes = new_download_len - prev_download_len,
|
||||
"added hashes to download set"
|
||||
);
|
||||
let new_hashes = new_download_len - prev_download_len;
|
||||
tracing::debug!(new_hashes, "added hashes to download set");
|
||||
metrics::histogram!("sync.obtain.response.hash.count", new_hashes as u64);
|
||||
}
|
||||
Ok(_) => unreachable!("network returned wrong response"),
|
||||
// We ignore this error because we made multiple fanout requests.
|
||||
|
@ -486,6 +505,15 @@ where
|
|||
return Err(eyre!("queued download of hash behind our chain tip"));
|
||||
}
|
||||
}
|
||||
|
||||
let new_downloads = download_set.len();
|
||||
tracing::debug!(new_downloads, "queueing new downloads");
|
||||
metrics::gauge!("sync.obtain.queued.hash.count", new_downloads as f64);
|
||||
|
||||
// security: use the actual number of new downloads from all peers,
|
||||
// so a single trailing peer can't toggle our mempool
|
||||
self.recent_syncs.push_obtain_tips_length(new_downloads);
|
||||
|
||||
self.request_blocks(download_set).await?;
|
||||
|
||||
Ok(())
|
||||
|
@ -594,10 +622,9 @@ where
|
|||
let prev_download_len = download_set.len();
|
||||
download_set.extend(unknown_hashes);
|
||||
let new_download_len = download_set.len();
|
||||
tracing::debug!(
|
||||
new_hashes = new_download_len - prev_download_len,
|
||||
"added hashes to download set"
|
||||
);
|
||||
let new_hashes = new_download_len - prev_download_len;
|
||||
tracing::debug!(new_hashes, "added hashes to download set");
|
||||
metrics::histogram!("sync.extend.response.hash.count", new_hashes as u64);
|
||||
}
|
||||
Ok(_) => unreachable!("network returned wrong response"),
|
||||
// We ignore this error because we made multiple fanout requests.
|
||||
|
@ -606,6 +633,14 @@ where
|
|||
}
|
||||
}
|
||||
|
||||
let new_downloads = download_set.len();
|
||||
tracing::debug!(new_downloads, "queueing new downloads");
|
||||
metrics::gauge!("sync.extend.queued.hash.count", new_downloads as f64);
|
||||
|
||||
// security: use the actual number of new downloads from all peers,
|
||||
// so a single trailing peer can't toggle our mempool
|
||||
self.recent_syncs.push_extend_tips_length(new_downloads);
|
||||
|
||||
self.request_blocks(download_set).await?;
|
||||
|
||||
Ok(())
|
||||
|
|
|
@ -0,0 +1,88 @@
|
|||
//! A channel which holds a list of recent syncer response lengths.
|
||||
|
||||
use tokio::sync::watch;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
|
||||
/// A helper type which holds a list of recent syncer response lengths.
|
||||
/// These sync lengths can be used to work out if Zebra has reached the end of the chain.
|
||||
///
|
||||
/// New lengths are added to the front of the list.
|
||||
/// Old lengths are dropped if the list is longer than `MAX_RECENT_LENGTHS`.
|
||||
//
|
||||
// TODO: disable the mempool if obtain or extend tips return errors?
|
||||
#[derive(Debug)]
|
||||
pub struct RecentSyncLengths {
|
||||
/// A sender for an array of recent sync lengths.
|
||||
sender: watch::Sender<Vec<usize>>,
|
||||
|
||||
/// A local copy of the contents of `sender`.
|
||||
// TODO: Replace with calls to `watch::Sender::borrow` once Tokio is updated to 1.0.0 (#2573)
|
||||
recent_lengths: Vec<usize>,
|
||||
}
|
||||
|
||||
impl RecentSyncLengths {
|
||||
/// The maximum number of lengths sent by `RecentSyncLengths`.
|
||||
///
|
||||
/// Older lengths are dropped.
|
||||
///
|
||||
/// This length was chosen as a tradeoff between:
|
||||
/// * clearing temporary errors and temporary syncs quickly
|
||||
/// * distinguishing between temporary and sustained syncs/errors
|
||||
/// * activating the syncer shortly after reaching the chain tip
|
||||
pub const MAX_RECENT_LENGTHS: usize = 4;
|
||||
|
||||
/// Create a new instance of [`RecentSyncLengths`]
|
||||
/// and a [`watch::Receiver`] endpoint for receiving recent sync lengths.
|
||||
pub fn new() -> (Self, watch::Receiver<Vec<usize>>) {
|
||||
let (sender, receiver) = watch::channel(Vec::new());
|
||||
|
||||
(
|
||||
RecentSyncLengths {
|
||||
sender,
|
||||
recent_lengths: Vec::with_capacity(Self::MAX_RECENT_LENGTHS),
|
||||
},
|
||||
receiver,
|
||||
)
|
||||
}
|
||||
|
||||
// We skip the genesis block download, because it just uses the genesis hash directly,
|
||||
// rather than asking peers for the next blocks in the chain.
|
||||
// (And if genesis downloads kept failing, we could accidentally activate the mempool.)
|
||||
|
||||
/// Insert a sync length from [`ChainSync::obtain_tips`] at the front of the list.
|
||||
#[instrument(skip(self), fields(self.recent_lengths))]
|
||||
pub fn push_obtain_tips_length(&mut self, sync_length: usize) {
|
||||
// currently, we treat lengths from obtain and extend tips exactly the same,
|
||||
// but we might want to ignore some obtain tips lengths
|
||||
//
|
||||
// See "Response Lengths During Sync -> Details" in:
|
||||
// https://github.com/ZcashFoundation/zebra/issues/2592#issuecomment-897304684
|
||||
self.update(sync_length)
|
||||
}
|
||||
|
||||
/// Insert a sync length from [`ChainSync::extend_tips`] at the front of the list.
|
||||
#[instrument(skip(self), fields(self.recent_lengths))]
|
||||
pub fn push_extend_tips_length(&mut self, sync_length: usize) {
|
||||
self.update(sync_length)
|
||||
}
|
||||
|
||||
/// Sends a list update to listeners.
|
||||
///
|
||||
/// Prunes recent lengths, if the list is longer than `MAX_RECENT_LENGTHS`.
|
||||
fn update(&mut self, sync_length: usize) {
|
||||
self.recent_lengths.insert(0, sync_length);
|
||||
|
||||
self.recent_lengths.truncate(Self::MAX_RECENT_LENGTHS);
|
||||
|
||||
// TODO: remove or downgrade this log to debug
|
||||
tracing::info!(
|
||||
recent_lengths = ?self.recent_lengths,
|
||||
"sending recent sync lengths"
|
||||
);
|
||||
|
||||
// ignore dropped receivers
|
||||
let _ = self.sender.send(self.recent_lengths.clone());
|
||||
}
|
||||
}
|
|
@ -0,0 +1,4 @@
|
|||
//! Tests for recent sync lengths.
|
||||
|
||||
mod prop;
|
||||
mod vectors;
|
|
@ -0,0 +1,66 @@
|
|||
//! Randomised property tests for recent sync lengths.
|
||||
|
||||
use std::cmp::min;
|
||||
|
||||
use proptest::prelude::*;
|
||||
use proptest_derive::Arbitrary;
|
||||
|
||||
use super::super::RecentSyncLengths;
|
||||
|
||||
#[derive(Arbitrary, Clone, Copy, Debug)]
|
||||
enum SyncLengthUpdate {
|
||||
ObtainTips(usize),
|
||||
ExtendTips(usize),
|
||||
}
|
||||
|
||||
use SyncLengthUpdate::*;
|
||||
|
||||
proptest! {
|
||||
#[test]
|
||||
fn max_recent_lengths(
|
||||
sync_updates in any::<Vec<SyncLengthUpdate>>(),
|
||||
) {
|
||||
let (mut recent_sync_lengths, receiver) = RecentSyncLengths::new();
|
||||
|
||||
for (count, update) in sync_updates.into_iter().enumerate() {
|
||||
match update {
|
||||
ObtainTips(sync_length) => recent_sync_lengths.push_obtain_tips_length(sync_length),
|
||||
ExtendTips(sync_length) => recent_sync_lengths.push_extend_tips_length(sync_length),
|
||||
}
|
||||
|
||||
prop_assert_eq!(
|
||||
receiver.borrow().len(),
|
||||
min(count + 1, RecentSyncLengths::MAX_RECENT_LENGTHS),
|
||||
"recent sync lengths: {:?}",
|
||||
*receiver.borrow(),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn latest_first(
|
||||
sync_updates in any::<Vec<SyncLengthUpdate>>(),
|
||||
) {
|
||||
let (mut recent_sync_lengths, receiver) = RecentSyncLengths::new();
|
||||
|
||||
for update in sync_updates {
|
||||
let latest_sync_length = match update {
|
||||
ObtainTips(sync_length) => {
|
||||
recent_sync_lengths.push_obtain_tips_length(sync_length);
|
||||
sync_length
|
||||
}
|
||||
ExtendTips(sync_length) => {
|
||||
recent_sync_lengths.push_extend_tips_length(sync_length);
|
||||
sync_length
|
||||
}
|
||||
};
|
||||
|
||||
prop_assert_eq!(
|
||||
receiver.borrow().first().cloned(),
|
||||
Some(latest_sync_length),
|
||||
"recent sync lengths: {:?}",
|
||||
*receiver.borrow(),
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,10 @@
|
|||
//! Hard-coded tests for recent sync lengths.
|
||||
|
||||
use super::super::RecentSyncLengths;
|
||||
|
||||
#[test]
|
||||
fn recent_sync_lengths_are_initially_empty() {
|
||||
let (_recent_sync_lengths, receiver) = RecentSyncLengths::new();
|
||||
|
||||
assert_eq!(receiver.borrow().len(), 0);
|
||||
}
|
|
@ -119,7 +119,7 @@ fn request_genesis_is_rate_limited() {
|
|||
);
|
||||
|
||||
// start the sync
|
||||
let mut chain_sync = ChainSync::new(
|
||||
let (mut chain_sync, _) = ChainSync::new(
|
||||
&ZebradConfig::default(),
|
||||
peer_service,
|
||||
state_service,
|
||||
|
|
Loading…
Reference in New Issue