ton_cpp_exporter/ton_metrics_push.py

222 lines
9.2 KiB
Python

#!/usr/bin/env python3.6
# Quick and dirty metrics exporter for the TON C++ node.
import subprocess
import argparse
import tempfile
import os
import sys
import json
import base64
import binascii
import re
import pathlib
parser = argparse.ArgumentParser()
parser.add_argument('--output', help="node_exporter collector directory", required=True)
parser.add_argument('--engine-console-binary', help="Binary path for validator-engine-console", required=True)
parser.add_argument('--lite-client-binary', help="Binary path for lite-client", required=True)
parser.add_argument('--validator-client-key', help="Validator client key", required=True)
parser.add_argument('--validator-server-pub', help="Validator server pubkey", required=True)
parser.add_argument('--wallet-address', help="Validator wallet address (0x...)", required=True)
parser.add_argument('--liteserver-pub', help="Lite server pubkey", required=True)
parser.add_argument('--election-dir', help="Election data directory generated by validator_msig.sh", required=True)
parser.add_argument('--validator-config', help="Path to validator config.json", required=True)
args = parser.parse_args()
LITE_CLIENT = [
args.lite_client_binary,
'-a', '127.0.0.1:3031',
'-p', args.liteserver_pub
]
# Request validator stats (local and chain time and height)
output = subprocess.check_output([
args.engine_console_binary,
'-a', '127.0.0.1:3030',
'-k', args.validator_client_key,
'-p', args.validator_server_pub,
'-c', 'getstats',
'-c', 'quit'],
# https://github.com/ton-blockchain/ton/issues/292
stdin=subprocess.PIPE
)
want = [
# Timestamp of local node.
b'unixtime',
# Timestamp of latest masterchain block seen.
b'masterchainblocktime',
# Block heights of masterchain.
b'stateserializermasterchainseqno',
b'shardclientmasterchainseqno',
]
values = {}
for line in output.split(b'\n'):
parts = line.split(b'\t')
if parts[0] in want:
values[parts[0]] = parts[-1]
elif parts[0] == b'masterchainblock':
# TODO: Parse and collect *masterchainblock metrics
masterchainblock = parts[-1]
for k in want:
# Assert that all metrics are present. The script needs to either return *all* metrics,
# or fail completely. Otherwise, we could silently miss error states.
values[k]
# Get election ID (i.e. timestamp).
RE_INT_RESULT = re.compile(rb'result: \[ (.*) \]')
output = subprocess.check_output(LITE_CLIENT + ['-rc', 'runmethod -1:3333333333333333333333333333333333333333333333333333333333333333 active_election_id'], stdin=subprocess.PIPE)
active_election_id = int(RE_INT_RESULT.findall(output)[0])
# Fetch amount of returned stake during election
output = subprocess.check_output(LITE_CLIENT + [
'-rc', 'runmethod -1:3333333333333333333333333333333333333333333333333333333333333333 compute_returned_stake ' + args.wallet_address], stdin=subprocess.PIPE)
returned_stake_amount = int(RE_INT_RESULT.findall(output)[0])
# Get block heights of individual shards.
# Returns a list of [(shard_id, height)] tuples.
output = subprocess.check_output(LITE_CLIENT + ['-rc', 'allshards'], stdin = subprocess.PIPE)
allshards = re.findall(rb'^shard #(\d+) : \(\d,\d+,(\d+)\)', output, re.M)
# Get list of active validators.
# Returns a list of [(hex pubkey, stake weight, hex adnl_addr)] tuples.
RE_VAL_CONFIG = re.compile(rb'value:\(validator_addr\n\s+public_key:\(ed25519_pubkey pubkey:x([A-Z0-9]+)\) weight:(\d+) adnl_addr:x([A-Z0-9]+)\)\)')
output = subprocess.check_output(LITE_CLIENT + ['-rc', 'getconfig 34'], stdin = subprocess.PIPE)
active_validators = RE_VAL_CONFIG.findall(output)
# Get list of next validators (elected and soon-to-be active).
output = subprocess.check_output(LITE_CLIENT + ['-rc', 'getconfig 36'], stdin = subprocess.PIPE)
next_validators = RE_VAL_CONFIG.findall(output)
# Get list of validators participating in the current election.
# Returns a list of [(pubkey as big endian int, stake weight)] tuples.
output = subprocess.check_output(LITE_CLIENT + ['-rc', 'runmethod -1:3333333333333333333333333333333333333333333333333333333333333333 participant_list'], stdin = subprocess.PIPE)
for line in output.split(b'\n'):
if b'result:' in line:
election_participants = re.findall(rb'\[(\d+) (\d+)\]', line)
break
else:
election_participants = []
# Get block creation stats
# For performance reason, the regex will be evaluated later once we know which validator is active on the chain.
RE_BLOCK_CREATION_STATS = re.compile(
rb'(?:[A-Z0-9]+) mc_cnt:\(counter '
rb'last_updated:(?P<mc_last_updated>\d+) '
rb'total:(?P<mc_total>\d+) '
rb'cnt2048: (?P<mc_cnt2048>[0-9\.]+) '
rb'cnt65536: (?P<mc_cnt65536>[0-9\.]+)\) '
rb'shard_cnt:\(counter '
rb'last_updated:(?P<shard_last_updated>\d+) '
rb'total:(?P<shard_total>\d+) '
rb'cnt2048: (?P<shard_cnt2048>[0-9\.]+) '
rb'cnt65536: (?P<shard_cnt65536>[0-9\.]+)\)'
)
creatorstats = subprocess.check_output(
LITE_CLIENT + ['-rc', 'creatorstats ' + masterchainblock.decode()], stdin = subprocess.PIPE)
# Read local validator config (same as running "getconfig" in the validator console).
with open(args.validator_config, 'r') as f:
cfg = json.load(f)
# Each validator has three individual keys:
#
# - Validator public key.
# - Node ID / temporary key (SHA256([0xc6, 0xb4, 0x13, 0x48] + pubkey))
# - ADNL address.
#
# Node ID and ADNL address are set in the validator config file, however,
# the public key generated during an election isn't and we have to pull
# it out of a log message. Once the validator elected, it's easy because
# we can just look at the validator set, but we haven't found an easier way
# to acquire the key during an election (I suppose we could look up the
# election transaction, but that would be a lot more effort).
#
# Figure out last/current election's validator pubkey:
#
election_dir = pathlib.Path(args.election_dir)
files = list(election_dir.glob("*-request-dump2"))
if len(files) == 1:
election_pubkey = re.findall(
rb'Provided a valid Ed25519 signature .+? with validator public key ([A-Z0-9]+)',
files[0].read_bytes())[0]
elif len(files) > 1:
print("Error: found multiple election state files: %r", files)
sys.exit(1)
else:
election_pubkey = None
# Write metrics and atomically replace metrics file.
with tempfile.NamedTemporaryFile(delete=False, dir=args.output) as fp:
# Validator stats (metric names are used verbatim).
for k, v in values.items():
fp.write(b'ton_%s %s\n' % (k, v))
# If the config contains no validators, we won't return any validator metrics!
for n, validator in enumerate(cfg['validators']):
adnl_b64 = next(filter(
lambda x: x['@type'] == 'engine.validatorAdnlAddress',
validator['adnl_addrs']))['id']
adnl_addr = binascii.hexlify(base64.b64decode(adnl_b64))
node_id = binascii.hexlify(base64.b64decode(validator['id']))
fp.write(b'ton_validator_election_date{index="%d", adnl_addr="%s"} %d\n' % (n, adnl_addr, validator['election_date']))
fp.write(b'ton_validator_expire_at{index="%d", adnl_addr="%s"} %d\n' % (n, adnl_addr, validator['expire_at']))
for pubkey, weight, adnl_addr_ in next_validators:
if adnl_addr_.lower() == adnl_addr:
print("Next validator:", pubkey.lower(), weight, adnl_addr)
fp.write(b'ton_validator_next_weight{adnl_addr="%s"} %s\n' % (adnl_addr, weight))
is_next = 1
break
else:
is_next = 0
for pubkey, weight, adnl_addr_ in active_validators:
if adnl_addr_.lower() == adnl_addr:
print("Active validator:", pubkey.lower(), weight, adnl_addr)
fp.write(b'ton_validator_active_weight{adnl_addr="%s"} %s\n' % (adnl_addr, weight))
is_active = 1
# Parse creator stats for active validator
for line in creatorstats.split(b'\n'):
if line.startswith(pubkey.upper()):
for k, v in RE_BLOCK_CREATION_STATS.match(line).groupdict().items():
fp.write(b'ton_validator_stats_%s{adnl_addr="%s"} %s\n' % (k.encode(), adnl_addr, v))
break
else:
is_active = 0
fp.write(b'ton_validator_is_active{adnl_addr="%s"} %d\n' % (adnl_addr, is_active))
fp.write(b'ton_validator_is_next{adnl_addr="%s"} %d\n' % (adnl_addr, is_next))
# Check whether the current election's newly generated validator pubkey shows up in participant_list
for num_pubkey, stake in election_participants:
if binascii.hexlify(int(num_pubkey).to_bytes(32, 'big')) == election_pubkey.lower():
election_participated = 1
break
else:
election_participated = 0
fp.write(b'ton_election_participated %d\n' % election_participated)
fp.write(b'ton_election_active_id %d\n' % active_election_id)
fp.write(b'ton_election_returned_stake_amount %d\n' % returned_stake_amount)
for id_, height in allshards:
fp.write(b'ton_shard_height{shard_id="%s"} %s\n' % (id_, height))
fp.write(b'\n')
fp.flush()
os.fchmod(fp.fileno(), 0o644)
fp.close()
os.rename(fp.name, os.path.join(args.output, 'ton.prom'))