ton_cpp_exporter/ton_metrics_push.py

#!/usr/bin/env python3.6
# Quick and dirty metrics exporter for the TON C++ node.

import subprocess
import argparse
import tempfile
import os
import sys
import json
import base64
import binascii
import re
import pathlib

parser = argparse.ArgumentParser()
parser.add_argument('--output', help="node_exporter collector directory", required=True)
parser.add_argument('--engine-console-binary', help="Binary path for validator-engine-console", required=True)
parser.add_argument('--lite-client-binary', help="Binary path for lite-client", required=True)
parser.add_argument('--validator-client-key', help="Validator client key", required=True)
parser.add_argument('--validator-server-pub', help="Validator server pubkey", required=True)
parser.add_argument('--wallet-address', help="Validator wallet address (0x...)", required=True)
parser.add_argument('--liteserver-pub', help="Lite server pubkey", required=True)
parser.add_argument('--election-dir', help="Election data directory generated by validator_msig.sh", required=True)
parser.add_argument('--validator-config', help="Path to validator config.json", required=True)
args = parser.parse_args()

LITE_CLIENT = [
    args.lite_client_binary,
    '-a', '127.0.0.1:3031',
    '-p', args.liteserver_pub
]

# Request validator stats (local and chain time and height)
output = subprocess.check_output([
    args.engine_console_binary,
    '-a', '127.0.0.1:3030',
    '-k', args.validator_client_key,
    '-p', args.validator_server_pub,
    '-c', 'getstats',
    '-c', 'quit'],
    # https://github.com/ton-blockchain/ton/issues/292
    stdin=subprocess.PIPE
)

want = [
    # Timestamp of local node.
    b'unixtime',
    # Timestamp of latest masterchain block seen.
    b'masterchainblocktime',
    # Block heights of masterchain.
    b'stateserializermasterchainseqno',
    b'shardclientmasterchainseqno',
]

values = {}

for line in output.split(b'\n'):
    parts = line.split(b'\t')
    if parts[0] in want:
        values[parts[0]] = parts[-1]
    elif parts[0] == b'masterchainblock':
        # TODO: Parse and collect *masterchainblock metrics
        masterchainblock = parts[-1]

for k in want:
    # Assert that all metrics are present. The script needs to either return *all* metrics,
    # or fail completely. Otherwise, we could silently miss error states.
    values[k]

# Get election ID (i.e. timestamp).
RE_INT_RESULT = re.compile(rb'result:  \[ (.*) \]')
output = subprocess.check_output(LITE_CLIENT + ['-rc', 'runmethod -1:3333333333333333333333333333333333333333333333333333333333333333 active_election_id'], stdin=subprocess.PIPE)
active_election_id = int(RE_INT_RESULT.findall(output)[0])

# Fetch amount of returned stake during election
output = subprocess.check_output(LITE_CLIENT + [
    '-rc', 'runmethod -1:3333333333333333333333333333333333333333333333333333333333333333 compute_returned_stake ' + args.wallet_address], stdin=subprocess.PIPE)
returned_stake_amount = int(RE_INT_RESULT.findall(output)[0])

# Get block heights of individual shards.
# Returns a list of [(shard_id, height)] tuples.
output = subprocess.check_output(LITE_CLIENT + ['-rc', 'allshards'], stdin = subprocess.PIPE)
allshards = re.findall(rb'^shard #(\d+) : \(\d,\d+,(\d+)\)', output, re.M)

# Get list of active validators.
# Returns a list of [(hex pubkey, stake weight, hex adnl_addr)] tuples.
RE_VAL_CONFIG = re.compile(rb'value:\(validator_addr\n\s+public_key:\(ed25519_pubkey pubkey:x([A-Z0-9]+)\) weight:(\d+) adnl_addr:x([A-Z0-9]+)\)\)')
output = subprocess.check_output(LITE_CLIENT + ['-rc', 'getconfig 34'], stdin = subprocess.PIPE)
active_validators = RE_VAL_CONFIG.findall(output)

# Get list of next validators (elected and soon-to-be active).
output = subprocess.check_output(LITE_CLIENT + ['-rc', 'getconfig 36'], stdin = subprocess.PIPE)
next_validators = RE_VAL_CONFIG.findall(output)

# Get list of validators participating in the current election.
# Returns a list of [(pubkey as big endian int, stake weight)] tuples.
output = subprocess.check_output(LITE_CLIENT + ['-rc', 'runmethod -1:3333333333333333333333333333333333333333333333333333333333333333 participant_list'], stdin = subprocess.PIPE)

for line in output.split(b'\n'):
    if b'result:' in line:
        election_participants = re.findall(rb'\[(\d+) (\d+)\]', line)
        break
else:
    election_participants = []

# Get block creation stats
# For performance reason, the regex will be evaluated later once we know which validator is active on the chain.
RE_BLOCK_CREATION_STATS = re.compile(
    rb'(?:[A-Z0-9]+) mc_cnt:\(counter '
    rb'last_updated:(?P<mc_last_updated>\d+) '
    rb'total:(?P<mc_total>\d+) '
    rb'cnt2048: (?P<mc_cnt2048>[0-9\.]+) '
    rb'cnt65536: (?P<mc_cnt65536>[0-9\.]+)\) '
    rb'shard_cnt:\(counter '
    rb'last_updated:(?P<shard_last_updated>\d+) '
    rb'total:(?P<shard_total>\d+) '
    rb'cnt2048: (?P<shard_cnt2048>[0-9\.]+) '
    rb'cnt65536: (?P<shard_cnt65536>[0-9\.]+)\)'
)
creatorstats = subprocess.check_output(
    LITE_CLIENT + ['-rc', 'creatorstats ' + masterchainblock.decode()], stdin = subprocess.PIPE)

# Read local validator config (same as running "getconfig" in the validator console).
with open(args.validator_config, 'r') as f:
    cfg = json.load(f)

# Each validator has three individual keys:
#
#  - Validator public key.
#  - Node ID / temporary key (SHA256([0xc6, 0xb4, 0x13, 0x48] + pubkey))
#  - ADNL address.
#
# Node ID and ADNL address are set in the validator config file, however,
# the public key generated during an election isn't and we have to pull
# it out of a log message. Once the validator elected, it's easy because
# we can just look at the validator set, but we haven't found an easier way
# to acquire the key during an election (I suppose we could look up the
# election transaction, but that would be a lot more effort).
#
# Figure out last/current election's validator pubkey:
#
election_dir = pathlib.Path(args.election_dir)
files = list(election_dir.glob("*-request-dump2"))

if len(files) == 1:
    election_pubkey = re.findall(
        rb'Provided a valid Ed25519 signature .+? with validator public key ([A-Z0-9]+)',
        files[0].read_bytes())[0]
elif len(files) > 1:
    print("Error: found multiple election state files: %r", files)
    sys.exit(1)
else:
    election_pubkey = None

# Write metrics and atomically replace metrics file.
with tempfile.NamedTemporaryFile(delete=False, dir=args.output) as fp:
    # Validator stats (metric names are used verbatim).
    for k, v in values.items():
        fp.write(b'ton_%s %s\n' % (k, v))

    # If the config contains no validators, we won't return any validator metrics!
    for n, validator in enumerate(cfg['validators']):
        adnl_b64 = next(filter(
            lambda x: x['@type'] == 'engine.validatorAdnlAddress',
            validator['adnl_addrs']))['id']

        adnl_addr = binascii.hexlify(base64.b64decode(adnl_b64))
        node_id = binascii.hexlify(base64.b64decode(validator['id']))

        fp.write(b'ton_validator_election_date{index="%d", adnl_addr="%s"} %d\n' % (n, adnl_addr, validator['election_date']))
        fp.write(b'ton_validator_expire_at{index="%d", adnl_addr="%s"} %d\n' % (n, adnl_addr, validator['expire_at']))

        for pubkey, weight, adnl_addr_ in next_validators:
            if adnl_addr_.lower() == adnl_addr:
                print("Next validator:", pubkey.lower(), weight, adnl_addr)
                fp.write(b'ton_validator_next_weight{adnl_addr="%s"} %s\n' % (adnl_addr, weight))
                is_next = 1
                break
        else:
            is_next = 0

        for pubkey, weight, adnl_addr_ in active_validators:
            if adnl_addr_.lower() == adnl_addr:
                print("Active validator:", pubkey.lower(), weight, adnl_addr)
                fp.write(b'ton_validator_active_weight{adnl_addr="%s"} %s\n' % (adnl_addr, weight))
                is_active = 1

                # Parse creator stats for active validator
                for line in creatorstats.split(b'\n'):
                    if line.startswith(pubkey.upper()):
                        for k, v in RE_BLOCK_CREATION_STATS.match(line).groupdict().items():
                            fp.write(b'ton_validator_stats_%s{adnl_addr="%s"} %s\n' % (k.encode(), adnl_addr, v))

                break
        else:
            is_active = 0

        fp.write(b'ton_validator_is_active{adnl_addr="%s"} %d\n' % (adnl_addr, is_active))
        fp.write(b'ton_validator_is_next{adnl_addr="%s"} %d\n' % (adnl_addr, is_next))

    # Check whether the current election's newly generated validator pubkey shows up in participant_list
    for num_pubkey, stake in election_participants:
        if binascii.hexlify(int(num_pubkey).to_bytes(32, 'big')) == election_pubkey.lower():
            election_participated = 1
            break
    else:
        election_participated = 0

    fp.write(b'ton_election_participated %d\n' % election_participated)

    fp.write(b'ton_election_active_id %d\n' % active_election_id)
    fp.write(b'ton_election_returned_stake_amount %d\n' % returned_stake_amount)

    for id_, height in allshards:
        fp.write(b'ton_shard_height{shard_id="%s"} %s\n' % (id_, height))

    fp.write(b'\n')
    fp.flush()
    os.fchmod(fp.fileno(), 0o644)
    fp.close()
    os.rename(fp.name, os.path.join(args.output, 'ton.prom'))