Split analytics into multiple modules

This commit is contained in:
Greg Pfeil 2022-10-10 12:55:00 -06:00
parent 0a53fd9237
commit 8877d342af
3 changed files with 765 additions and 741 deletions

745
analysis/analyze.py Executable file → Normal file
View File

@ -1,32 +1,19 @@
#!/usr/bin/env python3
# Copyright (c) 2022 The Zcash developers
# Distributed under the MIT software license, see the accompanying
# file COPYING or https://www.opensource.org/licenses/mit-license.php .
"""Simple Transaction Analysis
This contains a class, `Analyzer`, for defining analyses of the blocks and
transactions on the blockchain. It also exposes a function
`analyze_blocks`, which handles applying multiple analyses simultaneously over
some common range of blocks.
This contains a class, `Analysis`, for defining analyses of the blocks and
transactions on the blockchain. It also contains a class `Analyzer` with a
method `analyze_blocks`, which handles applying multiple analyses simultaneously
over some common range of blocks.
"""
import datetime
import itertools
import math
import numpy as np
import os.path
from progress.bar import IncrementalBar
from slickrpc.rpc import Proxy
from statistics import mean
import sys
### TODO: Get host/port from config
if len(sys.argv) > 1:
connection_string = sys.argv[1]
else:
raise Exception(
"%s needs to be provided a connection string, like \"http://user:pass@localhost:port\"."
% sys.argv[0])
class Analysis:
"""
@ -168,727 +155,3 @@ class Analyzer:
result.append((analysis.name, analysis.aggregate(dict(bucketses)[analysis])))
return result
### Helpers
def identity(x):
return x
def get_shielded_spends(tx):
try:
shielded_spends = len(tx['vShieldedSpend'])
except KeyError:
shielded_spends = 0
return shielded_spends
def get_shielded_outputs(tx):
try:
shielded_outputs = len(tx['vShieldedOutput'])
except KeyError:
shielded_outputs = 0
return shielded_outputs
def get_orchard_actions(tx):
try:
orchard_actions = len(tx['orchard']['actions'])
except KeyError:
orchard_actions = 0
return orchard_actions
def count_inputs(tx):
return len(tx['vin']) + 2 * len(tx['vjoinsplit']) + get_shielded_spends(tx) + get_orchard_actions(tx)
def count_outputs(tx):
return len(tx['vout']) + 2 * len(tx['vjoinsplit']) + get_shielded_outputs(tx) + get_orchard_actions(tx)
def count_ins_and_outs(tx):
return (len(tx['vin'])
+ len(tx['vout'])
+ get_shielded_spends(tx)
+ get_shielded_outputs(tx)
+ 2 * len(tx['vjoinsplit'])
+ 2 * get_orchard_actions(tx))
def count_actions(tx):
return (max(len(tx['vin']), len(tx['vout']))
+ max(get_shielded_spends(tx), get_shielded_outputs(tx))
+ 2 * len(tx['vjoinsplit'])
+ get_orchard_actions(tx))
def expiry_height_delta(block, tx):
"""
Returns -1 if there's no expiry, also returns approximately 35,000 (the
number of blocks in a month) if the expiry is beyond 1 month.
"""
month = blocks_per_hour * 24 * 30
try:
expiry_height = tx['expiryheight']
if expiry_height == 0:
return -1
elif tx['expiryheight'] - block['height'] > month:
return month
else:
return tx['expiryheight'] - block['height']
except KeyError:
# `tx['expiryheight']` is ostensibly an optional field, but it seems
# like `0` is what tends to be used for "don't expire", so this case
# generally isn't hit.
return -1
def tx_type(tx):
"""
Categorizes all tx into one of nine categories: (t)ransparent, (z)shielded,
or (m)ixed for both inputs and outputs. So some possible results are "t-t",
"t-z", "m-z", etc.
"""
if tx['vjoinsplit'] or get_shielded_spends(tx) != 0 or get_orchard_actions(tx) != 0:
if tx['vin']:
ins = "m"
else:
ins = "z"
else:
ins = "t"
if tx['vjoinsplit'] or get_shielded_outputs(tx) != 0 or get_orchard_actions(tx) != 0:
if tx['vout']:
outs = "m"
else:
outs = "z"
else:
outs = "t"
return ins + "-" + outs
def is_orchard_tx(tx):
try:
return tx['orchard']['actions']
except KeyError:
return False
def is_saplingspend_tx(tx):
try:
return tx['vShieldedSpend']
except KeyError:
return False
def orchard_anchorage(cache, block, tx):
"""
Returns -1 if there is no anchor
"""
try:
return block['height'] - cache[tx['orchard']['anchor']]
except KeyError:
return -1
def sapling_anchorage(cache, block, tx):
"""
Returns -1 if there is no anchor
"""
try:
return block['height'] - cache[tx['vShieldedSpend'][0]['anchor']]
except KeyError:
return -1
def is_not_coinbase(tx):
return 'feePaid' in tx
# NB: This requires zcashd to be running with `experimentalfeatures=1`,
# `txindex=1` and `insightexplorer=1`.
def getFeeDiff(proposedFee, tx):
try:
return proposedFee <= tx['feePaid']
except KeyError:
return -1
blocks_per_hour = 48 # half this before NU2?
# start about a month before sandblasting
start_range = blocks_per_hour * 24 * 7 * 206
### Requested Statistics
def storeAnchor(pool, cache, block):
"""
Caches the block height as the value for its anchor hash.
"""
try:
final_root = block[pool]
try:
cache[final_root]
except KeyError:
cache[final_root] = block['height']
except KeyError:
None
return cache
# "how old of anchors are people picking"
# --- https://zcash.slack.com/archives/CP6SKNCJK/p1660103126252979
anchor_age_orchard = Analysis(
"how old of anchors are people picking (for orchard)",
lambda _c, _b, tx: is_orchard_tx(tx),
[(orchard_anchorage, sum)],
lambda *_: 1,
({}, lambda c, b: storeAnchor('finalorchardroot', c, b)),
blocks_per_hour * 24
)
anchor_age_sapling = Analysis(
"how old of anchors are people picking (for sapling)",
lambda _c, _b, tx: is_saplingspend_tx(tx),
[(sapling_anchorage, sum)],
lambda *_: 1,
({}, lambda c, b: storeAnchor('finalsaplingroot', c, b)),
blocks_per_hour * 24
)
# "what's the distribution of expiry height deltas"
# --- https://zcash.slack.com/archives/CP6SKNCJK/p1660103126252979
expiry_height_deltas = Analysis(
"distribution of expiry height deltas",
lambda *_: True,
[(lambda _, b, t: expiry_height_delta(b, t), sum)],
lambda *_: 1
)
tx_type_with_long_expiry = Analysis(
"types of tx with expiries longer than about a month",
lambda _, b, t: expiry_height_delta(b, t) >= blocks_per_hour * 24 * 30,
[# (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)),
# identity),
(lambda _c, _b, tx: tx_type(tx), sum)],
lambda *_: 1
)
# "does anyone use locktime"
# --- https://zcash.slack.com/archives/CP6SKNCJK/p1660103126252979
locktime_usage = Analysis(
"proportion of tx using locktime",
lambda *_: True,
[(lambda *_: 1,
lambda d: dict(d)[True] / (dict(d)[False] + dict(d)[True])),
(lambda _c, _b, tx: tx['locktime'] != 0, sum)],
lambda *_: 1
)
# "I'm seeing a slightly different pattern to the sandblasting transactions,
# unless I've just missed this before. The transactions I've looked at recently
# have had > 400 sapling outputs. Has this been the case before and I just
# missed it? I thought primarily these transactions had slightly over 100
# outputs in most cases."
# --- https://zcash.slack.com/archives/CP6SKNCJK/p1660195664187769
# "Calculate the POFM threshold for historical transactions on-chain and
# calculate what proportion of those transactions would fall below the POFM
# threshold"
# --- https://docs.google.com/document/d/18wtGFCB2N4FO7SoqDPnEgVudAMlCArHMz0EwhE1HNPY/edit
tx_below_pofm_threshold = Analysis(
"rate of transactions below POFM threshold",
lambda _c, _b, tx: is_not_coinbase(tx),
[ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)),
lambda d: dict(d)[False] / (dict(d)[False] + dict(d)[True])),
(lambda _c, _b, tx: count_ins_and_outs(tx) - 4 > 0, sum)
],
lambda *_: 1
)
tx_below_pofm_threshold_abs = Analysis(
"transactions below POFM threshold",
lambda _c, _b, tx: is_not_coinbase(tx),
[ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)),
lambda d: (dict(d)[False], dict(d)[True])),
(lambda _c, _b, tx: count_ins_and_outs(tx) - 4 > 0, sum)
],
lambda *_: 1
)
outs_below_pofm_threshold_abs = Analysis(
"outputs below POFM threshold",
lambda _c, _b, tx: is_not_coinbase(tx),
[ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)),
lambda d: (dict(d)[False], dict(d)[True])),
(lambda _c, _b, tx: count_ins_and_outs(tx) - 4 > 0, sum)
],
lambda _c, _b, tx: count_outputs(tx)
)
tx_below_pofm_threshold_5 = Analysis(
"rate of transactions below POFM threshold with a grace window of 5",
lambda _c, _b, tx: is_not_coinbase(tx),
[ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)),
lambda d: dict(d)[False] / (dict(d)[False] + dict(d)[True])),
(lambda _c, _b, tx: count_ins_and_outs(tx) - 5 > 0, sum)
],
lambda *_: 1
)
tx_below_pofm_threshold_max = Analysis(
"rate of transactions below POFM threshold with max",
lambda _c, _b, tx: is_not_coinbase(tx),
[ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)),
lambda d: dict(d)[False] / (dict(d)[False] + dict(d)[True])),
(lambda _c, _b, tx: count_actions(tx) - 4 > 0, sum)
],
lambda *_: 1
)
tx_below_pofm_threshold_ins = Analysis(
"rate of transactions below POFM threshold only on inputs",
lambda _c, _b, tx: is_not_coinbase(tx),
[ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)),
lambda d: dict(d)[False] / (dict(d)[False] + dict(d)[True])),
(lambda _c, _b, tx: count_inputs(tx) - 4 > 0, sum)
],
lambda *_: 1
)
### Other Examples
tx_per_day = Analysis(
"count transactions per day (treating block 0 as midnight ZST)",
lambda *_: True,
[(lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), sum)],
lambda *_: 1
)
mean_tx_per_day = Analysis(
"mean transactions per day, by block",
lambda *_: True,
[(lambda _c, block, _t: int(block['height'] % (blocks_per_hour * 24)), lambda d: mean([x[1] for x in d])),
(lambda _c, block, _t: int(block['height']/(blocks_per_hour * 24)), sum)
],
lambda *_: 1
)
mean_inout_per_tx_per_day = Analysis(
"mean inputs, outputs per transaction per day, by block",
lambda *_: True,
[(lambda _c, block, _t: int(block['height'] % (blocks_per_hour * 24)), lambda d: mean(itertools.chain(d.values()))),
(lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity)
],
lambda _c, _b, tx: (count_inputs(tx), count_outputs(tx))
)
mean_inout_per_tx = Analysis(
"mean inputs, outputs per transaction, by week",
lambda *_: True,
[ ( lambda _c, block, _t: int(block['height']/(blocks_per_hour * 24 * 7)),
lambda d: (mean([x[0] for x in d]), mean([x[1] for x in d]))
)
],
lambda _c, _b, tx: (count_inputs(tx), count_outputs(tx))
)
minimum_pofm_fees_nuttycom = Analysis(
"distribution of fees in ZAT, by day, using nuttycom's pricing",
lambda *_: True,
[ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
(lambda _c, _b, tx: math.ceil(math.log((1000 + 250 * max(0, count_ins_and_outs(tx) - 4)) / 1000, 2)), sum)
],
lambda *_: 1
)
minimum_pofm_fees_nuttycom = Analysis(
"distribution of fees in ZAT, by day, using nuttycom's pricing",
lambda _c, _b, tx: is_not_coinbase(tx),
[ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
(lambda _c, _b, tx: math.ceil(math.log((1000 + 250 * max(0, count_ins_and_outs(tx) - 4)) / 1000, 2)), sum)
],
lambda *_: 1
)
minimum_pofm_fees_nuttycom2 = Analysis(
"distribution of fees in ZAT, by day, using nuttycom's changed pricing",
lambda _c, _b, tx: is_not_coinbase(tx),
[ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
(lambda _c, _b, tx: math.ceil(math.log((1000 + 200 * max(0, count_ins_and_outs(tx) - 5)) / 1000, 2)), sum)
],
lambda *_: 1
)
def meh_fees(tx):
fee = tx['feePaid']
if fee == 0:
return -1
else:
result = math.ceil(math.log(tx['feePaid'], 2))
# if result < 0:
# print("negative result: %s, %s" % (fee, tx['txid']))
return result
actual_fees = Analysis(
"actual fees",
lambda _c, _b, tx: is_not_coinbase(tx),
[ (lambda _c, _b, tx: meh_fees(tx), sum)
],
lambda *_: 1
)
proposed_fees = Analysis(
"",
lambda _c, _b, tx: is_not_coinbase(tx),
[ (lambda _c, _b, tx: math.ceil(math.log(5000 * max(2, count_actions(tx)), 2)), sum)
],
lambda *_: 1
)
arity_heat_map = Analysis(
"inputs vs outputs",
lambda *_: True,
[(lambda _c, _b, tx: min(100, count_outputs(tx)), identity),
(lambda _c, _b, tx: min(100, count_inputs(tx)), sum)],
lambda *_: 1
)
input_size_dist = Analysis(
"distribution of input sizes",
lambda *_: True,
[(lambda _c, _b, tx: [len(x['scriptSig']['hex']) for x in tx['vin']], identity)],
lambda *_: 1,
)
# very_high_inout_tx = Analysis(
# "tx with very high in/out counts",
# lambda _c, _b, tx: count_ins_and_outs(tx) > 100,
# [(lambda _c, _b, tx: (count_inputs(tx), count_outputs(tx)), identity)],
# lambda _c, _b, tx: tx['txid']
# )
very_high_inout_tx = Analysis(
"tx with very high in/out counts",
lambda _c, _b, tx: count_ins_and_outs(tx) > 5000,
[],
lambda _c, _b, tx: (tx['txid'], count_ins_and_outs(tx))
)
def track_utxos(cache, block):
for tx in block[tx]:
for vin in tx['vin']:
del cache[(vin['txid'], vin['vout'])]
for vout in tx['vout']:
cache[(tx['txid'], vout['n'])] = vout['valueZat']
return cache
utxo_distribution = Analysis(
"how many UTXOs and how big are they?",
lambda *_: True,
[(lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)),
lambda caches: sum([caches[-1][k] for k in caches[-1]]))],
lambda cache, _b, _t: cache,
({}, track_utxos),
1_000_000_000 # back to block 0, TODO: should be able to say this explicitly
)
def is_sandblasting(tx):
return get_shielded_outputs(tx) > 300
sandblasters_per_day = Analysis(
"how many transactions have >300 Sapling outputs each day?",
lambda _c, _b, tx: is_sandblasting(tx),
[(lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), sum)],
lambda *_: 1
)
sandblasters_and_more_per_day = Analysis(
"how many transactions have >300 outputs each day?",
lambda _c, _b, tx: count_outputs(tx) > 300,
[(lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), sum)],
lambda *_: 1
)
sandblaster_average_outputs_per_day = Analysis(
"how many outputs do sandblasters have?",
lambda _c, _b, tx: is_sandblasting(tx),
[(lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), (lambda txs: sum(txs) / len(txs)))],
lambda _c, _b, tx: count_outputs(tx)
)
nuttycom_fees_vs_actual = Analysis(
"transactions that wouldn't pay more under the new model, by day, using nuttycom's pricing",
lambda *_: True,
[ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
(lambda _c, _b, tx: getFeeDiff(250 * max(4, count_ins_and_outs(tx)), tx), sum)
],
lambda *_: 1
)
action_fees_vs_actual = Analysis(
"transactions that wouldn't pay more under the new model, by day, using actions",
lambda *_: True,
[ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
(lambda _c, _b, tx: getFeeDiff(500 * max(3, count_actions(tx)), tx), sum)
],
lambda *_: 1
)
nuttycom_fees_vs_actual_trans = Analysis(
"transparent transactions that wouldn't pay more under the new model, by day, using nuttycom's pricing",
lambda _c, _b, tx: tx_type(tx) == 't-t',
[ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
(lambda _c, _b, tx: getFeeDiff(250 * max(4, count_ins_and_outs(tx)), tx), sum)
],
lambda *_: 1
)
action_fees_vs_actual_trans = Analysis(
"transparent transactions that wouldn't pay more under the new model, by day, using actions",
lambda _c, _b, tx: tx_type(tx) == 't-t',
[ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
(lambda _c, _b, tx: getFeeDiff(500 * max(3, count_actions(tx)), tx), sum)
],
lambda *_: 1
)
greg_fees_vs_actual = Analysis(
"transactions that wouldn't pay more under the new model, by day, using actions",
lambda *_: True,
[ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
(lambda _c, _b, tx: getFeeDiff(250 * max(4, count_actions(tx)), tx), sum)
],
lambda *_: 1
)
greg_fees_vs_actual_trans = Analysis(
"transparent transactions that wouldn't pay more under the new model, by day, using actions",
lambda _c, _b, tx: tx_type(tx) == 't-t',
[ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
(lambda _c, _b, tx: getFeeDiff(250 * max(4, count_actions(tx)), tx), sum)
],
lambda *_: 1
)
latest_fees_vs_actual = Analysis(
"transactions that wouldn't pay more under the new model, by day, using actions",
lambda *_: True,
[ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
(lambda _c, _b, tx: getFeeDiff(1000 * max(2, count_actions(tx)), tx), sum)
],
lambda *_: 1
)
latest_fees_vs_actual_trans = Analysis(
"transparent transactions that wouldn't pay more under the new model, by day, using actions",
lambda _c, _b, tx: tx_type(tx) == 't-t',
[ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
(lambda _c, _b, tx: getFeeDiff(1000 * max(2, count_actions(tx)), tx), sum)
],
lambda *_: 1
)
flat_fees_vs_actual = Analysis(
"transactions that would pass the original 10k ZAT fee, by day",
lambda *_: True,
[ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
(lambda _c, _b, tx: getFeeDiff(10_000, tx), sum)
],
lambda *_: 1
)
shielding_tx_heat_map = Analysis(
"shielding tx",
lambda _c, _b, tx: is_not_coinbase(tx) and (tx_type(tx) == 't-z' or tx_type(tx) == 'm-z'),
[(lambda _c, _b, tx: min(100, count_outputs(tx)), identity),
(lambda _c, _b, tx: min(100, count_inputs(tx)), sum)],
lambda *_: 1
)
shielding_tx_actions = Analysis(
"shielding tx",
lambda _c, _b, tx: is_not_coinbase(tx) and (tx_type(tx) == 't-z' or tx_type(tx) == 'm-z'),
[(lambda _c, _b, tx: min(100, count_actions(tx)), sum)],
lambda *_: 1
)
fees_from_sandblasting = Analysis(
"fees collected from sandblasting",
lambda _c, _b, tx: is_sandblasting(tx),
[(lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), sum)],
lambda _c, _b, tx: 500 * max(2, count_actions(tx))
)
flat_fees_vs_actual_trans = Analysis(
"transparent transactions that would pass the original 10k ZAT fee, by day",
lambda _c, _b, tx: tx_type(tx) == 't-t',
[ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
(lambda _c, _b, tx: getFeeDiff(10_000, tx), sum)
],
lambda *_: 1
)
transparent_tx_that_would_fail_heat_map = Analysis(
"heat map of transparent tx that would fail under `500 * max(3, |actions|)`",
lambda _c, _b, tx: tx_type(tx) == 't-t' and getFeeDiff(500 * max(3, count_actions(tx)), tx) == False,
[(lambda _c, _b, tx: min(100, count_outputs(tx)), identity),
(lambda _c, _b, tx: min(100, count_inputs(tx)), sum)],
lambda *_: 1
)
historical_fees = Analysis(
"histogram of actual fees paid",
lambda _c, _b, tx: is_not_coinbase(tx),
[(lambda _c, _b, tx: check_fee_paid(tx), sum)],
lambda *_: 1
)
arity_heat_map = Analysis(
"inputs vs outputs",
lambda _c, _b, tx: is_not_coinbase(tx),
[(lambda _c, _b, tx: min(100, count_outputs(tx)), identity),
(lambda _c, _b, tx: min(100, count_inputs(tx)), sum)],
lambda *_: 1
)
transparent_input_histogram = Analysis(
"how many transparent inputs do txs have?",
lambda _c, _b, tx: is_not_coinbase(tx),
[ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
(lambda _c, _b, tx: min(100, len(tx['vin'])), sum)],
lambda *_: 1
)
nuttycom_fees_vs_10k = Analysis(
"transactions that wouldn't pay more under the new model, by day, using nuttycom's pricing",
lambda _c, _b, tx: is_not_coinbase(tx),
[ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
(lambda _c, _b, tx: 250 * max(4, count_ins_and_outs(tx)) <= 10_000, sum)
],
lambda *_: 1
)
action_fees_vs_10k = Analysis(
"transactions that wouldn't pay more under the new model, by day, using actions",
lambda _c, _b, tx: is_not_coinbase(tx),
[ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
(lambda _c, _b, tx: 500 * max(3, count_actions(tx)) <= 10_000, sum)
],
lambda *_: 1
)
latest_fees_vs_10k = Analysis(
"transactions that wouldn't pay more under the new model, by day, using actions",
lambda _c, _b, tx: is_not_coinbase(tx),
[ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
(lambda _c, _b, tx: 1000 * max(2, count_actions(tx)) <= 10_000, sum)
],
lambda *_: 1
)
a = Analyzer(connection_string)
def make_weekly_range(starting_week, number_of_weeks):
start_of_range = blocks_per_hour * 24 * 7 * starting_week
end_of_range = start_of_range + (blocks_per_hour * 24 * 7 * number_of_weeks)
return range(start_of_range, end_of_range)
# start about a month before sandblasting, overlapping with it
pre_sandblasting_range = make_weekly_range(206, 12)
recent_range = make_weekly_range(220, 1)
# start = datetime.datetime.now()
# for analysis in a.analyze_blocks(some_range,
# [ # sandblaster_average_outputs_per_day,
# # flat_fees_vs_actual,
# # flat_fees_vs_actual_trans,
# # transparent_tx_that_would_fail_heat_map
# nuttycom_fees_vs_actual,
# action_fees_vs_actual,
# nuttycom_fees_vs_actual_trans,
# action_fees_vs_actual_trans,
# greg_fees_vs_actual,
# greg_fees_vs_actual_trans,
# # historical_fees,
# # transparent_input_histogram,
# ]):
# print(analysis)
# print(datetime.datetime.now() - start)
# rerunning old data …
# start = datetime.datetime.now()
# for analysis in a.analyze_blocks(make_weekly_range(206, 1),
# [ actual_fees,
# proposed_fees,
# ]):
# print(analysis)
# print(datetime.datetime.now() - start)
nuttycom_fees_vs_10k2 = Analysis(
"transactions that wouldn't pay more under the new model, by day, using nuttycom's pricing",
lambda _c, _b, tx: is_not_coinbase(tx) and not is_sandblasting(tx),
[ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
(lambda _c, _b, tx: 250 * max(4, count_ins_and_outs(tx)) <= 10_000, sum)
],
lambda *_: 1
)
action_fees_vs_10k2 = Analysis(
"transactions that wouldn't pay more under the new model, by day, using actions",
lambda _c, _b, tx: is_not_coinbase(tx) and not is_sandblasting(tx),
[ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
(lambda _c, _b, tx: 500 * max(3, count_actions(tx)) <= 10_000, sum)
],
lambda *_: 1
)
latest_fees_vs_10k2 = Analysis(
"transactions that wouldn't pay more under the new model, by day, using actions",
lambda _c, _b, tx: is_not_coinbase(tx) and not is_sandblasting(tx),
[ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
(lambda _c, _b, tx: 1000 * max(2, count_actions(tx)) <= 10_000, sum)
],
lambda *_: 1
)
def vin_value(vin):
if 'valueSat' in vin:
return vin['valueSat']
else:
return 0
def tx_pool_movement(tx):
transparent = sum(vout['valueZat'] for vout in tx['vout']) - sum([vin_value(vin) for vin in tx['vin']])
sprout = sum([vjoinsplit['vpub_newZat'] - vjoinsplit['vpub_oldZat'] for vjoinsplit in tx['vjoinsplit']])
sapling = - tx['valueBalanceZat']
if 'orchard' in tx:
orchard = - tx['orchard']['valueBalanceZat']
else:
orchard = 0
# print("(%d, %d, %d, %d) %d -> %d" % (transparent, sprout, sapling, orchard, count_inputs(tx), count_outputs(tx)))
return (transparent, sprout, sapling, orchard)
pool_movement = Analysis(
"how are funds moving between pools?",
lambda *_: True,
[(lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), lambda vals: np.sum(np.array(vals), 0))],
lambda _c, _b, tx: tx_pool_movement(tx)
)
start = datetime.datetime.now()
for analysis in a.analyze_blocks(recent_range,
[ pool_movement
]):
print(analysis)
print(datetime.datetime.now() - start)
# start = datetime.datetime.now()
# for analysis in a.analyze_blocks(pre_sandblasting_range,
# [ tx_below_pofm_threshold,
# tx_below_pofm_threshold_5,
# tx_below_pofm_threshold_max,
# tx_below_pofm_threshold_ins,
# tx_below_pofm_threshold_abs,
# outs_below_pofm_threshold_abs,
# arity_heat_map,
# minimum_pofm_fees_nuttycom,
# minimum_pofm_fees_nuttycom2,
# ]):
# print(analysis)
# print(datetime.datetime.now() - start)

600
analysis/examples.py Executable file
View File

@ -0,0 +1,600 @@
#!/usr/bin/env python3
# Copyright (c) 2022 The Zcash developers
# Distributed under the MIT software license, see the accompanying
# file COPYING or https://www.opensource.org/licenses/mit-license.php .
"""Simple Transaction Analysis
This contains a class, `Analyzer`, for defining analyses of the blocks and
transactions on the blockchain. It also exposes a function
`analyze_blocks`, which handles applying multiple analyses simultaneously over
some common range of blocks.
"""
import datetime
import itertools
import math
import numpy as np
from statistics import mean
import sys
from analyze import Analysis, Analyzer
from helpers import *
### TODO: Get host/port from config
if len(sys.argv) > 1:
connection_string = sys.argv[1]
else:
raise Exception(
"%s needs to be provided a connection string, like \"http://user:pass@localhost:port\"."
% (sys.argv[0],))
blocks_per_hour = 48 # half this before NU2?
# start about a month before sandblasting
start_range = blocks_per_hour * 24 * 7 * 206
### Requested Statistics
def storeAnchor(pool, cache, block):
"""
Caches the block height as the value for its anchor hash.
"""
try:
final_root = block[pool]
try:
cache[final_root]
except KeyError:
cache[final_root] = block['height']
except KeyError:
None
return cache
# "how old of anchors are people picking"
# --- https://zcash.slack.com/archives/CP6SKNCJK/p1660103126252979
anchor_age_orchard = Analysis(
"how old of anchors are people picking (for orchard)",
lambda _c, _b, tx: is_orchard_tx(tx),
[(orchard_anchorage, sum)],
lambda *_: 1,
({}, lambda c, b: storeAnchor('finalorchardroot', c, b)),
blocks_per_hour * 24
)
anchor_age_sapling = Analysis(
"how old of anchors are people picking (for sapling)",
lambda _c, _b, tx: is_saplingspend_tx(tx),
[(sapling_anchorage, sum)],
lambda *_: 1,
({}, lambda c, b: storeAnchor('finalsaplingroot', c, b)),
blocks_per_hour * 24
)
# "what's the distribution of expiry height deltas"
# --- https://zcash.slack.com/archives/CP6SKNCJK/p1660103126252979
expiry_height_deltas = Analysis(
"distribution of expiry height deltas",
lambda *_: True,
[(lambda _, b, t: expiry_height_delta(b, t), sum)],
lambda *_: 1
)
tx_type_with_long_expiry = Analysis(
"types of tx with expiries longer than about a month",
lambda _, b, t: expiry_height_delta(b, t) >= blocks_per_hour * 24 * 30,
[# (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)),
# identity),
(lambda _c, _b, tx: tx_type(tx), sum)],
lambda *_: 1
)
# "does anyone use locktime"
# --- https://zcash.slack.com/archives/CP6SKNCJK/p1660103126252979
locktime_usage = Analysis(
"proportion of tx using locktime",
lambda *_: True,
[(lambda *_: 1,
lambda d: dict(d)[True] / (dict(d)[False] + dict(d)[True])),
(lambda _c, _b, tx: tx['locktime'] != 0, sum)],
lambda *_: 1
)
# "I'm seeing a slightly different pattern to the sandblasting transactions,
# unless I've just missed this before. The transactions I've looked at recently
# have had > 400 sapling outputs. Has this been the case before and I just
# missed it? I thought primarily these transactions had slightly over 100
# outputs in most cases."
# --- https://zcash.slack.com/archives/CP6SKNCJK/p1660195664187769
# "Calculate the POFM threshold for historical transactions on-chain and
# calculate what proportion of those transactions would fall below the POFM
# threshold"
# --- https://docs.google.com/document/d/18wtGFCB2N4FO7SoqDPnEgVudAMlCArHMz0EwhE1HNPY/edit
tx_below_pofm_threshold = Analysis(
"rate of transactions below POFM threshold",
lambda _c, _b, tx: is_not_coinbase(tx),
[ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)),
lambda d: dict(d)[False] / (dict(d)[False] + dict(d)[True])),
(lambda _c, _b, tx: count_ins_and_outs(tx) - 4 > 0, sum)
],
lambda *_: 1
)
tx_below_pofm_threshold_abs = Analysis(
"transactions below POFM threshold",
lambda _c, _b, tx: is_not_coinbase(tx),
[ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)),
lambda d: (dict(d)[False], dict(d)[True])),
(lambda _c, _b, tx: count_ins_and_outs(tx) - 4 > 0, sum)
],
lambda *_: 1
)
outs_below_pofm_threshold_abs = Analysis(
"outputs below POFM threshold",
lambda _c, _b, tx: is_not_coinbase(tx),
[ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)),
lambda d: (dict(d)[False], dict(d)[True])),
(lambda _c, _b, tx: count_ins_and_outs(tx) - 4 > 0, sum)
],
lambda _c, _b, tx: count_outputs(tx)
)
tx_below_pofm_threshold_5 = Analysis(
"rate of transactions below POFM threshold with a grace window of 5",
lambda _c, _b, tx: is_not_coinbase(tx),
[ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)),
lambda d: dict(d)[False] / (dict(d)[False] + dict(d)[True])),
(lambda _c, _b, tx: count_ins_and_outs(tx) - 5 > 0, sum)
],
lambda *_: 1
)
tx_below_pofm_threshold_max = Analysis(
"rate of transactions below POFM threshold with max",
lambda _c, _b, tx: is_not_coinbase(tx),
[ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)),
lambda d: dict(d)[False] / (dict(d)[False] + dict(d)[True])),
(lambda _c, _b, tx: count_actions(tx) - 4 > 0, sum)
],
lambda *_: 1
)
tx_below_pofm_threshold_ins = Analysis(
"rate of transactions below POFM threshold only on inputs",
lambda _c, _b, tx: is_not_coinbase(tx),
[ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)),
lambda d: dict(d)[False] / (dict(d)[False] + dict(d)[True])),
(lambda _c, _b, tx: count_inputs(tx) - 4 > 0, sum)
],
lambda *_: 1
)
### Other Examples
tx_per_day = Analysis(
"count transactions per day (treating block 0 as midnight ZST)",
lambda *_: True,
[(lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), sum)],
lambda *_: 1
)
mean_tx_per_day = Analysis(
"mean transactions per day, by block",
lambda *_: True,
[(lambda _c, block, _t: int(block['height'] % (blocks_per_hour * 24)), lambda d: mean([x[1] for x in d])),
(lambda _c, block, _t: int(block['height']/(blocks_per_hour * 24)), sum)
],
lambda *_: 1
)
mean_inout_per_tx_per_day = Analysis(
"mean inputs, outputs per transaction per day, by block",
lambda *_: True,
[(lambda _c, block, _t: int(block['height'] % (blocks_per_hour * 24)), lambda d: mean(itertools.chain(d.values()))),
(lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity)
],
lambda _c, _b, tx: (count_inputs(tx), count_outputs(tx))
)
mean_inout_per_tx = Analysis(
"mean inputs, outputs per transaction, by week",
lambda *_: True,
[ ( lambda _c, block, _t: int(block['height']/(blocks_per_hour * 24 * 7)),
lambda d: (mean([x[0] for x in d]), mean([x[1] for x in d]))
)
],
lambda _c, _b, tx: (count_inputs(tx), count_outputs(tx))
)
minimum_pofm_fees_nuttycom = Analysis(
"distribution of fees in ZAT, by day, using nuttycom's pricing",
lambda *_: True,
[ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
(lambda _c, _b, tx: math.ceil(math.log((1000 + 250 * max(0, count_ins_and_outs(tx) - 4)) / 1000, 2)), sum)
],
lambda *_: 1
)
minimum_pofm_fees_nuttycom = Analysis(
"distribution of fees in ZAT, by day, using nuttycom's pricing",
lambda _c, _b, tx: is_not_coinbase(tx),
[ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
(lambda _c, _b, tx: math.ceil(math.log((1000 + 250 * max(0, count_ins_and_outs(tx) - 4)) / 1000, 2)), sum)
],
lambda *_: 1
)
minimum_pofm_fees_nuttycom2 = Analysis(
"distribution of fees in ZAT, by day, using nuttycom's changed pricing",
lambda _c, _b, tx: is_not_coinbase(tx),
[ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
(lambda _c, _b, tx: math.ceil(math.log((1000 + 200 * max(0, count_ins_and_outs(tx) - 5)) / 1000, 2)), sum)
],
lambda *_: 1
)
def meh_fees(tx):
fee = tx['feePaid']
if fee == 0:
return -1
else:
result = math.ceil(math.log(tx['feePaid'], 2))
return result
actual_fees = Analysis(
"actual fees",
lambda _c, _b, tx: is_not_coinbase(tx),
[ (lambda _c, _b, tx: meh_fees(tx), sum)
],
lambda *_: 1
)
proposed_fees = Analysis(
"",
lambda _c, _b, tx: is_not_coinbase(tx),
[ (lambda _c, _b, tx: math.ceil(math.log(5000 * max(2, count_actions(tx)), 2)), sum)
],
lambda *_: 1
)
arity_heat_map = Analysis(
"inputs vs outputs",
lambda *_: True,
[(lambda _c, _b, tx: min(100, count_outputs(tx)), identity),
(lambda _c, _b, tx: min(100, count_inputs(tx)), sum)],
lambda *_: 1
)
input_size_dist = Analysis(
"distribution of input sizes",
lambda *_: True,
[(lambda _c, _b, tx: [len(x['scriptSig']['hex']) for x in tx['vin']], identity)],
lambda *_: 1,
)
# very_high_inout_tx = Analysis(
# "tx with very high in/out counts",
# lambda _c, _b, tx: count_ins_and_outs(tx) > 100,
# [(lambda _c, _b, tx: (count_inputs(tx), count_outputs(tx)), identity)],
# lambda _c, _b, tx: tx['txid']
# )
very_high_inout_tx = Analysis(
"tx with very high in/out counts",
lambda _c, _b, tx: count_ins_and_outs(tx) > 5000,
[],
lambda _c, _b, tx: (tx['txid'], count_ins_and_outs(tx))
)
def track_utxos(cache, block):
for tx in block[tx]:
for vin in tx['vin']:
del cache[(vin['txid'], vin['vout'])]
for vout in tx['vout']:
cache[(tx['txid'], vout['n'])] = vout['valueZat']
return cache
utxo_distribution = Analysis(
"how many UTXOs and how big are they?",
lambda *_: True,
[(lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)),
lambda caches: sum([caches[-1][k] for k in caches[-1]]))],
lambda cache, _b, _t: cache,
({}, track_utxos),
1_000_000_000 # back to block 0, TODO: should be able to say this explicitly
)
def is_sandblasting(tx):
return get_shielded_outputs(tx) > 300
sandblasters_per_day = Analysis(
"how many transactions have >300 Sapling outputs each day?",
lambda _c, _b, tx: is_sandblasting(tx),
[(lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), sum)],
lambda *_: 1
)
sandblasters_and_more_per_day = Analysis(
"how many transactions have >300 outputs each day?",
lambda _c, _b, tx: count_outputs(tx) > 300,
[(lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), sum)],
lambda *_: 1
)
sandblaster_average_outputs_per_day = Analysis(
"how many outputs do sandblasters have?",
lambda _c, _b, tx: is_sandblasting(tx),
[(lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), (lambda txs: sum(txs) / len(txs)))],
lambda _c, _b, tx: count_outputs(tx)
)
nuttycom_fees_vs_actual = Analysis(
"transactions that wouldn't pay more under the new model, by day, using nuttycom's pricing",
lambda *_: True,
[ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
(lambda _c, _b, tx: getFeeDiff(250 * max(4, count_ins_and_outs(tx)), tx), sum)
],
lambda *_: 1
)
action_fees_vs_actual = Analysis(
"transactions that wouldn't pay more under the new model, by day, using actions",
lambda *_: True,
[ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
(lambda _c, _b, tx: getFeeDiff(500 * max(3, count_actions(tx)), tx), sum)
],
lambda *_: 1
)
nuttycom_fees_vs_actual_trans = Analysis(
"transparent transactions that wouldn't pay more under the new model, by day, using nuttycom's pricing",
lambda _c, _b, tx: tx_type(tx) == 't-t',
[ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
(lambda _c, _b, tx: getFeeDiff(250 * max(4, count_ins_and_outs(tx)), tx), sum)
],
lambda *_: 1
)
action_fees_vs_actual_trans = Analysis(
"transparent transactions that wouldn't pay more under the new model, by day, using actions",
lambda _c, _b, tx: tx_type(tx) == 't-t',
[ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
(lambda _c, _b, tx: getFeeDiff(500 * max(3, count_actions(tx)), tx), sum)
],
lambda *_: 1
)
greg_fees_vs_actual = Analysis(
"transactions that wouldn't pay more under the new model, by day, using actions",
lambda *_: True,
[ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
(lambda _c, _b, tx: getFeeDiff(250 * max(4, count_actions(tx)), tx), sum)
],
lambda *_: 1
)
greg_fees_vs_actual_trans = Analysis(
"transparent transactions that wouldn't pay more under the new model, by day, using actions",
lambda _c, _b, tx: tx_type(tx) == 't-t',
[ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
(lambda _c, _b, tx: getFeeDiff(250 * max(4, count_actions(tx)), tx), sum)
],
lambda *_: 1
)
latest_fees_vs_actual = Analysis(
"transactions that wouldn't pay more under the new model, by day, using actions",
lambda *_: True,
[ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
(lambda _c, _b, tx: getFeeDiff(1000 * max(2, count_actions(tx)), tx), sum)
],
lambda *_: 1
)
latest_fees_vs_actual_trans = Analysis(
"transparent transactions that wouldn't pay more under the new model, by day, using actions",
lambda _c, _b, tx: tx_type(tx) == 't-t',
[ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
(lambda _c, _b, tx: getFeeDiff(1000 * max(2, count_actions(tx)), tx), sum)
],
lambda *_: 1
)
flat_fees_vs_actual = Analysis(
"transactions that would pass the original 10k ZAT fee, by day",
lambda *_: True,
[ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
(lambda _c, _b, tx: getFeeDiff(10_000, tx), sum)
],
lambda *_: 1
)
shielding_tx_heat_map = Analysis(
"shielding tx",
lambda _c, _b, tx: is_not_coinbase(tx) and (tx_type(tx) == 't-z' or tx_type(tx) == 'm-z'),
[(lambda _c, _b, tx: min(100, count_outputs(tx)), identity),
(lambda _c, _b, tx: min(100, count_inputs(tx)), sum)],
lambda *_: 1
)
shielding_tx_actions = Analysis(
"shielding tx",
lambda _c, _b, tx: is_not_coinbase(tx) and (tx_type(tx) == 't-z' or tx_type(tx) == 'm-z'),
[(lambda _c, _b, tx: min(100, count_actions(tx)), sum)],
lambda *_: 1
)
fees_from_sandblasting = Analysis(
"fees collected from sandblasting",
lambda _c, _b, tx: is_sandblasting(tx),
[(lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), sum)],
lambda _c, _b, tx: 500 * max(2, count_actions(tx))
)
flat_fees_vs_actual_trans = Analysis(
"transparent transactions that would pass the original 10k ZAT fee, by day",
lambda _c, _b, tx: tx_type(tx) == 't-t',
[ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
(lambda _c, _b, tx: getFeeDiff(10_000, tx), sum)
],
lambda *_: 1
)
transparent_tx_that_would_fail_heat_map = Analysis(
"heat map of transparent tx that would fail under `500 * max(3, |actions|)`",
lambda _c, _b, tx: tx_type(tx) == 't-t' and getFeeDiff(500 * max(3, count_actions(tx)), tx) == False,
[(lambda _c, _b, tx: min(100, count_outputs(tx)), identity),
(lambda _c, _b, tx: min(100, count_inputs(tx)), sum)],
lambda *_: 1
)
historical_fees = Analysis(
"histogram of actual fees paid",
lambda _c, _b, tx: is_not_coinbase(tx),
[(lambda _c, _b, tx: check_fee_paid(tx), sum)],
lambda *_: 1
)
arity_heat_map = Analysis(
"inputs vs outputs",
lambda _c, _b, tx: is_not_coinbase(tx),
[(lambda _c, _b, tx: min(100, count_outputs(tx)), identity),
(lambda _c, _b, tx: min(100, count_inputs(tx)), sum)],
lambda *_: 1
)
transparent_input_histogram = Analysis(
"how many transparent inputs do txs have?",
lambda _c, _b, tx: is_not_coinbase(tx),
[ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
(lambda _c, _b, tx: min(100, len(tx['vin'])), sum)],
lambda *_: 1
)
nuttycom_fees_vs_10k = Analysis(
"transactions that wouldn't pay more under the new model, by day, using nuttycom's pricing",
lambda _c, _b, tx: is_not_coinbase(tx),
[ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
(lambda _c, _b, tx: 250 * max(4, count_ins_and_outs(tx)) <= 10_000, sum)
],
lambda *_: 1
)
action_fees_vs_10k = Analysis(
"transactions that wouldn't pay more under the new model, by day, using actions",
lambda _c, _b, tx: is_not_coinbase(tx),
[ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
(lambda _c, _b, tx: 500 * max(3, count_actions(tx)) <= 10_000, sum)
],
lambda *_: 1
)
latest_fees_vs_10k = Analysis(
"transactions that wouldn't pay more under the new model, by day, using actions",
lambda _c, _b, tx: is_not_coinbase(tx),
[ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
(lambda _c, _b, tx: 1000 * max(2, count_actions(tx)) <= 10_000, sum)
],
lambda *_: 1
)
a = Analyzer(connection_string)
def make_weekly_range(starting_week, number_of_weeks):
start_of_range = blocks_per_hour * 24 * 7 * starting_week
end_of_range = start_of_range + (blocks_per_hour * 24 * 7 * number_of_weeks)
return range(start_of_range, end_of_range)
# start about a month before sandblasting, overlapping with it
pre_sandblasting_range = make_weekly_range(206, 12)
# well into sandblasting
recent_range = make_weekly_range(220, 1)
start = datetime.datetime.now()
for analysis in a.analyze_blocks(pre_sandblasting_range,
[ # sandblaster_average_outputs_per_day,
# flat_fees_vs_actual,
# flat_fees_vs_actual_trans,
# transparent_tx_that_would_fail_heat_map
nuttycom_fees_vs_actual,
action_fees_vs_actual,
nuttycom_fees_vs_actual_trans,
action_fees_vs_actual_trans,
greg_fees_vs_actual,
greg_fees_vs_actual_trans,
# historical_fees,
# transparent_input_histogram,
]):
print(analysis)
print(datetime.datetime.now() - start)
# rerunning old data …
start = datetime.datetime.now()
for analysis in a.analyze_blocks(make_weekly_range(206, 1),
[ actual_fees,
proposed_fees,
]):
print(analysis)
print(datetime.datetime.now() - start)
nuttycom_fees_vs_10k2 = Analysis(
"transactions that wouldn't pay more under the new model, by day, using nuttycom's pricing",
lambda _c, _b, tx: is_not_coinbase(tx) and not is_sandblasting(tx),
[ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
(lambda _c, _b, tx: 250 * max(4, count_ins_and_outs(tx)) <= 10_000, sum)
],
lambda *_: 1
)
action_fees_vs_10k2 = Analysis(
"transactions that wouldn't pay more under the new model, by day, using actions",
lambda _c, _b, tx: is_not_coinbase(tx) and not is_sandblasting(tx),
[ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
(lambda _c, _b, tx: 500 * max(3, count_actions(tx)) <= 10_000, sum)
],
lambda *_: 1
)
latest_fees_vs_10k2 = Analysis(
"transactions that wouldn't pay more under the new model, by day, using actions",
lambda _c, _b, tx: is_not_coinbase(tx) and not is_sandblasting(tx),
[ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
(lambda _c, _b, tx: 1000 * max(2, count_actions(tx)) <= 10_000, sum)
],
lambda *_: 1
)
pool_movement = Analysis(
"how are funds moving between pools?",
lambda *_: True,
[(lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), lambda vals: np.sum(np.array(vals), 0))],
lambda _c, _b, tx: tx_pool_movement(tx)
)
start = datetime.datetime.now()
for analysis in a.analyze_blocks(recent_range,
[ pool_movement
]):
print(analysis)
print(datetime.datetime.now() - start)
start = datetime.datetime.now()
for analysis in a.analyze_blocks(pre_sandblasting_range,
[ tx_below_pofm_threshold,
tx_below_pofm_threshold_5,
tx_below_pofm_threshold_max,
tx_below_pofm_threshold_ins,
tx_below_pofm_threshold_abs,
outs_below_pofm_threshold_abs,
arity_heat_map,
minimum_pofm_fees_nuttycom,
minimum_pofm_fees_nuttycom2,
]):
print(analysis)
print(datetime.datetime.now() - start)

161
analysis/helpers.py Normal file
View File

@ -0,0 +1,161 @@
# Copyright (c) 2022 The Zcash developers
# Distributed under the MIT software license, see the accompanying
# file COPYING or https://www.opensource.org/licenses/mit-license.php .
"""Useful function for transaction analyses
This is a collection of functions that make it easier to write new transaction
analyses.
"""
import datetime
import itertools
import math
def identity(x):
return x
def get_shielded_spends(tx):
try:
shielded_spends = len(tx['vShieldedSpend'])
except KeyError:
shielded_spends = 0
return shielded_spends
def get_shielded_outputs(tx):
try:
shielded_outputs = len(tx['vShieldedOutput'])
except KeyError:
shielded_outputs = 0
return shielded_outputs
def get_orchard_actions(tx):
try:
orchard_actions = len(tx['orchard']['actions'])
except KeyError:
orchard_actions = 0
return orchard_actions
def count_inputs(tx):
return len(tx['vin']) + 2 * len(tx['vjoinsplit']) + get_shielded_spends(tx) + get_orchard_actions(tx)
def count_outputs(tx):
return len(tx['vout']) + 2 * len(tx['vjoinsplit']) + get_shielded_outputs(tx) + get_orchard_actions(tx)
def count_ins_and_outs(tx):
return (len(tx['vin'])
+ len(tx['vout'])
+ get_shielded_spends(tx)
+ get_shielded_outputs(tx)
+ 2 * len(tx['vjoinsplit'])
+ 2 * get_orchard_actions(tx))
def count_actions(tx):
return (max(len(tx['vin']), len(tx['vout']))
+ max(get_shielded_spends(tx), get_shielded_outputs(tx))
+ 2 * len(tx['vjoinsplit'])
+ get_orchard_actions(tx))
def expiry_height_delta(block, tx):
"""
Returns -1 if there's no expiry, also returns approximately 35,000 (the
number of blocks in a month) if the expiry is beyond 1 month.
"""
month = blocks_per_hour * 24 * 30
try:
expiry_height = tx['expiryheight']
if expiry_height == 0:
return -1
elif tx['expiryheight'] - block['height'] > month:
return month
else:
return tx['expiryheight'] - block['height']
except KeyError:
# `tx['expiryheight']` is ostensibly an optional field, but it seems
# like `0` is what tends to be used for "don't expire", so this case
# generally isn't hit.
return -1
def tx_type(tx):
"""
Categorizes all tx into one of nine categories: (t)ransparent, (z)shielded,
or (m)ixed for both inputs and outputs. So some possible results are "t-t",
"t-z", "m-z", etc.
"""
if tx['vjoinsplit'] or get_shielded_spends(tx) != 0 or get_orchard_actions(tx) != 0:
if tx['vin']:
ins = "m"
else:
ins = "z"
else:
ins = "t"
if tx['vjoinsplit'] or get_shielded_outputs(tx) != 0 or get_orchard_actions(tx) != 0:
if tx['vout']:
outs = "m"
else:
outs = "z"
else:
outs = "t"
return ins + "-" + outs
def is_orchard_tx(tx):
try:
return tx['orchard']['actions']
except KeyError:
return False
def is_saplingspend_tx(tx):
try:
return tx['vShieldedSpend']
except KeyError:
return False
def orchard_anchorage(cache, block, tx):
"""
Returns -1 if there is no anchor
"""
try:
return block['height'] - cache[tx['orchard']['anchor']]
except KeyError:
return -1
def sapling_anchorage(cache, block, tx):
"""
Returns -1 if there is no anchor
"""
try:
return block['height'] - cache[tx['vShieldedSpend'][0]['anchor']]
except KeyError:
return -1
def is_not_coinbase(tx):
return 'feePaid' in tx
# NB: This requires zcashd to be running with `experimentalfeatures=1`,
# `txindex=1` and `insightexplorer=1`.
def getFeeDiff(proposedFee, tx):
try:
return proposedFee <= tx['feePaid']
except KeyError:
return -1
def vin_value(vin):
if 'valueSat' in vin:
return vin['valueSat']
else:
return 0
def tx_pool_movement(tx):
transparent = sum(vout['valueZat'] for vout in tx['vout']) - sum([vin_value(vin) for vin in tx['vin']])
sprout = sum([vjoinsplit['vpub_newZat'] - vjoinsplit['vpub_oldZat'] for vjoinsplit in tx['vjoinsplit']])
sapling = - tx['valueBalanceZat']
if 'orchard' in tx:
orchard = - tx['orchard']['valueBalanceZat']
else:
orchard = 0
# print("(%d, %d, %d, %d) %d -> %d" % (transparent, sprout, sapling, orchard, count_inputs(tx), count_outputs(tx)))
return (transparent, sprout, sapling, orchard)