Split analytics into multiple modules

2022-10-10 12:55:00 -06:00 · 2022-10-10 12:55:00 -06:00 · 8877d342af
parent 0a53fd9237
commit 8877d342af
3 changed files with 765 additions and 741 deletions
--- a/analysis/analyze.py
+++ b/analysis/analyze.py
@ -1,32 +1,19 @@
-#!/usr/bin/env python3
 # Copyright (c) 2022 The Zcash developers
 # Distributed under the MIT software license, see the accompanying
 # file COPYING or https://www.opensource.org/licenses/mit-license.php .
 """Simple Transaction Analysis

-This contains a class, `Analyzer`, for defining analyses of the blocks and
-transactions on the blockchain. It also exposes a function
-`analyze_blocks`, which handles applying multiple analyses simultaneously over
-some common range of blocks.
+This contains a class, `Analysis`, for defining analyses of the blocks and
+transactions on the blockchain. It also contains a class `Analyzer` with a
+method `analyze_blocks`, which handles applying multiple analyses simultaneously
+over some common range of blocks.
 """

 import datetime
 import itertools
 import math
-import numpy as np
-import os.path
 from progress.bar import IncrementalBar
 from slickrpc.rpc import Proxy
-from statistics import mean
-import sys
-
-### TODO: Get host/port from config
-if len(sys.argv) > 1:
-    connection_string = sys.argv[1]
-else:
-    raise Exception(
-        "%s needs to be provided a connection string, like \"http://user:pass@localhost:port\"."
-        % sys.argv[0])

 class Analysis:
    """
@ -168,727 +155,3 @@ class Analyzer:
            result.append((analysis.name, analysis.aggregate(dict(bucketses)[analysis])))

        return result
-
-### Helpers
-
-def identity(x):
-    return x
-
-def get_shielded_spends(tx):
-    try:
-        shielded_spends = len(tx['vShieldedSpend'])
-    except KeyError:
-        shielded_spends = 0
-
-    return shielded_spends
-
-def get_shielded_outputs(tx):
-    try:
-        shielded_outputs = len(tx['vShieldedOutput'])
-    except KeyError:
-        shielded_outputs = 0
-
-    return shielded_outputs
-
-def get_orchard_actions(tx):
-    try:
-        orchard_actions = len(tx['orchard']['actions'])
-    except KeyError:
-        orchard_actions = 0
-
-    return orchard_actions
-
-def count_inputs(tx):
-    return len(tx['vin']) + 2 * len(tx['vjoinsplit']) + get_shielded_spends(tx) + get_orchard_actions(tx)
-
-def count_outputs(tx):
-    return len(tx['vout']) + 2 * len(tx['vjoinsplit']) + get_shielded_outputs(tx) + get_orchard_actions(tx)
-
-def count_ins_and_outs(tx):
-    return (len(tx['vin'])
-            + len(tx['vout'])
-            + get_shielded_spends(tx)
-            + get_shielded_outputs(tx)
-            + 2 * len(tx['vjoinsplit'])
-            + 2 * get_orchard_actions(tx))
-
-def count_actions(tx):
-    return (max(len(tx['vin']), len(tx['vout']))
-            + max(get_shielded_spends(tx), get_shielded_outputs(tx))
-            + 2 * len(tx['vjoinsplit'])
-            + get_orchard_actions(tx))
-
-def expiry_height_delta(block, tx):
-    """
-    Returns -1 if there's no expiry, also returns approximately 35,000 (the
-    number of blocks in a month) if the expiry is beyond 1 month.
-    """
-    month = blocks_per_hour * 24 * 30
-    try:
-        expiry_height = tx['expiryheight']
-        if expiry_height == 0:
-            return -1
-        elif tx['expiryheight'] - block['height'] > month:
-            return month
-        else:
-            return tx['expiryheight'] - block['height']
-    except KeyError:
-        # `tx['expiryheight']` is ostensibly an optional field, but it seems
-        # like `0` is what tends to be used for "don't expire", so this case
-        # generally isn't hit.
-        return -1
-
-def tx_type(tx):
-    """
-    Categorizes all tx into one of nine categories: (t)ransparent, (z)shielded,
-    or (m)ixed for both inputs and outputs. So some possible results are "t-t",
-    "t-z", "m-z", etc.
-    """
-    if tx['vjoinsplit'] or get_shielded_spends(tx) != 0 or get_orchard_actions(tx) != 0:
-        if tx['vin']:
-            ins = "m"
-        else:
-            ins = "z"
-    else:
-        ins = "t"
-
-    if tx['vjoinsplit'] or get_shielded_outputs(tx) != 0 or get_orchard_actions(tx) != 0:
-        if tx['vout']:
-            outs = "m"
-        else:
-            outs = "z"
-    else:
-        outs = "t"
-
-    return ins + "-" + outs
-
-def is_orchard_tx(tx):
-    try:
-        return tx['orchard']['actions']
-    except KeyError:
-        return False
-
-def is_saplingspend_tx(tx):
-    try:
-        return tx['vShieldedSpend']
-    except KeyError:
-        return False
-
-def orchard_anchorage(cache, block, tx):
-    """
-    Returns -1 if there is no anchor
-    """
-    try:
-        return block['height'] - cache[tx['orchard']['anchor']]
-    except KeyError:
-        return -1
-
-def sapling_anchorage(cache, block, tx):
-    """
-    Returns -1 if there is no anchor
-    """
-    try:
-        return block['height'] - cache[tx['vShieldedSpend'][0]['anchor']]
-    except KeyError:
-        return -1
-
-def is_not_coinbase(tx):
-    return 'feePaid' in tx
-
-# NB: This requires zcashd to be running with `experimentalfeatures=1`,
-#    `txindex=1` and `insightexplorer=1`.
-def getFeeDiff(proposedFee, tx):
-    try:
-        return proposedFee <= tx['feePaid']
-    except KeyError:
-        return -1
-
-blocks_per_hour = 48 # half this before NU2?
-
-# start about a month before sandblasting
-start_range = blocks_per_hour * 24 * 7 * 206
-
-### Requested Statistics
-
-def storeAnchor(pool, cache, block):
-    """
-    Caches the block height as the value for its anchor hash.
-    """
-    try:
-        final_root = block[pool]
-        try:
-            cache[final_root]
-        except KeyError:
-            cache[final_root] = block['height']
-    except KeyError:
-        None
-
-    return cache
-
-# "how old of anchors are people picking"
-# --- https://zcash.slack.com/archives/CP6SKNCJK/p1660103126252979
-anchor_age_orchard = Analysis(
-    "how old of anchors are people picking (for orchard)",
-    lambda _c, _b, tx: is_orchard_tx(tx),
-    [(orchard_anchorage, sum)],
-    lambda *_: 1,
-    ({}, lambda c, b: storeAnchor('finalorchardroot', c, b)),
-    blocks_per_hour * 24
-)
-
-anchor_age_sapling = Analysis(
-    "how old of anchors are people picking (for sapling)",
-    lambda _c, _b, tx: is_saplingspend_tx(tx),
-    [(sapling_anchorage, sum)],
-    lambda *_: 1,
-    ({}, lambda c, b: storeAnchor('finalsaplingroot', c, b)),
-    blocks_per_hour * 24
-)
-
-# "what's the distribution of expiry height deltas"
-# --- https://zcash.slack.com/archives/CP6SKNCJK/p1660103126252979
-expiry_height_deltas = Analysis(
-    "distribution of expiry height deltas",
-    lambda *_: True,
-    [(lambda _, b, t: expiry_height_delta(b, t), sum)],
-    lambda *_: 1
-)
-
-tx_type_with_long_expiry = Analysis(
-    "types of tx with expiries longer than about a month",
-    lambda _, b, t: expiry_height_delta(b, t) >= blocks_per_hour * 24 * 30,
-    [# (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)),
-     #  identity),
-     (lambda _c, _b, tx: tx_type(tx), sum)],
-    lambda *_: 1
-)
-
-# "does anyone use locktime"
-# --- https://zcash.slack.com/archives/CP6SKNCJK/p1660103126252979
-locktime_usage = Analysis(
-    "proportion of tx using locktime",
-    lambda *_: True,
-    [(lambda *_: 1,
-      lambda d: dict(d)[True] / (dict(d)[False] + dict(d)[True])),
-     (lambda _c, _b, tx: tx['locktime'] != 0, sum)],
-    lambda *_: 1
-)
-
-# "I'm seeing a slightly different pattern to the sandblasting transactions,
-#  unless I've just missed this before. The transactions I've looked at recently
-#  have had > 400 sapling outputs. Has this been the case before and I just
-#  missed it? I thought primarily these transactions had slightly over 100
-#  outputs in most cases."
-# --- https://zcash.slack.com/archives/CP6SKNCJK/p1660195664187769
-
-
-# "Calculate the POFM threshold for historical transactions on-chain and
-#  calculate what proportion of those transactions would fall below the POFM
-#  threshold"
-# --- https://docs.google.com/document/d/18wtGFCB2N4FO7SoqDPnEgVudAMlCArHMz0EwhE1HNPY/edit
-tx_below_pofm_threshold = Analysis(
-    "rate of transactions below POFM threshold",
-    lambda _c, _b, tx: is_not_coinbase(tx),
-    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)),
-       lambda d: dict(d)[False] / (dict(d)[False] + dict(d)[True])),
-      (lambda _c, _b, tx: count_ins_and_outs(tx) - 4 > 0, sum)
-    ],
-    lambda *_: 1
-)
-
-tx_below_pofm_threshold_abs = Analysis(
-    "transactions below POFM threshold",
-    lambda _c, _b, tx: is_not_coinbase(tx),
-    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)),
-       lambda d: (dict(d)[False], dict(d)[True])),
-      (lambda _c, _b, tx: count_ins_and_outs(tx) - 4 > 0, sum)
-    ],
-    lambda *_: 1
-)
-
-outs_below_pofm_threshold_abs = Analysis(
-    "outputs below POFM threshold",
-    lambda _c, _b, tx: is_not_coinbase(tx),
-    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)),
-       lambda d: (dict(d)[False], dict(d)[True])),
-      (lambda _c, _b, tx: count_ins_and_outs(tx) - 4 > 0, sum)
-    ],
-    lambda _c, _b, tx: count_outputs(tx)
-)
-
-tx_below_pofm_threshold_5 = Analysis(
-    "rate of transactions below POFM threshold with a grace window of 5",
-    lambda _c, _b, tx: is_not_coinbase(tx),
-    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)),
-       lambda d: dict(d)[False] / (dict(d)[False] + dict(d)[True])),
-      (lambda _c, _b, tx: count_ins_and_outs(tx) - 5 > 0, sum)
-    ],
-    lambda *_: 1
-)
-
-
-tx_below_pofm_threshold_max = Analysis(
-    "rate of transactions below POFM threshold with max",
-    lambda _c, _b, tx: is_not_coinbase(tx),
-    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)),
-       lambda d: dict(d)[False] / (dict(d)[False] + dict(d)[True])),
-      (lambda _c, _b, tx: count_actions(tx) - 4 > 0, sum)
-    ],
-    lambda *_: 1
-)
-
-tx_below_pofm_threshold_ins = Analysis(
-    "rate of transactions below POFM threshold only on inputs",
-    lambda _c, _b, tx: is_not_coinbase(tx),
-    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)),
-       lambda d: dict(d)[False] / (dict(d)[False] + dict(d)[True])),
-      (lambda _c, _b, tx: count_inputs(tx) - 4 > 0, sum)
-    ],
-    lambda *_: 1
-)
-
-### Other Examples
-
-tx_per_day = Analysis(
-    "count transactions per day (treating block 0 as midnight ZST)",
-    lambda *_: True,
-    [(lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), sum)],
-    lambda *_: 1
-)
-
-mean_tx_per_day = Analysis(
-    "mean transactions per day, by block",
-    lambda *_: True,
-    [(lambda _c, block, _t: int(block['height'] % (blocks_per_hour * 24)), lambda d: mean([x[1] for x in d])),
-     (lambda _c, block, _t: int(block['height']/(blocks_per_hour * 24)), sum)
-    ],
-    lambda *_: 1
-)
-
-mean_inout_per_tx_per_day = Analysis(
-    "mean inputs, outputs per transaction per day, by block",
-    lambda *_: True,
-    [(lambda _c, block, _t: int(block['height'] % (blocks_per_hour * 24)), lambda d: mean(itertools.chain(d.values()))),
-     (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity)
-    ],
-    lambda _c, _b, tx: (count_inputs(tx), count_outputs(tx))
-)
-
-mean_inout_per_tx = Analysis(
-    "mean inputs, outputs per transaction, by week",
-    lambda *_: True,
-    [ ( lambda _c, block, _t: int(block['height']/(blocks_per_hour * 24 * 7)),
-        lambda d: (mean([x[0] for x in d]), mean([x[1] for x in d]))
-       )
-     ],
-    lambda _c, _b, tx: (count_inputs(tx), count_outputs(tx))
-)
-
-minimum_pofm_fees_nuttycom = Analysis(
-    "distribution of fees in ZAT, by day, using nuttycom's pricing",
-    lambda *_: True,
-    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
-      (lambda _c, _b, tx: math.ceil(math.log((1000 + 250 * max(0, count_ins_and_outs(tx) - 4)) / 1000, 2)), sum)
-    ],
-    lambda *_: 1
-)
-
-minimum_pofm_fees_nuttycom = Analysis(
-    "distribution of fees in ZAT, by day, using nuttycom's pricing",
-    lambda _c, _b, tx: is_not_coinbase(tx),
-    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
-      (lambda _c, _b, tx: math.ceil(math.log((1000 + 250 * max(0, count_ins_and_outs(tx) - 4)) / 1000, 2)), sum)
-    ],
-    lambda *_: 1
-)
-
-minimum_pofm_fees_nuttycom2 = Analysis(
-    "distribution of fees in ZAT, by day, using nuttycom's changed pricing",
-    lambda _c, _b, tx: is_not_coinbase(tx),
-    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
-      (lambda _c, _b, tx: math.ceil(math.log((1000 + 200 * max(0, count_ins_and_outs(tx) - 5)) / 1000, 2)), sum)
-    ],
-    lambda *_: 1
-)
-
-def meh_fees(tx):
-    fee = tx['feePaid']
-    if fee == 0:
-        return -1
-    else:
-        result = math.ceil(math.log(tx['feePaid'], 2))
-        # if result < 0:
-        #     print("negative result: %s, %s" % (fee, tx['txid']))
-        return result
-
-actual_fees = Analysis(
-    "actual fees",
-    lambda _c, _b, tx: is_not_coinbase(tx),
-    [ (lambda _c, _b, tx: meh_fees(tx), sum)
-    ],
-    lambda *_: 1
-)
-
-proposed_fees = Analysis(
-    "",
-    lambda _c, _b, tx: is_not_coinbase(tx),
-    [ (lambda _c, _b, tx: math.ceil(math.log(5000 * max(2, count_actions(tx)), 2)), sum)
-    ],
-    lambda *_: 1
-)
-
-arity_heat_map = Analysis(
-    "inputs vs outputs",
-    lambda *_: True,
-    [(lambda _c, _b, tx: min(100, count_outputs(tx)), identity),
-     (lambda _c, _b, tx: min(100, count_inputs(tx)), sum)],
-    lambda *_: 1
-)
-
-input_size_dist = Analysis(
-    "distribution of input sizes",
-    lambda *_: True,
-    [(lambda _c, _b, tx: [len(x['scriptSig']['hex']) for x in tx['vin']], identity)],
-    lambda *_: 1,
-)
-
-# very_high_inout_tx = Analysis(
-#     "tx with very high in/out counts",
-#     lambda _c, _b, tx: count_ins_and_outs(tx) > 100,
-#     [(lambda _c, _b, tx: (count_inputs(tx), count_outputs(tx)), identity)],
-#     lambda _c, _b, tx: tx['txid']
-# )
-
-very_high_inout_tx = Analysis(
-    "tx with very high in/out counts",
-    lambda _c, _b, tx: count_ins_and_outs(tx) > 5000,
-    [],
-    lambda _c, _b, tx: (tx['txid'], count_ins_and_outs(tx))
-)
-
-def track_utxos(cache, block):
-    for tx in block[tx]:
-        for vin in tx['vin']:
-            del cache[(vin['txid'], vin['vout'])]
-        for vout in tx['vout']:
-            cache[(tx['txid'], vout['n'])] = vout['valueZat']
-    return cache
-
-utxo_distribution = Analysis(
-    "how many UTXOs and how big are they?",
-    lambda *_: True,
-    [(lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)),
-      lambda caches: sum([caches[-1][k] for k in caches[-1]]))],
-    lambda cache, _b, _t: cache,
-    ({}, track_utxos),
-    1_000_000_000 # back to block 0, TODO: should be able to say this explicitly
-)
-
-def is_sandblasting(tx):
-    return get_shielded_outputs(tx) > 300
-
-sandblasters_per_day = Analysis(
-    "how many transactions have >300 Sapling outputs each day?",
-    lambda _c, _b, tx: is_sandblasting(tx),
-    [(lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), sum)],
-    lambda *_: 1
-)
-
-sandblasters_and_more_per_day = Analysis(
-    "how many transactions have >300 outputs each day?",
-    lambda _c, _b, tx: count_outputs(tx) > 300,
-    [(lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), sum)],
-    lambda *_: 1
-)
-
-sandblaster_average_outputs_per_day = Analysis(
-    "how many outputs do sandblasters have?",
-    lambda _c, _b, tx: is_sandblasting(tx),
-    [(lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), (lambda txs: sum(txs) / len(txs)))],
-    lambda _c, _b, tx: count_outputs(tx)
-)
-
-nuttycom_fees_vs_actual = Analysis(
-    "transactions that wouldn't pay more under the new model, by day, using nuttycom's pricing",
-    lambda *_: True,
-    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
-      (lambda _c, _b, tx: getFeeDiff(250 * max(4, count_ins_and_outs(tx)), tx), sum)
-    ],
-    lambda *_: 1
-)
-
-action_fees_vs_actual = Analysis(
-    "transactions that wouldn't pay more under the new model, by day, using actions",
-    lambda *_: True,
-    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
-      (lambda _c, _b, tx: getFeeDiff(500 * max(3, count_actions(tx)), tx), sum)
-    ],
-    lambda *_: 1
-)
-
-nuttycom_fees_vs_actual_trans = Analysis(
-    "transparent transactions that wouldn't pay more under the new model, by day, using nuttycom's pricing",
-    lambda _c, _b, tx: tx_type(tx) == 't-t',
-    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
-      (lambda _c, _b, tx: getFeeDiff(250 * max(4, count_ins_and_outs(tx)), tx), sum)
-    ],
-    lambda *_: 1
-)
-
-action_fees_vs_actual_trans = Analysis(
-    "transparent transactions that wouldn't pay more under the new model, by day, using actions",
-    lambda _c, _b, tx: tx_type(tx) == 't-t',
-    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
-      (lambda _c, _b, tx: getFeeDiff(500 * max(3, count_actions(tx)), tx), sum)
-    ],
-    lambda *_: 1
-)
-
-greg_fees_vs_actual = Analysis(
-    "transactions that wouldn't pay more under the new model, by day, using actions",
-    lambda *_: True,
-    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
-      (lambda _c, _b, tx: getFeeDiff(250 * max(4, count_actions(tx)), tx), sum)
-    ],
-    lambda *_: 1
-)
-
-greg_fees_vs_actual_trans = Analysis(
-    "transparent transactions that wouldn't pay more under the new model, by day, using actions",
-    lambda _c, _b, tx: tx_type(tx) == 't-t',
-    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
-      (lambda _c, _b, tx: getFeeDiff(250 * max(4, count_actions(tx)), tx), sum)
-    ],
-    lambda *_: 1
-)
-
-latest_fees_vs_actual = Analysis(
-    "transactions that wouldn't pay more under the new model, by day, using actions",
-    lambda *_: True,
-    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
-      (lambda _c, _b, tx: getFeeDiff(1000 * max(2, count_actions(tx)), tx), sum)
-    ],
-    lambda *_: 1
-)
-
-latest_fees_vs_actual_trans = Analysis(
-    "transparent transactions that wouldn't pay more under the new model, by day, using actions",
-    lambda _c, _b, tx: tx_type(tx) == 't-t',
-    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
-      (lambda _c, _b, tx: getFeeDiff(1000 * max(2, count_actions(tx)), tx), sum)
-    ],
-    lambda *_: 1
-)
-
-flat_fees_vs_actual = Analysis(
-    "transactions that would pass the original 10k ZAT fee, by day",
-    lambda *_: True,
-    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
-      (lambda _c, _b, tx: getFeeDiff(10_000, tx), sum)
-    ],
-    lambda *_: 1
-)
-
-shielding_tx_heat_map = Analysis(
-    "shielding tx",
-    lambda _c, _b, tx: is_not_coinbase(tx) and (tx_type(tx) == 't-z' or tx_type(tx) == 'm-z'),
-    [(lambda _c, _b, tx: min(100, count_outputs(tx)), identity),
-     (lambda _c, _b, tx: min(100, count_inputs(tx)), sum)],
-    lambda *_: 1
-)
-
-shielding_tx_actions = Analysis(
-    "shielding tx",
-    lambda _c, _b, tx: is_not_coinbase(tx) and (tx_type(tx) == 't-z' or tx_type(tx) == 'm-z'),
-    [(lambda _c, _b, tx: min(100, count_actions(tx)), sum)],
-    lambda *_: 1
-)
-
-fees_from_sandblasting = Analysis(
-    "fees collected from sandblasting",
-    lambda _c, _b, tx: is_sandblasting(tx),
-    [(lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), sum)],
-    lambda _c, _b, tx: 500 * max(2, count_actions(tx))
-)
-
-flat_fees_vs_actual_trans = Analysis(
-    "transparent transactions that would pass the original 10k ZAT fee, by day",
-    lambda _c, _b, tx: tx_type(tx) == 't-t',
-    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
-      (lambda _c, _b, tx: getFeeDiff(10_000, tx), sum)
-    ],
-    lambda *_: 1
-)
-
-transparent_tx_that_would_fail_heat_map = Analysis(
-    "heat map of transparent tx that would fail under `500 * max(3, |actions|)`",
-    lambda _c, _b, tx: tx_type(tx) == 't-t' and getFeeDiff(500 * max(3, count_actions(tx)), tx) == False,
-    [(lambda _c, _b, tx: min(100, count_outputs(tx)), identity),
-     (lambda _c, _b, tx: min(100, count_inputs(tx)), sum)],
-    lambda *_: 1
-)
-
-historical_fees = Analysis(
-    "histogram of actual fees paid",
-    lambda _c, _b, tx: is_not_coinbase(tx),
-    [(lambda _c, _b, tx: check_fee_paid(tx), sum)],
-    lambda *_: 1
-)
-
-arity_heat_map = Analysis(
-    "inputs vs outputs",
-    lambda _c, _b, tx: is_not_coinbase(tx),
-    [(lambda _c, _b, tx: min(100, count_outputs(tx)), identity),
-     (lambda _c, _b, tx: min(100, count_inputs(tx)), sum)],
-    lambda *_: 1
-)
-
-transparent_input_histogram = Analysis(
-    "how many transparent inputs do txs have?",
-    lambda _c, _b, tx: is_not_coinbase(tx),
-    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
-      (lambda _c, _b, tx: min(100, len(tx['vin'])), sum)],
-    lambda *_: 1
-)
-
-nuttycom_fees_vs_10k = Analysis(
-    "transactions that wouldn't pay more under the new model, by day, using nuttycom's pricing",
-    lambda _c, _b, tx: is_not_coinbase(tx),
-    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
-      (lambda _c, _b, tx: 250 * max(4, count_ins_and_outs(tx)) <= 10_000, sum)
-    ],
-    lambda *_: 1
-)
-
-action_fees_vs_10k = Analysis(
-    "transactions that wouldn't pay more under the new model, by day, using actions",
-    lambda _c, _b, tx: is_not_coinbase(tx),
-    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
-      (lambda _c, _b, tx: 500 * max(3, count_actions(tx)) <= 10_000, sum)
-    ],
-    lambda *_: 1
-)
-
-latest_fees_vs_10k = Analysis(
-    "transactions that wouldn't pay more under the new model, by day, using actions",
-    lambda _c, _b, tx: is_not_coinbase(tx),
-    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
-      (lambda _c, _b, tx: 1000 * max(2, count_actions(tx)) <= 10_000, sum)
-    ],
-    lambda *_: 1
-)
-
-
-a = Analyzer(connection_string)
-
-def make_weekly_range(starting_week, number_of_weeks):
-    start_of_range = blocks_per_hour * 24 * 7 * starting_week
-    end_of_range = start_of_range + (blocks_per_hour * 24 * 7 * number_of_weeks)
-    return range(start_of_range, end_of_range)
-
-
-# start about a month before sandblasting, overlapping with it
-
-pre_sandblasting_range = make_weekly_range(206, 12)
-recent_range = make_weekly_range(220, 1)
-
-# start = datetime.datetime.now()
-# for analysis in a.analyze_blocks(some_range,
-#                        [ # sandblaster_average_outputs_per_day,
-#                            # flat_fees_vs_actual,
-#                            # flat_fees_vs_actual_trans,
-#                            # transparent_tx_that_would_fail_heat_map
-#                          nuttycom_fees_vs_actual,
-#                          action_fees_vs_actual,
-#                          nuttycom_fees_vs_actual_trans,
-#                          action_fees_vs_actual_trans,
-#                          greg_fees_vs_actual,
-#                          greg_fees_vs_actual_trans,
-#                          # historical_fees,
-#                          # transparent_input_histogram,
-#                        ]):
-#     print(analysis)
-# print(datetime.datetime.now() - start)
-
-# rerunning old data …
-# start = datetime.datetime.now()
-# for analysis in a.analyze_blocks(make_weekly_range(206, 1),
-#                        [ actual_fees,
-#                          proposed_fees,
-#                        ]):
-#     print(analysis)
-# print(datetime.datetime.now() - start)
-
-nuttycom_fees_vs_10k2 = Analysis(
-    "transactions that wouldn't pay more under the new model, by day, using nuttycom's pricing",
-    lambda _c, _b, tx: is_not_coinbase(tx) and not is_sandblasting(tx),
-    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
-      (lambda _c, _b, tx: 250 * max(4, count_ins_and_outs(tx)) <= 10_000, sum)
-    ],
-    lambda *_: 1
-)
-
-action_fees_vs_10k2 = Analysis(
-    "transactions that wouldn't pay more under the new model, by day, using actions",
-    lambda _c, _b, tx: is_not_coinbase(tx) and not is_sandblasting(tx),
-    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
-      (lambda _c, _b, tx: 500 * max(3, count_actions(tx)) <= 10_000, sum)
-    ],
-    lambda *_: 1
-)
-
-latest_fees_vs_10k2 = Analysis(
-    "transactions that wouldn't pay more under the new model, by day, using actions",
-    lambda _c, _b, tx: is_not_coinbase(tx) and not is_sandblasting(tx),
-    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
-      (lambda _c, _b, tx: 1000 * max(2, count_actions(tx)) <= 10_000, sum)
-    ],
-    lambda *_: 1
-)
-
-def vin_value(vin):
-    if 'valueSat' in vin:
-        return vin['valueSat']
-    else:
-        return 0
-
-def tx_pool_movement(tx):
-    transparent = sum(vout['valueZat'] for vout in tx['vout']) - sum([vin_value(vin) for vin in tx['vin']])
-    sprout = sum([vjoinsplit['vpub_newZat'] - vjoinsplit['vpub_oldZat'] for vjoinsplit in tx['vjoinsplit']])
-    sapling = - tx['valueBalanceZat']
-    if 'orchard' in tx:
-        orchard = - tx['orchard']['valueBalanceZat']
-    else:
-        orchard = 0
-    # print("(%d, %d, %d, %d) – %d -> %d" % (transparent, sprout, sapling, orchard, count_inputs(tx), count_outputs(tx)))
-    return (transparent, sprout, sapling, orchard)
-
-pool_movement = Analysis(
-    "how are funds moving between pools?",
-    lambda *_: True,
-    [(lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), lambda vals: np.sum(np.array(vals), 0))],
-    lambda _c, _b, tx: tx_pool_movement(tx)
-)
-
-
-start = datetime.datetime.now()
-for analysis in a.analyze_blocks(recent_range,
-                       [ pool_movement
-                       ]):
-    print(analysis)
-print(datetime.datetime.now() - start)
-
-# start = datetime.datetime.now()
-# for analysis in a.analyze_blocks(pre_sandblasting_range,
-#                        [ tx_below_pofm_threshold,
-#                          tx_below_pofm_threshold_5,
-#                          tx_below_pofm_threshold_max,
-#                          tx_below_pofm_threshold_ins,
-#                          tx_below_pofm_threshold_abs,
-#                          outs_below_pofm_threshold_abs,
-#                          arity_heat_map,
-#                          minimum_pofm_fees_nuttycom,
-#                          minimum_pofm_fees_nuttycom2,
-#                        ]):
-#     print(analysis)
-# print(datetime.datetime.now() - start)
--- a/analysis/examples.py
+++ b/analysis/examples.py
@ -0,0 +1,600 @@
+#!/usr/bin/env python3
+# Copyright (c) 2022 The Zcash developers
+# Distributed under the MIT software license, see the accompanying
+# file COPYING or https://www.opensource.org/licenses/mit-license.php .
+"""Simple Transaction Analysis
+
+This contains a class, `Analyzer`, for defining analyses of the blocks and
+transactions on the blockchain. It also exposes a function
+`analyze_blocks`, which handles applying multiple analyses simultaneously over
+some common range of blocks.
+"""
+
+import datetime
+import itertools
+import math
+import numpy as np
+from statistics import mean
+import sys
+
+from analyze import Analysis, Analyzer
+from helpers import *
+
+### TODO: Get host/port from config
+if len(sys.argv) > 1:
+    connection_string = sys.argv[1]
+else:
+    raise Exception(
+        "%s needs to be provided a connection string, like \"http://user:pass@localhost:port\"."
+        % (sys.argv[0],))
+
+blocks_per_hour = 48 # half this before NU2?
+
+# start about a month before sandblasting
+start_range = blocks_per_hour * 24 * 7 * 206
+
+### Requested Statistics
+
+def storeAnchor(pool, cache, block):
+    """
+    Caches the block height as the value for its anchor hash.
+    """
+    try:
+        final_root = block[pool]
+        try:
+            cache[final_root]
+        except KeyError:
+            cache[final_root] = block['height']
+    except KeyError:
+        None
+
+    return cache
+
+# "how old of anchors are people picking"
+# --- https://zcash.slack.com/archives/CP6SKNCJK/p1660103126252979
+anchor_age_orchard = Analysis(
+    "how old of anchors are people picking (for orchard)",
+    lambda _c, _b, tx: is_orchard_tx(tx),
+    [(orchard_anchorage, sum)],
+    lambda *_: 1,
+    ({}, lambda c, b: storeAnchor('finalorchardroot', c, b)),
+    blocks_per_hour * 24
+)
+
+anchor_age_sapling = Analysis(
+    "how old of anchors are people picking (for sapling)",
+    lambda _c, _b, tx: is_saplingspend_tx(tx),
+    [(sapling_anchorage, sum)],
+    lambda *_: 1,
+    ({}, lambda c, b: storeAnchor('finalsaplingroot', c, b)),
+    blocks_per_hour * 24
+)
+
+# "what's the distribution of expiry height deltas"
+# --- https://zcash.slack.com/archives/CP6SKNCJK/p1660103126252979
+expiry_height_deltas = Analysis(
+    "distribution of expiry height deltas",
+    lambda *_: True,
+    [(lambda _, b, t: expiry_height_delta(b, t), sum)],
+    lambda *_: 1
+)
+
+tx_type_with_long_expiry = Analysis(
+    "types of tx with expiries longer than about a month",
+    lambda _, b, t: expiry_height_delta(b, t) >= blocks_per_hour * 24 * 30,
+    [# (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)),
+     #  identity),
+     (lambda _c, _b, tx: tx_type(tx), sum)],
+    lambda *_: 1
+)
+
+# "does anyone use locktime"
+# --- https://zcash.slack.com/archives/CP6SKNCJK/p1660103126252979
+locktime_usage = Analysis(
+    "proportion of tx using locktime",
+    lambda *_: True,
+    [(lambda *_: 1,
+      lambda d: dict(d)[True] / (dict(d)[False] + dict(d)[True])),
+     (lambda _c, _b, tx: tx['locktime'] != 0, sum)],
+    lambda *_: 1
+)
+
+# "I'm seeing a slightly different pattern to the sandblasting transactions,
+#  unless I've just missed this before. The transactions I've looked at recently
+#  have had > 400 sapling outputs. Has this been the case before and I just
+#  missed it? I thought primarily these transactions had slightly over 100
+#  outputs in most cases."
+# --- https://zcash.slack.com/archives/CP6SKNCJK/p1660195664187769
+
+
+# "Calculate the POFM threshold for historical transactions on-chain and
+#  calculate what proportion of those transactions would fall below the POFM
+#  threshold"
+# --- https://docs.google.com/document/d/18wtGFCB2N4FO7SoqDPnEgVudAMlCArHMz0EwhE1HNPY/edit
+tx_below_pofm_threshold = Analysis(
+    "rate of transactions below POFM threshold",
+    lambda _c, _b, tx: is_not_coinbase(tx),
+    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)),
+       lambda d: dict(d)[False] / (dict(d)[False] + dict(d)[True])),
+      (lambda _c, _b, tx: count_ins_and_outs(tx) - 4 > 0, sum)
+    ],
+    lambda *_: 1
+)
+
+tx_below_pofm_threshold_abs = Analysis(
+    "transactions below POFM threshold",
+    lambda _c, _b, tx: is_not_coinbase(tx),
+    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)),
+       lambda d: (dict(d)[False], dict(d)[True])),
+      (lambda _c, _b, tx: count_ins_and_outs(tx) - 4 > 0, sum)
+    ],
+    lambda *_: 1
+)
+
+outs_below_pofm_threshold_abs = Analysis(
+    "outputs below POFM threshold",
+    lambda _c, _b, tx: is_not_coinbase(tx),
+    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)),
+       lambda d: (dict(d)[False], dict(d)[True])),
+      (lambda _c, _b, tx: count_ins_and_outs(tx) - 4 > 0, sum)
+    ],
+    lambda _c, _b, tx: count_outputs(tx)
+)
+
+tx_below_pofm_threshold_5 = Analysis(
+    "rate of transactions below POFM threshold with a grace window of 5",
+    lambda _c, _b, tx: is_not_coinbase(tx),
+    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)),
+       lambda d: dict(d)[False] / (dict(d)[False] + dict(d)[True])),
+      (lambda _c, _b, tx: count_ins_and_outs(tx) - 5 > 0, sum)
+    ],
+    lambda *_: 1
+)
+
+
+tx_below_pofm_threshold_max = Analysis(
+    "rate of transactions below POFM threshold with max",
+    lambda _c, _b, tx: is_not_coinbase(tx),
+    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)),
+       lambda d: dict(d)[False] / (dict(d)[False] + dict(d)[True])),
+      (lambda _c, _b, tx: count_actions(tx) - 4 > 0, sum)
+    ],
+    lambda *_: 1
+)
+
+tx_below_pofm_threshold_ins = Analysis(
+    "rate of transactions below POFM threshold only on inputs",
+    lambda _c, _b, tx: is_not_coinbase(tx),
+    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)),
+       lambda d: dict(d)[False] / (dict(d)[False] + dict(d)[True])),
+      (lambda _c, _b, tx: count_inputs(tx) - 4 > 0, sum)
+    ],
+    lambda *_: 1
+)
+
+### Other Examples
+
+tx_per_day = Analysis(
+    "count transactions per day (treating block 0 as midnight ZST)",
+    lambda *_: True,
+    [(lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), sum)],
+    lambda *_: 1
+)
+
+mean_tx_per_day = Analysis(
+    "mean transactions per day, by block",
+    lambda *_: True,
+    [(lambda _c, block, _t: int(block['height'] % (blocks_per_hour * 24)), lambda d: mean([x[1] for x in d])),
+     (lambda _c, block, _t: int(block['height']/(blocks_per_hour * 24)), sum)
+    ],
+    lambda *_: 1
+)
+
+mean_inout_per_tx_per_day = Analysis(
+    "mean inputs, outputs per transaction per day, by block",
+    lambda *_: True,
+    [(lambda _c, block, _t: int(block['height'] % (blocks_per_hour * 24)), lambda d: mean(itertools.chain(d.values()))),
+     (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity)
+    ],
+    lambda _c, _b, tx: (count_inputs(tx), count_outputs(tx))
+)
+
+mean_inout_per_tx = Analysis(
+    "mean inputs, outputs per transaction, by week",
+    lambda *_: True,
+    [ ( lambda _c, block, _t: int(block['height']/(blocks_per_hour * 24 * 7)),
+        lambda d: (mean([x[0] for x in d]), mean([x[1] for x in d]))
+       )
+     ],
+    lambda _c, _b, tx: (count_inputs(tx), count_outputs(tx))
+)
+
+minimum_pofm_fees_nuttycom = Analysis(
+    "distribution of fees in ZAT, by day, using nuttycom's pricing",
+    lambda *_: True,
+    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
+      (lambda _c, _b, tx: math.ceil(math.log((1000 + 250 * max(0, count_ins_and_outs(tx) - 4)) / 1000, 2)), sum)
+    ],
+    lambda *_: 1
+)
+
+minimum_pofm_fees_nuttycom = Analysis(
+    "distribution of fees in ZAT, by day, using nuttycom's pricing",
+    lambda _c, _b, tx: is_not_coinbase(tx),
+    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
+      (lambda _c, _b, tx: math.ceil(math.log((1000 + 250 * max(0, count_ins_and_outs(tx) - 4)) / 1000, 2)), sum)
+    ],
+    lambda *_: 1
+)
+
+minimum_pofm_fees_nuttycom2 = Analysis(
+    "distribution of fees in ZAT, by day, using nuttycom's changed pricing",
+    lambda _c, _b, tx: is_not_coinbase(tx),
+    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
+      (lambda _c, _b, tx: math.ceil(math.log((1000 + 200 * max(0, count_ins_and_outs(tx) - 5)) / 1000, 2)), sum)
+    ],
+    lambda *_: 1
+)
+
+def meh_fees(tx):
+    fee = tx['feePaid']
+    if fee == 0:
+        return -1
+    else:
+        result = math.ceil(math.log(tx['feePaid'], 2))
+        return result
+
+actual_fees = Analysis(
+    "actual fees",
+    lambda _c, _b, tx: is_not_coinbase(tx),
+    [ (lambda _c, _b, tx: meh_fees(tx), sum)
+    ],
+    lambda *_: 1
+)
+
+proposed_fees = Analysis(
+    "",
+    lambda _c, _b, tx: is_not_coinbase(tx),
+    [ (lambda _c, _b, tx: math.ceil(math.log(5000 * max(2, count_actions(tx)), 2)), sum)
+    ],
+    lambda *_: 1
+)
+
+arity_heat_map = Analysis(
+    "inputs vs outputs",
+    lambda *_: True,
+    [(lambda _c, _b, tx: min(100, count_outputs(tx)), identity),
+     (lambda _c, _b, tx: min(100, count_inputs(tx)), sum)],
+    lambda *_: 1
+)
+
+input_size_dist = Analysis(
+    "distribution of input sizes",
+    lambda *_: True,
+    [(lambda _c, _b, tx: [len(x['scriptSig']['hex']) for x in tx['vin']], identity)],
+    lambda *_: 1,
+)
+
+# very_high_inout_tx = Analysis(
+#     "tx with very high in/out counts",
+#     lambda _c, _b, tx: count_ins_and_outs(tx) > 100,
+#     [(lambda _c, _b, tx: (count_inputs(tx), count_outputs(tx)), identity)],
+#     lambda _c, _b, tx: tx['txid']
+# )
+
+very_high_inout_tx = Analysis(
+    "tx with very high in/out counts",
+    lambda _c, _b, tx: count_ins_and_outs(tx) > 5000,
+    [],
+    lambda _c, _b, tx: (tx['txid'], count_ins_and_outs(tx))
+)
+
+def track_utxos(cache, block):
+    for tx in block[tx]:
+        for vin in tx['vin']:
+            del cache[(vin['txid'], vin['vout'])]
+        for vout in tx['vout']:
+            cache[(tx['txid'], vout['n'])] = vout['valueZat']
+    return cache
+
+utxo_distribution = Analysis(
+    "how many UTXOs and how big are they?",
+    lambda *_: True,
+    [(lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)),
+      lambda caches: sum([caches[-1][k] for k in caches[-1]]))],
+    lambda cache, _b, _t: cache,
+    ({}, track_utxos),
+    1_000_000_000 # back to block 0, TODO: should be able to say this explicitly
+)
+
+def is_sandblasting(tx):
+    return get_shielded_outputs(tx) > 300
+
+sandblasters_per_day = Analysis(
+    "how many transactions have >300 Sapling outputs each day?",
+    lambda _c, _b, tx: is_sandblasting(tx),
+    [(lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), sum)],
+    lambda *_: 1
+)
+
+sandblasters_and_more_per_day = Analysis(
+    "how many transactions have >300 outputs each day?",
+    lambda _c, _b, tx: count_outputs(tx) > 300,
+    [(lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), sum)],
+    lambda *_: 1
+)
+
+sandblaster_average_outputs_per_day = Analysis(
+    "how many outputs do sandblasters have?",
+    lambda _c, _b, tx: is_sandblasting(tx),
+    [(lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), (lambda txs: sum(txs) / len(txs)))],
+    lambda _c, _b, tx: count_outputs(tx)
+)
+
+nuttycom_fees_vs_actual = Analysis(
+    "transactions that wouldn't pay more under the new model, by day, using nuttycom's pricing",
+    lambda *_: True,
+    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
+      (lambda _c, _b, tx: getFeeDiff(250 * max(4, count_ins_and_outs(tx)), tx), sum)
+    ],
+    lambda *_: 1
+)
+
+action_fees_vs_actual = Analysis(
+    "transactions that wouldn't pay more under the new model, by day, using actions",
+    lambda *_: True,
+    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
+      (lambda _c, _b, tx: getFeeDiff(500 * max(3, count_actions(tx)), tx), sum)
+    ],
+    lambda *_: 1
+)
+
+nuttycom_fees_vs_actual_trans = Analysis(
+    "transparent transactions that wouldn't pay more under the new model, by day, using nuttycom's pricing",
+    lambda _c, _b, tx: tx_type(tx) == 't-t',
+    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
+      (lambda _c, _b, tx: getFeeDiff(250 * max(4, count_ins_and_outs(tx)), tx), sum)
+    ],
+    lambda *_: 1
+)
+
+action_fees_vs_actual_trans = Analysis(
+    "transparent transactions that wouldn't pay more under the new model, by day, using actions",
+    lambda _c, _b, tx: tx_type(tx) == 't-t',
+    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
+      (lambda _c, _b, tx: getFeeDiff(500 * max(3, count_actions(tx)), tx), sum)
+    ],
+    lambda *_: 1
+)
+
+greg_fees_vs_actual = Analysis(
+    "transactions that wouldn't pay more under the new model, by day, using actions",
+    lambda *_: True,
+    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
+      (lambda _c, _b, tx: getFeeDiff(250 * max(4, count_actions(tx)), tx), sum)
+    ],
+    lambda *_: 1
+)
+
+greg_fees_vs_actual_trans = Analysis(
+    "transparent transactions that wouldn't pay more under the new model, by day, using actions",
+    lambda _c, _b, tx: tx_type(tx) == 't-t',
+    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
+      (lambda _c, _b, tx: getFeeDiff(250 * max(4, count_actions(tx)), tx), sum)
+    ],
+    lambda *_: 1
+)
+
+latest_fees_vs_actual = Analysis(
+    "transactions that wouldn't pay more under the new model, by day, using actions",
+    lambda *_: True,
+    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
+      (lambda _c, _b, tx: getFeeDiff(1000 * max(2, count_actions(tx)), tx), sum)
+    ],
+    lambda *_: 1
+)
+
+latest_fees_vs_actual_trans = Analysis(
+    "transparent transactions that wouldn't pay more under the new model, by day, using actions",
+    lambda _c, _b, tx: tx_type(tx) == 't-t',
+    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
+      (lambda _c, _b, tx: getFeeDiff(1000 * max(2, count_actions(tx)), tx), sum)
+    ],
+    lambda *_: 1
+)
+
+flat_fees_vs_actual = Analysis(
+    "transactions that would pass the original 10k ZAT fee, by day",
+    lambda *_: True,
+    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
+      (lambda _c, _b, tx: getFeeDiff(10_000, tx), sum)
+    ],
+    lambda *_: 1
+)
+
+shielding_tx_heat_map = Analysis(
+    "shielding tx",
+    lambda _c, _b, tx: is_not_coinbase(tx) and (tx_type(tx) == 't-z' or tx_type(tx) == 'm-z'),
+    [(lambda _c, _b, tx: min(100, count_outputs(tx)), identity),
+     (lambda _c, _b, tx: min(100, count_inputs(tx)), sum)],
+    lambda *_: 1
+)
+
+shielding_tx_actions = Analysis(
+    "shielding tx",
+    lambda _c, _b, tx: is_not_coinbase(tx) and (tx_type(tx) == 't-z' or tx_type(tx) == 'm-z'),
+    [(lambda _c, _b, tx: min(100, count_actions(tx)), sum)],
+    lambda *_: 1
+)
+
+fees_from_sandblasting = Analysis(
+    "fees collected from sandblasting",
+    lambda _c, _b, tx: is_sandblasting(tx),
+    [(lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), sum)],
+    lambda _c, _b, tx: 500 * max(2, count_actions(tx))
+)
+
+flat_fees_vs_actual_trans = Analysis(
+    "transparent transactions that would pass the original 10k ZAT fee, by day",
+    lambda _c, _b, tx: tx_type(tx) == 't-t',
+    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
+      (lambda _c, _b, tx: getFeeDiff(10_000, tx), sum)
+    ],
+    lambda *_: 1
+)
+
+transparent_tx_that_would_fail_heat_map = Analysis(
+    "heat map of transparent tx that would fail under `500 * max(3, |actions|)`",
+    lambda _c, _b, tx: tx_type(tx) == 't-t' and getFeeDiff(500 * max(3, count_actions(tx)), tx) == False,
+    [(lambda _c, _b, tx: min(100, count_outputs(tx)), identity),
+     (lambda _c, _b, tx: min(100, count_inputs(tx)), sum)],
+    lambda *_: 1
+)
+
+historical_fees = Analysis(
+    "histogram of actual fees paid",
+    lambda _c, _b, tx: is_not_coinbase(tx),
+    [(lambda _c, _b, tx: check_fee_paid(tx), sum)],
+    lambda *_: 1
+)
+
+arity_heat_map = Analysis(
+    "inputs vs outputs",
+    lambda _c, _b, tx: is_not_coinbase(tx),
+    [(lambda _c, _b, tx: min(100, count_outputs(tx)), identity),
+     (lambda _c, _b, tx: min(100, count_inputs(tx)), sum)],
+    lambda *_: 1
+)
+
+transparent_input_histogram = Analysis(
+    "how many transparent inputs do txs have?",
+    lambda _c, _b, tx: is_not_coinbase(tx),
+    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
+      (lambda _c, _b, tx: min(100, len(tx['vin'])), sum)],
+    lambda *_: 1
+)
+
+nuttycom_fees_vs_10k = Analysis(
+    "transactions that wouldn't pay more under the new model, by day, using nuttycom's pricing",
+    lambda _c, _b, tx: is_not_coinbase(tx),
+    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
+      (lambda _c, _b, tx: 250 * max(4, count_ins_and_outs(tx)) <= 10_000, sum)
+    ],
+    lambda *_: 1
+)
+
+action_fees_vs_10k = Analysis(
+    "transactions that wouldn't pay more under the new model, by day, using actions",
+    lambda _c, _b, tx: is_not_coinbase(tx),
+    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
+      (lambda _c, _b, tx: 500 * max(3, count_actions(tx)) <= 10_000, sum)
+    ],
+    lambda *_: 1
+)
+
+latest_fees_vs_10k = Analysis(
+    "transactions that wouldn't pay more under the new model, by day, using actions",
+    lambda _c, _b, tx: is_not_coinbase(tx),
+    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
+      (lambda _c, _b, tx: 1000 * max(2, count_actions(tx)) <= 10_000, sum)
+    ],
+    lambda *_: 1
+)
+
+
+a = Analyzer(connection_string)
+
+def make_weekly_range(starting_week, number_of_weeks):
+    start_of_range = blocks_per_hour * 24 * 7 * starting_week
+    end_of_range = start_of_range + (blocks_per_hour * 24 * 7 * number_of_weeks)
+    return range(start_of_range, end_of_range)
+
+
+# start about a month before sandblasting, overlapping with it
+pre_sandblasting_range = make_weekly_range(206, 12)
+
+# well into sandblasting
+recent_range = make_weekly_range(220, 1)
+
+start = datetime.datetime.now()
+for analysis in a.analyze_blocks(pre_sandblasting_range,
+                       [ # sandblaster_average_outputs_per_day,
+                         # flat_fees_vs_actual,
+                         # flat_fees_vs_actual_trans,
+                         # transparent_tx_that_would_fail_heat_map
+                         nuttycom_fees_vs_actual,
+                         action_fees_vs_actual,
+                         nuttycom_fees_vs_actual_trans,
+                         action_fees_vs_actual_trans,
+                         greg_fees_vs_actual,
+                         greg_fees_vs_actual_trans,
+                         # historical_fees,
+                         # transparent_input_histogram,
+                       ]):
+    print(analysis)
+print(datetime.datetime.now() - start)
+
+# rerunning old data …
+start = datetime.datetime.now()
+for analysis in a.analyze_blocks(make_weekly_range(206, 1),
+                       [ actual_fees,
+                         proposed_fees,
+                       ]):
+    print(analysis)
+print(datetime.datetime.now() - start)
+
+nuttycom_fees_vs_10k2 = Analysis(
+    "transactions that wouldn't pay more under the new model, by day, using nuttycom's pricing",
+    lambda _c, _b, tx: is_not_coinbase(tx) and not is_sandblasting(tx),
+    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
+      (lambda _c, _b, tx: 250 * max(4, count_ins_and_outs(tx)) <= 10_000, sum)
+    ],
+    lambda *_: 1
+)
+
+action_fees_vs_10k2 = Analysis(
+    "transactions that wouldn't pay more under the new model, by day, using actions",
+    lambda _c, _b, tx: is_not_coinbase(tx) and not is_sandblasting(tx),
+    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
+      (lambda _c, _b, tx: 500 * max(3, count_actions(tx)) <= 10_000, sum)
+    ],
+    lambda *_: 1
+)
+
+latest_fees_vs_10k2 = Analysis(
+    "transactions that wouldn't pay more under the new model, by day, using actions",
+    lambda _c, _b, tx: is_not_coinbase(tx) and not is_sandblasting(tx),
+    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
+      (lambda _c, _b, tx: 1000 * max(2, count_actions(tx)) <= 10_000, sum)
+    ],
+    lambda *_: 1
+)
+
+pool_movement = Analysis(
+    "how are funds moving between pools?",
+    lambda *_: True,
+    [(lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), lambda vals: np.sum(np.array(vals), 0))],
+    lambda _c, _b, tx: tx_pool_movement(tx)
+)
+
+start = datetime.datetime.now()
+for analysis in a.analyze_blocks(recent_range,
+                       [ pool_movement
+                       ]):
+    print(analysis)
+print(datetime.datetime.now() - start)
+
+start = datetime.datetime.now()
+for analysis in a.analyze_blocks(pre_sandblasting_range,
+                       [ tx_below_pofm_threshold,
+                         tx_below_pofm_threshold_5,
+                         tx_below_pofm_threshold_max,
+                         tx_below_pofm_threshold_ins,
+                         tx_below_pofm_threshold_abs,
+                         outs_below_pofm_threshold_abs,
+                         arity_heat_map,
+                         minimum_pofm_fees_nuttycom,
+                         minimum_pofm_fees_nuttycom2,
+                       ]):
+    print(analysis)
+print(datetime.datetime.now() - start)
--- a/analysis/helpers.py
+++ b/analysis/helpers.py
@ -0,0 +1,161 @@
+# Copyright (c) 2022 The Zcash developers
+# Distributed under the MIT software license, see the accompanying
+# file COPYING or https://www.opensource.org/licenses/mit-license.php .
+"""Useful function for transaction analyses
+
+This is a collection of functions that make it easier to write new transaction
+analyses.
+"""
+
+import datetime
+import itertools
+import math
+
+def identity(x):
+    return x
+
+def get_shielded_spends(tx):
+    try:
+        shielded_spends = len(tx['vShieldedSpend'])
+    except KeyError:
+        shielded_spends = 0
+
+    return shielded_spends
+
+def get_shielded_outputs(tx):
+    try:
+        shielded_outputs = len(tx['vShieldedOutput'])
+    except KeyError:
+        shielded_outputs = 0
+
+    return shielded_outputs
+
+def get_orchard_actions(tx):
+    try:
+        orchard_actions = len(tx['orchard']['actions'])
+    except KeyError:
+        orchard_actions = 0
+
+    return orchard_actions
+
+def count_inputs(tx):
+    return len(tx['vin']) + 2 * len(tx['vjoinsplit']) + get_shielded_spends(tx) + get_orchard_actions(tx)
+
+def count_outputs(tx):
+    return len(tx['vout']) + 2 * len(tx['vjoinsplit']) + get_shielded_outputs(tx) + get_orchard_actions(tx)
+
+def count_ins_and_outs(tx):
+    return (len(tx['vin'])
+            + len(tx['vout'])
+            + get_shielded_spends(tx)
+            + get_shielded_outputs(tx)
+            + 2 * len(tx['vjoinsplit'])
+            + 2 * get_orchard_actions(tx))
+
+def count_actions(tx):
+    return (max(len(tx['vin']), len(tx['vout']))
+            + max(get_shielded_spends(tx), get_shielded_outputs(tx))
+            + 2 * len(tx['vjoinsplit'])
+            + get_orchard_actions(tx))
+
+def expiry_height_delta(block, tx):
+    """
+    Returns -1 if there's no expiry, also returns approximately 35,000 (the
+    number of blocks in a month) if the expiry is beyond 1 month.
+    """
+    month = blocks_per_hour * 24 * 30
+    try:
+        expiry_height = tx['expiryheight']
+        if expiry_height == 0:
+            return -1
+        elif tx['expiryheight'] - block['height'] > month:
+            return month
+        else:
+            return tx['expiryheight'] - block['height']
+    except KeyError:
+        # `tx['expiryheight']` is ostensibly an optional field, but it seems
+        # like `0` is what tends to be used for "don't expire", so this case
+        # generally isn't hit.
+        return -1
+
+def tx_type(tx):
+    """
+    Categorizes all tx into one of nine categories: (t)ransparent, (z)shielded,
+    or (m)ixed for both inputs and outputs. So some possible results are "t-t",
+    "t-z", "m-z", etc.
+    """
+    if tx['vjoinsplit'] or get_shielded_spends(tx) != 0 or get_orchard_actions(tx) != 0:
+        if tx['vin']:
+            ins = "m"
+        else:
+            ins = "z"
+    else:
+        ins = "t"
+
+    if tx['vjoinsplit'] or get_shielded_outputs(tx) != 0 or get_orchard_actions(tx) != 0:
+        if tx['vout']:
+            outs = "m"
+        else:
+            outs = "z"
+    else:
+        outs = "t"
+
+    return ins + "-" + outs
+
+def is_orchard_tx(tx):
+    try:
+        return tx['orchard']['actions']
+    except KeyError:
+        return False
+
+def is_saplingspend_tx(tx):
+    try:
+        return tx['vShieldedSpend']
+    except KeyError:
+        return False
+
+def orchard_anchorage(cache, block, tx):
+    """
+    Returns -1 if there is no anchor
+    """
+    try:
+        return block['height'] - cache[tx['orchard']['anchor']]
+    except KeyError:
+        return -1
+
+def sapling_anchorage(cache, block, tx):
+    """
+    Returns -1 if there is no anchor
+    """
+    try:
+        return block['height'] - cache[tx['vShieldedSpend'][0]['anchor']]
+    except KeyError:
+        return -1
+
+def is_not_coinbase(tx):
+    return 'feePaid' in tx
+
+# NB: This requires zcashd to be running with `experimentalfeatures=1`,
+#    `txindex=1` and `insightexplorer=1`.
+def getFeeDiff(proposedFee, tx):
+    try:
+        return proposedFee <= tx['feePaid']
+    except KeyError:
+        return -1
+
+def vin_value(vin):
+    if 'valueSat' in vin:
+        return vin['valueSat']
+    else:
+        return 0
+
+def tx_pool_movement(tx):
+    transparent = sum(vout['valueZat'] for vout in tx['vout']) - sum([vin_value(vin) for vin in tx['vin']])
+    sprout = sum([vjoinsplit['vpub_newZat'] - vjoinsplit['vpub_oldZat'] for vjoinsplit in tx['vjoinsplit']])
+    sapling = - tx['valueBalanceZat']
+    if 'orchard' in tx:
+        orchard = - tx['orchard']['valueBalanceZat']
+    else:
+        orchard = 0
+    # print("(%d, %d, %d, %d) – %d -> %d" % (transparent, sprout, sapling, orchard, count_inputs(tx), count_outputs(tx)))
+    return (transparent, sprout, sapling, orchard)