From e2260b711844556abfd602230d736b563fb37c97 Mon Sep 17 00:00:00 2001
From: Greg Pfeil <greg@z.cash>
Date: Mon, 3 Oct 2022 14:08:01 -0600
Subject: [PATCH 1/4] Relocate chain analysis tooling from zcash/zcash.

Was https://github.com/zcash/zcash/pull/6149
---
 analysis/analyze.py | 901 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 901 insertions(+)
 create mode 100755 analysis/analyze.py

diff --git a/analysis/analyze.py b/analysis/analyze.py
new file mode 100755
index 0000000..c74ae26
--- /dev/null
+++ b/analysis/analyze.py
@@ -0,0 +1,901 @@
+#!/usr/bin/env python3
+# Copyright (c) 2022 The Zcash developers
+# Distributed under the MIT software license, see the accompanying
+# file COPYING or https://www.opensource.org/licenses/mit-license.php .
+"""Simple Transaction Analysis
+
+This contains a class, `Analyzer`, for defining analyses of the blocks and
+transactions on the blockchain. It also exposes a function
+`analyze_blocks`, which handles applying multiple analyses simultaneously over
+some common range of blocks.
+"""
+
+import datetime
+import itertools
+import math
+import numpy as np
+import os.path
+from progress.bar import IncrementalBar
+from statistics import mean
+import sys
+
+sys.path.insert(
+    1,
+    os.path.join(os.path.dirname(os.path.abspath(__file__)),
+                 "../../qa/rpc-tests")
+)
+
+from test_framework.authproxy import AuthServiceProxy
+
+### TODO: Get host/port from config
+if len(sys.argv) > 1:
+    connection_string = sys.argv[1]
+else:
+    raise Exception(
+        "%s needs to be provided a connection string, like \"http://user:pass@localhost:port\"."
+        % sys.argv[0])
+
+class Analysis:
+    """
+    An analysis collects a single aggregated data structure from the blockchain.
+
+    If you had a block and a single tx from that block, you could simply
+   `my_analysis.aggregate(my_analysis.extract(block, tx))` to generate the stats
+    for that analysis. However, since we generally want to aggregate across many
+    transactions in many blocks and also because we usually want to collect
+    multiple statistics at once (because re-fetching blocks and tx is slow),
+   `extract` and `aggregate are separated out. See `analyze_blocks` for how to
+    take advantage of this structure.
+    """
+
+    def __init__(self, name, tx_filter, bucketers, extractor, cache = ((), lambda c, _: c), preCache = 0):
+        """It takes various functions to apply to the transactions therein. The functions are typed as follows:
+
+    tx_filter :: cache -> Block -> Tx -> Boolean
+    bucketers :: [ ...,
+                   (cache -> Block -> Tx -> k_n-2, [(k_n-1, a)] -> b),
+                   (cache -> Block -> Tx -> k_n-1, [(k_n, a)] -> b),
+                   (cache -> Block -> Tx -> k_n,   [v] -> a)
+                 ]
+    extractor :: cache -> Block -> Tx -> v
+    cache :: (cache, cache -> Block -> cache)
+    preCache = Natural
+
+    `tx_filter` decides whether the given transaction should be included in the
+                result,
+    `extractor` reduces each transaction to the parts we care about in the
+                results,
+    `bucketers` is a list of pairs of functions -- the first of each pair
+                produces a key for bucketing the results and the second is how
+                to accumulate the values in that bucket. The list allows us to
+                create buckets of buckets.
+    `cache`, if provided, is a tuple of an initial cache value and a function to
+             update it so that later transactions can look at information from
+             previous blocks.
+    `preCache` is how many blocks before the start of our range to start
+               caching. This is generally a _minimum_, don't be suprised if the
+               cache is updated from some much earlier point. Also, it may be
+               truncated if there aren't enough blocks between the beginning of
+               the chain and and the start of the range.
+
+    If no bucketers are provided, this returns a list of all the extracted data
+    in a list, one for each transaction. If there are bucketers, it returns a
+    map, with the keys from the first bucketer in the list and the values from
+    the first accumulator in the list.
+
+        """
+        self.name = name
+        self.__filter = tx_filter
+        self.__bucketers = bucketers
+        self.__extractor = extractor
+        (self.__cache, self.__cacheUpdater) = cache
+        self.preCache = preCache
+        self.__lastCachedBlock = 0
+
+    def updateCache(self, block):
+        """
+        This is exposed in order to handle the "precache", where we need to
+        build up the cache for blocks before the blocks we actually care to have
+        in our results.
+        """
+        if block['height'] > self.__lastCachedBlock:
+            self.__cache = self.__cacheUpdater(self.__cache, block)
+            self.__lastCachedBlock = block['height']
+
+    def extract(self, block, tx):
+        """
+        Extracts all the data from a given transaction (and its block) needed to
+        compute the statistics for this analysis.
+
+        TODO: Allow a bucketer to return multiple keys. This hopefully allows
+              things like sub-transaction extraction. E.g., looking at the sizes
+              of all vouts by day, without caring which ones are in the same tx
+        TODO: Distinguish between streamable and non-streamable analyses. The
+              difference is that a streamable analysis has an outermost bucketer
+              where duplicate keys are adjacent (much like POSIX `uniq`).
+        """
+        self.updateCache(block)
+
+        if self.__filter(self.__cache, block, tx):
+            value = self.__extractor(self.__cache, block, tx)
+            keys = [x[0](self.__cache, block, tx) for x in self.__bucketers]
+            return [(keys, value)]
+        else:
+            return []
+
+    def aggregate(self, kvs):
+        """
+        Given a `[([k_0, k_1, ..., k_n-1], v)]` (where `n` is the length of the
+        bucketer list provided at initialization and `k_*` are the results of
+        each bucketer), this groups and accumulates the results, returning their
+        final form.
+        """
+        kvs.sort(key=lambda x: x[0])
+        return self.__group(kvs, [x[1] for x in self.__bucketers])
+
+    def __group(self, kvs, accumulators):
+        if accumulators:
+            buck = []
+            accum, *remaining_accum = accumulators
+            for k, g in itertools.groupby(kvs, lambda x: x[0].pop(0)):
+                buck.append((k, accum(self.__group(list(g), remaining_accum))))
+            return buck
+        else:
+            return [x[1] for x in kvs]
+
+
+class Analyzer:
+    def __init__(self, node_url):
+        self.node = AuthServiceProxy(node_url)
+
+    def analyze_blocks(self, block_range, analyses):
+        """
+        This function executes multiple analyses over a common range of blocks,
+        returning results keyed by the name of the analysis.
+        """
+        current_height = self.node.getblockchaininfo()['estimatedheight']
+        bounded_range = range(
+            max(0, min(block_range[0], current_height)),
+            max(0, min(block_range[1], current_height))
+        )
+        longest_precache = max([x.preCache for x in analyses])
+        data_start = bounded_range[0]
+        for i in IncrementalBar('Building Cache   ').iter(range(max(0, data_start - longest_precache), data_start)):
+            [x.updateCache(self.node.getblock(str(i), 2)) for x in analyses]
+
+        bucketses = [(x, []) for x in analyses]
+        for block_height in IncrementalBar('Processing Blocks').iter(block_range):
+            block = self.node.getblock(str(block_height), 2)
+            for tx in block['tx']:
+                for analysis in analyses:
+                    dict(bucketses)[analysis].extend(analysis.extract(block, tx))
+
+        result = []
+        for analysis in IncrementalBar('Running Analyses ').iter(analyses):
+            result.append((analysis.name, analysis.aggregate(dict(bucketses)[analysis])))
+
+        return result
+
+### Helpers
+
+def identity(x):
+    return x
+
+def get_shielded_spends(tx):
+    try:
+        shielded_spends = len(tx['vShieldedSpend'])
+    except KeyError:
+        shielded_spends = 0
+
+    return shielded_spends
+
+def get_shielded_outputs(tx):
+    try:
+        shielded_outputs = len(tx['vShieldedOutput'])
+    except KeyError:
+        shielded_outputs = 0
+
+    return shielded_outputs
+
+def get_orchard_actions(tx):
+    try:
+        orchard_actions = len(tx['orchard']['actions'])
+    except KeyError:
+        orchard_actions = 0
+
+    return orchard_actions
+
+def count_inputs(tx):
+    return len(tx['vin']) + 2 * len(tx['vjoinsplit']) + get_shielded_spends(tx) + get_orchard_actions(tx)
+
+def count_outputs(tx):
+    return len(tx['vout']) + 2 * len(tx['vjoinsplit']) + get_shielded_outputs(tx) + get_orchard_actions(tx)
+
+def count_ins_and_outs(tx):
+    return (len(tx['vin'])
+            + len(tx['vout'])
+            + get_shielded_spends(tx)
+            + get_shielded_outputs(tx)
+            + 2 * len(tx['vjoinsplit'])
+            + 2 * get_orchard_actions(tx))
+
+def count_actions(tx):
+    return (max(len(tx['vin']), len(tx['vout']))
+            + max(get_shielded_spends(tx), get_shielded_outputs(tx))
+            + 2 * len(tx['vjoinsplit'])
+            + get_orchard_actions(tx))
+
+def expiry_height_delta(block, tx):
+    """
+    Returns -1 if there's no expiry, also returns approximately 35,000 (the
+    number of blocks in a month) if the expiry is beyond 1 month.
+    """
+    month = blocks_per_hour * 24 * 30
+    try:
+        expiry_height = tx['expiryheight']
+        if expiry_height == 0:
+            return -1
+        elif tx['expiryheight'] - block['height'] > month:
+            return month
+        else:
+            return tx['expiryheight'] - block['height']
+    except KeyError:
+        # `tx['expiryheight']` is ostensibly an optional field, but it seems
+        # like `0` is what tends to be used for "don't expire", so this case
+        # generally isn't hit.
+        return -1
+
+def tx_type(tx):
+    """
+    Categorizes all tx into one of nine categories: (t)ransparent, (z)shielded,
+    or (m)ixed for both inputs and outputs. So some possible results are "t-t",
+    "t-z", "m-z", etc.
+    """
+    if tx['vjoinsplit'] or get_shielded_spends(tx) != 0 or get_orchard_actions(tx) != 0:
+        if tx['vin']:
+            ins = "m"
+        else:
+            ins = "z"
+    else:
+        ins = "t"
+
+    if tx['vjoinsplit'] or get_shielded_outputs(tx) != 0 or get_orchard_actions(tx) != 0:
+        if tx['vout']:
+            outs = "m"
+        else:
+            outs = "z"
+    else:
+        outs = "t"
+
+    return ins + "-" + outs
+
+def is_orchard_tx(tx):
+    try:
+        return tx['orchard']['actions']
+    except KeyError:
+        return False
+
+def is_saplingspend_tx(tx):
+    try:
+        return tx['vShieldedSpend']
+    except KeyError:
+        return False
+
+def orchard_anchorage(cache, block, tx):
+    """
+    Returns -1 if there is no anchor
+    """
+    try:
+        return block['height'] - cache[tx['orchard']['anchor']]
+    except KeyError:
+        return -1
+
+def sapling_anchorage(cache, block, tx):
+    """
+    Returns -1 if there is no anchor
+    """
+    try:
+        return block['height'] - cache[tx['vShieldedSpend'][0]['anchor']]
+    except KeyError:
+        return -1
+
+def is_not_coinbase(tx):
+    return 'feePaid' in tx
+
+# NB: This requires zcashd to be running with `experimentalfeatures=1`,
+#    `txindex=1` and `insightexplorer=1`.
+def getFeeDiff(proposedFee, tx):
+    try:
+        return proposedFee <= tx['feePaid']
+    except KeyError:
+        return -1
+
+blocks_per_hour = 48 # half this before NU2?
+
+# start about a month before sandblasting
+start_range = blocks_per_hour * 24 * 7 * 206
+
+### Requested Statistics
+
+def storeAnchor(pool, cache, block):
+    """
+    Caches the block height as the value for its anchor hash.
+    """
+    try:
+        final_root = block[pool]
+        try:
+            cache[final_root]
+        except KeyError:
+            cache[final_root] = block['height']
+    except KeyError:
+        None
+
+    return cache
+
+# "how old of anchors are people picking"
+# --- https://zcash.slack.com/archives/CP6SKNCJK/p1660103126252979
+anchor_age_orchard = Analysis(
+    "how old of anchors are people picking (for orchard)",
+    lambda _c, _b, tx: is_orchard_tx(tx),
+    [(orchard_anchorage, sum)],
+    lambda *_: 1,
+    ({}, lambda c, b: storeAnchor('finalorchardroot', c, b)),
+    blocks_per_hour * 24
+)
+
+anchor_age_sapling = Analysis(
+    "how old of anchors are people picking (for sapling)",
+    lambda _c, _b, tx: is_saplingspend_tx(tx),
+    [(sapling_anchorage, sum)],
+    lambda *_: 1,
+    ({}, lambda c, b: storeAnchor('finalsaplingroot', c, b)),
+    blocks_per_hour * 24
+)
+
+# "what's the distribution of expiry height deltas"
+# --- https://zcash.slack.com/archives/CP6SKNCJK/p1660103126252979
+expiry_height_deltas = Analysis(
+    "distribution of expiry height deltas",
+    lambda *_: True,
+    [(lambda _, b, t: expiry_height_delta(b, t), sum)],
+    lambda *_: 1
+)
+
+tx_type_with_long_expiry = Analysis(
+    "types of tx with expiries longer than about a month",
+    lambda _, b, t: expiry_height_delta(b, t) >= blocks_per_hour * 24 * 30,
+    [# (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)),
+     #  identity),
+     (lambda _c, _b, tx: tx_type(tx), sum)],
+    lambda *_: 1
+)
+
+# "does anyone use locktime"
+# --- https://zcash.slack.com/archives/CP6SKNCJK/p1660103126252979
+locktime_usage = Analysis(
+    "proportion of tx using locktime",
+    lambda *_: True,
+    [(lambda *_: 1,
+      lambda d: dict(d)[True] / (dict(d)[False] + dict(d)[True])),
+     (lambda _c, _b, tx: tx['locktime'] != 0, sum)],
+    lambda *_: 1
+)
+
+# "I'm seeing a slightly different pattern to the sandblasting transactions,
+#  unless I've just missed this before. The transactions I've looked at recently
+#  have had > 400 sapling outputs. Has this been the case before and I just
+#  missed it? I thought primarily these transactions had slightly over 100
+#  outputs in most cases."
+# --- https://zcash.slack.com/archives/CP6SKNCJK/p1660195664187769
+
+
+# "Calculate the POFM threshold for historical transactions on-chain and
+#  calculate what proportion of those transactions would fall below the POFM
+#  threshold"
+# --- https://docs.google.com/document/d/18wtGFCB2N4FO7SoqDPnEgVudAMlCArHMz0EwhE1HNPY/edit
+tx_below_pofm_threshold = Analysis(
+    "rate of transactions below POFM threshold",
+    lambda _c, _b, tx: is_not_coinbase(tx),
+    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)),
+       lambda d: dict(d)[False] / (dict(d)[False] + dict(d)[True])),
+      (lambda _c, _b, tx: count_ins_and_outs(tx) - 4 > 0, sum)
+    ],
+    lambda *_: 1
+)
+
+tx_below_pofm_threshold_abs = Analysis(
+    "transactions below POFM threshold",
+    lambda _c, _b, tx: is_not_coinbase(tx),
+    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)),
+       lambda d: (dict(d)[False], dict(d)[True])),
+      (lambda _c, _b, tx: count_ins_and_outs(tx) - 4 > 0, sum)
+    ],
+    lambda *_: 1
+)
+
+outs_below_pofm_threshold_abs = Analysis(
+    "outputs below POFM threshold",
+    lambda _c, _b, tx: is_not_coinbase(tx),
+    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)),
+       lambda d: (dict(d)[False], dict(d)[True])),
+      (lambda _c, _b, tx: count_ins_and_outs(tx) - 4 > 0, sum)
+    ],
+    lambda _c, _b, tx: count_outputs(tx)
+)
+
+tx_below_pofm_threshold_5 = Analysis(
+    "rate of transactions below POFM threshold with a grace window of 5",
+    lambda _c, _b, tx: is_not_coinbase(tx),
+    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)),
+       lambda d: dict(d)[False] / (dict(d)[False] + dict(d)[True])),
+      (lambda _c, _b, tx: count_ins_and_outs(tx) - 5 > 0, sum)
+    ],
+    lambda *_: 1
+)
+
+
+tx_below_pofm_threshold_max = Analysis(
+    "rate of transactions below POFM threshold with max",
+    lambda _c, _b, tx: is_not_coinbase(tx),
+    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)),
+       lambda d: dict(d)[False] / (dict(d)[False] + dict(d)[True])),
+      (lambda _c, _b, tx: count_actions(tx) - 4 > 0, sum)
+    ],
+    lambda *_: 1
+)
+
+tx_below_pofm_threshold_ins = Analysis(
+    "rate of transactions below POFM threshold only on inputs",
+    lambda _c, _b, tx: is_not_coinbase(tx),
+    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)),
+       lambda d: dict(d)[False] / (dict(d)[False] + dict(d)[True])),
+      (lambda _c, _b, tx: count_inputs(tx) - 4 > 0, sum)
+    ],
+    lambda *_: 1
+)
+
+### Other Examples
+
+tx_per_day = Analysis(
+    "count transactions per day (treating block 0 as midnight ZST)",
+    lambda *_: True,
+    [(lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), sum)],
+    lambda *_: 1
+)
+
+mean_tx_per_day = Analysis(
+    "mean transactions per day, by block",
+    lambda *_: True,
+    [(lambda _c, block, _t: int(block['height'] % (blocks_per_hour * 24)), lambda d: mean([x[1] for x in d])),
+     (lambda _c, block, _t: int(block['height']/(blocks_per_hour * 24)), sum)
+    ],
+    lambda *_: 1
+)
+
+mean_inout_per_tx_per_day = Analysis(
+    "mean inputs, outputs per transaction per day, by block",
+    lambda *_: True,
+    [(lambda _c, block, _t: int(block['height'] % (blocks_per_hour * 24)), lambda d: mean(itertools.chain(d.values()))),
+     (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity)
+    ],
+    lambda _c, _b, tx: (count_inputs(tx), count_outputs(tx))
+)
+
+mean_inout_per_tx = Analysis(
+    "mean inputs, outputs per transaction, by week",
+    lambda *_: True,
+    [ ( lambda _c, block, _t: int(block['height']/(blocks_per_hour * 24 * 7)),
+        lambda d: (mean([x[0] for x in d]), mean([x[1] for x in d]))
+       )
+     ],
+    lambda _c, _b, tx: (count_inputs(tx), count_outputs(tx))
+)
+
+minimum_pofm_fees_nuttycom = Analysis(
+    "distribution of fees in ZAT, by day, using nuttycom's pricing",
+    lambda *_: True,
+    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
+      (lambda _c, _b, tx: math.ceil(math.log((1000 + 250 * max(0, count_ins_and_outs(tx) - 4)) / 1000, 2)), sum)
+    ],
+    lambda *_: 1
+)
+
+minimum_pofm_fees_nuttycom = Analysis(
+    "distribution of fees in ZAT, by day, using nuttycom's pricing",
+    lambda _c, _b, tx: is_not_coinbase(tx),
+    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
+      (lambda _c, _b, tx: math.ceil(math.log((1000 + 250 * max(0, count_ins_and_outs(tx) - 4)) / 1000, 2)), sum)
+    ],
+    lambda *_: 1
+)
+
+minimum_pofm_fees_nuttycom2 = Analysis(
+    "distribution of fees in ZAT, by day, using nuttycom's changed pricing",
+    lambda _c, _b, tx: is_not_coinbase(tx),
+    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
+      (lambda _c, _b, tx: math.ceil(math.log((1000 + 200 * max(0, count_ins_and_outs(tx) - 5)) / 1000, 2)), sum)
+    ],
+    lambda *_: 1
+)
+
+def meh_fees(tx):
+    fee = tx['feePaid']
+    if fee == 0:
+        return -1
+    else:
+        result = math.ceil(math.log(tx['feePaid'], 2))
+        # if result < 0:
+        #     print("negative result: %s, %s" % (fee, tx['txid']))
+        return result
+
+actual_fees = Analysis(
+    "actual fees",
+    lambda _c, _b, tx: is_not_coinbase(tx),
+    [ (lambda _c, _b, tx: meh_fees(tx), sum)
+    ],
+    lambda *_: 1
+)
+
+proposed_fees = Analysis(
+    "",
+    lambda _c, _b, tx: is_not_coinbase(tx),
+    [ (lambda _c, _b, tx: math.ceil(math.log(5000 * max(2, count_actions(tx)), 2)), sum)
+    ],
+    lambda *_: 1
+)
+
+arity_heat_map = Analysis(
+    "inputs vs outputs",
+    lambda *_: True,
+    [(lambda _c, _b, tx: min(100, count_outputs(tx)), identity),
+     (lambda _c, _b, tx: min(100, count_inputs(tx)), sum)],
+    lambda *_: 1
+)
+
+input_size_dist = Analysis(
+    "distribution of input sizes",
+    lambda *_: True,
+    [(lambda _c, _b, tx: [len(x['scriptSig']['hex']) for x in tx['vin']], identity)],
+    lambda *_: 1,
+)
+
+# very_high_inout_tx = Analysis(
+#     "tx with very high in/out counts",
+#     lambda _c, _b, tx: count_ins_and_outs(tx) > 100,
+#     [(lambda _c, _b, tx: (count_inputs(tx), count_outputs(tx)), identity)],
+#     lambda _c, _b, tx: tx['txid']
+# )
+
+very_high_inout_tx = Analysis(
+    "tx with very high in/out counts",
+    lambda _c, _b, tx: count_ins_and_outs(tx) > 5000,
+    [],
+    lambda _c, _b, tx: (tx['txid'], count_ins_and_outs(tx))
+)
+
+def track_utxos(cache, block):
+    for tx in block[tx]:
+        for vin in tx['vin']:
+            del cache[(vin['txid'], vin['vout'])]
+        for vout in tx['vout']:
+            cache[(tx['txid'], vout['n'])] = vout['valueZat']
+    return cache
+
+utxo_distribution = Analysis(
+    "how many UTXOs and how big are they?",
+    lambda *_: True,
+    [(lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)),
+      lambda caches: sum([caches[-1][k] for k in caches[-1]]))],
+    lambda cache, _b, _t: cache,
+    ({}, track_utxos),
+    1_000_000_000 # back to block 0, TODO: should be able to say this explicitly
+)
+
+def is_sandblasting(tx):
+    return get_shielded_outputs(tx) > 300
+
+sandblasters_per_day = Analysis(
+    "how many transactions have >300 Sapling outputs each day?",
+    lambda _c, _b, tx: is_sandblasting(tx),
+    [(lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), sum)],
+    lambda *_: 1
+)
+
+sandblasters_and_more_per_day = Analysis(
+    "how many transactions have >300 outputs each day?",
+    lambda _c, _b, tx: count_outputs(tx) > 300,
+    [(lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), sum)],
+    lambda *_: 1
+)
+
+sandblaster_average_outputs_per_day = Analysis(
+    "how many outputs do sandblasters have?",
+    lambda _c, _b, tx: is_sandblasting(tx),
+    [(lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), (lambda txs: sum(txs) / len(txs)))],
+    lambda _c, _b, tx: count_outputs(tx)
+)
+
+nuttycom_fees_vs_actual = Analysis(
+    "transactions that wouldn't pay more under the new model, by day, using nuttycom's pricing",
+    lambda *_: True,
+    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
+      (lambda _c, _b, tx: getFeeDiff(250 * max(4, count_ins_and_outs(tx)), tx), sum)
+    ],
+    lambda *_: 1
+)
+
+action_fees_vs_actual = Analysis(
+    "transactions that wouldn't pay more under the new model, by day, using actions",
+    lambda *_: True,
+    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
+      (lambda _c, _b, tx: getFeeDiff(500 * max(3, count_actions(tx)), tx), sum)
+    ],
+    lambda *_: 1
+)
+
+nuttycom_fees_vs_actual_trans = Analysis(
+    "transparent transactions that wouldn't pay more under the new model, by day, using nuttycom's pricing",
+    lambda _c, _b, tx: tx_type(tx) == 't-t',
+    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
+      (lambda _c, _b, tx: getFeeDiff(250 * max(4, count_ins_and_outs(tx)), tx), sum)
+    ],
+    lambda *_: 1
+)
+
+action_fees_vs_actual_trans = Analysis(
+    "transparent transactions that wouldn't pay more under the new model, by day, using actions",
+    lambda _c, _b, tx: tx_type(tx) == 't-t',
+    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
+      (lambda _c, _b, tx: getFeeDiff(500 * max(3, count_actions(tx)), tx), sum)
+    ],
+    lambda *_: 1
+)
+
+greg_fees_vs_actual = Analysis(
+    "transactions that wouldn't pay more under the new model, by day, using actions",
+    lambda *_: True,
+    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
+      (lambda _c, _b, tx: getFeeDiff(250 * max(4, count_actions(tx)), tx), sum)
+    ],
+    lambda *_: 1
+)
+
+greg_fees_vs_actual_trans = Analysis(
+    "transparent transactions that wouldn't pay more under the new model, by day, using actions",
+    lambda _c, _b, tx: tx_type(tx) == 't-t',
+    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
+      (lambda _c, _b, tx: getFeeDiff(250 * max(4, count_actions(tx)), tx), sum)
+    ],
+    lambda *_: 1
+)
+
+latest_fees_vs_actual = Analysis(
+    "transactions that wouldn't pay more under the new model, by day, using actions",
+    lambda *_: True,
+    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
+      (lambda _c, _b, tx: getFeeDiff(1000 * max(2, count_actions(tx)), tx), sum)
+    ],
+    lambda *_: 1
+)
+
+latest_fees_vs_actual_trans = Analysis(
+    "transparent transactions that wouldn't pay more under the new model, by day, using actions",
+    lambda _c, _b, tx: tx_type(tx) == 't-t',
+    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
+      (lambda _c, _b, tx: getFeeDiff(1000 * max(2, count_actions(tx)), tx), sum)
+    ],
+    lambda *_: 1
+)
+
+flat_fees_vs_actual = Analysis(
+    "transactions that would pass the original 10k ZAT fee, by day",
+    lambda *_: True,
+    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
+      (lambda _c, _b, tx: getFeeDiff(10_000, tx), sum)
+    ],
+    lambda *_: 1
+)
+
+shielding_tx_heat_map = Analysis(
+    "shielding tx",
+    lambda _c, _b, tx: is_not_coinbase(tx) and (tx_type(tx) == 't-z' or tx_type(tx) == 'm-z'),
+    [(lambda _c, _b, tx: min(100, count_outputs(tx)), identity),
+     (lambda _c, _b, tx: min(100, count_inputs(tx)), sum)],
+    lambda *_: 1
+)
+
+shielding_tx_actions = Analysis(
+    "shielding tx",
+    lambda _c, _b, tx: is_not_coinbase(tx) and (tx_type(tx) == 't-z' or tx_type(tx) == 'm-z'),
+    [(lambda _c, _b, tx: min(100, count_actions(tx)), sum)],
+    lambda *_: 1
+)
+
+fees_from_sandblasting = Analysis(
+    "fees collected from sandblasting",
+    lambda _c, _b, tx: is_sandblasting(tx),
+    [(lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), sum)],
+    lambda _c, _b, tx: 500 * max(2, count_actions(tx))
+)
+
+flat_fees_vs_actual_trans = Analysis(
+    "transparent transactions that would pass the original 10k ZAT fee, by day",
+    lambda _c, _b, tx: tx_type(tx) == 't-t',
+    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
+      (lambda _c, _b, tx: getFeeDiff(10_000, tx), sum)
+    ],
+    lambda *_: 1
+)
+
+transparent_tx_that_would_fail_heat_map = Analysis(
+    "heat map of transparent tx that would fail under `500 * max(3, |actions|)`",
+    lambda _c, _b, tx: tx_type(tx) == 't-t' and getFeeDiff(500 * max(3, count_actions(tx)), tx) == False,
+    [(lambda _c, _b, tx: min(100, count_outputs(tx)), identity),
+     (lambda _c, _b, tx: min(100, count_inputs(tx)), sum)],
+    lambda *_: 1
+)
+
+historical_fees = Analysis(
+    "histogram of actual fees paid",
+    lambda _c, _b, tx: is_not_coinbase(tx),
+    [(lambda _c, _b, tx: check_fee_paid(tx), sum)],
+    lambda *_: 1
+)
+
+arity_heat_map = Analysis(
+    "inputs vs outputs",
+    lambda _c, _b, tx: is_not_coinbase(tx),
+    [(lambda _c, _b, tx: min(100, count_outputs(tx)), identity),
+     (lambda _c, _b, tx: min(100, count_inputs(tx)), sum)],
+    lambda *_: 1
+)
+
+transparent_input_histogram = Analysis(
+    "how many transparent inputs do txs have?",
+    lambda _c, _b, tx: is_not_coinbase(tx),
+    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
+      (lambda _c, _b, tx: min(100, len(tx['vin'])), sum)],
+    lambda *_: 1
+)
+
+nuttycom_fees_vs_10k = Analysis(
+    "transactions that wouldn't pay more under the new model, by day, using nuttycom's pricing",
+    lambda _c, _b, tx: is_not_coinbase(tx),
+    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
+      (lambda _c, _b, tx: 250 * max(4, count_ins_and_outs(tx)) <= 10_000, sum)
+    ],
+    lambda *_: 1
+)
+
+action_fees_vs_10k = Analysis(
+    "transactions that wouldn't pay more under the new model, by day, using actions",
+    lambda _c, _b, tx: is_not_coinbase(tx),
+    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
+      (lambda _c, _b, tx: 500 * max(3, count_actions(tx)) <= 10_000, sum)
+    ],
+    lambda *_: 1
+)
+
+latest_fees_vs_10k = Analysis(
+    "transactions that wouldn't pay more under the new model, by day, using actions",
+    lambda _c, _b, tx: is_not_coinbase(tx),
+    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
+      (lambda _c, _b, tx: 1000 * max(2, count_actions(tx)) <= 10_000, sum)
+    ],
+    lambda *_: 1
+)
+
+
+a = Analyzer(connection_string)
+
+def make_weekly_range(starting_week, number_of_weeks):
+    start_of_range = blocks_per_hour * 24 * 7 * starting_week
+    end_of_range = start_of_range + (blocks_per_hour * 24 * 7 * number_of_weeks)
+    return range(start_of_range, end_of_range)
+
+
+# start about a month before sandblasting, overlapping with it
+
+pre_sandblasting_range = make_weekly_range(206, 12)
+recent_range = make_weekly_range(220, 1)
+
+# start = datetime.datetime.now()
+# for analysis in a.analyze_blocks(some_range,
+#                        [ # sandblaster_average_outputs_per_day,
+#                            # flat_fees_vs_actual,
+#                            # flat_fees_vs_actual_trans,
+#                            # transparent_tx_that_would_fail_heat_map
+#                          nuttycom_fees_vs_actual,
+#                          action_fees_vs_actual,
+#                          nuttycom_fees_vs_actual_trans,
+#                          action_fees_vs_actual_trans,
+#                          greg_fees_vs_actual,
+#                          greg_fees_vs_actual_trans,
+#                          # historical_fees,
+#                          # transparent_input_histogram,
+#                        ]):
+#     print(analysis)
+# print(datetime.datetime.now() - start)
+
+# rerunning old data …
+# start = datetime.datetime.now()
+# for analysis in a.analyze_blocks(make_weekly_range(206, 1),
+#                        [ actual_fees,
+#                          proposed_fees,
+#                        ]):
+#     print(analysis)
+# print(datetime.datetime.now() - start)
+
+nuttycom_fees_vs_10k2 = Analysis(
+    "transactions that wouldn't pay more under the new model, by day, using nuttycom's pricing",
+    lambda _c, _b, tx: is_not_coinbase(tx) and not is_sandblasting(tx),
+    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
+      (lambda _c, _b, tx: 250 * max(4, count_ins_and_outs(tx)) <= 10_000, sum)
+    ],
+    lambda *_: 1
+)
+
+action_fees_vs_10k2 = Analysis(
+    "transactions that wouldn't pay more under the new model, by day, using actions",
+    lambda _c, _b, tx: is_not_coinbase(tx) and not is_sandblasting(tx),
+    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
+      (lambda _c, _b, tx: 500 * max(3, count_actions(tx)) <= 10_000, sum)
+    ],
+    lambda *_: 1
+)
+
+latest_fees_vs_10k2 = Analysis(
+    "transactions that wouldn't pay more under the new model, by day, using actions",
+    lambda _c, _b, tx: is_not_coinbase(tx) and not is_sandblasting(tx),
+    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
+      (lambda _c, _b, tx: 1000 * max(2, count_actions(tx)) <= 10_000, sum)
+    ],
+    lambda *_: 1
+)
+
+def vin_value(vin):
+    if 'valueSat' in vin:
+        return vin['valueSat']
+    else:
+        return 0
+
+def tx_pool_movement(tx):
+    transparent = sum(vout['valueZat'] for vout in tx['vout']) - sum([vin_value(vin) for vin in tx['vin']])
+    sprout = sum([vjoinsplit['vpub_newZat'] - vjoinsplit['vpub_oldZat'] for vjoinsplit in tx['vjoinsplit']])
+    sapling = - tx['valueBalanceZat']
+    if 'orchard' in tx:
+        orchard = - tx['orchard']['valueBalanceZat']
+    else:
+        orchard = 0
+    # print("(%d, %d, %d, %d) – %d -> %d" % (transparent, sprout, sapling, orchard, count_inputs(tx), count_outputs(tx)))
+    return (transparent, sprout, sapling, orchard)
+
+pool_movement = Analysis(
+    "how are funds moving between pools?",
+    lambda *_: True,
+    [(lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), lambda vals: np.sum(np.array(vals), 0))],
+    lambda _c, _b, tx: tx_pool_movement(tx)
+)
+
+
+start = datetime.datetime.now()
+for analysis in a.analyze_blocks(recent_range,
+                       [ pool_movement
+                       ]):
+    print(analysis)
+print(datetime.datetime.now() - start)
+
+# start = datetime.datetime.now()
+# for analysis in a.analyze_blocks(pre_sandblasting_range,
+#                        [ tx_below_pofm_threshold,
+#                          tx_below_pofm_threshold_5,
+#                          tx_below_pofm_threshold_max,
+#                          tx_below_pofm_threshold_ins,
+#                          tx_below_pofm_threshold_abs,
+#                          outs_below_pofm_threshold_abs,
+#                          arity_heat_map,
+#                          minimum_pofm_fees_nuttycom,
+#                          minimum_pofm_fees_nuttycom2,
+#                        ]):
+#     print(analysis)
+# print(datetime.datetime.now() - start)

From 0a53fd923796e2898461044c9b0d172e8ecd654e Mon Sep 17 00:00:00 2001
From: Greg Pfeil <greg@z.cash>
Date: Tue, 4 Oct 2022 14:06:20 -0600
Subject: [PATCH 2/4] Use slickrpc instead of the authproxy from rpc-tests
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Removes the cross-repo dependency.

The README change doesn’t indicate a new dependency, the existing code was
already depending on slick-bitcoinrpc.
---
 README.md           |  4 ++++
 analysis/analyze.py | 11 ++---------
 2 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/README.md b/README.md
index 8a9becf..a627481 100644
--- a/README.md
+++ b/README.md
@@ -1,2 +1,6 @@
 # zcash-graphs
 A collection of scripts for collecting and graphing data about the Zcash chain
+
+## dependencies
+
+- `slick-bitcoinrpc` (which depends on `libssl-dev` and `libcurl4-openssl-dev`)
diff --git a/analysis/analyze.py b/analysis/analyze.py
index c74ae26..8c23ee9 100755
--- a/analysis/analyze.py
+++ b/analysis/analyze.py
@@ -16,17 +16,10 @@ import math
 import numpy as np
 import os.path
 from progress.bar import IncrementalBar
+from slickrpc.rpc import Proxy
 from statistics import mean
 import sys
 
-sys.path.insert(
-    1,
-    os.path.join(os.path.dirname(os.path.abspath(__file__)),
-                 "../../qa/rpc-tests")
-)
-
-from test_framework.authproxy import AuthServiceProxy
-
 ### TODO: Get host/port from config
 if len(sys.argv) > 1:
     connection_string = sys.argv[1]
@@ -146,7 +139,7 @@ class Analysis:
 
 class Analyzer:
     def __init__(self, node_url):
-        self.node = AuthServiceProxy(node_url)
+        self.node = Proxy(node_url)
 
     def analyze_blocks(self, block_range, analyses):
         """

From 8877d342af00fe8b972b58cd50bf691f15f56b98 Mon Sep 17 00:00:00 2001
From: Greg Pfeil <greg@z.cash>
Date: Mon, 10 Oct 2022 12:55:00 -0600
Subject: [PATCH 3/4] Split analytics into multiple modules

---
 analysis/analyze.py  | 745 +------------------------------------------
 analysis/examples.py | 600 ++++++++++++++++++++++++++++++++++
 analysis/helpers.py  | 161 ++++++++++
 3 files changed, 765 insertions(+), 741 deletions(-)
 mode change 100755 => 100644 analysis/analyze.py
 create mode 100755 analysis/examples.py
 create mode 100644 analysis/helpers.py

diff --git a/analysis/analyze.py b/analysis/analyze.py
old mode 100755
new mode 100644
index 8c23ee9..bc82c89
--- a/analysis/analyze.py
+++ b/analysis/analyze.py
@@ -1,32 +1,19 @@
-#!/usr/bin/env python3
 # Copyright (c) 2022 The Zcash developers
 # Distributed under the MIT software license, see the accompanying
 # file COPYING or https://www.opensource.org/licenses/mit-license.php .
 """Simple Transaction Analysis
 
-This contains a class, `Analyzer`, for defining analyses of the blocks and
-transactions on the blockchain. It also exposes a function
-`analyze_blocks`, which handles applying multiple analyses simultaneously over
-some common range of blocks.
+This contains a class, `Analysis`, for defining analyses of the blocks and
+transactions on the blockchain. It also contains a class `Analyzer` with a
+method `analyze_blocks`, which handles applying multiple analyses simultaneously
+over some common range of blocks.
 """
 
 import datetime
 import itertools
 import math
-import numpy as np
-import os.path
 from progress.bar import IncrementalBar
 from slickrpc.rpc import Proxy
-from statistics import mean
-import sys
-
-### TODO: Get host/port from config
-if len(sys.argv) > 1:
-    connection_string = sys.argv[1]
-else:
-    raise Exception(
-        "%s needs to be provided a connection string, like \"http://user:pass@localhost:port\"."
-        % sys.argv[0])
 
 class Analysis:
     """
@@ -168,727 +155,3 @@ class Analyzer:
             result.append((analysis.name, analysis.aggregate(dict(bucketses)[analysis])))
 
         return result
-
-### Helpers
-
-def identity(x):
-    return x
-
-def get_shielded_spends(tx):
-    try:
-        shielded_spends = len(tx['vShieldedSpend'])
-    except KeyError:
-        shielded_spends = 0
-
-    return shielded_spends
-
-def get_shielded_outputs(tx):
-    try:
-        shielded_outputs = len(tx['vShieldedOutput'])
-    except KeyError:
-        shielded_outputs = 0
-
-    return shielded_outputs
-
-def get_orchard_actions(tx):
-    try:
-        orchard_actions = len(tx['orchard']['actions'])
-    except KeyError:
-        orchard_actions = 0
-
-    return orchard_actions
-
-def count_inputs(tx):
-    return len(tx['vin']) + 2 * len(tx['vjoinsplit']) + get_shielded_spends(tx) + get_orchard_actions(tx)
-
-def count_outputs(tx):
-    return len(tx['vout']) + 2 * len(tx['vjoinsplit']) + get_shielded_outputs(tx) + get_orchard_actions(tx)
-
-def count_ins_and_outs(tx):
-    return (len(tx['vin'])
-            + len(tx['vout'])
-            + get_shielded_spends(tx)
-            + get_shielded_outputs(tx)
-            + 2 * len(tx['vjoinsplit'])
-            + 2 * get_orchard_actions(tx))
-
-def count_actions(tx):
-    return (max(len(tx['vin']), len(tx['vout']))
-            + max(get_shielded_spends(tx), get_shielded_outputs(tx))
-            + 2 * len(tx['vjoinsplit'])
-            + get_orchard_actions(tx))
-
-def expiry_height_delta(block, tx):
-    """
-    Returns -1 if there's no expiry, also returns approximately 35,000 (the
-    number of blocks in a month) if the expiry is beyond 1 month.
-    """
-    month = blocks_per_hour * 24 * 30
-    try:
-        expiry_height = tx['expiryheight']
-        if expiry_height == 0:
-            return -1
-        elif tx['expiryheight'] - block['height'] > month:
-            return month
-        else:
-            return tx['expiryheight'] - block['height']
-    except KeyError:
-        # `tx['expiryheight']` is ostensibly an optional field, but it seems
-        # like `0` is what tends to be used for "don't expire", so this case
-        # generally isn't hit.
-        return -1
-
-def tx_type(tx):
-    """
-    Categorizes all tx into one of nine categories: (t)ransparent, (z)shielded,
-    or (m)ixed for both inputs and outputs. So some possible results are "t-t",
-    "t-z", "m-z", etc.
-    """
-    if tx['vjoinsplit'] or get_shielded_spends(tx) != 0 or get_orchard_actions(tx) != 0:
-        if tx['vin']:
-            ins = "m"
-        else:
-            ins = "z"
-    else:
-        ins = "t"
-
-    if tx['vjoinsplit'] or get_shielded_outputs(tx) != 0 or get_orchard_actions(tx) != 0:
-        if tx['vout']:
-            outs = "m"
-        else:
-            outs = "z"
-    else:
-        outs = "t"
-
-    return ins + "-" + outs
-
-def is_orchard_tx(tx):
-    try:
-        return tx['orchard']['actions']
-    except KeyError:
-        return False
-
-def is_saplingspend_tx(tx):
-    try:
-        return tx['vShieldedSpend']
-    except KeyError:
-        return False
-
-def orchard_anchorage(cache, block, tx):
-    """
-    Returns -1 if there is no anchor
-    """
-    try:
-        return block['height'] - cache[tx['orchard']['anchor']]
-    except KeyError:
-        return -1
-
-def sapling_anchorage(cache, block, tx):
-    """
-    Returns -1 if there is no anchor
-    """
-    try:
-        return block['height'] - cache[tx['vShieldedSpend'][0]['anchor']]
-    except KeyError:
-        return -1
-
-def is_not_coinbase(tx):
-    return 'feePaid' in tx
-
-# NB: This requires zcashd to be running with `experimentalfeatures=1`,
-#    `txindex=1` and `insightexplorer=1`.
-def getFeeDiff(proposedFee, tx):
-    try:
-        return proposedFee <= tx['feePaid']
-    except KeyError:
-        return -1
-
-blocks_per_hour = 48 # half this before NU2?
-
-# start about a month before sandblasting
-start_range = blocks_per_hour * 24 * 7 * 206
-
-### Requested Statistics
-
-def storeAnchor(pool, cache, block):
-    """
-    Caches the block height as the value for its anchor hash.
-    """
-    try:
-        final_root = block[pool]
-        try:
-            cache[final_root]
-        except KeyError:
-            cache[final_root] = block['height']
-    except KeyError:
-        None
-
-    return cache
-
-# "how old of anchors are people picking"
-# --- https://zcash.slack.com/archives/CP6SKNCJK/p1660103126252979
-anchor_age_orchard = Analysis(
-    "how old of anchors are people picking (for orchard)",
-    lambda _c, _b, tx: is_orchard_tx(tx),
-    [(orchard_anchorage, sum)],
-    lambda *_: 1,
-    ({}, lambda c, b: storeAnchor('finalorchardroot', c, b)),
-    blocks_per_hour * 24
-)
-
-anchor_age_sapling = Analysis(
-    "how old of anchors are people picking (for sapling)",
-    lambda _c, _b, tx: is_saplingspend_tx(tx),
-    [(sapling_anchorage, sum)],
-    lambda *_: 1,
-    ({}, lambda c, b: storeAnchor('finalsaplingroot', c, b)),
-    blocks_per_hour * 24
-)
-
-# "what's the distribution of expiry height deltas"
-# --- https://zcash.slack.com/archives/CP6SKNCJK/p1660103126252979
-expiry_height_deltas = Analysis(
-    "distribution of expiry height deltas",
-    lambda *_: True,
-    [(lambda _, b, t: expiry_height_delta(b, t), sum)],
-    lambda *_: 1
-)
-
-tx_type_with_long_expiry = Analysis(
-    "types of tx with expiries longer than about a month",
-    lambda _, b, t: expiry_height_delta(b, t) >= blocks_per_hour * 24 * 30,
-    [# (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)),
-     #  identity),
-     (lambda _c, _b, tx: tx_type(tx), sum)],
-    lambda *_: 1
-)
-
-# "does anyone use locktime"
-# --- https://zcash.slack.com/archives/CP6SKNCJK/p1660103126252979
-locktime_usage = Analysis(
-    "proportion of tx using locktime",
-    lambda *_: True,
-    [(lambda *_: 1,
-      lambda d: dict(d)[True] / (dict(d)[False] + dict(d)[True])),
-     (lambda _c, _b, tx: tx['locktime'] != 0, sum)],
-    lambda *_: 1
-)
-
-# "I'm seeing a slightly different pattern to the sandblasting transactions,
-#  unless I've just missed this before. The transactions I've looked at recently
-#  have had > 400 sapling outputs. Has this been the case before and I just
-#  missed it? I thought primarily these transactions had slightly over 100
-#  outputs in most cases."
-# --- https://zcash.slack.com/archives/CP6SKNCJK/p1660195664187769
-
-
-# "Calculate the POFM threshold for historical transactions on-chain and
-#  calculate what proportion of those transactions would fall below the POFM
-#  threshold"
-# --- https://docs.google.com/document/d/18wtGFCB2N4FO7SoqDPnEgVudAMlCArHMz0EwhE1HNPY/edit
-tx_below_pofm_threshold = Analysis(
-    "rate of transactions below POFM threshold",
-    lambda _c, _b, tx: is_not_coinbase(tx),
-    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)),
-       lambda d: dict(d)[False] / (dict(d)[False] + dict(d)[True])),
-      (lambda _c, _b, tx: count_ins_and_outs(tx) - 4 > 0, sum)
-    ],
-    lambda *_: 1
-)
-
-tx_below_pofm_threshold_abs = Analysis(
-    "transactions below POFM threshold",
-    lambda _c, _b, tx: is_not_coinbase(tx),
-    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)),
-       lambda d: (dict(d)[False], dict(d)[True])),
-      (lambda _c, _b, tx: count_ins_and_outs(tx) - 4 > 0, sum)
-    ],
-    lambda *_: 1
-)
-
-outs_below_pofm_threshold_abs = Analysis(
-    "outputs below POFM threshold",
-    lambda _c, _b, tx: is_not_coinbase(tx),
-    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)),
-       lambda d: (dict(d)[False], dict(d)[True])),
-      (lambda _c, _b, tx: count_ins_and_outs(tx) - 4 > 0, sum)
-    ],
-    lambda _c, _b, tx: count_outputs(tx)
-)
-
-tx_below_pofm_threshold_5 = Analysis(
-    "rate of transactions below POFM threshold with a grace window of 5",
-    lambda _c, _b, tx: is_not_coinbase(tx),
-    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)),
-       lambda d: dict(d)[False] / (dict(d)[False] + dict(d)[True])),
-      (lambda _c, _b, tx: count_ins_and_outs(tx) - 5 > 0, sum)
-    ],
-    lambda *_: 1
-)
-
-
-tx_below_pofm_threshold_max = Analysis(
-    "rate of transactions below POFM threshold with max",
-    lambda _c, _b, tx: is_not_coinbase(tx),
-    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)),
-       lambda d: dict(d)[False] / (dict(d)[False] + dict(d)[True])),
-      (lambda _c, _b, tx: count_actions(tx) - 4 > 0, sum)
-    ],
-    lambda *_: 1
-)
-
-tx_below_pofm_threshold_ins = Analysis(
-    "rate of transactions below POFM threshold only on inputs",
-    lambda _c, _b, tx: is_not_coinbase(tx),
-    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)),
-       lambda d: dict(d)[False] / (dict(d)[False] + dict(d)[True])),
-      (lambda _c, _b, tx: count_inputs(tx) - 4 > 0, sum)
-    ],
-    lambda *_: 1
-)
-
-### Other Examples
-
-tx_per_day = Analysis(
-    "count transactions per day (treating block 0 as midnight ZST)",
-    lambda *_: True,
-    [(lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), sum)],
-    lambda *_: 1
-)
-
-mean_tx_per_day = Analysis(
-    "mean transactions per day, by block",
-    lambda *_: True,
-    [(lambda _c, block, _t: int(block['height'] % (blocks_per_hour * 24)), lambda d: mean([x[1] for x in d])),
-     (lambda _c, block, _t: int(block['height']/(blocks_per_hour * 24)), sum)
-    ],
-    lambda *_: 1
-)
-
-mean_inout_per_tx_per_day = Analysis(
-    "mean inputs, outputs per transaction per day, by block",
-    lambda *_: True,
-    [(lambda _c, block, _t: int(block['height'] % (blocks_per_hour * 24)), lambda d: mean(itertools.chain(d.values()))),
-     (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity)
-    ],
-    lambda _c, _b, tx: (count_inputs(tx), count_outputs(tx))
-)
-
-mean_inout_per_tx = Analysis(
-    "mean inputs, outputs per transaction, by week",
-    lambda *_: True,
-    [ ( lambda _c, block, _t: int(block['height']/(blocks_per_hour * 24 * 7)),
-        lambda d: (mean([x[0] for x in d]), mean([x[1] for x in d]))
-       )
-     ],
-    lambda _c, _b, tx: (count_inputs(tx), count_outputs(tx))
-)
-
-minimum_pofm_fees_nuttycom = Analysis(
-    "distribution of fees in ZAT, by day, using nuttycom's pricing",
-    lambda *_: True,
-    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
-      (lambda _c, _b, tx: math.ceil(math.log((1000 + 250 * max(0, count_ins_and_outs(tx) - 4)) / 1000, 2)), sum)
-    ],
-    lambda *_: 1
-)
-
-minimum_pofm_fees_nuttycom = Analysis(
-    "distribution of fees in ZAT, by day, using nuttycom's pricing",
-    lambda _c, _b, tx: is_not_coinbase(tx),
-    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
-      (lambda _c, _b, tx: math.ceil(math.log((1000 + 250 * max(0, count_ins_and_outs(tx) - 4)) / 1000, 2)), sum)
-    ],
-    lambda *_: 1
-)
-
-minimum_pofm_fees_nuttycom2 = Analysis(
-    "distribution of fees in ZAT, by day, using nuttycom's changed pricing",
-    lambda _c, _b, tx: is_not_coinbase(tx),
-    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
-      (lambda _c, _b, tx: math.ceil(math.log((1000 + 200 * max(0, count_ins_and_outs(tx) - 5)) / 1000, 2)), sum)
-    ],
-    lambda *_: 1
-)
-
-def meh_fees(tx):
-    fee = tx['feePaid']
-    if fee == 0:
-        return -1
-    else:
-        result = math.ceil(math.log(tx['feePaid'], 2))
-        # if result < 0:
-        #     print("negative result: %s, %s" % (fee, tx['txid']))
-        return result
-
-actual_fees = Analysis(
-    "actual fees",
-    lambda _c, _b, tx: is_not_coinbase(tx),
-    [ (lambda _c, _b, tx: meh_fees(tx), sum)
-    ],
-    lambda *_: 1
-)
-
-proposed_fees = Analysis(
-    "",
-    lambda _c, _b, tx: is_not_coinbase(tx),
-    [ (lambda _c, _b, tx: math.ceil(math.log(5000 * max(2, count_actions(tx)), 2)), sum)
-    ],
-    lambda *_: 1
-)
-
-arity_heat_map = Analysis(
-    "inputs vs outputs",
-    lambda *_: True,
-    [(lambda _c, _b, tx: min(100, count_outputs(tx)), identity),
-     (lambda _c, _b, tx: min(100, count_inputs(tx)), sum)],
-    lambda *_: 1
-)
-
-input_size_dist = Analysis(
-    "distribution of input sizes",
-    lambda *_: True,
-    [(lambda _c, _b, tx: [len(x['scriptSig']['hex']) for x in tx['vin']], identity)],
-    lambda *_: 1,
-)
-
-# very_high_inout_tx = Analysis(
-#     "tx with very high in/out counts",
-#     lambda _c, _b, tx: count_ins_and_outs(tx) > 100,
-#     [(lambda _c, _b, tx: (count_inputs(tx), count_outputs(tx)), identity)],
-#     lambda _c, _b, tx: tx['txid']
-# )
-
-very_high_inout_tx = Analysis(
-    "tx with very high in/out counts",
-    lambda _c, _b, tx: count_ins_and_outs(tx) > 5000,
-    [],
-    lambda _c, _b, tx: (tx['txid'], count_ins_and_outs(tx))
-)
-
-def track_utxos(cache, block):
-    for tx in block[tx]:
-        for vin in tx['vin']:
-            del cache[(vin['txid'], vin['vout'])]
-        for vout in tx['vout']:
-            cache[(tx['txid'], vout['n'])] = vout['valueZat']
-    return cache
-
-utxo_distribution = Analysis(
-    "how many UTXOs and how big are they?",
-    lambda *_: True,
-    [(lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)),
-      lambda caches: sum([caches[-1][k] for k in caches[-1]]))],
-    lambda cache, _b, _t: cache,
-    ({}, track_utxos),
-    1_000_000_000 # back to block 0, TODO: should be able to say this explicitly
-)
-
-def is_sandblasting(tx):
-    return get_shielded_outputs(tx) > 300
-
-sandblasters_per_day = Analysis(
-    "how many transactions have >300 Sapling outputs each day?",
-    lambda _c, _b, tx: is_sandblasting(tx),
-    [(lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), sum)],
-    lambda *_: 1
-)
-
-sandblasters_and_more_per_day = Analysis(
-    "how many transactions have >300 outputs each day?",
-    lambda _c, _b, tx: count_outputs(tx) > 300,
-    [(lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), sum)],
-    lambda *_: 1
-)
-
-sandblaster_average_outputs_per_day = Analysis(
-    "how many outputs do sandblasters have?",
-    lambda _c, _b, tx: is_sandblasting(tx),
-    [(lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), (lambda txs: sum(txs) / len(txs)))],
-    lambda _c, _b, tx: count_outputs(tx)
-)
-
-nuttycom_fees_vs_actual = Analysis(
-    "transactions that wouldn't pay more under the new model, by day, using nuttycom's pricing",
-    lambda *_: True,
-    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
-      (lambda _c, _b, tx: getFeeDiff(250 * max(4, count_ins_and_outs(tx)), tx), sum)
-    ],
-    lambda *_: 1
-)
-
-action_fees_vs_actual = Analysis(
-    "transactions that wouldn't pay more under the new model, by day, using actions",
-    lambda *_: True,
-    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
-      (lambda _c, _b, tx: getFeeDiff(500 * max(3, count_actions(tx)), tx), sum)
-    ],
-    lambda *_: 1
-)
-
-nuttycom_fees_vs_actual_trans = Analysis(
-    "transparent transactions that wouldn't pay more under the new model, by day, using nuttycom's pricing",
-    lambda _c, _b, tx: tx_type(tx) == 't-t',
-    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
-      (lambda _c, _b, tx: getFeeDiff(250 * max(4, count_ins_and_outs(tx)), tx), sum)
-    ],
-    lambda *_: 1
-)
-
-action_fees_vs_actual_trans = Analysis(
-    "transparent transactions that wouldn't pay more under the new model, by day, using actions",
-    lambda _c, _b, tx: tx_type(tx) == 't-t',
-    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
-      (lambda _c, _b, tx: getFeeDiff(500 * max(3, count_actions(tx)), tx), sum)
-    ],
-    lambda *_: 1
-)
-
-greg_fees_vs_actual = Analysis(
-    "transactions that wouldn't pay more under the new model, by day, using actions",
-    lambda *_: True,
-    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
-      (lambda _c, _b, tx: getFeeDiff(250 * max(4, count_actions(tx)), tx), sum)
-    ],
-    lambda *_: 1
-)
-
-greg_fees_vs_actual_trans = Analysis(
-    "transparent transactions that wouldn't pay more under the new model, by day, using actions",
-    lambda _c, _b, tx: tx_type(tx) == 't-t',
-    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
-      (lambda _c, _b, tx: getFeeDiff(250 * max(4, count_actions(tx)), tx), sum)
-    ],
-    lambda *_: 1
-)
-
-latest_fees_vs_actual = Analysis(
-    "transactions that wouldn't pay more under the new model, by day, using actions",
-    lambda *_: True,
-    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
-      (lambda _c, _b, tx: getFeeDiff(1000 * max(2, count_actions(tx)), tx), sum)
-    ],
-    lambda *_: 1
-)
-
-latest_fees_vs_actual_trans = Analysis(
-    "transparent transactions that wouldn't pay more under the new model, by day, using actions",
-    lambda _c, _b, tx: tx_type(tx) == 't-t',
-    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
-      (lambda _c, _b, tx: getFeeDiff(1000 * max(2, count_actions(tx)), tx), sum)
-    ],
-    lambda *_: 1
-)
-
-flat_fees_vs_actual = Analysis(
-    "transactions that would pass the original 10k ZAT fee, by day",
-    lambda *_: True,
-    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
-      (lambda _c, _b, tx: getFeeDiff(10_000, tx), sum)
-    ],
-    lambda *_: 1
-)
-
-shielding_tx_heat_map = Analysis(
-    "shielding tx",
-    lambda _c, _b, tx: is_not_coinbase(tx) and (tx_type(tx) == 't-z' or tx_type(tx) == 'm-z'),
-    [(lambda _c, _b, tx: min(100, count_outputs(tx)), identity),
-     (lambda _c, _b, tx: min(100, count_inputs(tx)), sum)],
-    lambda *_: 1
-)
-
-shielding_tx_actions = Analysis(
-    "shielding tx",
-    lambda _c, _b, tx: is_not_coinbase(tx) and (tx_type(tx) == 't-z' or tx_type(tx) == 'm-z'),
-    [(lambda _c, _b, tx: min(100, count_actions(tx)), sum)],
-    lambda *_: 1
-)
-
-fees_from_sandblasting = Analysis(
-    "fees collected from sandblasting",
-    lambda _c, _b, tx: is_sandblasting(tx),
-    [(lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), sum)],
-    lambda _c, _b, tx: 500 * max(2, count_actions(tx))
-)
-
-flat_fees_vs_actual_trans = Analysis(
-    "transparent transactions that would pass the original 10k ZAT fee, by day",
-    lambda _c, _b, tx: tx_type(tx) == 't-t',
-    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
-      (lambda _c, _b, tx: getFeeDiff(10_000, tx), sum)
-    ],
-    lambda *_: 1
-)
-
-transparent_tx_that_would_fail_heat_map = Analysis(
-    "heat map of transparent tx that would fail under `500 * max(3, |actions|)`",
-    lambda _c, _b, tx: tx_type(tx) == 't-t' and getFeeDiff(500 * max(3, count_actions(tx)), tx) == False,
-    [(lambda _c, _b, tx: min(100, count_outputs(tx)), identity),
-     (lambda _c, _b, tx: min(100, count_inputs(tx)), sum)],
-    lambda *_: 1
-)
-
-historical_fees = Analysis(
-    "histogram of actual fees paid",
-    lambda _c, _b, tx: is_not_coinbase(tx),
-    [(lambda _c, _b, tx: check_fee_paid(tx), sum)],
-    lambda *_: 1
-)
-
-arity_heat_map = Analysis(
-    "inputs vs outputs",
-    lambda _c, _b, tx: is_not_coinbase(tx),
-    [(lambda _c, _b, tx: min(100, count_outputs(tx)), identity),
-     (lambda _c, _b, tx: min(100, count_inputs(tx)), sum)],
-    lambda *_: 1
-)
-
-transparent_input_histogram = Analysis(
-    "how many transparent inputs do txs have?",
-    lambda _c, _b, tx: is_not_coinbase(tx),
-    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
-      (lambda _c, _b, tx: min(100, len(tx['vin'])), sum)],
-    lambda *_: 1
-)
-
-nuttycom_fees_vs_10k = Analysis(
-    "transactions that wouldn't pay more under the new model, by day, using nuttycom's pricing",
-    lambda _c, _b, tx: is_not_coinbase(tx),
-    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
-      (lambda _c, _b, tx: 250 * max(4, count_ins_and_outs(tx)) <= 10_000, sum)
-    ],
-    lambda *_: 1
-)
-
-action_fees_vs_10k = Analysis(
-    "transactions that wouldn't pay more under the new model, by day, using actions",
-    lambda _c, _b, tx: is_not_coinbase(tx),
-    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
-      (lambda _c, _b, tx: 500 * max(3, count_actions(tx)) <= 10_000, sum)
-    ],
-    lambda *_: 1
-)
-
-latest_fees_vs_10k = Analysis(
-    "transactions that wouldn't pay more under the new model, by day, using actions",
-    lambda _c, _b, tx: is_not_coinbase(tx),
-    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
-      (lambda _c, _b, tx: 1000 * max(2, count_actions(tx)) <= 10_000, sum)
-    ],
-    lambda *_: 1
-)
-
-
-a = Analyzer(connection_string)
-
-def make_weekly_range(starting_week, number_of_weeks):
-    start_of_range = blocks_per_hour * 24 * 7 * starting_week
-    end_of_range = start_of_range + (blocks_per_hour * 24 * 7 * number_of_weeks)
-    return range(start_of_range, end_of_range)
-
-
-# start about a month before sandblasting, overlapping with it
-
-pre_sandblasting_range = make_weekly_range(206, 12)
-recent_range = make_weekly_range(220, 1)
-
-# start = datetime.datetime.now()
-# for analysis in a.analyze_blocks(some_range,
-#                        [ # sandblaster_average_outputs_per_day,
-#                            # flat_fees_vs_actual,
-#                            # flat_fees_vs_actual_trans,
-#                            # transparent_tx_that_would_fail_heat_map
-#                          nuttycom_fees_vs_actual,
-#                          action_fees_vs_actual,
-#                          nuttycom_fees_vs_actual_trans,
-#                          action_fees_vs_actual_trans,
-#                          greg_fees_vs_actual,
-#                          greg_fees_vs_actual_trans,
-#                          # historical_fees,
-#                          # transparent_input_histogram,
-#                        ]):
-#     print(analysis)
-# print(datetime.datetime.now() - start)
-
-# rerunning old data …
-# start = datetime.datetime.now()
-# for analysis in a.analyze_blocks(make_weekly_range(206, 1),
-#                        [ actual_fees,
-#                          proposed_fees,
-#                        ]):
-#     print(analysis)
-# print(datetime.datetime.now() - start)
-
-nuttycom_fees_vs_10k2 = Analysis(
-    "transactions that wouldn't pay more under the new model, by day, using nuttycom's pricing",
-    lambda _c, _b, tx: is_not_coinbase(tx) and not is_sandblasting(tx),
-    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
-      (lambda _c, _b, tx: 250 * max(4, count_ins_and_outs(tx)) <= 10_000, sum)
-    ],
-    lambda *_: 1
-)
-
-action_fees_vs_10k2 = Analysis(
-    "transactions that wouldn't pay more under the new model, by day, using actions",
-    lambda _c, _b, tx: is_not_coinbase(tx) and not is_sandblasting(tx),
-    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
-      (lambda _c, _b, tx: 500 * max(3, count_actions(tx)) <= 10_000, sum)
-    ],
-    lambda *_: 1
-)
-
-latest_fees_vs_10k2 = Analysis(
-    "transactions that wouldn't pay more under the new model, by day, using actions",
-    lambda _c, _b, tx: is_not_coinbase(tx) and not is_sandblasting(tx),
-    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
-      (lambda _c, _b, tx: 1000 * max(2, count_actions(tx)) <= 10_000, sum)
-    ],
-    lambda *_: 1
-)
-
-def vin_value(vin):
-    if 'valueSat' in vin:
-        return vin['valueSat']
-    else:
-        return 0
-
-def tx_pool_movement(tx):
-    transparent = sum(vout['valueZat'] for vout in tx['vout']) - sum([vin_value(vin) for vin in tx['vin']])
-    sprout = sum([vjoinsplit['vpub_newZat'] - vjoinsplit['vpub_oldZat'] for vjoinsplit in tx['vjoinsplit']])
-    sapling = - tx['valueBalanceZat']
-    if 'orchard' in tx:
-        orchard = - tx['orchard']['valueBalanceZat']
-    else:
-        orchard = 0
-    # print("(%d, %d, %d, %d) – %d -> %d" % (transparent, sprout, sapling, orchard, count_inputs(tx), count_outputs(tx)))
-    return (transparent, sprout, sapling, orchard)
-
-pool_movement = Analysis(
-    "how are funds moving between pools?",
-    lambda *_: True,
-    [(lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), lambda vals: np.sum(np.array(vals), 0))],
-    lambda _c, _b, tx: tx_pool_movement(tx)
-)
-
-
-start = datetime.datetime.now()
-for analysis in a.analyze_blocks(recent_range,
-                       [ pool_movement
-                       ]):
-    print(analysis)
-print(datetime.datetime.now() - start)
-
-# start = datetime.datetime.now()
-# for analysis in a.analyze_blocks(pre_sandblasting_range,
-#                        [ tx_below_pofm_threshold,
-#                          tx_below_pofm_threshold_5,
-#                          tx_below_pofm_threshold_max,
-#                          tx_below_pofm_threshold_ins,
-#                          tx_below_pofm_threshold_abs,
-#                          outs_below_pofm_threshold_abs,
-#                          arity_heat_map,
-#                          minimum_pofm_fees_nuttycom,
-#                          minimum_pofm_fees_nuttycom2,
-#                        ]):
-#     print(analysis)
-# print(datetime.datetime.now() - start)
diff --git a/analysis/examples.py b/analysis/examples.py
new file mode 100755
index 0000000..deff6ee
--- /dev/null
+++ b/analysis/examples.py
@@ -0,0 +1,600 @@
+#!/usr/bin/env python3
+# Copyright (c) 2022 The Zcash developers
+# Distributed under the MIT software license, see the accompanying
+# file COPYING or https://www.opensource.org/licenses/mit-license.php .
+"""Simple Transaction Analysis
+
+This contains a class, `Analyzer`, for defining analyses of the blocks and
+transactions on the blockchain. It also exposes a function
+`analyze_blocks`, which handles applying multiple analyses simultaneously over
+some common range of blocks.
+"""
+
+import datetime
+import itertools
+import math
+import numpy as np
+from statistics import mean
+import sys
+
+from analyze import Analysis, Analyzer
+from helpers import *
+
+### TODO: Get host/port from config
+if len(sys.argv) > 1:
+    connection_string = sys.argv[1]
+else:
+    raise Exception(
+        "%s needs to be provided a connection string, like \"http://user:pass@localhost:port\"."
+        % (sys.argv[0],))
+
+blocks_per_hour = 48 # half this before NU2?
+
+# start about a month before sandblasting
+start_range = blocks_per_hour * 24 * 7 * 206
+
+### Requested Statistics
+
+def storeAnchor(pool, cache, block):
+    """
+    Caches the block height as the value for its anchor hash.
+    """
+    try:
+        final_root = block[pool]
+        try:
+            cache[final_root]
+        except KeyError:
+            cache[final_root] = block['height']
+    except KeyError:
+        None
+
+    return cache
+
+# "how old of anchors are people picking"
+# --- https://zcash.slack.com/archives/CP6SKNCJK/p1660103126252979
+anchor_age_orchard = Analysis(
+    "how old of anchors are people picking (for orchard)",
+    lambda _c, _b, tx: is_orchard_tx(tx),
+    [(orchard_anchorage, sum)],
+    lambda *_: 1,
+    ({}, lambda c, b: storeAnchor('finalorchardroot', c, b)),
+    blocks_per_hour * 24
+)
+
+anchor_age_sapling = Analysis(
+    "how old of anchors are people picking (for sapling)",
+    lambda _c, _b, tx: is_saplingspend_tx(tx),
+    [(sapling_anchorage, sum)],
+    lambda *_: 1,
+    ({}, lambda c, b: storeAnchor('finalsaplingroot', c, b)),
+    blocks_per_hour * 24
+)
+
+# "what's the distribution of expiry height deltas"
+# --- https://zcash.slack.com/archives/CP6SKNCJK/p1660103126252979
+expiry_height_deltas = Analysis(
+    "distribution of expiry height deltas",
+    lambda *_: True,
+    [(lambda _, b, t: expiry_height_delta(b, t), sum)],
+    lambda *_: 1
+)
+
+tx_type_with_long_expiry = Analysis(
+    "types of tx with expiries longer than about a month",
+    lambda _, b, t: expiry_height_delta(b, t) >= blocks_per_hour * 24 * 30,
+    [# (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)),
+     #  identity),
+     (lambda _c, _b, tx: tx_type(tx), sum)],
+    lambda *_: 1
+)
+
+# "does anyone use locktime"
+# --- https://zcash.slack.com/archives/CP6SKNCJK/p1660103126252979
+locktime_usage = Analysis(
+    "proportion of tx using locktime",
+    lambda *_: True,
+    [(lambda *_: 1,
+      lambda d: dict(d)[True] / (dict(d)[False] + dict(d)[True])),
+     (lambda _c, _b, tx: tx['locktime'] != 0, sum)],
+    lambda *_: 1
+)
+
+# "I'm seeing a slightly different pattern to the sandblasting transactions,
+#  unless I've just missed this before. The transactions I've looked at recently
+#  have had > 400 sapling outputs. Has this been the case before and I just
+#  missed it? I thought primarily these transactions had slightly over 100
+#  outputs in most cases."
+# --- https://zcash.slack.com/archives/CP6SKNCJK/p1660195664187769
+
+
+# "Calculate the POFM threshold for historical transactions on-chain and
+#  calculate what proportion of those transactions would fall below the POFM
+#  threshold"
+# --- https://docs.google.com/document/d/18wtGFCB2N4FO7SoqDPnEgVudAMlCArHMz0EwhE1HNPY/edit
+tx_below_pofm_threshold = Analysis(
+    "rate of transactions below POFM threshold",
+    lambda _c, _b, tx: is_not_coinbase(tx),
+    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)),
+       lambda d: dict(d)[False] / (dict(d)[False] + dict(d)[True])),
+      (lambda _c, _b, tx: count_ins_and_outs(tx) - 4 > 0, sum)
+    ],
+    lambda *_: 1
+)
+
+tx_below_pofm_threshold_abs = Analysis(
+    "transactions below POFM threshold",
+    lambda _c, _b, tx: is_not_coinbase(tx),
+    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)),
+       lambda d: (dict(d)[False], dict(d)[True])),
+      (lambda _c, _b, tx: count_ins_and_outs(tx) - 4 > 0, sum)
+    ],
+    lambda *_: 1
+)
+
+outs_below_pofm_threshold_abs = Analysis(
+    "outputs below POFM threshold",
+    lambda _c, _b, tx: is_not_coinbase(tx),
+    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)),
+       lambda d: (dict(d)[False], dict(d)[True])),
+      (lambda _c, _b, tx: count_ins_and_outs(tx) - 4 > 0, sum)
+    ],
+    lambda _c, _b, tx: count_outputs(tx)
+)
+
+tx_below_pofm_threshold_5 = Analysis(
+    "rate of transactions below POFM threshold with a grace window of 5",
+    lambda _c, _b, tx: is_not_coinbase(tx),
+    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)),
+       lambda d: dict(d)[False] / (dict(d)[False] + dict(d)[True])),
+      (lambda _c, _b, tx: count_ins_and_outs(tx) - 5 > 0, sum)
+    ],
+    lambda *_: 1
+)
+
+
+tx_below_pofm_threshold_max = Analysis(
+    "rate of transactions below POFM threshold with max",
+    lambda _c, _b, tx: is_not_coinbase(tx),
+    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)),
+       lambda d: dict(d)[False] / (dict(d)[False] + dict(d)[True])),
+      (lambda _c, _b, tx: count_actions(tx) - 4 > 0, sum)
+    ],
+    lambda *_: 1
+)
+
+tx_below_pofm_threshold_ins = Analysis(
+    "rate of transactions below POFM threshold only on inputs",
+    lambda _c, _b, tx: is_not_coinbase(tx),
+    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)),
+       lambda d: dict(d)[False] / (dict(d)[False] + dict(d)[True])),
+      (lambda _c, _b, tx: count_inputs(tx) - 4 > 0, sum)
+    ],
+    lambda *_: 1
+)
+
+### Other Examples
+
+tx_per_day = Analysis(
+    "count transactions per day (treating block 0 as midnight ZST)",
+    lambda *_: True,
+    [(lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), sum)],
+    lambda *_: 1
+)
+
+mean_tx_per_day = Analysis(
+    "mean transactions per day, by block",
+    lambda *_: True,
+    [(lambda _c, block, _t: int(block['height'] % (blocks_per_hour * 24)), lambda d: mean([x[1] for x in d])),
+     (lambda _c, block, _t: int(block['height']/(blocks_per_hour * 24)), sum)
+    ],
+    lambda *_: 1
+)
+
+mean_inout_per_tx_per_day = Analysis(
+    "mean inputs, outputs per transaction per day, by block",
+    lambda *_: True,
+    [(lambda _c, block, _t: int(block['height'] % (blocks_per_hour * 24)), lambda d: mean(itertools.chain(d.values()))),
+     (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity)
+    ],
+    lambda _c, _b, tx: (count_inputs(tx), count_outputs(tx))
+)
+
+mean_inout_per_tx = Analysis(
+    "mean inputs, outputs per transaction, by week",
+    lambda *_: True,
+    [ ( lambda _c, block, _t: int(block['height']/(blocks_per_hour * 24 * 7)),
+        lambda d: (mean([x[0] for x in d]), mean([x[1] for x in d]))
+       )
+     ],
+    lambda _c, _b, tx: (count_inputs(tx), count_outputs(tx))
+)
+
+minimum_pofm_fees_nuttycom = Analysis(
+    "distribution of fees in ZAT, by day, using nuttycom's pricing",
+    lambda *_: True,
+    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
+      (lambda _c, _b, tx: math.ceil(math.log((1000 + 250 * max(0, count_ins_and_outs(tx) - 4)) / 1000, 2)), sum)
+    ],
+    lambda *_: 1
+)
+
+minimum_pofm_fees_nuttycom = Analysis(
+    "distribution of fees in ZAT, by day, using nuttycom's pricing",
+    lambda _c, _b, tx: is_not_coinbase(tx),
+    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
+      (lambda _c, _b, tx: math.ceil(math.log((1000 + 250 * max(0, count_ins_and_outs(tx) - 4)) / 1000, 2)), sum)
+    ],
+    lambda *_: 1
+)
+
+minimum_pofm_fees_nuttycom2 = Analysis(
+    "distribution of fees in ZAT, by day, using nuttycom's changed pricing",
+    lambda _c, _b, tx: is_not_coinbase(tx),
+    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
+      (lambda _c, _b, tx: math.ceil(math.log((1000 + 200 * max(0, count_ins_and_outs(tx) - 5)) / 1000, 2)), sum)
+    ],
+    lambda *_: 1
+)
+
+def meh_fees(tx):
+    fee = tx['feePaid']
+    if fee == 0:
+        return -1
+    else:
+        result = math.ceil(math.log(tx['feePaid'], 2))
+        return result
+
+actual_fees = Analysis(
+    "actual fees",
+    lambda _c, _b, tx: is_not_coinbase(tx),
+    [ (lambda _c, _b, tx: meh_fees(tx), sum)
+    ],
+    lambda *_: 1
+)
+
+proposed_fees = Analysis(
+    "",
+    lambda _c, _b, tx: is_not_coinbase(tx),
+    [ (lambda _c, _b, tx: math.ceil(math.log(5000 * max(2, count_actions(tx)), 2)), sum)
+    ],
+    lambda *_: 1
+)
+
+arity_heat_map = Analysis(
+    "inputs vs outputs",
+    lambda *_: True,
+    [(lambda _c, _b, tx: min(100, count_outputs(tx)), identity),
+     (lambda _c, _b, tx: min(100, count_inputs(tx)), sum)],
+    lambda *_: 1
+)
+
+input_size_dist = Analysis(
+    "distribution of input sizes",
+    lambda *_: True,
+    [(lambda _c, _b, tx: [len(x['scriptSig']['hex']) for x in tx['vin']], identity)],
+    lambda *_: 1,
+)
+
+# very_high_inout_tx = Analysis(
+#     "tx with very high in/out counts",
+#     lambda _c, _b, tx: count_ins_and_outs(tx) > 100,
+#     [(lambda _c, _b, tx: (count_inputs(tx), count_outputs(tx)), identity)],
+#     lambda _c, _b, tx: tx['txid']
+# )
+
+very_high_inout_tx = Analysis(
+    "tx with very high in/out counts",
+    lambda _c, _b, tx: count_ins_and_outs(tx) > 5000,
+    [],
+    lambda _c, _b, tx: (tx['txid'], count_ins_and_outs(tx))
+)
+
+def track_utxos(cache, block):
+    for tx in block[tx]:
+        for vin in tx['vin']:
+            del cache[(vin['txid'], vin['vout'])]
+        for vout in tx['vout']:
+            cache[(tx['txid'], vout['n'])] = vout['valueZat']
+    return cache
+
+utxo_distribution = Analysis(
+    "how many UTXOs and how big are they?",
+    lambda *_: True,
+    [(lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)),
+      lambda caches: sum([caches[-1][k] for k in caches[-1]]))],
+    lambda cache, _b, _t: cache,
+    ({}, track_utxos),
+    1_000_000_000 # back to block 0, TODO: should be able to say this explicitly
+)
+
+def is_sandblasting(tx):
+    return get_shielded_outputs(tx) > 300
+
+sandblasters_per_day = Analysis(
+    "how many transactions have >300 Sapling outputs each day?",
+    lambda _c, _b, tx: is_sandblasting(tx),
+    [(lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), sum)],
+    lambda *_: 1
+)
+
+sandblasters_and_more_per_day = Analysis(
+    "how many transactions have >300 outputs each day?",
+    lambda _c, _b, tx: count_outputs(tx) > 300,
+    [(lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), sum)],
+    lambda *_: 1
+)
+
+sandblaster_average_outputs_per_day = Analysis(
+    "how many outputs do sandblasters have?",
+    lambda _c, _b, tx: is_sandblasting(tx),
+    [(lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), (lambda txs: sum(txs) / len(txs)))],
+    lambda _c, _b, tx: count_outputs(tx)
+)
+
+nuttycom_fees_vs_actual = Analysis(
+    "transactions that wouldn't pay more under the new model, by day, using nuttycom's pricing",
+    lambda *_: True,
+    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
+      (lambda _c, _b, tx: getFeeDiff(250 * max(4, count_ins_and_outs(tx)), tx), sum)
+    ],
+    lambda *_: 1
+)
+
+action_fees_vs_actual = Analysis(
+    "transactions that wouldn't pay more under the new model, by day, using actions",
+    lambda *_: True,
+    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
+      (lambda _c, _b, tx: getFeeDiff(500 * max(3, count_actions(tx)), tx), sum)
+    ],
+    lambda *_: 1
+)
+
+nuttycom_fees_vs_actual_trans = Analysis(
+    "transparent transactions that wouldn't pay more under the new model, by day, using nuttycom's pricing",
+    lambda _c, _b, tx: tx_type(tx) == 't-t',
+    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
+      (lambda _c, _b, tx: getFeeDiff(250 * max(4, count_ins_and_outs(tx)), tx), sum)
+    ],
+    lambda *_: 1
+)
+
+action_fees_vs_actual_trans = Analysis(
+    "transparent transactions that wouldn't pay more under the new model, by day, using actions",
+    lambda _c, _b, tx: tx_type(tx) == 't-t',
+    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
+      (lambda _c, _b, tx: getFeeDiff(500 * max(3, count_actions(tx)), tx), sum)
+    ],
+    lambda *_: 1
+)
+
+greg_fees_vs_actual = Analysis(
+    "transactions that wouldn't pay more under the new model, by day, using actions",
+    lambda *_: True,
+    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
+      (lambda _c, _b, tx: getFeeDiff(250 * max(4, count_actions(tx)), tx), sum)
+    ],
+    lambda *_: 1
+)
+
+greg_fees_vs_actual_trans = Analysis(
+    "transparent transactions that wouldn't pay more under the new model, by day, using actions",
+    lambda _c, _b, tx: tx_type(tx) == 't-t',
+    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
+      (lambda _c, _b, tx: getFeeDiff(250 * max(4, count_actions(tx)), tx), sum)
+    ],
+    lambda *_: 1
+)
+
+latest_fees_vs_actual = Analysis(
+    "transactions that wouldn't pay more under the new model, by day, using actions",
+    lambda *_: True,
+    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
+      (lambda _c, _b, tx: getFeeDiff(1000 * max(2, count_actions(tx)), tx), sum)
+    ],
+    lambda *_: 1
+)
+
+latest_fees_vs_actual_trans = Analysis(
+    "transparent transactions that wouldn't pay more under the new model, by day, using actions",
+    lambda _c, _b, tx: tx_type(tx) == 't-t',
+    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
+      (lambda _c, _b, tx: getFeeDiff(1000 * max(2, count_actions(tx)), tx), sum)
+    ],
+    lambda *_: 1
+)
+
+flat_fees_vs_actual = Analysis(
+    "transactions that would pass the original 10k ZAT fee, by day",
+    lambda *_: True,
+    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
+      (lambda _c, _b, tx: getFeeDiff(10_000, tx), sum)
+    ],
+    lambda *_: 1
+)
+
+shielding_tx_heat_map = Analysis(
+    "shielding tx",
+    lambda _c, _b, tx: is_not_coinbase(tx) and (tx_type(tx) == 't-z' or tx_type(tx) == 'm-z'),
+    [(lambda _c, _b, tx: min(100, count_outputs(tx)), identity),
+     (lambda _c, _b, tx: min(100, count_inputs(tx)), sum)],
+    lambda *_: 1
+)
+
+shielding_tx_actions = Analysis(
+    "shielding tx",
+    lambda _c, _b, tx: is_not_coinbase(tx) and (tx_type(tx) == 't-z' or tx_type(tx) == 'm-z'),
+    [(lambda _c, _b, tx: min(100, count_actions(tx)), sum)],
+    lambda *_: 1
+)
+
+fees_from_sandblasting = Analysis(
+    "fees collected from sandblasting",
+    lambda _c, _b, tx: is_sandblasting(tx),
+    [(lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), sum)],
+    lambda _c, _b, tx: 500 * max(2, count_actions(tx))
+)
+
+flat_fees_vs_actual_trans = Analysis(
+    "transparent transactions that would pass the original 10k ZAT fee, by day",
+    lambda _c, _b, tx: tx_type(tx) == 't-t',
+    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
+      (lambda _c, _b, tx: getFeeDiff(10_000, tx), sum)
+    ],
+    lambda *_: 1
+)
+
+transparent_tx_that_would_fail_heat_map = Analysis(
+    "heat map of transparent tx that would fail under `500 * max(3, |actions|)`",
+    lambda _c, _b, tx: tx_type(tx) == 't-t' and getFeeDiff(500 * max(3, count_actions(tx)), tx) == False,
+    [(lambda _c, _b, tx: min(100, count_outputs(tx)), identity),
+     (lambda _c, _b, tx: min(100, count_inputs(tx)), sum)],
+    lambda *_: 1
+)
+
+historical_fees = Analysis(
+    "histogram of actual fees paid",
+    lambda _c, _b, tx: is_not_coinbase(tx),
+    [(lambda _c, _b, tx: check_fee_paid(tx), sum)],
+    lambda *_: 1
+)
+
+arity_heat_map = Analysis(
+    "inputs vs outputs",
+    lambda _c, _b, tx: is_not_coinbase(tx),
+    [(lambda _c, _b, tx: min(100, count_outputs(tx)), identity),
+     (lambda _c, _b, tx: min(100, count_inputs(tx)), sum)],
+    lambda *_: 1
+)
+
+transparent_input_histogram = Analysis(
+    "how many transparent inputs do txs have?",
+    lambda _c, _b, tx: is_not_coinbase(tx),
+    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
+      (lambda _c, _b, tx: min(100, len(tx['vin'])), sum)],
+    lambda *_: 1
+)
+
+nuttycom_fees_vs_10k = Analysis(
+    "transactions that wouldn't pay more under the new model, by day, using nuttycom's pricing",
+    lambda _c, _b, tx: is_not_coinbase(tx),
+    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
+      (lambda _c, _b, tx: 250 * max(4, count_ins_and_outs(tx)) <= 10_000, sum)
+    ],
+    lambda *_: 1
+)
+
+action_fees_vs_10k = Analysis(
+    "transactions that wouldn't pay more under the new model, by day, using actions",
+    lambda _c, _b, tx: is_not_coinbase(tx),
+    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
+      (lambda _c, _b, tx: 500 * max(3, count_actions(tx)) <= 10_000, sum)
+    ],
+    lambda *_: 1
+)
+
+latest_fees_vs_10k = Analysis(
+    "transactions that wouldn't pay more under the new model, by day, using actions",
+    lambda _c, _b, tx: is_not_coinbase(tx),
+    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
+      (lambda _c, _b, tx: 1000 * max(2, count_actions(tx)) <= 10_000, sum)
+    ],
+    lambda *_: 1
+)
+
+
+a = Analyzer(connection_string)
+
+def make_weekly_range(starting_week, number_of_weeks):
+    start_of_range = blocks_per_hour * 24 * 7 * starting_week
+    end_of_range = start_of_range + (blocks_per_hour * 24 * 7 * number_of_weeks)
+    return range(start_of_range, end_of_range)
+
+
+# start about a month before sandblasting, overlapping with it
+pre_sandblasting_range = make_weekly_range(206, 12)
+
+# well into sandblasting
+recent_range = make_weekly_range(220, 1)
+
+start = datetime.datetime.now()
+for analysis in a.analyze_blocks(pre_sandblasting_range,
+                       [ # sandblaster_average_outputs_per_day,
+                         # flat_fees_vs_actual,
+                         # flat_fees_vs_actual_trans,
+                         # transparent_tx_that_would_fail_heat_map
+                         nuttycom_fees_vs_actual,
+                         action_fees_vs_actual,
+                         nuttycom_fees_vs_actual_trans,
+                         action_fees_vs_actual_trans,
+                         greg_fees_vs_actual,
+                         greg_fees_vs_actual_trans,
+                         # historical_fees,
+                         # transparent_input_histogram,
+                       ]):
+    print(analysis)
+print(datetime.datetime.now() - start)
+
+# rerunning old data …
+start = datetime.datetime.now()
+for analysis in a.analyze_blocks(make_weekly_range(206, 1),
+                       [ actual_fees,
+                         proposed_fees,
+                       ]):
+    print(analysis)
+print(datetime.datetime.now() - start)
+
+nuttycom_fees_vs_10k2 = Analysis(
+    "transactions that wouldn't pay more under the new model, by day, using nuttycom's pricing",
+    lambda _c, _b, tx: is_not_coinbase(tx) and not is_sandblasting(tx),
+    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
+      (lambda _c, _b, tx: 250 * max(4, count_ins_and_outs(tx)) <= 10_000, sum)
+    ],
+    lambda *_: 1
+)
+
+action_fees_vs_10k2 = Analysis(
+    "transactions that wouldn't pay more under the new model, by day, using actions",
+    lambda _c, _b, tx: is_not_coinbase(tx) and not is_sandblasting(tx),
+    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
+      (lambda _c, _b, tx: 500 * max(3, count_actions(tx)) <= 10_000, sum)
+    ],
+    lambda *_: 1
+)
+
+latest_fees_vs_10k2 = Analysis(
+    "transactions that wouldn't pay more under the new model, by day, using actions",
+    lambda _c, _b, tx: is_not_coinbase(tx) and not is_sandblasting(tx),
+    [ (lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), identity),
+      (lambda _c, _b, tx: 1000 * max(2, count_actions(tx)) <= 10_000, sum)
+    ],
+    lambda *_: 1
+)
+
+pool_movement = Analysis(
+    "how are funds moving between pools?",
+    lambda *_: True,
+    [(lambda _c, block, _t: int(block['height'] / (blocks_per_hour * 24)), lambda vals: np.sum(np.array(vals), 0))],
+    lambda _c, _b, tx: tx_pool_movement(tx)
+)
+
+start = datetime.datetime.now()
+for analysis in a.analyze_blocks(recent_range,
+                       [ pool_movement
+                       ]):
+    print(analysis)
+print(datetime.datetime.now() - start)
+
+start = datetime.datetime.now()
+for analysis in a.analyze_blocks(pre_sandblasting_range,
+                       [ tx_below_pofm_threshold,
+                         tx_below_pofm_threshold_5,
+                         tx_below_pofm_threshold_max,
+                         tx_below_pofm_threshold_ins,
+                         tx_below_pofm_threshold_abs,
+                         outs_below_pofm_threshold_abs,
+                         arity_heat_map,
+                         minimum_pofm_fees_nuttycom,
+                         minimum_pofm_fees_nuttycom2,
+                       ]):
+    print(analysis)
+print(datetime.datetime.now() - start)
diff --git a/analysis/helpers.py b/analysis/helpers.py
new file mode 100644
index 0000000..c516c00
--- /dev/null
+++ b/analysis/helpers.py
@@ -0,0 +1,161 @@
+# Copyright (c) 2022 The Zcash developers
+# Distributed under the MIT software license, see the accompanying
+# file COPYING or https://www.opensource.org/licenses/mit-license.php .
+"""Useful function for transaction analyses
+
+This is a collection of functions that make it easier to write new transaction
+analyses.
+"""
+
+import datetime
+import itertools
+import math
+
+def identity(x):
+    return x
+
+def get_shielded_spends(tx):
+    try:
+        shielded_spends = len(tx['vShieldedSpend'])
+    except KeyError:
+        shielded_spends = 0
+
+    return shielded_spends
+
+def get_shielded_outputs(tx):
+    try:
+        shielded_outputs = len(tx['vShieldedOutput'])
+    except KeyError:
+        shielded_outputs = 0
+
+    return shielded_outputs
+
+def get_orchard_actions(tx):
+    try:
+        orchard_actions = len(tx['orchard']['actions'])
+    except KeyError:
+        orchard_actions = 0
+
+    return orchard_actions
+
+def count_inputs(tx):
+    return len(tx['vin']) + 2 * len(tx['vjoinsplit']) + get_shielded_spends(tx) + get_orchard_actions(tx)
+
+def count_outputs(tx):
+    return len(tx['vout']) + 2 * len(tx['vjoinsplit']) + get_shielded_outputs(tx) + get_orchard_actions(tx)
+
+def count_ins_and_outs(tx):
+    return (len(tx['vin'])
+            + len(tx['vout'])
+            + get_shielded_spends(tx)
+            + get_shielded_outputs(tx)
+            + 2 * len(tx['vjoinsplit'])
+            + 2 * get_orchard_actions(tx))
+
+def count_actions(tx):
+    return (max(len(tx['vin']), len(tx['vout']))
+            + max(get_shielded_spends(tx), get_shielded_outputs(tx))
+            + 2 * len(tx['vjoinsplit'])
+            + get_orchard_actions(tx))
+
+def expiry_height_delta(block, tx):
+    """
+    Returns -1 if there's no expiry, also returns approximately 35,000 (the
+    number of blocks in a month) if the expiry is beyond 1 month.
+    """
+    month = blocks_per_hour * 24 * 30
+    try:
+        expiry_height = tx['expiryheight']
+        if expiry_height == 0:
+            return -1
+        elif tx['expiryheight'] - block['height'] > month:
+            return month
+        else:
+            return tx['expiryheight'] - block['height']
+    except KeyError:
+        # `tx['expiryheight']` is ostensibly an optional field, but it seems
+        # like `0` is what tends to be used for "don't expire", so this case
+        # generally isn't hit.
+        return -1
+
+def tx_type(tx):
+    """
+    Categorizes all tx into one of nine categories: (t)ransparent, (z)shielded,
+    or (m)ixed for both inputs and outputs. So some possible results are "t-t",
+    "t-z", "m-z", etc.
+    """
+    if tx['vjoinsplit'] or get_shielded_spends(tx) != 0 or get_orchard_actions(tx) != 0:
+        if tx['vin']:
+            ins = "m"
+        else:
+            ins = "z"
+    else:
+        ins = "t"
+
+    if tx['vjoinsplit'] or get_shielded_outputs(tx) != 0 or get_orchard_actions(tx) != 0:
+        if tx['vout']:
+            outs = "m"
+        else:
+            outs = "z"
+    else:
+        outs = "t"
+
+    return ins + "-" + outs
+
+def is_orchard_tx(tx):
+    try:
+        return tx['orchard']['actions']
+    except KeyError:
+        return False
+
+def is_saplingspend_tx(tx):
+    try:
+        return tx['vShieldedSpend']
+    except KeyError:
+        return False
+
+def orchard_anchorage(cache, block, tx):
+    """
+    Returns -1 if there is no anchor
+    """
+    try:
+        return block['height'] - cache[tx['orchard']['anchor']]
+    except KeyError:
+        return -1
+
+def sapling_anchorage(cache, block, tx):
+    """
+    Returns -1 if there is no anchor
+    """
+    try:
+        return block['height'] - cache[tx['vShieldedSpend'][0]['anchor']]
+    except KeyError:
+        return -1
+
+def is_not_coinbase(tx):
+    return 'feePaid' in tx
+
+# NB: This requires zcashd to be running with `experimentalfeatures=1`,
+#    `txindex=1` and `insightexplorer=1`.
+def getFeeDiff(proposedFee, tx):
+    try:
+        return proposedFee <= tx['feePaid']
+    except KeyError:
+        return -1
+
+def vin_value(vin):
+    if 'valueSat' in vin:
+        return vin['valueSat']
+    else:
+        return 0
+
+def tx_pool_movement(tx):
+    transparent = sum(vout['valueZat'] for vout in tx['vout']) - sum([vin_value(vin) for vin in tx['vin']])
+    sprout = sum([vjoinsplit['vpub_newZat'] - vjoinsplit['vpub_oldZat'] for vjoinsplit in tx['vjoinsplit']])
+    sapling = - tx['valueBalanceZat']
+    if 'orchard' in tx:
+        orchard = - tx['orchard']['valueBalanceZat']
+    else:
+        orchard = 0
+    # print("(%d, %d, %d, %d) – %d -> %d" % (transparent, sprout, sapling, orchard, count_inputs(tx), count_outputs(tx)))
+    return (transparent, sprout, sapling, orchard)

From d50c6b0268b86838349e744c12891bdec69167df Mon Sep 17 00:00:00 2001
From: Greg Pfeil <greg@z.cash>
Date: Mon, 10 Oct 2022 17:51:23 -0600
Subject: [PATCH 4/4] Minor cleanup

- renamed `a` to `analyzer`
- moved `blocks_per_hour` to helpers
---
 analysis/examples.py | 15 +++++----------
 analysis/helpers.py  |  2 ++
 2 files changed, 7 insertions(+), 10 deletions(-)

diff --git a/analysis/examples.py b/analysis/examples.py
index deff6ee..5e135b9 100755
--- a/analysis/examples.py
+++ b/analysis/examples.py
@@ -28,10 +28,7 @@ else:
         "%s needs to be provided a connection string, like \"http://user:pass@localhost:port\"."
         % (sys.argv[0],))
 
-blocks_per_hour = 48 # half this before NU2?
-
-# start about a month before sandblasting
-start_range = blocks_per_hour * 24 * 7 * 206
+analyzer = Analyzer(connection_string)
 
 ### Requested Statistics
 
@@ -502,8 +499,6 @@ latest_fees_vs_10k = Analysis(
 )
 
 
-a = Analyzer(connection_string)
-
 def make_weekly_range(starting_week, number_of_weeks):
     start_of_range = blocks_per_hour * 24 * 7 * starting_week
     end_of_range = start_of_range + (blocks_per_hour * 24 * 7 * number_of_weeks)
@@ -517,7 +512,7 @@ pre_sandblasting_range = make_weekly_range(206, 12)
 recent_range = make_weekly_range(220, 1)
 
 start = datetime.datetime.now()
-for analysis in a.analyze_blocks(pre_sandblasting_range,
+for analysis in analyzer.analyze_blocks(pre_sandblasting_range,
                        [ # sandblaster_average_outputs_per_day,
                          # flat_fees_vs_actual,
                          # flat_fees_vs_actual_trans,
@@ -536,7 +531,7 @@ print(datetime.datetime.now() - start)
 
 # rerunning old data …
 start = datetime.datetime.now()
-for analysis in a.analyze_blocks(make_weekly_range(206, 1),
+for analysis in analyzer.analyze_blocks(make_weekly_range(206, 1),
                        [ actual_fees,
                          proposed_fees,
                        ]):
@@ -578,14 +573,14 @@ pool_movement = Analysis(
 )
 
 start = datetime.datetime.now()
-for analysis in a.analyze_blocks(recent_range,
+for analysis in analyzer.analyze_blocks(recent_range,
                        [ pool_movement
                        ]):
     print(analysis)
 print(datetime.datetime.now() - start)
 
 start = datetime.datetime.now()
-for analysis in a.analyze_blocks(pre_sandblasting_range,
+for analysis in analyzer.analyze_blocks(pre_sandblasting_range,
                        [ tx_below_pofm_threshold,
                          tx_below_pofm_threshold_5,
                          tx_below_pofm_threshold_max,
diff --git a/analysis/helpers.py b/analysis/helpers.py
index c516c00..9e6cfe9 100644
--- a/analysis/helpers.py
+++ b/analysis/helpers.py
@@ -11,6 +11,8 @@ import datetime
 import itertools
 import math
 
+blocks_per_hour = 48 # half this before NU2?
+
 def identity(x):
     return x