developers/zcash-issue-dag.py

#!/usr/bin/env python3

# ZenHub issue dependency graph generator for the ECC core team.
# Author: jack@electriccoin.co
# Last updated: 2021-05-07

import networkx as nx

from str2bool import str2bool as strtobool
import mimetypes
import os
from textwrap import wrap
from urllib.parse import urlparse

from sgqlc.endpoint.http import HTTPEndpoint
from sgqlc.operation import Operation
from github_schema import github_schema as schema
from zenhub_schema import zenhub_schema

GITHUB_TOKEN = os.environ.get('GITHUB_TOKEN')
ZENHUB_TOKEN = os.environ.get('ZENHUB_TOKEN')

DAG_VIEW = os.environ.get('DAG_VIEW', 'core')

# To get the id of a repo, see <https://stackoverflow.com/a/47223479/393146>.

HALO2_REPOS = {
    290019239: ('zcash', 'halo2'),
    344239327: ('zcash', 'pasta_curves'),
}

CORE_REPOS = {
    26987049: ('zcash', 'zcash'),
    47279130: ('zcash', 'zips'),
    48303644: ('zcash', 'incrementalmerkletree'),
    85334928: ('zcash', 'librustzcash'),
    133857578: ('zcash-hackworks', 'zcash-test-vectors'),
    111058300: ('zcash', 'sapling-crypto'),
    **HALO2_REPOS,
    305835578: ('zcash', 'orchard'),
}

TFL_REPOS = {
    642135348: ('Electric-Coin-Company', 'tfl-book'),
    725179873: ('Electric-Coin-Company', 'zebra-tfl'),
    695805989: ('zcash', 'simtfl'),
}

ANDROID_REPOS = {
    390808594: ('Electric-Coin-Company', 'zashi-android'),
    151763639: ('Electric-Coin-Company', 'zcash-android-wallet-sdk'),
    719178328: ('Electric-Coin-Company', 'zashi'),
}

IOS_REPOS = {
    387551125: ('Electric-Coin-Company', 'zashi-ios'),
    185480114: ('Electric-Coin-Company', 'zcash-swift-wallet-sdk'),
    270825987: ('Electric-Coin-Company', 'MnemonicSwift'),
    439137887: ('Electric-Coin-Company', 'zcash-light-client-ffi'),
    719178328: ('Electric-Coin-Company', 'zashi'),
}

WALLET_REPOS = {
    85334928: ('zcash', 'librustzcash'),
    159714694: ('zcash', 'lightwalletd'),
    **ANDROID_REPOS,
    **IOS_REPOS,
}

ECC_REPOS = {
    **CORE_REPOS,
    **TFL_REPOS,
    **WALLET_REPOS,
    65419597: ('Electric-Coin-Company', 'infrastructure'),
}

ZF_REPOS = {
    205255683: ('ZcashFoundation', 'zebra'),
    225479018: ('ZcashFoundation', 'redjubjub'),
    235651437: ('ZcashFoundation', 'ed25519-zebra'),
    279422254: ('ZcashFoundation', 'zcash_script'),
}

ZF_FROST_REPOS = {
    437862440: ('ZcashFoundation', 'frost'),
}

ZCASHD_DEPRECATION_REPOS = {
    26987049: ('zcash', 'zcash'),
    47279130: ('zcash', 'zips'),
    85334928: ('zcash', 'librustzcash'),
    863610221: ('zcash', 'wallet'),
    159714694: ('zcash', 'lightwalletd'),
}

POOL_DEPRECATION_REPOS = {
    **CORE_REPOS,
    **WALLET_REPOS,
}

REPO_SETS = {
    'core': CORE_REPOS,
    'halo2': HALO2_REPOS,
    'tfl': TFL_REPOS,
    'wallet': WALLET_REPOS,
    'wallet-ios': IOS_REPOS,
    'wallet-android': ANDROID_REPOS,
    'ecc': ECC_REPOS,
    'zf': ZF_REPOS,
    'zf-frost': ZF_FROST_REPOS,
    'zf-devops': {**ZF_REPOS, **ZF_FROST_REPOS},
    'zcashd-deprecation': ZCASHD_DEPRECATION_REPOS,
    'sprout-deprecation': POOL_DEPRECATION_REPOS,
    'transparent-deprecation': POOL_DEPRECATION_REPOS,
}

REPOS = REPO_SETS[DAG_VIEW]

WORKSPACE_SETS = {
    # ecc-core
    '5dc1fd615862290001229f21': CORE_REPOS.keys(),
    # ecc-wallet
    '5db8aa0244512d0001e0968e': WALLET_REPOS.keys(),
    # zf
    '5fb24d9264a3e8000e666a9e': ZF_REPOS.keys(),
    # zf-frost
    '607d75e0169bd50011d5410f': ZF_FROST_REPOS.keys(),
}

WORKSPACES = {
    workspace_id: [repo_id for repo_id in repos if repo_id in REPOS]
    for (workspace_id, repos) in WORKSPACE_SETS.items()
}

SUPPORTED_CATEGORIES = set(['releases', 'targets'])
def cats(s):
    return set([x.strip() for x in s.split(',')]) - set([''])

# If set, removes all issues and PRs that are not ancestors of the given issues.
# This can be used to render a sub-graph focused on one area.
#
# Format is ORG/REPO#ISSUE[,ORG/REPO#ISSUE[, ..]]
TERMINATE_AT = cats(os.environ.get('TERMINATE_AT', ''))

# Whether to remove issues and PRs that are not target or release issues.
ONLY_INCLUDE = cats(os.environ.get('ONLY_INCLUDE', ''))

# Whether to include subgraphs where all issues and PRs are closed.
INCLUDE_FINISHED = strtobool(os.environ.get('INCLUDE_FINISHED', 'false'))

# Whether to remove closed issues and PRs that are not downstream of open ones.
# When set to 'targets' or 'releases', only issues upstream of a closed target
# or release issue will be removed.
PRUNE_FINISHED = os.environ.get('PRUNE_FINISHED', 'true')

# Whether to group issues and PRs by milestone.
SHOW_MILESTONES = strtobool(os.environ.get('SHOW_MILESTONES', 'false'))

# Whether to group issues and PRs by ZenHub epics.
SHOW_EPICS = strtobool(os.environ.get('SHOW_EPICS', 'false'))


class GitHubIssue:
    def __init__(self, repo_id, issue_number, data):
        self.repo_id = repo_id
        self.issue_number = issue_number
        self.milestone = None

        if data is not None:
            labels = [label['name'] for label in data['labels']['nodes']]
            self.title = data['title']
            self.is_release = 'C-release' in labels
            self.is_target = 'C-target' in labels
            self.is_pr = 'merged' in data
            self.is_committed = 'S-committed' in labels
            self.is_in_progress = 'S-in-progress' in labels
            self.waiting_on_review = 'S-waiting-on-review' in labels
            self.url = data['url']
            self.state = 'closed' if data['state'] in ['CLOSED', 'MERGED'] else 'open'
            if 'milestone' in data and data['milestone']:
                self.milestone = data['milestone']['title']
        else:
            # If we can't fetch issue data, assume we don't care.
            self.title = ''
            self.url = None
            self.is_release = False
            self.is_target = False
            self.is_pr = False
            self.is_committed = False
            self.is_in_progress = False
            self.waiting_on_review = False
            self.state = 'closed'

    def __repr__(self):
        if self.repo_id in REPOS:
            repo = REPOS[self.repo_id]
            # Shorten the representation of long repo names.
            if repo[0] == 'Electric-Coin-Company':
                repo = ('ECC', repo[1])
            repo = '/'.join(repo)
            return '%s#%d' % (repo, self.issue_number)
        else:
            return 'Unknown'

    def __eq__(self, other):
        return (self.repo_id, self.issue_number) == (other.repo_id, other.issue_number)

    def __hash__(self):
        return hash((self.repo_id, self.issue_number))

    def any_cat(self, categories):
        release_cat = self.is_release if 'releases' in categories else False
        targets_cat = self.is_target if 'targets' in categories else False
        return release_cat or targets_cat

def fetch_issues(op, issues):
    repos = set([repo for (repo, _) in issues])
    repos = {repo: [issue for (r, issue) in issues if r == repo] for repo in repos}

    for (repo, issues) in repos.items():
        conn = op.repository(
            owner=REPOS[repo][0],
            name=REPOS[repo][1],
            __alias__='repo%d' % repo,
        )

        for issue in issues:
            res = conn.issue_or_pull_request(number=issue, __alias__='issue%d' % issue)
            for typ in [schema.Issue, schema.PullRequest]:
                node = res.__as__(typ)
                node.labels(first=50).nodes().name()
                node.state()
                node.milestone().title()
                node.title()
                node.url()
                if typ == schema.PullRequest:
                    node.merged()

def download_issues(endpoint, nodes):
    issues = [(repo, issue) for (repo, issue) in nodes if repo in REPOS]

    ret = {}

    # Ensure that any graph nodes from ZenHub that are not in the repos we care about have
    # default entries, to simplify subsequent graph manipulation code.
    for (repo, issue) in [(repo, issue) for (repo, issue) in nodes if repo not in REPOS]:
        ret[(repo, issue)] = GitHubIssue(repo, issue, None)

    def chunks(lst, n):
        for i in range(0, len(lst), n):
            yield lst[i:i + n]

    for issues in chunks(issues, 50):
        op = Operation(schema.Query)
        fetch_issues(op, issues)

        d = endpoint(op)
        data = (op + d)

        for (repo, issue) in issues:
            repo_data = data['repo%d' % repo]
            issue_key = 'issue%d' % issue
            # If GITHUB_TOKEN doesn't have permission to read from a particular private
            # repository in REPOS, GitHub returns an empty repo_data section.
            issue_data = repo_data[issue_key] if issue_key in repo_data else None
            ret[(repo, issue)] = GitHubIssue(repo, issue, issue_data)

    return ret

def fetch_workspace_graph(op, workspace_id, repos, cursor):
    dependencies = op.workspace(id=workspace_id).issue_dependencies(
        # TODO: This causes a 500 Internal Server Error. We need the ZenHub repo IDs here,
        # not the GitHub repo IDs (which the previous REST API used).
        # repository_ids=repos,
        first=100,
        after=cursor,
    )
    dependencies.nodes.id()
    dependencies.nodes.blocked_issue.number()
    dependencies.nodes.blocked_issue.repository.gh_id()
    dependencies.nodes.blocking_issue.number()
    dependencies.nodes.blocking_issue.repository.gh_id()
    dependencies.page_info.has_next_page()
    dependencies.page_info.end_cursor()

def get_dependency_graph(endpoint, workspace_id, repos):
    edges = []
    cursor = None

    while True:
        op = Operation(zenhub_schema.Query)
        fetch_workspace_graph(op, workspace_id, repos, cursor)

        d = endpoint(op)
        data = (op + d)

        dependencies = data.workspace.issue_dependencies
        edges += [
            (
                (node.blocking_issue.repository.gh_id, node.blocking_issue.number),
                (node.blocked_issue.repository.gh_id, node.blocked_issue.number),
            ) for node in dependencies.nodes
        ]

        if dependencies.page_info.has_next_page:
            cursor = dependencies.page_info.end_cursor
            print('.', end='', flush=True)
        else:
            print()
            break

    return nx.DiGraph(edges)

def fetch_epics(op, workspace_id, repos, cursor):
    epics = op.workspace(id=workspace_id).epics(
        # TODO: This causes a 500 Internal Server Error. We need the ZenHub repo IDs here,
        # not the GitHub repo IDs (which the previous REST API used).
        # repository_ids=repos,
        first=100,
        after=cursor,
    )
    epics.nodes.id()
    epics.nodes.issue.number()
    epics.nodes.issue.repository.gh_id()
    epics.page_info.has_next_page()
    epics.page_info.end_cursor()

def get_epics(endpoint, workspace_id, repos):
    epics = []
    cursor = None

    while True:
        op = Operation(zenhub_schema.Query)
        fetch_epics(op, workspace_id, repos, cursor)

        d = endpoint(op)
        data = (op + d)

        epics_page = data.workspace.epics
        epics += [
            (node.id, (node.issue.repository.gh_id, node.issue.number))
            for node in epics_page.nodes
        ]

        if epics_page.page_info.has_next_page:
            cursor = epics_page.page_info.end_cursor
            print('.', end='', flush=True)
        else:
            print()
            break

    return epics

def fetch_epic_issues(op, workspace_id, epic_id, cursor):
    epic = op.workspace(id=workspace_id).epics(ids=[epic_id])
    child_issues = epic.nodes.child_issues(
        first=100,
        after=cursor,
    )
    child_issues.nodes.number()
    child_issues.nodes.repository.gh_id()
    child_issues.page_info.has_next_page()
    child_issues.page_info.end_cursor()

def get_epic_issues(endpoint, workspace_id, epic_id):
    epic_issues = []
    cursor = None

    while True:
        op = Operation(zenhub_schema.Query)
        fetch_epic_issues(op, workspace_id, epic_id, cursor)

        d = endpoint(op)
        data = (op + d)

        epic = data.workspace.epics.nodes[0]
        epic_issues += [
            (node.repository.gh_id, node.number)
            for node in epic.child_issues.nodes
        ]

        if epic.child_issues.page_info.has_next_page:
            cursor = epic.child_issues.page_info.end_cursor
            print('.', end='', flush=True)
        else:
            print()
            break

    return epic_issues

def main():
    gapi = HTTPEndpoint(
        'https://api.github.com/graphql',
        {'Authorization': 'bearer %s' % GITHUB_TOKEN},
    )
    zapi = HTTPEndpoint(
        'https://api.zenhub.com/public/graphql',
        {'Authorization': 'Bearer %s' % ZENHUB_TOKEN},
    )

    # Build the full dependency graph from ZenHub's per-workspace API.
    print('Fetching graph')
    dg = nx.compose_all([
        get_dependency_graph(zapi, workspace_id, repos)
        for (workspace_id, repos) in WORKSPACES.items()
        if len(repos) > 0
    ])

    print('Rendering DAG')

    if SHOW_EPICS:
        epics_issues = []
        for (workspace_id, repos) in WORKSPACES.items():
            if len(repos) > 0:
                epics_issues += get_epics(zapi, workspace_id, repos)
        epics_issues = set(epics_issues)

        epics_mapping = download_issues(gapi, [gh_ref for (_, gh_ref) in epics_issues])
        epics_mapping = {k: v for (k, v) in epics_mapping.items() if v.state != 'closed'}
        issues_by_epic = {}
        for (i, ((repo_id, epic_id), epic)) in enumerate(epics_mapping.items()):
            workspace_id = [
                workspace_id
                for (workspace_id, repos) in WORKSPACES.items()
                if repo_id in repos
            ][0]
            epic_id = [
                id for (id, gh_ref) in epics_issues
                if gh_ref == (repo_id, epic_id)
            ][0]
            issues = set(get_epic_issues(zapi, workspace_id, epic_id))
            issues_by_epic[epic] = issues
            for i in issues:
                # zapi.dependencies only returns nodes that have some connection,
                # but we'd like to show all issues from epics even if they are
                # disconnected.
                dg.add_node(i)

    if len(TERMINATE_AT) > 0:
        # Look up the repo IDs for the given terminating issues.
        reverse_repos = {v:k for k,v in REPOS.items()}
        terminate_at = [x.split('#') for x in TERMINATE_AT]
        terminate_at = set([(reverse_repos[tuple(r.split('/', 1))], int(i)) for (r, i) in terminate_at])

        # Replace the graph with the subgraph that only includes the terminating
        # issues and their ancestors.
        ancestors = [nx.ancestors(dg, n) for n in terminate_at]
        dg = nx.subgraph(dg, terminate_at.union(*ancestors))

    # Fetch the issues within the graph.
    mapping = download_issues(gapi, dg.nodes)

    # Relabel the graph
    dg = nx.relabel_nodes(dg, mapping)

    # Filter out unknown issues
    unknown = [n for n in dg if n.repo_id not in REPOS]
    if len(unknown) > 0:
        dg.remove_nodes_from(unknown)

    # Apply property annotations
    for (source, sink) in dg.edges:
        attrs = dg.edges[source, sink]
        attrs['is_open'] = 0 if source.state == 'closed' else 1

    if len(ONLY_INCLUDE) > 0 and ONLY_INCLUDE.issubset(SUPPORTED_CATEGORIES):
        # Insert direct edges for all transitive paths in the graph. This creates edges
        # between target issues that were not previously directly connected, but were
        # "reachable".
        tc = nx.transitive_closure_dag(dg)

        # Remove non-target issues. This also removes their involved edges, leaving behind
        # the transitive closure of the target issues.
        tc.remove_nodes_from([n for n in dg.nodes if not n.any_cat(ONLY_INCLUDE)])

        # Reduce to the minimum number of edges representing the same transitive paths.
        # This is unique for a DAG.
        dg = nx.transitive_reduction(tc)

    if not INCLUDE_FINISHED:
        # Identify the disconnected subgraphs.
        subgraphs = [dg.subgraph(c) for c in nx.connected_components(dg.to_undirected())]

        # Identify subgraphs comprised entirely of closed issues.
        ignore = [g for g in subgraphs if all([n.state == 'closed' for n in g])]

        # Remove fully-closed subgraphs.
        if len(ignore) > 0:
            dg.remove_nodes_from(nx.compose_all(ignore))

    # Prune nodes that are not downstream of any open issues.
    if cats(PRUNE_FINISHED).issubset(SUPPORTED_CATEGORIES):
        closed_targets = [n for n in dg.nodes if n.any_cat(cats(PRUNE_FINISHED)) and n.state == 'closed']
        for target in closed_targets:
            # Check that the target (and by extension its ancestors) wasn't already
            # removed for being the ancestor of another closed target.
            if target in dg:
                ancestors = nx.ancestors(dg, target)
                if all(n.state == 'closed' for n in ancestors):
                    # Only prune ancestors, not the closed target node, so that
                    # we see the most recently-closed target nodes in the DAG.
                    dg.remove_nodes_from(ancestors)

    elif PRUNE_FINISHED in ['true', 'all']:
        # - It would be nice to keep the most recently-closed issues on the DAG, but
        #   dg.out_degree seems to be broken...
        to_prune = [n for (n, degree) in dg.in_degree() if degree == 0 and n.state == 'closed']
        while len(to_prune) > 0:
            dg.remove_nodes_from(to_prune)
            to_prune = [n for (n, degree) in dg.in_degree() if degree == 0 and n.state == 'closed']

    do_next = [n for (n, degree) in dg.in_degree(weight='is_open') if degree == 0 and n.state != 'closed']

    # Apply style annotations.
    for n in dg:
        attrs = dg.nodes[n]
        if n.title:
            attrs['label'] = '\n'.join(['%s' % n] + wrap(n.title, 25))
        if n.state == 'closed':
            attrs['class'] = 'closed'
            attrs['fillcolor'] = '#fad8c7'
        elif n.waiting_on_review:
            attrs['class'] = 'needs-review'
            attrs['fillcolor'] = '#dfc150'
        elif n.is_committed or n.is_in_progress:
            attrs['class'] = 'committed'
            attrs['fillcolor'] = '#a6cfff'
        else:
            attrs['class'] = 'open'
            attrs['fillcolor'] = '#c2e0c6'
        attrs['penwidth'] = 2 if n in do_next else 1
        if n.is_target:
            attrs['shape'] = 'folder'
        elif n.is_pr:
            attrs['shape'] = 'component'
        else:
            attrs['shape'] = 'box'
        attrs['style'] = 'filled'
        if n.url:
            attrs['URL'] = n.url
            attrs['target'] = '_blank'

    ag = nx.nx_agraph.to_agraph(dg)

    clusters = 0
    if SHOW_MILESTONES:
        # Identify milestone nbunches
        milestones = {n.milestone: [] for n in dg}
        for m in milestones:
            milestones[m] = [n for n in dg if n.milestone == m]
        if None in milestones:
            del milestones[None]
        for (milestone, nodes) in milestones.items():
            ag.add_subgraph(nodes, 'cluster_%d' % clusters, label=milestone, color='blue')
            clusters += 1

    if SHOW_EPICS:
        for (epic, issues) in issues_by_epic.items():
            issues = [n for n in dg if (n.repo_id, n.issue_number) in issues]
            if issues:
                ag.add_subgraph(issues, 'cluster_%d' % clusters, label=epic.title, color='blue')
                clusters += 1

    # Draw the result!
    ag.graph_attr['rankdir'] = 'LR'
    ag.graph_attr['stylesheet'] = 'zcash-dag.css'
    ag.layout(prog='dot')
    os.makedirs('public', exist_ok=True)
    ag.draw('public/zcash-%s-dag.svg' % DAG_VIEW)

    # Render the HTML version!
    with open('public/zcash-%s-dag.svg' % DAG_VIEW) as f:
        svg_data = f.read()
    svg_start = svg_data.find('<svg')
    html_data = '''<!DOCTYPE html>
<html>
  <head>
    <title>Zcash %s DAG</title>

    <!-- Pan/zoom SVGs -->
    <script src="https://bumbu.me/svg-pan-zoom/dist/svg-pan-zoom.min.js"></script>

    <link rel="stylesheet" href="zcash-dag.css">
    <style>
      @media (prefers-color-scheme: dark) {
        body {
          /* Material dark theme surface colour */
          background-color: #121212;
        }
      }
    </style>
  </head>
  <body>
    <div id="dag">%s</div>

    <script>
      svgPanZoom('#dag > svg', {
        zoomScaleSensitivity: 0.4
      });
    </script>
  </body>
</html>
''' % (DAG_VIEW, svg_data[svg_start:])
    with open('public/zcash-%s-dag.html' % DAG_VIEW, 'w') as f:
        f.write(html_data)


if __name__ == '__main__':
    if GITHUB_TOKEN and ZENHUB_TOKEN:
        main()
    else:
        print('Please set the GITHUB_TOKEN and ZENHUB_TOKEN environment variables.')