From 1f041f955a2896a81add36e2b67983788e9de5fb Mon Sep 17 00:00:00 2001 From: Daira Hopwood Date: Mon, 5 Apr 2021 21:39:51 +0100 Subject: [PATCH] Add links_and_dests.py. This can be used to print outgoing links and targets in the PDF, and detect a subset of errors. It depends on the PyPDF2 library (pip3 install PyPDF2). Signed-off-by: Daira Hopwood --- protocol/links_and_dests.py | 66 +++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100755 protocol/links_and_dests.py diff --git a/protocol/links_and_dests.py b/protocol/links_and_dests.py new file mode 100755 index 00000000..3d1d47ce --- /dev/null +++ b/protocol/links_and_dests.py @@ -0,0 +1,66 @@ +#!/usr/bin/env python3 + +try: + from PyPDF2 import PdfFileReader +except ImportError: + print("Please install the PyPDF2 library using `pip3 install PyPDF2`.\n") + raise + +from collections import deque +import sys + +def get_links_and_destinations(f): + # Based on + pdf = PdfFileReader(f) + + links = deque() + dests = deque() + errors = deque() + + for pg in range(pdf.getNumPages()): + obj = pdf.getPage(pg).getObject() + + for annotation in obj.get('/Annots', []): + uri = annotation.getObject().get('/A', {}).get('/URI', None) + if uri is not None and uri not in links: + links.append(uri) + + dests = pdf.getNamedDestinations() + + for l in links: + if not l.startswith("https:"): + errors.append("Insecure or unrecognized protocol in link: " + l) + + if l.startswith("https://zips.z.cash/protocol/"): + fragment = l.partition("#")[2] + if fragment and fragment not in dests: + errors.append("Missing link target: " + l) + + return (links, dests, errors) + + +def main(args): + if len(args) < 2: + print("Usage: ./links_and_dests.py ") + return 1 + + with open(args[1], 'rb') as f: + (links, dests, errors) = get_links_and_destinations(f) + + print("Links:") + for l in links: + print(l) + + print("\nDestinations:") + for d in dests: + print(d) + + if errors: + print("\nErrors:") + for e in errors: + print(e) + + return 0 + +if __name__ == '__main__': + sys.exit(main(sys.argv))