mirror of https://github.com/zcash/zips.git
Add links_and_dests.py.
This can be used to print outgoing links and targets in the PDF, and detect a subset of errors. It depends on the PyPDF2 library (pip3 install PyPDF2). Signed-off-by: Daira Hopwood <daira@jacaranda.org>
This commit is contained in:
parent
4f50d5e515
commit
1f041f955a
|
@ -0,0 +1,66 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
try:
|
||||||
|
from PyPDF2 import PdfFileReader
|
||||||
|
except ImportError:
|
||||||
|
print("Please install the PyPDF2 library using `pip3 install PyPDF2`.\n")
|
||||||
|
raise
|
||||||
|
|
||||||
|
from collections import deque
|
||||||
|
import sys
|
||||||
|
|
||||||
|
def get_links_and_destinations(f):
|
||||||
|
# Based on <https://stackoverflow.com/a/5978161/393146>
|
||||||
|
pdf = PdfFileReader(f)
|
||||||
|
|
||||||
|
links = deque()
|
||||||
|
dests = deque()
|
||||||
|
errors = deque()
|
||||||
|
|
||||||
|
for pg in range(pdf.getNumPages()):
|
||||||
|
obj = pdf.getPage(pg).getObject()
|
||||||
|
|
||||||
|
for annotation in obj.get('/Annots', []):
|
||||||
|
uri = annotation.getObject().get('/A', {}).get('/URI', None)
|
||||||
|
if uri is not None and uri not in links:
|
||||||
|
links.append(uri)
|
||||||
|
|
||||||
|
dests = pdf.getNamedDestinations()
|
||||||
|
|
||||||
|
for l in links:
|
||||||
|
if not l.startswith("https:"):
|
||||||
|
errors.append("Insecure or unrecognized protocol in link: " + l)
|
||||||
|
|
||||||
|
if l.startswith("https://zips.z.cash/protocol/"):
|
||||||
|
fragment = l.partition("#")[2]
|
||||||
|
if fragment and fragment not in dests:
|
||||||
|
errors.append("Missing link target: " + l)
|
||||||
|
|
||||||
|
return (links, dests, errors)
|
||||||
|
|
||||||
|
|
||||||
|
def main(args):
|
||||||
|
if len(args) < 2:
|
||||||
|
print("Usage: ./links_and_dests.py <file.pdf>")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
with open(args[1], 'rb') as f:
|
||||||
|
(links, dests, errors) = get_links_and_destinations(f)
|
||||||
|
|
||||||
|
print("Links:")
|
||||||
|
for l in links:
|
||||||
|
print(l)
|
||||||
|
|
||||||
|
print("\nDestinations:")
|
||||||
|
for d in dests:
|
||||||
|
print(d)
|
||||||
|
|
||||||
|
if errors:
|
||||||
|
print("\nErrors:")
|
||||||
|
for e in errors:
|
||||||
|
print(e)
|
||||||
|
|
||||||
|
return 0
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
sys.exit(main(sys.argv))
|
Loading…
Reference in New Issue