mirror of https://github.com/zcash/zips.git
Add support for link checking to protocol/links_and_dests.py and protocol/Makefile.
Signed-off-by: Daira Hopwood <daira@jacaranda.org>
This commit is contained in:
parent
65ebb2266d
commit
1df0f60deb
|
@ -36,6 +36,10 @@ else
|
||||||
git push --tags origin HEAD:master
|
git push --tags origin HEAD:master
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
.PHONY: linkcheck
|
||||||
|
linkcheck:
|
||||||
|
./links_and_dests.py --check nu5.pdf protocol.pdf heartwood.pdf blossom.pdf sapling.pdf
|
||||||
|
|
||||||
.Makefile.uptodate: Makefile
|
.Makefile.uptodate: Makefile
|
||||||
$(MAKE) clean
|
$(MAKE) clean
|
||||||
touch .Makefile.uptodate
|
touch .Makefile.uptodate
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from PyPDF2 import PdfFileReader
|
from PyPDF2 import PdfFileReader
|
||||||
|
@ -6,6 +7,9 @@ except ImportError:
|
||||||
print("Please install the PyPDF2 library using `pip3 install PyPDF2`.\n")
|
print("Please install the PyPDF2 library using `pip3 install PyPDF2`.\n")
|
||||||
raise
|
raise
|
||||||
|
|
||||||
|
from urllib.request import urlopen, Request
|
||||||
|
from urllib.error import URLError
|
||||||
|
from os.path import basename
|
||||||
from collections import deque
|
from collections import deque
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
|
@ -13,45 +17,79 @@ def get_links_and_destinations(f):
|
||||||
# Based on <https://stackoverflow.com/a/5978161/393146>
|
# Based on <https://stackoverflow.com/a/5978161/393146>
|
||||||
pdf = PdfFileReader(f)
|
pdf = PdfFileReader(f)
|
||||||
|
|
||||||
links = deque()
|
links = set()
|
||||||
dests = deque()
|
|
||||||
errors = deque()
|
|
||||||
|
|
||||||
for pg in range(pdf.getNumPages()):
|
for pg in range(pdf.getNumPages()):
|
||||||
obj = pdf.getPage(pg).getObject()
|
obj = pdf.getPage(pg).getObject()
|
||||||
|
|
||||||
for annotation in obj.get('/Annots', []):
|
for annotation in obj.get('/Annots', []):
|
||||||
uri = annotation.getObject().get('/A', {}).get('/URI', None)
|
uri = annotation.getObject().get('/A', {}).get('/URI', None)
|
||||||
if uri is not None and uri not in links:
|
if uri is not None and uri not in links:
|
||||||
links.append(uri)
|
links.add(uri)
|
||||||
|
|
||||||
dests = pdf.getNamedDestinations()
|
dests = pdf.getNamedDestinations()
|
||||||
|
|
||||||
for l in links:
|
return (links, dests)
|
||||||
if not l.startswith("https:"):
|
|
||||||
errors.append("Insecure or unrecognized protocol in link: " + l)
|
|
||||||
|
|
||||||
if l.startswith("https://zips.z.cash/protocol/"):
|
|
||||||
fragment = l.partition("#")[2]
|
|
||||||
if fragment and fragment not in dests:
|
|
||||||
errors.append("Missing link target: " + l)
|
|
||||||
|
|
||||||
return (links, dests, errors)
|
|
||||||
|
|
||||||
|
|
||||||
def main(args):
|
def main(args):
|
||||||
if len(args) < 2:
|
if len(args) < 2:
|
||||||
print("Usage: ./links_and_dests.py <file.pdf>")
|
print("Usage: ./links_and_dests.py [--check] [--print-dests] <file.pdf>")
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
with open(args[1], 'rb') as f:
|
check = '--check' in args[1:]
|
||||||
(links, dests, errors) = get_links_and_destinations(f)
|
print_dests = '--print-dests' in args[1:]
|
||||||
|
paths = [arg for arg in args[1:] if not arg.startswith('--')]
|
||||||
|
|
||||||
|
all_links = {} # url -> pdf_paths
|
||||||
|
all_dests = {} # url -> dests
|
||||||
|
|
||||||
|
for pdf_path in paths:
|
||||||
|
with open(pdf_path, 'rb') as f:
|
||||||
|
(links, dests) = get_links_and_destinations(f)
|
||||||
|
|
||||||
|
for l in links:
|
||||||
|
refs = all_links.get(l, None)
|
||||||
|
if refs is None:
|
||||||
|
all_links[l] = refs = deque()
|
||||||
|
refs.append(pdf_path)
|
||||||
|
|
||||||
|
all_dests["https://zips.z.cash/protocol/" + basename(pdf_path)] = dests
|
||||||
|
|
||||||
|
errors = deque()
|
||||||
|
|
||||||
print("Links:")
|
print("Links:")
|
||||||
for l in links:
|
for (l, p) in sorted(all_links.items()):
|
||||||
print(l)
|
print(l, end=" ")
|
||||||
|
sys.stdout.flush()
|
||||||
|
what = "%s (occurs in %s)" % (l, " and ".join(p)) if len(paths) > 1 else l
|
||||||
|
status = ""
|
||||||
|
|
||||||
print("\nDestinations:")
|
if not l.startswith("https:"):
|
||||||
|
errors.append("Insecure or unrecognized protocol in link: " + what)
|
||||||
|
status = "❌"
|
||||||
|
else:
|
||||||
|
(url, _, fragment) = l.partition("#")
|
||||||
|
if url in all_dests:
|
||||||
|
if fragment and fragment not in all_dests[url]:
|
||||||
|
errors.append("Missing link target: " + what)
|
||||||
|
status = "❌"
|
||||||
|
else:
|
||||||
|
status = "✓"
|
||||||
|
elif check:
|
||||||
|
try:
|
||||||
|
headers = {"User-Agent": "Mozilla/5.0"}
|
||||||
|
res = urlopen(Request(url=l, headers=headers))
|
||||||
|
res.read()
|
||||||
|
status = "✓"
|
||||||
|
except URLError as e:
|
||||||
|
errors.append("Could not open link: %s due to %r" % (what, e))
|
||||||
|
status = "❌"
|
||||||
|
|
||||||
|
print(status)
|
||||||
|
|
||||||
|
if print_dests:
|
||||||
|
for dests in all_dests:
|
||||||
|
print("\nDestinations for %s:" % (dests,))
|
||||||
for d in dests:
|
for d in dests:
|
||||||
print(d)
|
print(d)
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue