zips/links_and_dests.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

from urllib.request import build_opener, HTTPCookieProcessor, HTTPSHandler, Request
from urllib.error import URLError, HTTPError
from os.path import relpath
from collections import deque
import sys
from time import sleep
import ssl
from io import BytesIO

try:
    from bs4 import BeautifulSoup
    import html5lib
    import certifi
except ImportError:
    print("Please install the BeautifulSoup, html5lib, and certifi libraries using `pip install bs4 html5lib certifi`.\n")
    raise

if [int(v) for v in certifi.__version__.split('.')] < [2021, 5, 30]:
    print("Please upgrade certifi using `pip install --upgrade certifi`.\n")
    sys.exit(1)

def get_links_and_destinations_from_pdf(f):
    try:
        from PyPDF2 import PdfFileReader
    except ImportError:
        print("Please install the PyPDF2 library using `pip install PyPDF2`.\n")
        raise

    # Based on <https://stackoverflow.com/a/5978161/393146>
    pdf = PdfFileReader(f)

    links = set()
    for pg in range(pdf.getNumPages()):
        obj = pdf.getPage(pg).getObject()

        for annotation in obj.get('/Annots', []):
            uri = annotation.getObject().get('/A', {}).get('/URI', None)
            if uri is not None and uri not in links:
                links.add(uri)

    dests = pdf.getNamedDestinations().keys()

    return (links, dests)


def get_links_and_destinations_from_html(f):
    links = set()
    internal = set()
    dests = set()

    soup = BeautifulSoup(f.read(), "html5lib")
    for link in soup.find_all('a'):
        if link.has_attr('href'):
           url = link['href']
           (internal if url.startswith('#') else links).add(url)

        if link.has_attr('name'):
           dests.add(link['name'])

    for link in soup.find_all(id=True):
        dests.add(link['id'])
        # GitHub's rendering of .mediawiki files puts 'id="user-content-<ANCHOR>"' in the source
        # and dynamically creates a corresponding link #<ANCHOR>.
        if link['id'].startswith("user-content-"):
            dests.add(link['id'][13:])

    internal.difference_update(['#' + d for d in dests])  # ignore internal links satisfied by a dest
    links.update(internal)
    return (links, dests)


def main(args):
    if len(args) < 2:
        print("Usage: ./links_and_dests.py [--check] [--print-dests] <file.pdf|html|xhtml>")
        return 1

    check = '--check' in args[1:]
    print_dests = '--print-dests' in args[1:]
    paths = [arg for arg in args[1:] if not arg.startswith('--')]

    all_links = {}  # url -> pdf_paths
    all_dests = {}  # url -> dests

    errors = deque()

    print("Reading files...")
    for path in paths:
        print(path, end=" ")
        sys.stdout.flush()

        with open(path, 'rb') as f:
            if path.endswith(".html") or path.endswith(".xhtml"):
                (links, dests) = get_links_and_destinations_from_html(f)
            elif path.endswith(".pdf"):
                (links, dests) = get_links_and_destinations_from_pdf(f)
            else:
                errors.append("Unrecognized file type: " + path)
                continue

        path = relpath(path)
        for l in links:
            refs = all_links.get(l, None)
            if refs is None:
                all_links[l] = refs = deque()
            refs.append(path)

        all_dests["https://zips.z.cash/" + path] = dests
        if path.endswith(".html"):
            all_dests["https://zips.z.cash/" + path[:-5]] = dests

    print("\n")
    print("Links:")

    last_url = None
    content = None
    content_type = None
    dests = None

    for (l, p) in sorted(all_links.items()):
        print(l, end=" ")
        sys.stdout.flush()
        what = "%s (occurs in %s)" % (l, " and ".join(p)) if len(paths) > 1 else l
        status = ""

        if ":" not in l:
            l = "https://zips.z.cash/" + l

        if l.startswith("mailto:"):
            status = "(not checked)"
        elif l.startswith("https:") or l.startswith("HTTP:"):  # use uppercase HTTP: for links with no https: equivalent
            (url, _, fragment) = l.partition("#")

            if url in all_dests:
                if fragment and fragment not in all_dests[url]:
                    errors.append("Missing link target: " + what)
                    status = "❌"
                else:
                    status = "✓"
            elif check:
                # If url == last_url, there is no need to refetch content. This is an optimization when
                # checking URLs with the same site but different fragments (which will be sorted together).
                if url != last_url:
                    headers = {"User-Agent": "Mozilla/5.0"}
                    https_handler = HTTPSHandler(context=ssl.create_default_context(cafile=certifi.where()))

                    # Some DOI links (i.e. to https://doi.org/) redirect to link.springer.com
                    # in a way that requires cookies (booo!). We allow this for DOI links,
                    # but for all other links we simulate a client that never sets cookies.
                    if l.startswith("https://doi.org/"):
                        opener = build_opener(HTTPCookieProcessor(), https_handler)
                    else:
                        opener = build_opener(https_handler)

                    for retry in range(2):
                        try:
                            response = opener.open(Request(url=l, headers=headers))
                            content_type = response.info().get_content_type()
                            content = response.read()
                            last_url = url
                        except URLError as e:
                            if retry == 0 and isinstance(e, HTTPError) and e.code == 429:
                                try:
                                    delay = int(e.headers['Retry-After'], 10) + 1
                                except Exception:
                                    delay = 60

                                print("(waiting %ds due to rate limiting)" % (delay,), end=" ")
                                sys.stdout.flush()
                                sleep(delay)
                                continue

                            errors.append("Could not open link: %s due to %r" % (what, e))
                            status = "❌"
                            content_type = None
                            content = None
                            last_url = None

                        dests = None
                        break

                if content is not None:
                    if fragment:
                        if dests is None:
                            if content_type in ('text/html', 'application/xhtml+xml'):
                                (_, dests) = get_links_and_destinations_from_html(BytesIO(content))
                            elif content_type == 'application/pdf':
                                (_, dests) = get_links_and_destinations_from_pdf(BytesIO(content))

                        if dests is None:
                            print("(link target not checked)", end=" ")
                            status = "✓"
                        elif fragment not in dests:
                            errors.append("Missing link target: " + what)
                            status = "❌"
                        else:
                            status = "✓"
                    else:
                        status = "✓"
        else:
            errors.append("Insecure or unrecognized protocol in link: " + what)
            status = "❌"

        print(status)

    if print_dests:
        for (path, dests) in all_dests.items():
            if path + ".html" not in all_dests:  # avoid duplication
                print("\nDestinations for %s:" % (path,))
                for d in dests:
                    print(d)

    if errors:
        print("\nErrors:")
        for e in errors:
            print(e)

    return 0


if __name__ == '__main__':
    sys.exit(main(sys.argv))
Add links_and_dests.py. This can be used to print outgoing links and targets in the PDF, and detect a subset of errors. It depends on the PyPDF2 library (pip3 install PyPDF2). Signed-off-by: Daira Hopwood <daira@jacaranda.org> 2021-04-05 13:39:51 -07:00			`#!/usr/bin/env python3`
Add support for link checking to protocol/links_and_dests.py and protocol/Makefile. Signed-off-by: Daira Hopwood <daira@jacaranda.org> 2021-04-08 04:04:48 -07:00			`# -- coding: utf-8 --`
Add links_and_dests.py. This can be used to print outgoing links and targets in the PDF, and detect a subset of errors. It depends on the PyPDF2 library (pip3 install PyPDF2). Signed-off-by: Daira Hopwood <daira@jacaranda.org> 2021-04-05 13:39:51 -07:00
Update links_and_dests.py to support HTML files and rate limiting (part 1). Signed-off-by: Daira Hopwood <daira@jacaranda.org> 2021-09-09 06:57:51 -07:00			`from urllib.request import build_opener, HTTPCookieProcessor, HTTPSHandler, Request`
			`from urllib.error import URLError, HTTPError`
			`from os.path import relpath`
			`from collections import deque`
			`import sys`
			`from time import sleep`
			`import ssl`
			`from io import BytesIO`

Add links_and_dests.py. This can be used to print outgoing links and targets in the PDF, and detect a subset of errors. It depends on the PyPDF2 library (pip3 install PyPDF2). Signed-off-by: Daira Hopwood <daira@jacaranda.org> 2021-04-05 13:39:51 -07:00			`try:`
Update links_and_dests.py to support HTML files and rate limiting (part 1). Signed-off-by: Daira Hopwood <daira@jacaranda.org> 2021-09-09 06:57:51 -07:00			`from bs4 import BeautifulSoup`
			`import html5lib`
			`import certifi`
Add links_and_dests.py. This can be used to print outgoing links and targets in the PDF, and detect a subset of errors. It depends on the PyPDF2 library (pip3 install PyPDF2). Signed-off-by: Daira Hopwood <daira@jacaranda.org> 2021-04-05 13:39:51 -07:00			`except ImportError:`
Update links_and_dests.py to support HTML files and rate limiting (part 1). Signed-off-by: Daira Hopwood <daira@jacaranda.org> 2021-09-09 06:57:51 -07:00			print("Please install the BeautifulSoup, html5lib, and certifi libraries using `pip install bs4 html5lib certifi`.\n")
Add links_and_dests.py. This can be used to print outgoing links and targets in the PDF, and detect a subset of errors. It depends on the PyPDF2 library (pip3 install PyPDF2). Signed-off-by: Daira Hopwood <daira@jacaranda.org> 2021-04-05 13:39:51 -07:00			`raise`

Update links_and_dests.py to support HTML files and rate limiting (part 1). Signed-off-by: Daira Hopwood <daira@jacaranda.org> 2021-09-09 06:57:51 -07:00			`if [int(v) for v in certifi.__version__.split('.')] < [2021, 5, 30]:`
			print("Please upgrade certifi using `pip install --upgrade certifi`.\n")
			`sys.exit(1)`

			`def get_links_and_destinations_from_pdf(f):`
			`try:`
			`from PyPDF2 import PdfFileReader`
			`except ImportError:`
			print("Please install the PyPDF2 library using `pip install PyPDF2`.\n")
			`raise`
Add links_and_dests.py. This can be used to print outgoing links and targets in the PDF, and detect a subset of errors. It depends on the PyPDF2 library (pip3 install PyPDF2). Signed-off-by: Daira Hopwood <daira@jacaranda.org> 2021-04-05 13:39:51 -07:00
			`# Based on <https://stackoverflow.com/a/5978161/393146>`
			`pdf = PdfFileReader(f)`

Add support for link checking to protocol/links_and_dests.py and protocol/Makefile. Signed-off-by: Daira Hopwood <daira@jacaranda.org> 2021-04-08 04:04:48 -07:00			`links = set()`
Add links_and_dests.py. This can be used to print outgoing links and targets in the PDF, and detect a subset of errors. It depends on the PyPDF2 library (pip3 install PyPDF2). Signed-off-by: Daira Hopwood <daira@jacaranda.org> 2021-04-05 13:39:51 -07:00			`for pg in range(pdf.getNumPages()):`
			`obj = pdf.getPage(pg).getObject()`

			`for annotation in obj.get('/Annots', []):`
			`uri = annotation.getObject().get('/A', {}).get('/URI', None)`
			`if uri is not None and uri not in links:`
Add support for link checking to protocol/links_and_dests.py and protocol/Makefile. Signed-off-by: Daira Hopwood <daira@jacaranda.org> 2021-04-08 04:04:48 -07:00			`links.add(uri)`
Add links_and_dests.py. This can be used to print outgoing links and targets in the PDF, and detect a subset of errors. It depends on the PyPDF2 library (pip3 install PyPDF2). Signed-off-by: Daira Hopwood <daira@jacaranda.org> 2021-04-05 13:39:51 -07:00
Update links_and_dests.py to support HTML files and rate limiting (part 1). Signed-off-by: Daira Hopwood <daira@jacaranda.org> 2021-09-09 06:57:51 -07:00			`dests = pdf.getNamedDestinations().keys()`

			`return (links, dests)`

Add links_and_dests.py. This can be used to print outgoing links and targets in the PDF, and detect a subset of errors. It depends on the PyPDF2 library (pip3 install PyPDF2). Signed-off-by: Daira Hopwood <daira@jacaranda.org> 2021-04-05 13:39:51 -07:00
Update links_and_dests.py to support HTML files and rate limiting (part 1). Signed-off-by: Daira Hopwood <daira@jacaranda.org> 2021-09-09 06:57:51 -07:00			`def get_links_and_destinations_from_html(f):`
			`links = set()`
			`internal = set()`
			`dests = set()`

			`soup = BeautifulSoup(f.read(), "html5lib")`
			`for link in soup.find_all('a'):`
			`if link.has_attr('href'):`
			`url = link['href']`
			`(internal if url.startswith('#') else links).add(url)`

			`if link.has_attr('name'):`
			`dests.add(link['name'])`

			`for link in soup.find_all(id=True):`
			`dests.add(link['id'])`
links_and_dests.py: fix false positive "Missing link target" errors for links into rendered BIPs. Signed-off-by: Daira Hopwood <daira@jacaranda.org> 2021-12-03 06:54:22 -08:00			`# GitHub's rendering of .mediawiki files puts 'id="user-content-<ANCHOR>"' in the source`
			`# and dynamically creates a corresponding link #<ANCHOR>.`
			`if link['id'].startswith("user-content-"):`
			`dests.add(link['id'][13:])`
Update links_and_dests.py to support HTML files and rate limiting (part 1). Signed-off-by: Daira Hopwood <daira@jacaranda.org> 2021-09-09 06:57:51 -07:00
			`internal.difference_update(['#' + d for d in dests]) # ignore internal links satisfied by a dest`
			`links.update(internal)`
Add support for link checking to protocol/links_and_dests.py and protocol/Makefile. Signed-off-by: Daira Hopwood <daira@jacaranda.org> 2021-04-08 04:04:48 -07:00			`return (links, dests)`
Add links_and_dests.py. This can be used to print outgoing links and targets in the PDF, and detect a subset of errors. It depends on the PyPDF2 library (pip3 install PyPDF2). Signed-off-by: Daira Hopwood <daira@jacaranda.org> 2021-04-05 13:39:51 -07:00

			`def main(args):`
			`if len(args) < 2:`
Update links_and_dests.py to support HTML files and rate limiting (part 1). Signed-off-by: Daira Hopwood <daira@jacaranda.org> 2021-09-09 06:57:51 -07:00			`print("Usage: ./links_and_dests.py [--check] [--print-dests] <file.pdf\|html\|xhtml>")`
Add links_and_dests.py. This can be used to print outgoing links and targets in the PDF, and detect a subset of errors. It depends on the PyPDF2 library (pip3 install PyPDF2). Signed-off-by: Daira Hopwood <daira@jacaranda.org> 2021-04-05 13:39:51 -07:00			`return 1`

Add support for link checking to protocol/links_and_dests.py and protocol/Makefile. Signed-off-by: Daira Hopwood <daira@jacaranda.org> 2021-04-08 04:04:48 -07:00			`check = '--check' in args[1:]`
			`print_dests = '--print-dests' in args[1:]`
			`paths = [arg for arg in args[1:] if not arg.startswith('--')]`

			`all_links = {} # url -> pdf_paths`
			`all_dests = {} # url -> dests`

Update links_and_dests.py to support HTML files and rate limiting (part 1). Signed-off-by: Daira Hopwood <daira@jacaranda.org> 2021-09-09 06:57:51 -07:00			`errors = deque()`

			`print("Reading files...")`
			`for path in paths:`
			`print(path, end=" ")`
			`sys.stdout.flush()`

			`with open(path, 'rb') as f:`
			`if path.endswith(".html") or path.endswith(".xhtml"):`
			`(links, dests) = get_links_and_destinations_from_html(f)`
			`elif path.endswith(".pdf"):`
			`(links, dests) = get_links_and_destinations_from_pdf(f)`
			`else:`
			`errors.append("Unrecognized file type: " + path)`
			`continue`
Add support for link checking to protocol/links_and_dests.py and protocol/Makefile. Signed-off-by: Daira Hopwood <daira@jacaranda.org> 2021-04-08 04:04:48 -07:00
Update links_and_dests.py to support HTML files and rate limiting (part 1). Signed-off-by: Daira Hopwood <daira@jacaranda.org> 2021-09-09 06:57:51 -07:00			`path = relpath(path)`
Add support for link checking to protocol/links_and_dests.py and protocol/Makefile. Signed-off-by: Daira Hopwood <daira@jacaranda.org> 2021-04-08 04:04:48 -07:00			`for l in links:`
			`refs = all_links.get(l, None)`
			`if refs is None:`
			`all_links[l] = refs = deque()`
Update links_and_dests.py to support HTML files and rate limiting (part 1). Signed-off-by: Daira Hopwood <daira@jacaranda.org> 2021-09-09 06:57:51 -07:00			`refs.append(path)`
Add support for link checking to protocol/links_and_dests.py and protocol/Makefile. Signed-off-by: Daira Hopwood <daira@jacaranda.org> 2021-04-08 04:04:48 -07:00
Update links_and_dests.py to support HTML files and rate limiting (part 1). Signed-off-by: Daira Hopwood <daira@jacaranda.org> 2021-09-09 06:57:51 -07:00			`all_dests["https://zips.z.cash/" + path] = dests`
			`if path.endswith(".html"):`
			`all_dests["https://zips.z.cash/" + path[:-5]] = dests`
Add links_and_dests.py. This can be used to print outgoing links and targets in the PDF, and detect a subset of errors. It depends on the PyPDF2 library (pip3 install PyPDF2). Signed-off-by: Daira Hopwood <daira@jacaranda.org> 2021-04-05 13:39:51 -07:00
Update links_and_dests.py to support HTML files and rate limiting (part 1). Signed-off-by: Daira Hopwood <daira@jacaranda.org> 2021-09-09 06:57:51 -07:00			`print("\n")`
Add links_and_dests.py. This can be used to print outgoing links and targets in the PDF, and detect a subset of errors. It depends on the PyPDF2 library (pip3 install PyPDF2). Signed-off-by: Daira Hopwood <daira@jacaranda.org> 2021-04-05 13:39:51 -07:00			`print("Links:")`
Update links_and_dests.py to support HTML files and rate limiting (part 1). Signed-off-by: Daira Hopwood <daira@jacaranda.org> 2021-09-09 06:57:51 -07:00
			`last_url = None`
			`content = None`
			`content_type = None`
			`dests = None`

Add support for link checking to protocol/links_and_dests.py and protocol/Makefile. Signed-off-by: Daira Hopwood <daira@jacaranda.org> 2021-04-08 04:04:48 -07:00			`for (l, p) in sorted(all_links.items()):`
			`print(l, end=" ")`
			`sys.stdout.flush()`
			`what = "%s (occurs in %s)" % (l, " and ".join(p)) if len(paths) > 1 else l`
			`status = ""`
Add links_and_dests.py. This can be used to print outgoing links and targets in the PDF, and detect a subset of errors. It depends on the PyPDF2 library (pip3 install PyPDF2). Signed-off-by: Daira Hopwood <daira@jacaranda.org> 2021-04-05 13:39:51 -07:00
Update links_and_dests.py to support HTML files and rate limiting (part 1). Signed-off-by: Daira Hopwood <daira@jacaranda.org> 2021-09-09 06:57:51 -07:00			`if ":" not in l:`
			`l = "https://zips.z.cash/" + l`

			`if l.startswith("mailto:"):`
			`status = "(not checked)"`
			`elif l.startswith("https:") or l.startswith("HTTP:"): # use uppercase HTTP: for links with no https: equivalent`
Add support for link checking to protocol/links_and_dests.py and protocol/Makefile. Signed-off-by: Daira Hopwood <daira@jacaranda.org> 2021-04-08 04:04:48 -07:00			`(url, _, fragment) = l.partition("#")`
Update links_and_dests.py to support HTML files and rate limiting (part 1). Signed-off-by: Daira Hopwood <daira@jacaranda.org> 2021-09-09 06:57:51 -07:00
Add support for link checking to protocol/links_and_dests.py and protocol/Makefile. Signed-off-by: Daira Hopwood <daira@jacaranda.org> 2021-04-08 04:04:48 -07:00			`if url in all_dests:`
			`if fragment and fragment not in all_dests[url]:`
			`errors.append("Missing link target: " + what)`
			`status = "❌"`
			`else:`
			`status = "✓"`
			`elif check:`
Update links_and_dests.py to support HTML files and rate limiting (part 1). Signed-off-by: Daira Hopwood <daira@jacaranda.org> 2021-09-09 06:57:51 -07:00			`# If url == last_url, there is no need to refetch content. This is an optimization when`
			`# checking URLs with the same site but different fragments (which will be sorted together).`
			`if url != last_url:`
Add support for link checking to protocol/links_and_dests.py and protocol/Makefile. Signed-off-by: Daira Hopwood <daira@jacaranda.org> 2021-04-08 04:04:48 -07:00			`headers = {"User-Agent": "Mozilla/5.0"}`
Update links_and_dests.py to support HTML files and rate limiting (part 1). Signed-off-by: Daira Hopwood <daira@jacaranda.org> 2021-09-09 06:57:51 -07:00			`https_handler = HTTPSHandler(context=ssl.create_default_context(cafile=certifi.where()))`

protocol/links_and_dests.py: Some DOI links (i.e. to https://doi.org/) redirect to link.springer.com in a way that requires cookies (booo!). We allow this for DOI links, but for all other links we simulate a client that never sets cookies. Signed-off-by: Daira Hopwood <daira@jacaranda.org> 2021-09-01 05:11:10 -07:00			`# Some DOI links (i.e. to https://doi.org/) redirect to link.springer.com`
			`# in a way that requires cookies (booo!). We allow this for DOI links,`
			`# but for all other links we simulate a client that never sets cookies.`
			`if l.startswith("https://doi.org/"):`
Update links_and_dests.py to support HTML files and rate limiting (part 1). Signed-off-by: Daira Hopwood <daira@jacaranda.org> 2021-09-09 06:57:51 -07:00			`opener = build_opener(HTTPCookieProcessor(), https_handler)`
protocol/links_and_dests.py: Some DOI links (i.e. to https://doi.org/) redirect to link.springer.com in a way that requires cookies (booo!). We allow this for DOI links, but for all other links we simulate a client that never sets cookies. Signed-off-by: Daira Hopwood <daira@jacaranda.org> 2021-09-01 05:11:10 -07:00			`else:`
Update links_and_dests.py to support HTML files and rate limiting (part 1). Signed-off-by: Daira Hopwood <daira@jacaranda.org> 2021-09-09 06:57:51 -07:00			`opener = build_opener(https_handler)`

			`for retry in range(2):`
			`try:`
			`response = opener.open(Request(url=l, headers=headers))`
			`content_type = response.info().get_content_type()`
			`content = response.read()`
			`last_url = url`
			`except URLError as e:`
			`if retry == 0 and isinstance(e, HTTPError) and e.code == 429:`
			`try:`
			`delay = int(e.headers['Retry-After'], 10) + 1`
			`except Exception:`
			`delay = 60`

			`print("(waiting %ds due to rate limiting)" % (delay,), end=" ")`
			`sys.stdout.flush()`
			`sleep(delay)`
			`continue`

			`errors.append("Could not open link: %s due to %r" % (what, e))`
			`status = "❌"`
			`content_type = None`
			`content = None`
			`last_url = None`

			`dests = None`
			`break`

			`if content is not None:`
			`if fragment:`
			`if dests is None:`
links_and_dests.py: fix false positive "Missing link target" errors for links into rendered BIPs. Signed-off-by: Daira Hopwood <daira@jacaranda.org> 2021-12-03 06:54:22 -08:00			`if content_type in ('text/html', 'application/xhtml+xml'):`
Update links_and_dests.py to support HTML files and rate limiting (part 1). Signed-off-by: Daira Hopwood <daira@jacaranda.org> 2021-09-09 06:57:51 -07:00			`(_, dests) = get_links_and_destinations_from_html(BytesIO(content))`
			`elif content_type == 'application/pdf':`
			`(_, dests) = get_links_and_destinations_from_pdf(BytesIO(content))`

			`if dests is None:`
			`print("(link target not checked)", end=" ")`
			`status = "✓"`
			`elif fragment not in dests:`
			`errors.append("Missing link target: " + what)`
			`status = "❌"`
			`else:`
			`status = "✓"`
			`else:`
			`status = "✓"`
			`else:`
			`errors.append("Insecure or unrecognized protocol in link: " + what)`
			`status = "❌"`
Add support for link checking to protocol/links_and_dests.py and protocol/Makefile. Signed-off-by: Daira Hopwood <daira@jacaranda.org> 2021-04-08 04:04:48 -07:00
			`print(status)`

			`if print_dests:`
Update links_and_dests.py to support HTML files and rate limiting (part 1). Signed-off-by: Daira Hopwood <daira@jacaranda.org> 2021-09-09 06:57:51 -07:00			`for (path, dests) in all_dests.items():`
			`if path + ".html" not in all_dests: # avoid duplication`
			`print("\nDestinations for %s:" % (path,))`
			`for d in dests:`
			`print(d)`
Add links_and_dests.py. This can be used to print outgoing links and targets in the PDF, and detect a subset of errors. It depends on the PyPDF2 library (pip3 install PyPDF2). Signed-off-by: Daira Hopwood <daira@jacaranda.org> 2021-04-05 13:39:51 -07:00
			`if errors:`
			`print("\nErrors:")`
			`for e in errors:`
			`print(e)`

			`return 0`

Update links_and_dests.py to support HTML files and rate limiting (part 1). Signed-off-by: Daira Hopwood <daira@jacaranda.org> 2021-09-09 06:57:51 -07:00
Add links_and_dests.py. This can be used to print outgoing links and targets in the PDF, and detect a subset of errors. It depends on the PyPDF2 library (pip3 install PyPDF2). Signed-off-by: Daira Hopwood <daira@jacaranda.org> 2021-04-05 13:39:51 -07:00			`if __name__ == '__main__':`
			`sys.exit(main(sys.argv))`