diff --git a/protocol/links_and_dests.py b/protocol/links_and_dests.py
index a6242929..fa113a74 100755
--- a/protocol/links_and_dests.py
+++ b/protocol/links_and_dests.py
@@ -1,19 +1,34 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 
-try:
-    from PyPDF2 import PdfFileReader
-except ImportError:
-    print("Please install the PyPDF2 library using `pip3 install PyPDF2`.\n")
-    raise
-
-from urllib.request import build_opener, HTTPCookieProcessor, Request
-from urllib.error import URLError
-from os.path import basename
+from urllib.request import build_opener, HTTPCookieProcessor, HTTPSHandler, Request
+from urllib.error import URLError, HTTPError
+from os.path import relpath
 from collections import deque
 import sys
+from time import sleep
+import ssl
+from io import BytesIO
+
+try:
+    from bs4 import BeautifulSoup
+    import html5lib
+    import certifi
+except ImportError:
+    print("Please install the BeautifulSoup, html5lib, and certifi libraries using `pip install bs4 html5lib certifi`.\n")
+    raise
+
+if [int(v) for v in certifi.__version__.split('.')] < [2021, 5, 30]:
+    print("Please upgrade certifi using `pip install --upgrade certifi`.\n")
+    sys.exit(1)
+
+def get_links_and_destinations_from_pdf(f):
+    try:
+        from PyPDF2 import PdfFileReader
+    except ImportError:
+        print("Please install the PyPDF2 library using `pip install PyPDF2`.\n")
+        raise
 
-def get_links_and_destinations(f):
     # Based on <https://stackoverflow.com/a/5978161/393146>
     pdf = PdfFileReader(f)
 
@@ -26,14 +41,36 @@ def get_links_and_destinations(f):
             if uri is not None and uri not in links:
                 links.add(uri)
 
-    dests = pdf.getNamedDestinations()
+    dests = pdf.getNamedDestinations().keys()
 
     return (links, dests)
 
 
+def get_links_and_destinations_from_html(f):
+    links = set()
+    internal = set()
+    dests = set()
+
+    soup = BeautifulSoup(f.read(), "html5lib")
+    for link in soup.find_all('a'):
+        if link.has_attr('href'):
+           url = link['href']
+           (internal if url.startswith('#') else links).add(url)
+
+        if link.has_attr('name'):
+           dests.add(link['name'])
+
+    for link in soup.find_all(id=True):
+        dests.add(link['id'])
+
+    internal.difference_update(['#' + d for d in dests])  # ignore internal links satisfied by a dest
+    links.update(internal)
+    return (links, dests)
+
+
 def main(args):
     if len(args) < 2:
-        print("Usage: ./links_and_dests.py [--check] [--print-dests] <file.pdf>")
+        print("Usage: ./links_and_dests.py [--check] [--print-dests] <file.pdf|html|xhtml>")
         return 1
 
     check = '--check' in args[1:]
@@ -43,32 +80,55 @@ def main(args):
     all_links = {}  # url -> pdf_paths
     all_dests = {}  # url -> dests
 
-    for pdf_path in paths:
-        with open(pdf_path, 'rb') as f:
-            (links, dests) = get_links_and_destinations(f)
+    errors = deque()
 
+    print("Reading files...")
+    for path in paths:
+        print(path, end=" ")
+        sys.stdout.flush()
+
+        with open(path, 'rb') as f:
+            if path.endswith(".html") or path.endswith(".xhtml"):
+                (links, dests) = get_links_and_destinations_from_html(f)
+            elif path.endswith(".pdf"):
+                (links, dests) = get_links_and_destinations_from_pdf(f)
+            else:
+                errors.append("Unrecognized file type: " + path)
+                continue
+
+        path = relpath(path)
         for l in links:
             refs = all_links.get(l, None)
             if refs is None:
                 all_links[l] = refs = deque()
-            refs.append(pdf_path)
+            refs.append(path)
 
-        all_dests["https://zips.z.cash/protocol/" + basename(pdf_path)] = dests
-
-    errors = deque()
+        all_dests["https://zips.z.cash/" + path] = dests
+        if path.endswith(".html"):
+            all_dests["https://zips.z.cash/" + path[:-5]] = dests
 
+    print("\n")
     print("Links:")
+
+    last_url = None
+    content = None
+    content_type = None
+    dests = None
+
     for (l, p) in sorted(all_links.items()):
         print(l, end=" ")
         sys.stdout.flush()
         what = "%s (occurs in %s)" % (l, " and ".join(p)) if len(paths) > 1 else l
         status = ""
 
-        if not l.startswith("https:"):
-            errors.append("Insecure or unrecognized protocol in link: " + what)
-            status = "❌"
-        else:
+        if ":" not in l:
+            l = "https://zips.z.cash/" + l
+
+        if l.startswith("mailto:"):
+            status = "(not checked)"
+        elif l.startswith("https:") or l.startswith("HTTP:"):  # use uppercase HTTP: for links with no https: equivalent
             (url, _, fragment) = l.partition("#")
+
             if url in all_dests:
                 if fragment and fragment not in all_dests[url]:
                     errors.append("Missing link target: " + what)
@@ -76,29 +136,77 @@ def main(args):
                 else:
                     status = "✓"
             elif check:
-                try:
+                # If url == last_url, there is no need to refetch content. This is an optimization when
+                # checking URLs with the same site but different fragments (which will be sorted together).
+                if url != last_url:
                     headers = {"User-Agent": "Mozilla/5.0"}
+                    https_handler = HTTPSHandler(context=ssl.create_default_context(cafile=certifi.where()))
+
                     # Some DOI links (i.e. to https://doi.org/) redirect to link.springer.com
                     # in a way that requires cookies (booo!). We allow this for DOI links,
                     # but for all other links we simulate a client that never sets cookies.
                     if l.startswith("https://doi.org/"):
-                        opener = build_opener(HTTPCookieProcessor())
+                        opener = build_opener(HTTPCookieProcessor(), https_handler)
                     else:
-                        opener = build_opener()
-                    response = opener.open(Request(url=l, headers=headers))
-                    response.read()
-                    status = "✓"
-                except URLError as e:
-                    errors.append("Could not open link: %s due to %r" % (what, e))
-                    status = "❌"
+                        opener = build_opener(https_handler)
+
+                    for retry in range(2):
+                        try:
+                            response = opener.open(Request(url=l, headers=headers))
+                            content_type = response.info().get_content_type()
+                            content = response.read()
+                            last_url = url
+                        except URLError as e:
+                            if retry == 0 and isinstance(e, HTTPError) and e.code == 429:
+                                try:
+                                    delay = int(e.headers['Retry-After'], 10) + 1
+                                except Exception:
+                                    delay = 60
+
+                                print("(waiting %ds due to rate limiting)" % (delay,), end=" ")
+                                sys.stdout.flush()
+                                sleep(delay)
+                                continue
+
+                            errors.append("Could not open link: %s due to %r" % (what, e))
+                            status = "❌"
+                            content_type = None
+                            content = None
+                            last_url = None
+
+                        dests = None
+                        break
+
+                if content is not None:
+                    if fragment:
+                        if dests is None:
+                            if content_type == 'text/html':
+                                (_, dests) = get_links_and_destinations_from_html(BytesIO(content))
+                            elif content_type == 'application/pdf':
+                                (_, dests) = get_links_and_destinations_from_pdf(BytesIO(content))
+
+                        if dests is None:
+                            print("(link target not checked)", end=" ")
+                            status = "✓"
+                        elif fragment not in dests:
+                            errors.append("Missing link target: " + what)
+                            status = "❌"
+                        else:
+                            status = "✓"
+                    else:
+                        status = "✓"
+        else:
+            errors.append("Insecure or unrecognized protocol in link: " + what)
+            status = "❌"
 
         print(status)
 
     if print_dests:
-        for dests in all_dests:
-            print("\nDestinations for %s:" % (dests,))
-            for d in dests:
-                print(d)
+        for (path, dests) in all_dests.items():
+            if path + ".html" not in all_dests:  # avoid duplication
+                print("\nDestinations for %s:" % (path,))
+                for d in dests:
+                    print(d)
 
     if errors:
         print("\nErrors:")
@@ -107,5 +215,6 @@ def main(args):
 
     return 0
 
+
 if __name__ == '__main__':
     sys.exit(main(sys.argv))