From a5c4f139c9eb1d2e4fb409de25ee82996e59194a Mon Sep 17 00:00:00 2001 From: Daira Hopwood Date: Wed, 1 Sep 2021 13:11:10 +0100 Subject: [PATCH] protocol/links_and_dests.py: Some DOI links (i.e. to https://doi.org/) redirect to link.springer.com in a way that requires cookies (booo!). We allow this for DOI links, but for all other links we simulate a client that never sets cookies. Signed-off-by: Daira Hopwood --- protocol/links_and_dests.py | 13 ++++++++++--- protocol/protocol.tex | 2 ++ 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/protocol/links_and_dests.py b/protocol/links_and_dests.py index 1267c753..a6242929 100755 --- a/protocol/links_and_dests.py +++ b/protocol/links_and_dests.py @@ -7,7 +7,7 @@ except ImportError: print("Please install the PyPDF2 library using `pip3 install PyPDF2`.\n") raise -from urllib.request import urlopen, Request +from urllib.request import build_opener, HTTPCookieProcessor, Request from urllib.error import URLError from os.path import basename from collections import deque @@ -78,8 +78,15 @@ def main(args): elif check: try: headers = {"User-Agent": "Mozilla/5.0"} - res = urlopen(Request(url=l, headers=headers)) - res.read() + # Some DOI links (i.e. to https://doi.org/) redirect to link.springer.com + # in a way that requires cookies (booo!). We allow this for DOI links, + # but for all other links we simulate a client that never sets cookies. + if l.startswith("https://doi.org/"): + opener = build_opener(HTTPCookieProcessor()) + else: + opener = build_opener() + response = opener.open(Request(url=l, headers=headers)) + response.read() status = "✓" except URLError as e: errors.append("Could not open link: %s due to %r" % (what, e)) diff --git a/protocol/protocol.tex b/protocol/protocol.tex index 8b19daf3..34549355 100644 --- a/protocol/protocol.tex +++ b/protocol/protocol.tex @@ -14511,6 +14511,8 @@ Peter Newell's illustration of the Jubjub bird, from \cite{Carroll1902}. } %nufive \item Fix a reference to nonexistent version 2019.0-beta-40 of this specification (in \crossref{diffadjustment}) that should be \historyref{2019.0.0}. + \item Improve \texttt{protocol/links\_and\_dests.py} to eliminate false positives + when checking DOI links. \end{itemize}