protocol/links_and_dests.py: Some DOI links (i.e. to https://doi.org/) redirect to link.springer.com

in a way that requires cookies (booo!). We allow this for DOI links, but for all other links we
simulate a client that never sets cookies.

Signed-off-by: Daira Hopwood <daira@jacaranda.org>
This commit is contained in:
Daira Hopwood 2021-09-01 13:11:10 +01:00
parent a918bbc6d7
commit a5c4f139c9
2 changed files with 12 additions and 3 deletions

View File

@ -7,7 +7,7 @@ except ImportError:
print("Please install the PyPDF2 library using `pip3 install PyPDF2`.\n")
raise
from urllib.request import urlopen, Request
from urllib.request import build_opener, HTTPCookieProcessor, Request
from urllib.error import URLError
from os.path import basename
from collections import deque
@ -78,8 +78,15 @@ def main(args):
elif check:
try:
headers = {"User-Agent": "Mozilla/5.0"}
res = urlopen(Request(url=l, headers=headers))
res.read()
# Some DOI links (i.e. to https://doi.org/) redirect to link.springer.com
# in a way that requires cookies (booo!). We allow this for DOI links,
# but for all other links we simulate a client that never sets cookies.
if l.startswith("https://doi.org/"):
opener = build_opener(HTTPCookieProcessor())
else:
opener = build_opener()
response = opener.open(Request(url=l, headers=headers))
response.read()
status = ""
except URLError as e:
errors.append("Could not open link: %s due to %r" % (what, e))

View File

@ -14511,6 +14511,8 @@ Peter Newell's illustration of the Jubjub bird, from \cite{Carroll1902}.
} %nufive
\item Fix a reference to nonexistent version 2019.0-beta-40 of this specification
(in \crossref{diffadjustment}) that should be \historyref{2019.0.0}.
\item Improve \texttt{protocol/links\_and\_dests.py} to eliminate false positives
when checking DOI links.
\end{itemize}