Add option to test external links

This commit is contained in:
Luca Prete 2022-03-28 18:53:32 +02:00
parent ba30ceb5c1
commit 603732f7e8
2 changed files with 22 additions and 6 deletions

View File

@ -21,29 +21,42 @@ destinations. Its main use is in CI pipelines triggered by pull requests.
import collections
import pathlib
import requests
import urllib.parse
import click
import marko
BAD_STATUS_CODES = [401, 401, 403, 404, 500]
BASEDIR = pathlib.Path(__file__).resolve().parents[1]
DOC = collections.namedtuple('DOC', 'path relpath links')
LINK = collections.namedtuple('LINK', 'dest valid')
def check_link(link, readme_path):
def check_link(link, readme_path, external):
'Checks if a link element has a valid destination.'
link_valid = None
url = urllib.parse.urlparse(link.dest)
# If the link is public, say the link is anyway valid
# if --external is not set; check the link otherwise
print(link.dest)
if url.scheme:
# TODO: worth checking if the call returns 404, 403, 500
link_valid = True
if external:
user_agent = {'User-Agent': 'Chrome/51.0.2704.64'}
try:
status_code = requests.get(link.dest, headers=user_agent).status_code
if status_code in BAD_STATUS_CODES:
link_valid = False
except requests.exceptions.ConnectionError:
link_valid = False
# The link is private
else:
link_valid = (readme_path.parent / url.path).exists()
return LINK(link.dest, link_valid)
def check_docs(dir_name):
def check_docs(dir_name, external):
'Traverses dir_name and checks for all Markdown files.'
dir_path = BASEDIR / dir_name
parser = marko.parser.Parser()
@ -57,7 +70,7 @@ def check_docs(dir_name):
while elements:
el = elements.popleft()
if isinstance(el, marko.inline.Link):
links.append(check_link(el, readme_path))
links.append(check_link(el, readme_path, external))
elif hasattr(el, 'children'):
elements.extend(el.children)
@ -66,12 +79,14 @@ def check_docs(dir_name):
@click.command()
@click.argument('dirs', type=str, nargs=-1)
def main(dirs):
@click.option('-e', '--external', is_flag=True, default=False,
help='Whether to test external links.')
def main(dirs, external):
'Checks links in Markdown files contained in dirs.'
errors = 0
for dir_name in dirs:
print(f'----- {dir_name} -----')
for doc in check_docs(dir_name):
for doc in check_docs(dir_name, external):
state = '' if all(l.valid for l in doc.links) else ''
print(f'[{state}] {doc.relpath} ({len(doc.links)})')
if state == '':

View File

@ -1,4 +1,5 @@
click
marko
requests
yamale
yapf