rusefi_documentation/wiki-tools/brokenlinks.sh

197 lines
6.5 KiB
Bash
Raw Normal View History

2021-02-15 18:21:10 -08:00
#!/usr/bin/env bash
2021-02-18 13:05:03 -08:00
##################################################################################################
# brokenlinks.sh #
# 02/18/2021 #
# Written By David Holdeman #
# Searches for broken links in a Github Wiki repo, and suggests and applies corrections. #
2023-01-19 05:18:44 -08:00
# Usage: brokenlinks.sh [-s non-interactive] [-d debug] <optional file(s)> ... #
2021-02-18 13:05:03 -08:00
##################################################################################################
# These two functions are used to escape variables for use in a sed command
# Passed a single string
2021-02-15 18:21:10 -08:00
escape() {
sed 's/[^^]/[&]/g; s/\^/\\^/g' <<<"$1";
}
export -f escape
escapeReplace() {
sed 's/[&/\]/\\&/g' <<<"$1";
}
export -f escapeReplace
2021-02-18 13:05:03 -08:00
2023-01-18 13:57:11 -08:00
# return status:
# 0: file is good .md
2023-01-19 16:09:57 -08:00
# 1: file is bad
# 2: file is not .md
2023-01-18 13:57:11 -08:00
checkurl() {
2023-01-19 16:09:57 -08:00
LINK="$2"
HASH="$3"
2023-01-18 13:57:11 -08:00
# If it's an internet link, ignore it.
# That's beyond the scope of this tool.
2023-01-19 17:11:30 -08:00
if echo "$LINK" | grep -E '^http' >/dev/null; then
2023-01-19 16:09:57 -08:00
return 2
2023-01-18 13:57:11 -08:00
fi
# At some point in this scripts development, fixed links to files/images were given the './' prefix.
# This didn't really hurt anything, but it's not idiomatic.
# I added this to fix the problems I caused, and decided it was worth keeping around.
2023-01-19 17:11:30 -08:00
if echo "$LINK" | grep -E '^[.]?/' >/dev/null; then
OLDLINK="$LINK"
# Correct the link
LINK=$(echo "$LINK" | sed 's/^.\{0,1\}\///')
2023-01-18 19:47:12 -08:00
(
flock -x 200
2023-01-18 13:57:11 -08:00
# Print the file and the old link
2023-01-18 16:09:29 -08:00
echo "In $1:" >&2
2023-01-19 17:11:30 -08:00
echo "$OLDLINK" >&2
2023-01-18 13:57:11 -08:00
# Print the options as though they are a list in order to have the same UI as other types of correction
2023-01-19 17:11:30 -08:00
echo "$LINK" | cat --number >&2
2023-01-18 13:57:11 -08:00
if [ "$SCRIPT" -lt 1 ]; then
2023-01-19 17:28:28 -08:00
echo "Type a number, then hit return to select an alternative, or just hit return to skip fixing:" >&2
2023-01-18 13:57:11 -08:00
# Read the user input
read -r PICK
if [ "$PICK" -eq 1 ]; then
# Replace the old link with the new one.
# Parentheses are placed around both the old link and new one in order to ensure we replace the link,
# and not some other place in the file that happens to use the same words.
2023-01-19 17:11:30 -08:00
REPLACE=$(escape '('"$OLDLINK""$HASH"')')
REPLACEWITH=$(escapeReplace "$LINK""$HASH")
sed -i "s/$REPLACE/\($REPLACEWITH\)/" "$1"
2021-02-18 13:05:03 -08:00
fi
2023-01-18 13:57:11 -08:00
# We don't continue here because the link we fixed might be broken.
fi
2023-01-18 19:47:12 -08:00
) 200>brokenlinks.lock
2023-01-18 13:57:11 -08:00
fi
# Skip links that are to an .md file and aren't broken.
2023-01-19 16:09:57 -08:00
if [ "$(echo "$LIST" | grep "$LINK"".md" 2>/dev/null | wc -l)" -gt 0 ]; then
2023-01-18 13:57:11 -08:00
# print the URL for use in checkhash
2023-01-19 16:09:57 -08:00
echo "$LINK"
2023-01-18 13:57:11 -08:00
return 0
fi
2023-01-19 17:11:30 -08:00
# Skip non-md links if they're not broken.
if echo "$LINK" | grep -v '.md$' >/dev/null && ls "$LINK" 2>/dev/null >/dev/null; then
2023-01-19 16:09:57 -08:00
return 2
2023-01-18 13:57:11 -08:00
fi
# Build the search term we will look for.
# All hyphens and underscores are replaced with asterisks, so we
# can find files with mismatched hyphens or underscores.
2023-01-19 16:22:16 -08:00
SEARCH='*'$(basename "$LINK" | sed 's/[-_ ]/*/g')'*'
2023-01-18 13:57:11 -08:00
# Search for matching files.
2023-01-19 16:22:16 -08:00
FILES=$(find . -iname "$SEARCH")
2023-01-18 19:47:12 -08:00
(
flock -x 200
2023-01-19 16:09:57 -08:00
# Print the filename and the broken link.
2023-01-18 19:47:12 -08:00
echo "In $1:" >&2
2023-01-19 16:09:57 -08:00
echo "$LINK" >&2
2023-01-18 13:57:11 -08:00
# If there are no files, skip to next link.
if [ "$(echo -n "$FILES" | wc -c)" -lt 1 ]; then
echo "Could not find" >&2
return 1
fi
# List the potential files, with numbers.
2023-01-18 14:05:50 -08:00
echo "$FILES" | cat --number >&2
2023-01-18 13:57:11 -08:00
if [ "$SCRIPT" -lt 1 ]; then
2023-01-19 17:28:28 -08:00
echo "Type a number, then hit return to select an alternative, or just hit return to skip fixing:" >&2
2023-01-18 13:57:11 -08:00
# Read the user input
read -r PICK
# If the selection isn't a number, skip to the next link.
if ! [[ $PICK =~ ^[0-9]+$ ]]; then
return 1
fi
# Get the selected file path, without the preceding ./
2023-01-19 16:09:57 -08:00
FILE=$(echo "$FILES" | head -n "$PICK" | tail -n 1 | sed 's/^\.\///')
2023-01-19 17:32:07 -08:00
MD=0
2023-01-19 16:09:57 -08:00
if echo "$FILE" | grep ".md$" >/dev/null; then
2023-01-19 17:32:07 -08:00
MD=1
2023-01-19 16:09:57 -08:00
FILE=$(basename "$FILE" .md)
fi
2023-01-18 13:57:11 -08:00
# Replace the old link with the new one.
# Parentheses are placed around both the old link and new one in order to ensure we replace the link,
# and not some other place in the file that happens to use the same words.
2023-01-19 16:09:57 -08:00
REPLACE=$(escape '('"$LINK""$HASH"')')
2023-01-19 16:22:16 -08:00
REPLACEWITH=$(escapeReplace "$FILE""$HASH")
2023-01-19 16:09:57 -08:00
sed -i "s/$REPLACE/\($REPLACEWITH\)/" "$1"
# print the URL for use in checkhash
echo "$LINK"
2023-01-19 17:32:07 -08:00
if [ "$MD" -eq 1 ]; then
return 0
else
return 2
fi
2023-01-18 13:57:11 -08:00
fi
return 1
2023-01-18 19:47:12 -08:00
) 200>brokenlinks.lock
return $?
2023-01-18 13:57:11 -08:00
}
2023-01-18 13:59:40 -08:00
export -f checkurl
2023-01-18 13:57:11 -08:00
checkhash() {
# TODO check hash fragment validity
return 0
}
2023-01-18 13:59:40 -08:00
export -f checkhash
2023-01-18 13:57:11 -08:00
# Main processing function
# Passed the path to a .md file
searchfile() {
2023-01-19 16:09:57 -08:00
STATUS=0
2023-01-18 13:57:11 -08:00
# This loops for every link in the file.
# See the end of the function for the grep that finds the links in the file.
# We use file descriptor 3, because if we used stdin, the read calls inside this loop would read from that instead of
# reading the user's input.
while IFS= read -r -u 3 LINK; do
2023-01-19 16:09:57 -08:00
if echo "$LINK" | grep '#' >/dev/null; then
URL=$(echo "$LINK" | cut -d '#' -f 1)
HASH="#"$(echo "$LINK" | cut -d '#' -f 2)
else
URL="$LINK"
HASH=""
fi
2023-01-18 13:57:11 -08:00
URLSTATUS=0
if [ -n "$URL" ]; then
2023-01-18 14:12:04 -08:00
URL=$(checkurl "$1" "$URL" "$HASH")
2023-01-18 13:57:11 -08:00
URLSTATUS=$?
2023-01-19 16:09:57 -08:00
if [ "$URLSTATUS" -eq 1 ]; then
STATUS=1
fi
2023-01-18 13:57:11 -08:00
fi
if [ -n "$HASH" ] && [ "$URLSTATUS" -eq 0 ]; then
2023-01-18 14:12:04 -08:00
checkhash "$1" "$HASH" "$URL"
2023-01-19 16:09:57 -08:00
if [ "$?" -gt 0 ]; then
STATUS=1
fi
2023-01-18 13:57:11 -08:00
fi
2023-01-19 05:18:44 -08:00
if [ "$DEBUG" -eq 1 ]; then
echo "$(date +%T.%N) $1 $URL $HASH"
fi
2021-02-18 13:05:03 -08:00
# This regex finds links in the file that is passed to searchfile
# Results are fed to file descriptor 3 for the reasons previously explained.
done 3< <(grep -oP '(?<=\]\().*?(?=[\)])' "$1" | sed -e "s/^<//g" -e "s/>$//g" | cut -d '"' -f1 | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')
2023-01-19 16:09:57 -08:00
return $STATUS
2021-02-15 18:21:10 -08:00
}
export -f searchfile
2021-02-18 13:05:03 -08:00
FILES=()
export SCRIPT=0
2023-01-19 05:18:44 -08:00
export DEBUG=0
for i in $@; do
if [ "$i" == "-s" ]; then
export SCRIPT=1
2023-01-19 05:18:44 -08:00
elif [ "$i" == "-d" ]; then
export DEBUG=1
else
FILES+=("${i}")
fi
done
2023-01-19 17:25:31 -08:00
export LIST=$(find . -iname "*.md" ! -name '_*')
2023-01-18 20:04:41 -08:00
if [ "${#FILES[@]}" -gt 0 ]; then
for f in "${FILES[@]}"; do
searchfile "$f"
2023-01-19 16:09:57 -08:00
exit $?
done
else
# run searchfile on every .md file in the repo
2023-01-18 20:04:41 -08:00
xargs -0 -P $(nproc --all) -a <(echo "$LIST" | tr '\n' '\0') -I {} bash -c 'searchfile "$@"' _ {}
fi