2021-02-15 18:21:10 -08:00
|
|
|
#!/usr/bin/env bash
|
2021-02-18 13:05:03 -08:00
|
|
|
##################################################################################################
|
|
|
|
# brokenlinks.sh #
|
|
|
|
# 02/18/2021 #
|
|
|
|
# Written By David Holdeman #
|
|
|
|
# Searches for broken links in a Github Wiki repo, and suggests and applies corrections. #
|
|
|
|
##################################################################################################
|
|
|
|
|
|
|
|
# These two functions are used to escape variables for use in a sed command
|
|
|
|
# Passed a single string
|
2021-02-15 18:21:10 -08:00
|
|
|
escape() {
|
|
|
|
sed 's/[^^]/[&]/g; s/\^/\\^/g' <<<"$1";
|
|
|
|
}
|
|
|
|
export -f escape
|
|
|
|
escapeReplace() {
|
|
|
|
sed 's/[&/\]/\\&/g' <<<"$1";
|
|
|
|
}
|
|
|
|
export -f escapeReplace
|
2021-02-18 13:05:03 -08:00
|
|
|
|
|
|
|
# Main processing function
|
|
|
|
# Passed the path to a .md file
|
2021-02-15 18:21:10 -08:00
|
|
|
searchfile() {
|
2021-02-18 13:05:03 -08:00
|
|
|
# This loops for every link in the file.
|
|
|
|
# See the end of the function for the grep that finds the links in the file.
|
|
|
|
# We use file descriptor 3, because if we used stdin, the read calls inside this loop would read from that instead of
|
|
|
|
# reading the user's input.
|
2021-02-15 19:40:53 -08:00
|
|
|
while IFS= read -r -u 3 link; do
|
2021-02-18 13:05:03 -08:00
|
|
|
# If it's an internet link, ignore it.
|
|
|
|
# That's beyond the scope of this tool.
|
2021-02-15 20:11:08 -08:00
|
|
|
if echo $link | grep -E '^[http|\/]' >/dev/null; then
|
2021-02-15 18:21:10 -08:00
|
|
|
continue
|
|
|
|
fi
|
2021-02-18 13:05:03 -08:00
|
|
|
# At some point in this scripts development, fixed links to files/images were given the './' prefix.
|
|
|
|
# This didn't really hurt anything, but it's not idiomatic.
|
|
|
|
# I added this to fix the problems I caused, and decided it was worth keeping around.
|
2021-02-16 05:40:55 -08:00
|
|
|
if echo $link | grep -E '^\.\/' >/dev/null; then
|
2021-02-18 13:05:03 -08:00
|
|
|
# NEWLINK is the corrected link
|
2021-02-16 05:40:55 -08:00
|
|
|
NEWLINK=$(echo $link | sed 's/^\.\///')
|
2021-02-18 13:05:03 -08:00
|
|
|
# Print the file and the old link
|
2021-02-16 05:40:55 -08:00
|
|
|
echo "In $1:"
|
|
|
|
echo $link
|
2021-02-18 13:05:03 -08:00
|
|
|
# Print the options as though they are a list in order to have the same UI as other types of correction
|
2021-02-16 05:40:55 -08:00
|
|
|
echo $NEWLINK | cat --number
|
2021-02-18 13:05:03 -08:00
|
|
|
# Read the user input
|
2021-02-16 05:40:55 -08:00
|
|
|
read PICK
|
|
|
|
if [ $PICK -eq 1 ]; then
|
2021-02-18 13:05:03 -08:00
|
|
|
# Replace the old link with the new one.
|
|
|
|
# Parentheses are placed around both the old link and new one in order to ensure we replace the link,
|
|
|
|
# and not some other place in the file that happens to use the same words.
|
2021-02-16 05:40:55 -08:00
|
|
|
REPLACE=$(escape '('$link')')
|
|
|
|
REPLACEWITH=$(escapeReplace "$NEWLINK")
|
|
|
|
sed -i "s/$REPLACE/\($REPLACEWITH\)/" "$1"
|
|
|
|
fi
|
2021-02-18 13:05:03 -08:00
|
|
|
# We don't continue here because the link we fixed might be broken.
|
2021-02-16 05:40:55 -08:00
|
|
|
fi
|
2021-02-18 13:05:03 -08:00
|
|
|
# Skip links that are to an .md file and aren't broken.
|
2021-02-15 18:52:31 -08:00
|
|
|
if [ $(find . -name "$link"".md" 2>/dev/null | wc -l) -gt 0 ]; then
|
2021-02-15 18:21:10 -08:00
|
|
|
continue
|
|
|
|
fi
|
2021-02-18 13:05:03 -08:00
|
|
|
# Skip links that are to a hash fragment.
|
2021-02-15 18:52:31 -08:00
|
|
|
if echo "$link" | grep "#" >/dev/null; then
|
|
|
|
continue
|
|
|
|
fi
|
2021-02-18 13:05:03 -08:00
|
|
|
# Process links that are not to an .md file.
|
|
|
|
# We need to do this separately because Github/gollum behave differently with different kinds of links.
|
|
|
|
# .md files need the name of the file, without the .md extension.
|
|
|
|
# Everything else needs the path relative to the repo root.
|
2021-02-15 18:52:31 -08:00
|
|
|
if echo "$link" | grep -Ev ".md$" >/dev/null; then
|
2021-02-18 13:05:03 -08:00
|
|
|
# Skip the link if it's not broken.
|
2021-02-15 18:52:31 -08:00
|
|
|
if ls "$link" 2>/dev/null >/dev/null; then
|
|
|
|
continue
|
2021-02-18 13:05:03 -08:00
|
|
|
fi
|
|
|
|
# Print the filename and the broken link.
|
|
|
|
echo "In $1:"
|
|
|
|
echo $link
|
|
|
|
# Build the search term we will look for.
|
|
|
|
# All hyphens and underscores are replaced with asterisks, so we
|
|
|
|
# can find files with mismatched hyphens or underscores.
|
|
|
|
SEARCH='*'$(basename "$link" | sed 's/[-_ ]/*/g')'*'
|
|
|
|
# Search for matching files.
|
|
|
|
FILES=$(find . -iname "$SEARCH")
|
|
|
|
# If there are no files, skip to next link.
|
|
|
|
if [ $(echo -n "$FILES" | wc -c) -lt 1 ]; then
|
|
|
|
echo "Could not find"
|
2021-02-15 18:52:31 -08:00
|
|
|
continue
|
|
|
|
fi
|
2021-02-18 13:05:03 -08:00
|
|
|
# List the potential files, with numbers.
|
|
|
|
echo "$FILES" | cat --number
|
|
|
|
# Read the user input
|
|
|
|
read PICK
|
|
|
|
# If the selection isn't a number, skip to the next link.
|
|
|
|
if ! [[ $PICK =~ ^[0-9]+$ ]]; then
|
|
|
|
continue
|
|
|
|
fi
|
|
|
|
# Get the selected file path, without the preceding ./
|
|
|
|
FILE=$(echo "$FILES" | head -n $PICK | tail -n 1 | sed 's/^\.\///')
|
|
|
|
# Replace the old link with the new one.
|
|
|
|
# Parentheses are placed around both the old link and new one in order to ensure we replace the link,
|
|
|
|
# and not some other place in the file that happens to use the same words.
|
|
|
|
REPLACE=$(escape '('$link')')
|
2021-07-17 07:05:34 -07:00
|
|
|
REPLACEWITH=$(escapeReplace "$(basename "$FILE" .md)")
|
2021-02-18 13:05:03 -08:00
|
|
|
sed -i "s/$REPLACE/\($REPLACEWITH\)/" "$1"
|
|
|
|
continue
|
2021-02-15 18:52:31 -08:00
|
|
|
fi
|
2021-02-15 20:11:57 -08:00
|
|
|
echo "In $1:"
|
2021-02-15 18:21:10 -08:00
|
|
|
echo $link
|
2021-02-18 13:05:03 -08:00
|
|
|
# Build the search term we will look for.
|
|
|
|
# All hyphens and underscores are replaced with asterisks, so we
|
|
|
|
# can find files with mismatched hyphens or underscores.
|
2021-02-16 05:40:55 -08:00
|
|
|
SEARCH='*'$(basename "$link" | sed 's/[-_ ]/*/g')'*'
|
2021-02-18 13:05:03 -08:00
|
|
|
# Search for matching files.
|
2021-02-16 05:40:55 -08:00
|
|
|
FILES=$(find . -iname "$SEARCH")
|
2021-02-18 13:05:03 -08:00
|
|
|
# If there are no files, skip to next link.
|
2021-02-15 19:47:06 -08:00
|
|
|
if [ $(echo -n "$FILES" | wc -c) -lt 1 ]; then
|
|
|
|
echo "Could not find"
|
|
|
|
continue
|
|
|
|
fi
|
2021-02-18 13:05:03 -08:00
|
|
|
# List the potential files, with numbers.
|
2021-02-15 18:21:10 -08:00
|
|
|
echo "$FILES" | cat --number
|
2021-02-18 13:05:03 -08:00
|
|
|
# Read the user input
|
2021-02-15 18:21:10 -08:00
|
|
|
read PICK
|
2021-02-18 13:05:03 -08:00
|
|
|
# If the selection isn't a number, skip to the next link.
|
2021-02-17 13:26:36 -08:00
|
|
|
if ! [[ $PICK =~ ^[0-9]+$ ]]; then
|
2021-02-16 13:05:38 -08:00
|
|
|
continue
|
|
|
|
fi
|
2021-02-18 13:05:03 -08:00
|
|
|
# Get the selected file path, without the preceding ./
|
2021-02-15 18:52:31 -08:00
|
|
|
FILE=$(basename "$(echo "$FILES" | head -n $PICK | tail -n 1)" .md)
|
2021-02-18 13:05:03 -08:00
|
|
|
# Replace the old link with the new one.
|
|
|
|
# Parentheses are placed around both the old link and new one in order to ensure we replace the link,
|
|
|
|
# and not some other place in the file that happens to use the same words.
|
2021-02-15 18:52:31 -08:00
|
|
|
REPLACE=$(escape '('$link')')
|
|
|
|
REPLACEWITH=$(escapeReplace "$FILE")
|
|
|
|
sed -i "s/$REPLACE/\($REPLACEWITH\)/" "$1"
|
2021-02-18 13:05:03 -08:00
|
|
|
# This regex finds links in the file that is passed to searchfile
|
|
|
|
# Results are fed to file descriptor 3 for the reasons previously explained.
|
2021-07-17 07:05:34 -07:00
|
|
|
done 3< <(grep -oP '(?<=\]\().*?(?=[\)])' "$1" | sed -e "s/^<//g" -e "s/>$//g" | cut -d '"' -f1 | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')
|
2021-02-15 18:21:10 -08:00
|
|
|
}
|
|
|
|
export -f searchfile
|
2021-02-18 13:05:03 -08:00
|
|
|
|
|
|
|
# run searchfile on every .md file in the repo
|
2021-02-15 18:21:10 -08:00
|
|
|
find . -iname "*.md" -exec bash -c 'searchfile "$0"' {} \;
|