Improve the linkchecker script
The linkchecker script is not working the same way as the `scripts/lsync.sh`. - The path must start with '/docs'. This is not implied in any way. - The language can be deduced if user provides a full path to a markdown file, e.g. `content/en/docs/concepts/security/controlling-access.md`. - The path parameter could use a positional argument for ease of use. This PR improves the user experience for the tool.pull/33134/head
parent
285affba7e
commit
95257a2edd
|
@ -1,13 +1,12 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
#
|
#
|
||||||
# This a link checker for Kubernetes documentation website.
|
# This a link checker for Kubernetes documentation website.
|
||||||
# - We cover the following cases for the language you provide via `-l`, which
|
#
|
||||||
# defaults to 'en'.
|
# If the language to check is not English (`en`), we check if you are actually
|
||||||
# - If the language specified is not English (`en`), we check if you are
|
# using the localized links. For example, if you checking
|
||||||
# actually using the localized links. For example, if you specify `zh` as
|
# `content/zh/docs/foo/bar`, we check if the English version exists AND if the
|
||||||
# the language, and for link target `/docs/foo/bar`, we check if the English
|
# Chinese version exists as well. A checking record is produced if the link
|
||||||
# version exists AND if the Chinese version exists as well. A checking record
|
# can use the localized version.
|
||||||
# is produced if the link can use the localized version.
|
|
||||||
#
|
#
|
||||||
# Usage: linkchecker.py -h
|
# Usage: linkchecker.py -h
|
||||||
#
|
#
|
||||||
|
@ -64,12 +63,16 @@ BAD_LINK_TYPES = {
|
||||||
C_RED = "\033[31m"
|
C_RED = "\033[31m"
|
||||||
C_GREEN = "\033[32m"
|
C_GREEN = "\033[32m"
|
||||||
C_YELLOW = "\033[33m"
|
C_YELLOW = "\033[33m"
|
||||||
C_GRAY = "\033[90m"
|
C_GRAY = "\033[90m"
|
||||||
C_CYAN = "\033[36m"
|
C_CYAN = "\033[36m"
|
||||||
C_END = "\033[0m"
|
C_END = "\033[0m"
|
||||||
|
|
||||||
# Command line arguments shared across functions
|
# Command line arguments shared across functions
|
||||||
ARGS = None
|
ARGS = None
|
||||||
|
# Command line parser
|
||||||
|
PARSER = None
|
||||||
|
# Language as parsed from the file path
|
||||||
|
LANG = None
|
||||||
# Global result dictionary keyed by page examined
|
# Global result dictionary keyed by page examined
|
||||||
RESULT = {}
|
RESULT = {}
|
||||||
# Cached redirect entries
|
# Cached redirect entries
|
||||||
|
@ -77,6 +80,7 @@ REDIRECTS = {}
|
||||||
# Cached anchors in target pages
|
# Cached anchors in target pages
|
||||||
ANCHORS = {}
|
ANCHORS = {}
|
||||||
|
|
||||||
|
|
||||||
def new_record(level, message, target):
|
def new_record(level, message, target):
|
||||||
"""Create new checking record.
|
"""Create new checking record.
|
||||||
|
|
||||||
|
@ -89,7 +93,7 @@ def new_record(level, message, target):
|
||||||
global ARGS
|
global ARGS
|
||||||
|
|
||||||
# Skip info when verbose
|
# Skip info when verbose
|
||||||
if ARGS.verbose == False and level == "INFO":
|
if ARGS.verbose is False and level == "INFO":
|
||||||
return None
|
return None
|
||||||
|
|
||||||
result = None
|
result = None
|
||||||
|
@ -98,9 +102,9 @@ def new_record(level, message, target):
|
||||||
else:
|
else:
|
||||||
target = C_GRAY + target + C_END
|
target = C_GRAY + target + C_END
|
||||||
if level == "INFO":
|
if level == "INFO":
|
||||||
result = target + ": " + C_GREEN + message + C_END
|
result = target + ": " + C_GREEN + message + C_END
|
||||||
elif level == "WARNING":
|
elif level == "WARNING":
|
||||||
result = target + ": " + C_YELLOW+ message + C_END
|
result = target + ": " + C_YELLOW + message + C_END
|
||||||
else: # default to error
|
else: # default to error
|
||||||
result = target + ": " + C_RED + message + C_END
|
result = target + ": " + C_RED + message + C_END
|
||||||
|
|
||||||
|
@ -286,7 +290,7 @@ def check_target(page, anchor, target):
|
||||||
|
|
||||||
# link to English or localized page
|
# link to English or localized page
|
||||||
if (target.startswith("/docs/") or
|
if (target.startswith("/docs/") or
|
||||||
target.startswith("/" + ARGS.lang + "/docs/")):
|
target.startswith("/" + LANG + "/docs/")):
|
||||||
|
|
||||||
# target is shared reference (kubectl or kubernetes-api?
|
# target is shared reference (kubectl or kubernetes-api?
|
||||||
if (target.find("/docs/reference/generated/kubectl/") >= 0 or
|
if (target.find("/docs/reference/generated/kubectl/") >= 0 or
|
||||||
|
@ -305,22 +309,22 @@ def check_target(page, anchor, target):
|
||||||
if ok:
|
if ok:
|
||||||
# We do't do additional checks for English site even if it has
|
# We do't do additional checks for English site even if it has
|
||||||
# links to a non-English page
|
# links to a non-English page
|
||||||
if ARGS.lang == "en":
|
if LANG == "en":
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# If we are already checking localized link, fine
|
# If we are already checking localized link, fine
|
||||||
if target.startswith("/" + ARGS.lang + "/docs/"):
|
if target.startswith("/" + LANG + "/docs/"):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# additional check for localization even if English target exists
|
# additional check for localization even if English target exists
|
||||||
base = os.path.join(ROOT, "content", ARGS.lang)
|
base = os.path.join(ROOT, "content", LANG)
|
||||||
found = check_file_exists(base, target)
|
found = check_file_exists(base, target)
|
||||||
if not found:
|
if not found:
|
||||||
# Still to be translated
|
# Still to be translated
|
||||||
return None
|
return None
|
||||||
msg = ("Localized page detected, please append '/%s' to the target"
|
msg = ("Localized page detected, please append '/%s' to the target"
|
||||||
% ARGS.lang)
|
% LANG)
|
||||||
return new_record("ERROR", "Link not using localized page", target)
|
return new_record("ERROR", msg, target)
|
||||||
|
|
||||||
# taget might be a redirect entry
|
# taget might be a redirect entry
|
||||||
real_target = get_redirect(target)
|
real_target = get_redirect(target)
|
||||||
|
@ -333,15 +337,16 @@ def check_target(page, anchor, target):
|
||||||
msg = "Link may be wrong for the anchor [%s]" % anchor
|
msg = "Link may be wrong for the anchor [%s]" % anchor
|
||||||
return new_record("WARNING", msg, target)
|
return new_record("WARNING", msg, target)
|
||||||
|
|
||||||
def check_anchor(target_page, anchor):
|
|
||||||
|
def check_anchor(target, anchor):
|
||||||
"""Check if an anchor is defined in the target page
|
"""Check if an anchor is defined in the target page
|
||||||
|
|
||||||
:param target_page: The target page to check
|
:param target: The target page to check
|
||||||
:param anchor: Anchor string to find in the target page
|
:param anchor: Anchor string to find in the target page
|
||||||
"""
|
"""
|
||||||
if target_page not in ANCHORS:
|
if target not in ANCHORS:
|
||||||
try:
|
try:
|
||||||
with open(target_page, "r") as f:
|
with open(target, "r") as f:
|
||||||
data = f.readlines()
|
data = f.readlines()
|
||||||
except Exception as ex:
|
except Exception as ex:
|
||||||
print("[Error] failed in reading markdown file: " + str(ex))
|
print("[Error] failed in reading markdown file: " + str(ex))
|
||||||
|
@ -351,8 +356,9 @@ def check_anchor(target_page, anchor):
|
||||||
regex1 = re.compile(anchor_pattern1)
|
regex1 = re.compile(anchor_pattern1)
|
||||||
anchor_pattern2 = r"{#(.*?)}"
|
anchor_pattern2 = r"{#(.*?)}"
|
||||||
regex2 = re.compile(anchor_pattern2)
|
regex2 = re.compile(anchor_pattern2)
|
||||||
ANCHORS[target_page] = regex1.findall(content) + regex2.findall(content)
|
ANCHORS[target] = regex1.findall(content) + regex2.findall(content)
|
||||||
return anchor in ANCHORS[target_page]
|
return anchor in ANCHORS[target]
|
||||||
|
|
||||||
|
|
||||||
def check_apiref_target(target, anchor):
|
def check_apiref_target(target, anchor):
|
||||||
"""Check a link to an API reference page.
|
"""Check a link to an API reference page.
|
||||||
|
@ -360,7 +366,8 @@ def check_apiref_target(target, anchor):
|
||||||
:param target: The link target string to check
|
:param target: The link target string to check
|
||||||
:param anchor: Anchor string from the content page
|
:param anchor: Anchor string from the content page
|
||||||
"""
|
"""
|
||||||
base = os.path.join(ROOT, "content", "en", "docs", "reference", "kubernetes-api")
|
base = os.path.join(ROOT, "content", "en", "docs", "reference",
|
||||||
|
"kubernetes-api")
|
||||||
ok = check_file_exists(base + "/", target)
|
ok = check_file_exists(base + "/", target)
|
||||||
if not ok:
|
if not ok:
|
||||||
return new_record("ERROR", "API reference page not found", target)
|
return new_record("ERROR", "API reference page not found", target)
|
||||||
|
@ -370,7 +377,9 @@ def check_apiref_target(target, anchor):
|
||||||
|
|
||||||
target_page = os.path.join(base, target)+".md"
|
target_page = os.path.join(base, target)+".md"
|
||||||
if not check_anchor(target_page, anchor):
|
if not check_anchor(target_page, anchor):
|
||||||
return new_record("ERROR", "Anchor not found in API reference page", target+"#"+anchor)
|
return new_record("ERROR", "Anchor not found in API reference page",
|
||||||
|
target+"#"+anchor)
|
||||||
|
|
||||||
|
|
||||||
def validate_links(page):
|
def validate_links(page):
|
||||||
"""Find and validate links on a content page.
|
"""Find and validate links on a content page.
|
||||||
|
@ -398,8 +407,8 @@ def validate_links(page):
|
||||||
records.append(r)
|
records.append(r)
|
||||||
|
|
||||||
# searches for pattern: {{< api-reference page="" anchor=""
|
# searches for pattern: {{< api-reference page="" anchor=""
|
||||||
apiref_pattern = r"{{ *< *api-reference page=\"([^\"]*?)\" *anchor=\"(.*?)\""
|
apiref_re = r"{{ *< *api-reference page=\"([^\"]*?)\" *anchor=\"(.*?)\""
|
||||||
regex = re.compile(apiref_pattern)
|
regex = re.compile(apiref_re)
|
||||||
|
|
||||||
matches = regex.findall(content)
|
matches = regex.findall(content)
|
||||||
for m in matches:
|
for m in matches:
|
||||||
|
@ -408,8 +417,8 @@ def validate_links(page):
|
||||||
records.append(r)
|
records.append(r)
|
||||||
|
|
||||||
# searches for pattern: {{< api-reference page=""
|
# searches for pattern: {{< api-reference page=""
|
||||||
apiref_pattern = r"{{ *< *api-reference page=\"([^\"]*?)\""
|
apiref_re = r"{{ *< *api-reference page=\"([^\"]*?)\""
|
||||||
regex = re.compile(apiref_pattern)
|
regex = re.compile(apiref_re)
|
||||||
|
|
||||||
matches = regex.findall(content)
|
matches = regex.findall(content)
|
||||||
for m in matches:
|
for m in matches:
|
||||||
|
@ -426,31 +435,38 @@ def parse_arguments():
|
||||||
|
|
||||||
Result is returned and saved into global variable ARGS.
|
Result is returned and saved into global variable ARGS.
|
||||||
"""
|
"""
|
||||||
parser = argparse.ArgumentParser(description="Links checker for docs.")
|
global PARSER
|
||||||
parser.add_argument("-l", dest="lang", default="en", metavar="<LANG>",
|
|
||||||
help=("two letter language code, e.g. 'zh'. "
|
|
||||||
"(default='en')"))
|
|
||||||
parser.add_argument("-v", dest="verbose", action="store_true",
|
|
||||||
help="switch on verbose level")
|
|
||||||
parser.add_argument("-f", dest="filter", default="/docs/**/*.md",
|
|
||||||
metavar="<FILTER>",
|
|
||||||
help=("File pattern to scan, e.g. '/docs/foo.md'. "
|
|
||||||
"(default='/docs/**/*.md')"))
|
|
||||||
parser.add_argument("-n", "--no-color", action="store_true",
|
|
||||||
help="Suppress colored printing.")
|
|
||||||
|
|
||||||
return parser.parse_args()
|
PARSER = argparse.ArgumentParser(description="Links checker for docs.")
|
||||||
|
PARSER.add_argument("-v", dest="verbose", action="store_true",
|
||||||
|
help="switch on verbose level")
|
||||||
|
PARSER.add_argument("-n", "--no-color", action="store_true",
|
||||||
|
help="Suppress colored printing.")
|
||||||
|
PARSER.add_argument("-f", dest="filter", default="content/en/docs/**/*.md",
|
||||||
|
metavar="<FILTER>",
|
||||||
|
help=("File pattern to scan. "
|
||||||
|
"(default='content/en/docs/**/*.md')"))
|
||||||
|
|
||||||
|
return PARSER.parse_args()
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
"""The main entry of the program."""
|
"""The main entry of the program."""
|
||||||
global ARGS, ROOT, REDIRECTS
|
global ARGS, ROOT, REDIRECTS, PARSER, LANG
|
||||||
|
|
||||||
ARGS = parse_arguments()
|
ARGS = parse_arguments()
|
||||||
print("Language: " + ARGS.lang)
|
|
||||||
ROOT = os.path.join(os.path.dirname(__file__), '..')
|
ROOT = os.path.join(os.path.dirname(__file__), '..')
|
||||||
content_dir = os.path.join(ROOT, 'content')
|
|
||||||
lang_dir = os.path.join(content_dir, ARGS.lang)
|
print(ARGS.filter)
|
||||||
|
parts = ARGS.filter.split("/", 2)
|
||||||
|
if len(parts) != 3 or parts[0] != "content":
|
||||||
|
print("ERROR:\nPlease specify file pattern in the format "
|
||||||
|
"'content/<lang>/<path-pattern>', for example:\n"
|
||||||
|
"'content/zh/docs/concepts/**/*.md'\n")
|
||||||
|
PARSER.print_help()
|
||||||
|
sys.exit(-1)
|
||||||
|
|
||||||
|
LANG = parts[1]
|
||||||
|
|
||||||
# read redirects data
|
# read redirects data
|
||||||
redirects_fn = os.path.join(ROOT, "static", "_redirects")
|
redirects_fn = os.path.join(ROOT, "static", "_redirects")
|
||||||
|
@ -473,7 +489,7 @@ def main():
|
||||||
print("[Error] failed in reading redirects file: " + str(ex))
|
print("[Error] failed in reading redirects file: " + str(ex))
|
||||||
return
|
return
|
||||||
|
|
||||||
folders = [f for f in glob.glob(lang_dir + ARGS.filter, recursive=True)]
|
folders = [f for f in glob.glob(ARGS.filter, recursive=True)]
|
||||||
for page in folders:
|
for page in folders:
|
||||||
validate_links(page)
|
validate_links(page)
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue