Update script for Python3, PyYAML 5.1.2 (#17127)

-Update to use argparse.ArgumentParser() for command line args
-Use tempfile library to create tmp work directory

-Remove shutil.rmtree because it was throwing permission errors and
not deleting the directory. Also no longer needed after switching
to tempfile.

-Enchance error handling
-Add check for Go installation
-Enhance check for PyYAML installation
-Change deprecated PyYAML load function to full_load
-Add comments at top of file
-Modify to comply with PEP8
-Update kubernetes-components.md with software versions to match script

Signed-off-by: Aimee Ukasick <aimeeu.opensource@gmail.com>
pull/17130/head
aimeeu 2019-10-22 16:04:18 -05:00 committed by Kubernetes Prow Robot
parent 07431dd930
commit 04127a3acb
3 changed files with 267 additions and 192 deletions

View File

@ -15,16 +15,17 @@ reference documentation for tools and components in the
* You need a machine that is running Linux or macOS.
* You need to have this software installed:
* Install the following:
* [Python 2.7.16](https://www.python.org/downloads/)
* [Python](https://www.python.org/downloads/) v3.7.x
* [Git](https://git-scm.com/book/en/v2/Getting-Started-Installing-Git)
* [Golang](https://golang.org/doc/install) version 1.12 for Kubernetes 1.14+; Go 1.13 [is not supported](https://github.com/kubernetes/community/blob/master/contributors/devel/development.md#go)
* [PyYAML](https://pyyaml.org/) v3.13
* [Pip](https://pypi.org/project/pip/) used to install PyYAML
* [PyYAML](https://pyyaml.org/) v5.1.2
* [make](https://www.gnu.org/software/make/)
* [gcc compiler/linker](https://gcc.gnu.org/)
* The Go binary must be in your path; **do not** set your `$GOPATH`. The `update-imported-docs` tool sets your GOPATH.
* The `Go` binary must be in your path. The `update-imported-docs` tool sets your GOPATH.
* You need to know how to create a pull request to a GitHub repository.
This involves creating your own fork of the repository. For more
@ -46,11 +47,11 @@ git clone git@github.com:<your_github_username>/website.git
Determine the base directory of your clone. For example, if you followed the
preceding step to get the repository, your base directory is
`$github.com/website.` The remaining steps refer to your base directory as
`github.com/website.` The remaining steps refer to your base directory as
`<web-base>`.
The reference documentation for the Kubernetes components and tools is generated
from the Kubernetes source code. The `update-imported-docs` tool automatically
The `update-imported-docs` tool generates the reference documentation for the
Kubernetes components from the Kubernetes source code. The tool automatically
clones the `kubernetes/kubernetes` repository. If you want to change the
reference documentation, please follow [this
guide](/docs/contribute/generate-ref-docs/contribute-upstream).
@ -75,9 +76,10 @@ to `kubernetes/website`.
## Configuration file format
Each config file may contain multiple repos that will be imported together.
When necessary, you can customize the configuration file by manually editing
it. You may create new config files for importing other groups of documents. Imported documents must follow these guidelines:
Each config file may contain multiple repos that will be imported together. When
necessary, you can customize the configuration file by manually editing it. You
may create new config files for importing other groups of documents. Imported
documents must follow these guidelines:
1. Adhere to the [Documentation Style Guide](/docs/contribute/style/style-guide/).

View File

@ -1,182 +0,0 @@
#!/usr/bin/env python
import glob
import os
import re
import shutil
import subprocess
import sys
try:
import yaml
except Exception:
print("Please ensure PyYAML package is installed. This can be done, for "
"example, by executing the following command:\n\n"
" pip install pyyaml\n")
sys.exit(-1)
def processLinks(content, remotePrefix, subPath):
"""Process markdown links found in the docs."""
def analyze(matchObj):
ankor = matchObj.group('ankor')
target = matchObj.group('target')
if not (target.startswith("https://") or
target.startswith("mailto:") or
target.startswith("#")):
if target.startswith("/"):
targetList = remotePrefix, target[1:]
target = "/".join(targetList)
else:
targetList = remotePrefix, subPath, target
target = "/".join(targetList)
return "[%s](%s)" % (ankor, target)
# Links are in the form '[text](url)'
linkRegex = re.compile(r"\[(?P<ankor>.*)\]\((?P<target>.*)\)")
content = re.sub(linkRegex, analyze, content)
h1Regex = re.compile("^(# .*)?\n")
content = re.sub(h1Regex, "", content)
return content
def processKubectlLinks(content):
"""Update markdown links found in the SeeAlso section of kubectl page.
Example:[kubectl annotate](/docs/reference/generated/kubectl/kubectl-commands#annotate)
"""
def analyze(matchObj):
ankor = matchObj.group('ankor')
target = matchObj.group('target')
if (target.endswith(".md") and target.startswith("kubectl")):
ankorList = ankor.split("kubectl ")
target = "/docs/reference/generated/kubectl/kubectl-commands" + "#" + ankorList[1]
return "[%s](%s)" % (ankor, target)
# Links are in the form '[text](url)'
linkRegex = re.compile(r"\[(?P<ankor>.*)\]\((?P<target>.*?)\)")
content = re.sub(linkRegex, analyze, content)
return content
def processFile(src, dst, repoPath, repoDir, rootDir, genAbsoluteLinks):
"""Process a file element.
:param src: A string containing the relative path of a source file. The
string may contain wildcard characters such as '*' or '?'.
:param dst: The path for the destination file. The string can be a
directory name or a file name.
"""
pattern = os.path.join(repoDir, repoPath, src)
dstPath = os.path.join(rootDir, dst)
for src in glob.glob(pattern):
# we don't dive into subdirectories
if not os.path.isfile(src):
print("[Error] skipping non-regular path %s" % src)
continue
content = ""
try:
with open(src, "r") as srcFile:
content = srcFile.read()
except Exception as ex:
print("[Error] failed in reading source file: " + str(ex))
continue
dst = dstPath
if dstPath.endswith("/"):
baseName = os.path.basename(src)
dst = os.path.join(dst, baseName)
try:
print("Writing doc: " + dst)
with open(dst, "w") as dstFile:
if genAbsoluteLinks:
srcDir = os.path.dirname(src)
remotePrefix = repoPath + "/tree/master"
content = processLinks(content, remotePrefix, srcDir)
if dst.endswith("kubectl.md"):
print("Processing kubectl links")
content = processKubectlLinks(content)
dstFile.write(content)
except Exception as ex:
print("[Error] failed in writing target file '%s': %s"
"" % (dst, str(ex)))
continue
def main():
"""The main entry of the program."""
if len(sys.argv) < 2:
print("[Error] Please specify a config file")
return -1
configFile = sys.argv[1]
currDir = os.path.dirname(__file__)
rootDir = os.path.realpath(os.path.join(currDir, '..'))
try:
configData = yaml.load(open(configFile, 'r'))
except Exception as ex:
print("[Error] failed in loading config file - %s" % str(ex))
return -2
os.chdir(rootDir)
workDir = "/tmp/update_docs"
shutil.rmtree(workDir, True)
os.mkdir(workDir, 0750)
for repo in configData["repos"]:
if "name" not in repo:
print("[Error] repo missing name")
continue
repoName = repo["name"]
if "remote" not in repo:
print("[Error] repo '%s' missing repo path" % repoName)
continue
repoRemote = repo["remote"]
remoteRegex = re.compile(r"^https://(?P<prefix>.*)\.git$")
matches = remoteRegex.search(repoRemote)
if not matches:
print("[Error] repo path for '%s' is invalid" % repoName)
continue
repoPath = os.path.join("src", matches.group('prefix'))
os.chdir(workDir)
print("Cloning repo %s..." % repoName)
cmd = "git clone --depth=1 -b {0} {1} {2}".format(
repo["branch"], repoRemote, repoPath)
res = subprocess.call(cmd, shell=True)
if res != 0:
print("[Error] failed in cloning repo '%s'" % repoName)
continue
os.chdir(repoPath)
if "generate-command" in repo:
genCmd = repo["generate-command"]
genCmd = "export GOPATH=" + workDir + "\n" + genCmd
print("Generating docs for %s with %s" % (repoName, genCmd))
res = subprocess.call(genCmd, shell=True)
if res != 0:
print("[Error] failed in generating docs for '%s'" % repoName)
continue
os.chdir(rootDir)
for f in repo["files"]:
processFile(f['src'], f['dst'], repoPath, workDir, rootDir,
"gen-absolute-links" in repo)
print("Completed docs update. Now run the following command to commit:\n\n"
" git add .\n"
" git commit -m <comment>\n"
" git push\n")
if __name__ == '__main__':
sys.exit(main())

View File

@ -0,0 +1,255 @@
#!/usr/bin/env python3
##
# This script was tested with Python 3.7.4, Go 1.12, and PyYAML 5.1.2
# installed in a virtual environment.
# This script assumes you have the Python package manager 'pip' installed.
#
# This script updates the generated reference documentation.
# See https://kubernetes.io/docs/contribute/generate-ref-docs/kubernetes-components/
# for further details.
#
# This script checks to make sure Go and PyYAML have been installed.
# The reference docs are generated by a Go command so the Go binary must be
# in your PATH.
#
# A temp "work_dir" is created and is the path where repos will be cloned.
# The work_dir is printed out so you can remove it
# when you no longer need the contents.
# This work_dir will temporarily become the GOPATH.
#
# To execute the script from the website/update-imported-docs directory:
# ./update-imported-docs.py <config_file>
# Config files:
# reference.yml use this to update the reference docs
# release.yml use this to auto-generate/import release notes
##
import argparse
import glob
import os
import re
import shutil
import subprocess
import sys
import tempfile
error_msgs = []
# pip should be installed when Python is installed, but just in case...
if not shutil.which('pip'):
error_msgs.append("Install pip so you can install PyYAML. https://pip.pypa.io/en/stable/installing")
reqs = subprocess.check_output([sys.executable, '-m', 'pip', 'freeze'])
installed_packages = [r.decode().split('==')[0] for r in reqs.split()]
if 'PyYAML' not in installed_packages:
error_msgs.append(
"Please ensure the PyYAML package is installed; see https://pypi.org/project/PyYAML")
else:
import yaml
if not shutil.which('go'):
error_msgs.append(
"Go must be installed. See https://golang.org/doc/install")
def process_links(content, remote_prefix, sub_path):
"""Process markdown links found in the docs."""
def analyze(match_obj):
ankor = match_obj.group('ankor')
target = match_obj.group('target')
if not (target.startswith("https://") or
target.startswith("mailto:") or
target.startswith("#")):
if target.startswith("/"):
target_list = remote_prefix, target[1:]
target = "/".join(target_list)
else:
target_list = remote_prefix, sub_path, target
target = "/".join(target_list)
return "[%s](%s)" % (ankor, target)
# Links are in the form '[text](url)'
link_regex = re.compile(r"\[(?P<ankor>.*)\]\((?P<target>.*)\)")
content = re.sub(link_regex, analyze, content)
h1_regex = re.compile("^(# .*)?\n")
content = re.sub(h1_regex, "", content)
return content
def process_kubectl_links(content):
"""Update markdown links found in the SeeAlso section of kubectl page.
Example:[kubectl annotate](/docs/reference/generated/kubectl/kubectl-commands#annotate)
"""
def analyze(match_obj):
ankor = match_obj.group('ankor')
target = match_obj.group('target')
if target.endswith(".md") and target.startswith("kubectl"):
ankor_list = ankor.split("kubectl ")
target = "/docs/reference/generated/kubectl/kubectl-commands" + "#" + \
ankor_list[1]
return "[%s](%s)" % (ankor, target)
# Links are in the form '[text](url)'
link_regex = re.compile(r"\[(?P<ankor>.*)\]\((?P<target>.*?)\)")
content = re.sub(link_regex, analyze, content)
return content
def process_file(src, dst, repo_path, repo_dir, root_dir, gen_absolute_links):
"""Process a file element.
:param src: A string containing the relative path of a source file. The
string may contain wildcard characters such as '*' or '?'.
:param dst: The path for the destination file. The string can be a
directory name or a file name.
:param repo_path:
:param repo_dir:
:param root_dir:
:param gen_absolute_links:
"""
pattern = os.path.join(repo_dir, repo_path, src)
dst_path = os.path.join(root_dir, dst)
for src in glob.glob(pattern):
# we don't dive into subdirectories
if not os.path.isfile(src):
print("[Error] skipping non-regular path {}".format(src))
continue
content = ""
try:
with open(src, "r") as srcFile:
content = srcFile.read()
except Exception as ex:
print("[Error] failed in reading source file: ".format(ex))
continue
dst = dst_path
if dst_path.endswith("/"):
base_name = os.path.basename(src)
dst = os.path.join(dst, base_name)
try:
print("Writing doc: " + dst)
with open(dst, "w") as dstFile:
if gen_absolute_links:
src_dir = os.path.dirname(src)
remote_prefix = repo_path + "/tree/master"
content = process_links(content, remote_prefix, src_dir)
if dst.endswith("kubectl.md"):
print("Processing kubectl links")
content = process_kubectl_links(content)
dstFile.write(content)
except Exception as ex:
print("[Error] failed in writing target file {}: {}".format(dst, ex))
continue
def parse_input_args():
"""
Parse command line argument
'config_file' is only one argument; it should be one of the YAML
files in this same directory
:return: parsed argument
"""
parser = argparse.ArgumentParser()
parser.add_argument('config_file', type=str,
help="reference.yml to generate reference docs; "
"release.yml to generate release notes")
return parser.parse_args()
def main():
"""The main entry of the program."""
if len(error_msgs) > 0:
for msg in error_msgs:
print(msg + "\n")
return -2
# first parse input argument
in_args = parse_input_args()
config_file = in_args.config_file
print("config_file is {}".format(config_file))
curr_dir = os.path.dirname(__file__)
print("curr_dir {}".format(curr_dir))
root_dir = os.path.realpath(os.path.join(curr_dir, '..'))
print("root_dir {}".format(root_dir))
try:
config_data = yaml.full_load(open(config_file, 'r'))
except Exception as ex:
# to catch when a user specifies a file that does not exist
print("[Error] failed in loading config file - {}".format(str(ex)))
return -2
os.chdir(root_dir)
# create the temp work_dir
try:
print("Making temp work_dir")
work_dir = tempfile.mkdtemp()
except OSError as ose:
print("[Error] Unable to create temp work_dir {}; error: {}"
.format(work_dir, ose))
return -2
for repo in config_data["repos"]:
if "name" not in repo:
print("[Error] repo missing name")
continue
repo_name = repo["name"]
if "remote" not in repo:
print("[Error] repo {} missing repo path".format(repo_name))
continue
repo_remote = repo["remote"]
remote_regex = re.compile(r"^https://(?P<prefix>.*)\.git$")
matches = remote_regex.search(repo_remote)
if not matches:
print("[Error] repo path for {} is invalid".format(repo_name))
continue
repo_path = os.path.join("src", matches.group('prefix'))
os.chdir(work_dir)
print("Cloning repo {}".format(repo_name))
cmd = "git clone --depth=1 -b {0} {1} {2}".format(
repo["branch"], repo_remote, repo_path)
res = subprocess.call(cmd, shell=True)
if res != 0:
print("[Error] failed in cloning repo {}".format(repo_name))
continue
os.chdir(repo_path)
if "generate-command" in repo:
gen_cmd = repo["generate-command"]
gen_cmd = "export GOPATH=" + work_dir + "\n" + gen_cmd
print("Generating docs for {} with {}".format(repo_name, gen_cmd))
res = subprocess.call(gen_cmd, shell=True)
if res != 0:
print("[Error] failed in generating docs for {}".format(
repo_name))
continue
os.chdir(root_dir)
for f in repo["files"]:
process_file(f['src'], f['dst'], repo_path, work_dir, root_dir,
"gen-absolute-links" in repo)
print("Completed docs update. Now run the following command to commit:\n\n"
" git add .\n"
" git commit -m <comment>\n"
" git push\n"
" delete temp dir {} when done ".format(work_dir))
if __name__ == '__main__':
sys.exit(main())