Add script for detecting bad characters.
Co-authored-by: Shu Muto <shu.mutow@nec.com>pull/27315/head
parent
6d252624b2
commit
7fde0426df
|
@ -11,6 +11,7 @@
|
|||
| `linkchecker.py` | This a link checker for Kubernetes documentation website. |
|
||||
| `lsync.sh` | This script checks if the English version of a page has changed since a localized page has been committed. |
|
||||
| `replace-capture.sh` | This script sets K8S_WEBSITE in your env to your docs website root or rely on this script to determine it automatically |
|
||||
| `check-ctrlcode.py` | This script finds control-code(0x00-0x1f) in text files. |
|
||||
|
||||
|
||||
|
||||
|
@ -152,3 +153,28 @@ The following command checks a subdirectory:
|
|||
|
||||
./scripts/lsync.sh content/zh/docs/concepts/
|
||||
|
||||
## check-ctrlcode.py
|
||||
|
||||
This script finds control-code(0x00-0x1f) in text files.
|
||||
It will display illegal character in browser.
|
||||
|
||||
```
|
||||
Usage: ./check-ctrlcode.py <dir> <ext>
|
||||
|
||||
<dir> Specify the directory to check.
|
||||
<ext> Specify the extension to check.
|
||||
|
||||
For example, we can execute as following.
|
||||
|
||||
./check-ctrlcode.py ../content/en/ .md
|
||||
|
||||
The output is following format.
|
||||
|
||||
"{0} <L{1}:{2}:{3}>: {4}"
|
||||
|
||||
{0} : The path of file that a control-code exists.
|
||||
{1} : The line number that a control-code exists.
|
||||
{2} : The column number that a control-code exists.
|
||||
{3} : The found control-code.
|
||||
{4} : The one-line strings in the file.
|
||||
```
|
|
@ -0,0 +1,54 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import sys
|
||||
import re
|
||||
|
||||
def main():
|
||||
args = sys.argv
|
||||
if (len(args) != 3):
|
||||
print("Usage: ./check-ctrlcode.py <dir> <ext>")
|
||||
sys.exit(1)
|
||||
|
||||
dirpath = args[1]
|
||||
ext = args[2]
|
||||
|
||||
fullpath = os.path.abspath(dirpath)
|
||||
if (os.path.isdir(fullpath) is not True):
|
||||
print("Directory not found.")
|
||||
sys.exit(1)
|
||||
|
||||
check_dir(fullpath, ext)
|
||||
|
||||
def check_dir(path, ext):
|
||||
for f in os.listdir(path):
|
||||
if(f[0] == "."):
|
||||
continue
|
||||
fullpath = os.path.join(path, f)
|
||||
if(os.path.isdir(fullpath)):
|
||||
check_dir(fullpath, ext)
|
||||
continue
|
||||
exts = os.path.splitext(f)
|
||||
if(exts[1] != ext):
|
||||
continue
|
||||
check_ctrlcode(fullpath)
|
||||
|
||||
def check_ctrlcode(filepath):
|
||||
line = 0
|
||||
with open(filepath, encoding='utf-8') as f:
|
||||
while True:
|
||||
str = f.readline()
|
||||
if(str == ""):
|
||||
break
|
||||
line = line + 1
|
||||
# check 0x00-0x1f except 0x09(HT), 0x0a(LF), 0x0d(CR)
|
||||
pattern = re.compile('[\u0000-\u0008\u000b\u000c\u000e-\u001f]')
|
||||
m = pattern.search(str)
|
||||
if(m == None):
|
||||
continue
|
||||
pos = m.end()
|
||||
ctrl = m.group().encode("utf-8")
|
||||
print("{0} <L{1}:{2}:{3}>: {4}\n".format(filepath, line, pos, ctrl, str.replace('\n','')))
|
||||
|
||||
|
||||
main()
|
Loading…
Reference in New Issue