GHA CI: Switch to pre-commit framework for checking file health

Now users are able to run the same checks on their local development
environment.
https://pre-commit.com/
This commit is contained in:
Chocobo1 2021-10-10 01:17:25 +08:00
parent 45a1c25a29
commit 3467358663
No known key found for this signature in database
GPG key ID: 210D9C873253A68C
4 changed files with 125 additions and 90 deletions

68
.github/workflows/check_translation_tag.py vendored Executable file
View file

@ -0,0 +1,68 @@
#!/usr/bin/env python3
# A pre-commit hook for detecting problematic <translation> tags
# Copyright (C) 2021 Mike Tzou (Chocobo1)
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#
# In addition, as a special exception, the copyright holders give permission to
# link this program with the OpenSSL project's "OpenSSL" library (or with
# modified versions of it that use the same license as the "OpenSSL" library),
# and distribute the linked executables. You must obey the GNU General Public
# License in all respects for all of the code used other than "OpenSSL". If you
# modify file(s), you may extend this exception to your version of the file(s),
# but you are not obligated to do so. If you do not wish to do so, delete this
# exception statement from your version.
from typing import Optional, Sequence
import argparse
import re
def main(argv: Optional[Sequence[str]] = None) -> int:
parser = argparse.ArgumentParser()
parser.add_argument('filenames', nargs='*', help='Filenames to check')
args = parser.parse_args(argv)
error_msg = ""
regex = re.compile(r"\s*</translation>")
for filename in args.filenames:
line_counter = 1
error_buffer = ""
with open(filename) as file:
try:
for line in file:
if (match := regex.match(line)) is not None:
error_buffer += str(f"Defect file: \"{filename}\"\n"
f"Line: {line_counter}\n"
f"Column span: {match.span()}\n"
f"Part: \"{match.group()}\"\n\n")
line_counter += 1
except UnicodeDecodeError as error:
# not a text file, skip
continue
error_msg += error_buffer
if len(error_msg) > 0:
print(error_msg)
return 1
return 0
if __name__ == '__main__':
exit(main())

View file

@ -11,10 +11,7 @@ jobs:
uses: actions/checkout@v2
- name: Install tools
run: |
sudo apt update
sudo apt install zsh
uses: actions/setup-python@v2
- name: Check files
run: |
./.github/workflows/file_health.sh
uses: pre-commit/action@v2.0.3

View file

@ -1,85 +0,0 @@
#!/usr/bin/env zsh
set -o nounset
# Assumption: file names don't contain `:` (for the `cut` invocation).
# Safe to assume, as such a character in a filename would cause trouble on Windows, a platform we support
# any regression turn this non-zero
regressions=0
# exclusions (these are just grep extended regular expressions to match against paths relative to the root of the repository)
exclusions_nonutf8='(.*(7z|gif|ic(ns|o)|png|qm|zip))'
exclusions_bom='src/base/unicodestrings.h'
exclusions_tw='(*.ts)|src/webui/www/private/scripts/lib/*'
exclusions_trailing_newline='configure'
exclusions_no_lf='(*.ts)|(.*svg)|compile_commands.json|src/webui/www/private/scripts/lib/*'
echo -e "\n*** Detect files not encoded in UTF-8 ***\n"
find . -path ./build -prune -false -o -path ./.git -prune -false -o -type f -exec file --mime {} \; | sort \
| grep -v -e "charset=us-ascii" -e "charset=utf-8" | cut -d ":" -f 1 \
| grep -E -v -e "${exclusions_nonutf8}" \
| tee >(echo -e "--> Files not encoded in UTF-8: found" "$(wc -l < /dev/stdin)" "regression(s)\n") \
| xargs -I my_input -0 bash -c 'echo "my_input"; test "$(echo -n "my_input" | wc -l)" -eq 0'
regressions=$((regressions+$?))
echo -e "\n*** Detect files encoded in UTF-8 with BOM ***\n"
grep --exclude-dir={.git,build} -rIl $'\xEF\xBB\xBF' | sort \
| grep -E -v -e "${exclusions_bom}" \
| tee >(echo -e "--> Files encoded in UTF-8 with BOM: found" "$(wc -l < /dev/stdin)" "regression(s)\n") \
| xargs -I my_input -0 bash -c 'echo "my_input"; test "$(echo -n "my_input" | wc -l)" -eq 0'
regressions=$((regressions+$?))
echo -e "\n*** Detect usage of CR byte ***\n"
grep --exclude-dir={.git,build} -rIlU $'\x0D' | sort \
| tee >(echo -e "--> Usage of CR byte: found" "$(wc -l < /dev/stdin)" "regression(s)\n") \
| xargs -I my_input -0 bash -c 'echo "my_input"; test "$(echo -n "my_input" | wc -l)" -eq 0'
regressions=$((regressions+$?))
echo -e "\n*** Detect trailing whitespace in lines ***\n"
grep --exclude-dir={.git,build} -rIl "[[:blank:]]$" | sort \
| grep -E -v -e "${exclusions_tw}" \
| tee >(echo -e "--> Trailing whitespace in lines: found" "$(wc -l < /dev/stdin)" "regression(s)\n") \
| xargs -I my_input -0 bash -c 'echo "my_input"; test "$(echo -n "my_input" | wc -l)" -eq 0';
regressions=$((regressions+$?))
echo -e "\n*** Detect too many trailing newlines ***\n"
find . -path ./build -prune -false -o -path ./.git -prune -false -o -type f -exec file --mime {} \; | sort \
| grep -e "charset=us-ascii" -e "charset=utf-8" | cut -d ":" -f 1 \
| grep -E -v -e "${exclusions_trailing_newline}" \
| xargs -L1 -I my_input bash -c 'test "$(tail -q -c2 "my_input" | hexdump -C | grep "0a 0a")" && echo "my_input"' \
| tee >(echo -e "--> Too many trailing newlines: found" "$(wc -l < /dev/stdin)" "regression(s)\n") \
| xargs -I my_input -0 bash -c 'echo "my_input"; test "$(echo -n "my_input" | wc -l)" -eq 0'
regressions=$((regressions+$?))
echo -e "\n*** Detect no trailing newline ***\n"
find . -path ./build -prune -false -o -path ./.git -prune -false -o -type f -exec file --mime {} \; | sort \
| grep -e "charset=us-ascii" -e "charset=utf-8" | cut -d ":" -f 1 \
| grep -E -v -e "${exclusions_no_lf}" \
| xargs -L1 -I my_input bash -c 'test "$(tail -q -c1 "my_input" | hexdump -C | grep "0a")" || echo "my_input"' \
| tee >(echo -e "--> No trailing newline: found" "$(wc -l < /dev/stdin)" "regression(s)\n") \
| xargs -I my_input -0 bash -c 'echo "my_input"; test "$(echo -n "my_input" | wc -l)" -eq 0'
regressions=$((regressions+$?))
echo -e "\n*** Detect translation closing tag in new line ***\n"
grep --exclude-dir={.git,build} -nri "^</translation>" | sort \
| cut -d ":" -f 1,2 \
| tee >(echo -e "--> Translation closing tag in new line: found" "$(wc -l < /dev/stdin)" "regression(s)\n") \
| xargs -I my_input -0 bash -c 'echo "my_input"; test "$(echo -n "my_input" | wc -l)" -eq 0'
regressions=$((regressions+$?))
if [ "$regressions" -ne 0 ]; then
regressions=1
echo "\nFile health regressions found. Please fix them (or add them as exclusions)."
else
echo "All OK, no file health regressions found."
fi
exit $regressions;

55
.pre-commit-config.yaml Normal file
View file

@ -0,0 +1,55 @@
repos:
- repo: local
hooks:
- id: check-translation-tag
name: Check newline characters in <translation> tag
entry: .github/workflows/check_translation_tag.py
language: script
types_or:
- ts
- repo: https://github.com/pre-commit/pre-commit-hooks.git
rev: v4.0.1
hooks:
- id: check-json
name: Check JSON files
- id: check-yaml
name: Check YAML files
- id: fix-byte-order-marker
name: Check file encoding (UTF-8 without BOM)
exclude: |
(?x)^(
src/base/unicodestrings.h
)$
- id: mixed-line-ending
name: Check line ending character (LF)
args: ["--fix=lf"]
exclude: |
(?x)^(
compile_commands.json |
src/webui/www/private/scripts/lib/.*
)$
- id: end-of-file-fixer
name: Check trailing newlines
exclude: |
(?x)^(
compile_commands.json |
configure |
src/webui/www/private/scripts/lib/.*
)$
exclude_types:
- svg
- ts
- id: trailing-whitespace
name: Check trailing whitespaces
exclude: |
(?x)^(
src/webui/www/private/scripts/lib/.*
)$
exclude_types:
- ts