#!/usr/bin/python3
# Copyright (c) 2017-2024, Mudita Sp. z.o.o. All rights reserved.
# For licensing, see https://github.com/mudita/MuditaOS/blob/master/LICENSE.md
import collections
import shutil
from pathlib import Path
import argparse
import json
import subprocess
import sys
import logging
import textwrap
logging.basicConfig(level=logging.DEBUG, format='%(levelname)s: %(message)s')
logger = logging.getLogger(__name__)
handler = logging.StreamHandler(sys.stdout)
logger.addHandler(handler)
# note: ripgrep is required for this tool
def detect_duplicate_keys(list_of_pairs):
key_count = collections.Counter(key for key, value in list_of_pairs)
duplicate_keys = [key for key, count in key_count.items() if count > 1]
if duplicate_keys:
raise ValueError(", ".join(duplicate_keys))
def copy_folder_contents(src_folder: Path, dst_folder: Path):
dst_folder.mkdir(parents=True, exist_ok=True)
for file_path in src_folder.glob("*"):
if file_path.is_file():
shutil.copy2(file_path, dst_folder / file_path.name)
def write_all_keys_to_file(json_path: Path, output_path: Path):
with json_path.open() as json_file:
json_data = json.load(json_file)
keys = json_data.keys()
with output_path.open(mode='w') as output_file:
output_file.write('\n'.join(keys))
def validate_data(list_of_pairs: list):
detect_duplicate_keys(list_of_pairs)
return dict(list_of_pairs)
def perform_on_files_from_path(json_path: Path, operation):
json_files = json_path.glob('*.json')
ret = 0
for file_path in json_files:
with file_path.open() as json_file:
ret |= operation(file_path, json_file)
return ret
def check_duplicates(file_path: Path, json_file):
try:
_ = json.load(json_file, object_pairs_hook=validate_data)
except ValueError as e:
duplicate_keys = [key.strip() for key in str(e).split(',') if key.strip()]
logger.debug(f"[{file_path.name}]: duplicate {len(duplicate_keys)}: {', '.join(duplicate_keys)}")
return 1
return 0
def check_empty_entries(file_path: Path, json_file):
json_data = json.load(json_file)
empty_entries = [entry for entry, value in json_data.items() if not value]
if empty_entries:
logger.debug(f"[{file_path.name}]: empty entries {len(empty_entries)}: {empty_entries}")
return 1
return 0
def get_all_keys_from_path(json_path: Path) -> set[str]:
json_keys = set()
for file_path in json_path.glob('*.json'):
with file_path.open() as json_file:
json_data = json.load(json_file)
json_keys |= set(json_data.keys())
return json_keys
def check_missing_entries_from_path(json_path: Path) -> int:
all_keys = get_all_keys_from_path(json_path)
ret = 0
for file_path in json_path.glob('*.json'):
with file_path.open() as json_file:
json_data = json.load(json_file)
missing_keys_in_file = all_keys - set(json_data.keys())
if missing_keys_in_file:
with (file_path.with_suffix('.pattern')).open('w') as pattern_file:
pattern_file.write('\n'.join(missing_keys_in_file))
ret = 1
return ret
def fix_json(dst_path: Path):
with open(dst_path) as dst_file:
json_data = json.load(dst_file)
with open(dst_path, 'w') as dst_file:
json.dump(json_data, dst_file, indent=4, sort_keys=True)
def fix_jsons(json_dst_path: Path):
if not json_dst_path.exists():
json_dst_path.mkdir(parents=True)
for file_path in json_dst_path.glob("*.json"):
dst_file_path = file_path
fix_json(dst_file_path)
logger.debug("Translation files fixed")
def verify_keys_code_usage(pattern_src_path: Path, pattern_file=None):
unused_keys = []
used_keys = []
if pattern_file is None:
file_list = list(pattern_src_path.glob("*.pattern"))
else:
pattern_file_path = pattern_src_path / pattern_file
if not pattern_file_path.exists():
raise ValueError(f"Pattern file {pattern_file_path} not found.")
file_list = [pattern_file_path]
for pattern_path in file_list:
with pattern_path.open("r") as file:
lines = [line.strip() for line in file if line.strip()]
rg_result = subprocess.run(
["rg", "-f", str(pattern_path), "-g", f"!{pattern_src_path}", "-T", "json", ".."],
stdout=subprocess.PIPE,
).stdout.decode("UTF-8")
for line in lines:
if line in rg_result:
used_keys.append(line)
else:
unused_keys.append(line)
pattern_path.unlink()
return set(unused_keys), set(used_keys)
def remove_unused_keys(json_dst_path: Path, unused_keys: set):
if not json_dst_path.exists():
json_dst_path.mkdir(parents=True)
for file in json_dst_path.glob("*.json"):
with file.open() as json_file:
json_data = json.load(json_file)
for key in unused_keys:
json_data.pop(key, None)
temp_path = file.with_suffix(".tmp")
with temp_path.open(mode='w') as outfile:
json.dump(json_data, outfile, indent=4, sort_keys=True)
shutil.move(str(temp_path), str(file))
logger.debug("Translation files cleaned up from unused keys")
def get_missing_and_used_keys_for_files(json_path: Path, used_keys: set):
ret = 0
dir_list = [x.name for x in json_path.glob("*.json")]
# iterate to find missing keys
for file in dir_list:
file_path = json_path / file
with file_path.open() as json_file:
json_data = json.load(json_file)
missing_keys_in_file = used_keys.difference(set(json_data))
if missing_keys_in_file:
logger.debug(
f"[{file}]: missing and used {len(missing_keys_in_file)}: {sorted(missing_keys_in_file)}")
ret |= 1
return ret
def main(args):
ret = 0
src_path = Path(args.src)
dst_path = Path(args.dst) if args.dst else None
if args.fix:
copy_folder_contents(src_path, dst_path)
fix_jsons(dst_path)
# check for usage of English.json entries in the code
write_all_keys_to_file(dst_path / "English.json", dst_path / "English.keys")
not_used_keys, _ = verify_keys_code_usage(dst_path, "English.keys")
if not_used_keys:
logger.critical(f"unused english keys: {len(not_used_keys)}: {not_used_keys}")
remove_unused_keys(dst_path, not_used_keys)
missing_not_used_keys, missing_used_keys = verify_keys_code_usage(src_path)
ret |= get_missing_and_used_keys_for_files(src_path, missing_used_keys)
remove_unused_keys(dst_path, missing_not_used_keys)
ret |= perform_on_files_from_path(src_path, check_empty_entries)
ret |= perform_on_files_from_path(src_path, check_duplicates)
ret |= check_missing_entries_from_path(src_path)
for file in src_path.glob("*.pattern"):
file.unlink()
return ret
if __name__ == "__main__":
parser = argparse.ArgumentParser(
prog='verify_translations',
description='Script for checking the inconsistency of lang jsons',
formatter_class=argparse.RawTextHelpFormatter
)
parser.add_argument('-s', '--src', metavar='path', type=Path, help="source path to the json files", required=True)
parser.add_argument('--fix', action='store_true', help=textwrap.dedent('''\
fix the translation files: remove duplicates, remove unused keys and sort
WARNING! this will overwrite your destination files!
Use with caution!'''))
parser.add_argument('-d', '--dst', metavar='path', type=Path, help="destination path for the fixed json files")
parser.add_argument('-v', '--verbose', action='store_true')
args = parser.parse_args()
if args.fix and not args.dst:
parser.error("The destination path must be specified")
sys.exit(1)
if args.verbose:
logger.setLevel(logging.DEBUG)
else:
logger.setLevel(logging.CRITICAL)
error_code = main(args)
if error_code:
logging.error("Verification failed")
sys.exit(error_code)