@@ 3,10 3,11 @@
# For licensing, see https://github.com/mudita/MuditaOS/LICENSE.md
import collections
-import os
-import os.path as path
+import shutil
+from pathlib import Path
import argparse
import json
+import subprocess
import sys
import logging
import textwrap
@@ 17,106 18,209 @@ handler = logging.StreamHandler(sys.stdout)
logger.addHandler(handler)
-def detect_duplicate_keys(list_of_pairs: list):
- key_count = collections.Counter(k for k, v in list_of_pairs)
- duplicate_keys = ', '.join(k for k, v in key_count.items() if v > 1)
+# note: ripgrep is required for this tool
+def detect_duplicate_keys(list_of_pairs):
+ key_count = collections.Counter(key for key, value in list_of_pairs)
+ duplicate_keys = [key for key, count in key_count.items() if count > 1]
if duplicate_keys:
- raise ValueError(duplicate_keys)
+ raise ValueError(", ".join(duplicate_keys))
+
+
+def copy_folder_contents(src_folder: Path, dst_folder: Path):
+ dst_folder.mkdir(parents=True, exist_ok=True)
+
+ for file_path in src_folder.glob("*"):
+ if file_path.is_file():
+ shutil.copy2(file_path, dst_folder / file_path.name)
+
+
+def write_all_keys_to_file(json_path: Path, output_path: Path):
+ with json_path.open() as json_file:
+ json_data = json.load(json_file)
+ keys = json_data.keys()
+
+ with output_path.open(mode='w') as output_file:
+ output_file.write('\n'.join(keys))
def validate_data(list_of_pairs: list):
detect_duplicate_keys(list_of_pairs)
- # More detection, each of them will raise exception upon invalid
- # data
return dict(list_of_pairs)
-def perform_on_files_from_path(json_path: path, operation):
- dir_list = os.listdir(json_path)
+def perform_on_files_from_path(json_path: Path, operation):
+ json_files = json_path.glob('*.json')
ret = 0
- for file in dir_list:
- file_path = path.join(json_path, file)
- with open(file_path) as json_file:
+ for file_path in json_files:
+ with file_path.open() as json_file:
ret |= operation(file_path, json_file)
+
return ret
-def check_duplicates(file_path: path, json_file):
+def check_duplicates(file_path: Path, json_file):
try:
_ = json.load(json_file, object_pairs_hook=validate_data)
except ValueError as e:
- dup_list = str(e).split(',')
- logger.warning(f"[{path.basename(file_path)}]: duplicate {len(dup_list)}: {dup_list}")
+ duplicate_keys = [key.strip() for key in str(e).split(',') if key.strip()]
+ logger.debug(f"[{file_path.name}]: duplicate {len(duplicate_keys)}: {', '.join(duplicate_keys)}")
return 1
return 0
-def check_empty_entries(file_path: path, json_file):
+def check_empty_entries(file_path: Path, json_file):
json_data = json.load(json_file)
- empty_entries = [entry for entry in json_data if len(str(json_data[entry])) == 0]
+ empty_entries = [entry for entry, value in json_data.items() if not value]
if empty_entries:
- logger.warning(f"[{path.basename(file_path)}]: empty entries {len(empty_entries)}: {empty_entries}")
+ logger.debug(f"[{file_path.name}]: empty entries {len(empty_entries)}: {empty_entries}")
return 1
return 0
-def get_all_keys_from_path(json_path: path):
- dir_list = os.listdir(json_path)
- json_keys = []
-
- # iterate to get all possible keys and check for key duplicates
- for file in dir_list:
- file_path = path.join(json_path, file)
+def get_all_keys_from_path(json_path: Path) -> set[str]:
+ json_keys = set()
- with open(file_path) as json_file:
+ for file_path in json_path.glob('*.json'):
+ with file_path.open() as json_file:
json_data = json.load(json_file)
- json_keys.append(set(json_data))
+ json_keys |= set(json_data.keys())
- return set.union(*json_keys)
+ return json_keys
-def check_missing_entries_from_path(json_path: path):
- ret = 0
- dir_list = os.listdir(json_path)
+def check_missing_entries_from_path(json_path: Path) -> int:
all_keys = get_all_keys_from_path(json_path)
+ ret = 0
- # iterate to find missing keys
- for file in dir_list:
- file_path = path.join(json_path, file)
- with open(file_path) as json_file:
+ for file_path in json_path.glob('*.json'):
+ with file_path.open() as json_file:
json_data = json.load(json_file)
- missing_keys_in_file = all_keys.difference(set(json_data))
+ missing_keys_in_file = all_keys - set(json_data.keys())
+
if missing_keys_in_file:
- logger.warning(f"[{file}]: missing {len(missing_keys_in_file)}: {sorted(missing_keys_in_file)}")
- ret |= 1
+ with (file_path.with_suffix('.pattern')).open('w') as pattern_file:
+ pattern_file.write('\n'.join(missing_keys_in_file))
+ ret = 1
+
return ret
-def fix_jsons(json_src_path: path, json_dst_path: path):
- dir_list = os.listdir(json_src_path)
- for file in dir_list:
- src_file_path = path.join(json_src_path, file)
- dst_file_path = path.join(json_dst_path, file)
- if not path.exists(json_dst_path):
- os.makedirs(json_dst_path)
+def fix_json(dst_path: Path):
+ with open(dst_path) as dst_file:
+ json_data = json.load(dst_file)
+
+ with open(dst_path, 'w') as dst_file:
+ json.dump(json_data, dst_file, indent=4, sort_keys=True)
+
+
+def fix_jsons(json_dst_path: Path):
+ if not json_dst_path.exists():
+ json_dst_path.mkdir(parents=True)
+
+ for file_path in json_dst_path.glob("*.json"):
+ dst_file_path = file_path
+
+ fix_json(dst_file_path)
+
+ logger.debug("Translation files fixed")
+
+
+def verify_keys_code_usage(pattern_src_path: Path, pattern_file=None):
+ unused_keys = []
+ used_keys = []
- with open(src_file_path) as json_file, open(dst_file_path, 'w') as outfile:
+ if pattern_file is None:
+ file_list = list(pattern_src_path.glob("*.pattern"))
+ else:
+ pattern_file_path = pattern_src_path / pattern_file
+ if not pattern_file_path.exists():
+ raise ValueError(f"Pattern file {pattern_file_path} not found.")
+ file_list = [pattern_file_path]
+
+ for pattern_path in file_list:
+ with pattern_path.open("r") as file:
+ lines = [line.strip() for line in file if line.strip()]
+ rg_result = subprocess.run(
+ ["rg", "-f", str(pattern_path), "-g", f"!{pattern_src_path}", "-T", "json", ".."],
+ stdout=subprocess.PIPE,
+ ).stdout.decode("UTF-8")
+
+ for line in lines:
+ if line in rg_result:
+ used_keys.append(line)
+ else:
+ unused_keys.append(line)
+
+ pattern_path.unlink()
+
+ return set(unused_keys), set(used_keys)
+
+
+def remove_unused_keys(json_dst_path: Path, unused_keys: set):
+ if not json_dst_path.exists():
+ json_dst_path.mkdir(parents=True)
+
+ for file in json_dst_path.glob("*.json"):
+ with file.open() as json_file:
json_data = json.load(json_file)
+ for key in unused_keys:
+ json_data.pop(key, None)
+
+ temp_path = file.with_suffix(".tmp")
+ with temp_path.open(mode='w') as outfile:
json.dump(json_data, outfile, indent=4, sort_keys=True)
+ shutil.move(str(temp_path), str(file))
+
+ logger.debug("Translation files cleaned up from unused keys")
+
+
+def get_missing_and_used_keys_for_files(json_path: Path, used_keys: set):
+ ret = 0
+ dir_list = [x.name for x in json_path.glob("*.json")]
- logger.info("Translation files fixed")
+ # iterate to find missing keys
+ for file in dir_list:
+ file_path = json_path / file
+ with file_path.open() as json_file:
+ json_data = json.load(json_file)
+ missing_keys_in_file = used_keys.difference(set(json_data))
+
+ if missing_keys_in_file:
+ logger.debug(
+ f"[{file}]: missing and used {len(missing_keys_in_file)}: {sorted(missing_keys_in_file)}")
+ ret |= 1
+ return ret
def main(args):
ret = 0
+ src_path = Path(args.src)
+ dst_path = Path(args.dst) if args.dst else None
+
if args.fix:
- fix_jsons(args.src, args.dst)
+ copy_folder_contents(src_path, dst_path)
+ fix_jsons(dst_path)
+
+ # check for usage of English.json entries in the code
+ write_all_keys_to_file(dst_path / "English.json", dst_path / "English.keys")
+ not_used_keys, _ = verify_keys_code_usage(dst_path, "English.keys")
+ if not_used_keys:
+ logger.critical(f"unused english keys: {len(not_used_keys)}: {not_used_keys}")
+
+ remove_unused_keys(dst_path, not_used_keys)
+ missing_not_used_keys, missing_used_keys = verify_keys_code_usage(src_path)
+ ret |= get_missing_and_used_keys_for_files(src_path, missing_used_keys)
+ remove_unused_keys(dst_path, missing_not_used_keys)
+
+ ret |= perform_on_files_from_path(src_path, check_empty_entries)
+ ret |= perform_on_files_from_path(src_path, check_duplicates)
+ ret |= check_missing_entries_from_path(src_path)
+
+ for file in src_path.glob("*.pattern"):
+ file.unlink()
- ret |= check_missing_entries_from_path(args.src)
- ret |= perform_on_files_from_path(args.src, check_empty_entries)
- ret |= perform_on_files_from_path(args.src, check_duplicates)
return ret
@@ 124,14 228,16 @@ if __name__ == "__main__":
parser = argparse.ArgumentParser(
prog='verify_translations',
description='Script for checking the inconsistency of lang jsons',
- formatter_class=argparse.RawTextHelpFormatter)
- parser.add_argument('-s', '--src', metavar='path', help="source path to the json files", required=True)
+ formatter_class=argparse.RawTextHelpFormatter
+ )
+
+ parser.add_argument('-s', '--src', metavar='path', type=Path, help="source path to the json files", required=True)
parser.add_argument('--fix', action='store_true', help=textwrap.dedent('''\
- fix the translation files: remove duplicates and sort
+ fix the translation files: remove duplicates, remove unused keys and sort
WARNING! this will overwrite your destination files!
-
+
Use with caution!'''))
- parser.add_argument('-d', '--dst', metavar='path', help="destination path for the fixed json files")
+ parser.add_argument('-d', '--dst', metavar='path', type=Path, help="destination path for the fixed json files")
parser.add_argument('-v', '--verbose', action='store_true')
args = parser.parse_args()