~aleteoryx/muditaos

ref: sign_test muditaos/tools/verify_translations.py -rwxr-xr-x 8.1 KiB
a217eeb3 — Dawid Wojtas [BH-2024] Fix lack of alarm directory after updating software 1 year, 5 months ago
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
#!/usr/bin/python3
# Copyright (c) 2017-2023, Mudita Sp. z.o.o. All rights reserved.
# For licensing, see https://github.com/mudita/MuditaOS/LICENSE.md

import collections
import shutil
from pathlib import Path
import argparse
import json
import subprocess
import sys
import logging
import textwrap

logging.basicConfig(level=logging.DEBUG, format='%(levelname)s: %(message)s')
logger = logging.getLogger(__name__)
handler = logging.StreamHandler(sys.stdout)
logger.addHandler(handler)


# note: ripgrep is required for this tool
def detect_duplicate_keys(list_of_pairs):
    key_count = collections.Counter(key for key, value in list_of_pairs)
    duplicate_keys = [key for key, count in key_count.items() if count > 1]

    if duplicate_keys:
        raise ValueError(", ".join(duplicate_keys))


def copy_folder_contents(src_folder: Path, dst_folder: Path):
    dst_folder.mkdir(parents=True, exist_ok=True)

    for file_path in src_folder.glob("*"):
        if file_path.is_file():
            shutil.copy2(file_path, dst_folder / file_path.name)


def write_all_keys_to_file(json_path: Path, output_path: Path):
    with json_path.open() as json_file:
        json_data = json.load(json_file)
        keys = json_data.keys()

    with output_path.open(mode='w') as output_file:
        output_file.write('\n'.join(keys))


def validate_data(list_of_pairs: list):
    detect_duplicate_keys(list_of_pairs)
    return dict(list_of_pairs)


def perform_on_files_from_path(json_path: Path, operation):
    json_files = json_path.glob('*.json')
    ret = 0

    for file_path in json_files:
        with file_path.open() as json_file:
            ret |= operation(file_path, json_file)

    return ret


def check_duplicates(file_path: Path, json_file):
    try:
        _ = json.load(json_file, object_pairs_hook=validate_data)
    except ValueError as e:
        duplicate_keys = [key.strip() for key in str(e).split(',') if key.strip()]
        logger.debug(f"[{file_path.name}]: duplicate {len(duplicate_keys)}: {', '.join(duplicate_keys)}")
        return 1
    return 0


def check_empty_entries(file_path: Path, json_file):
    json_data = json.load(json_file)
    empty_entries = [entry for entry, value in json_data.items() if not value]
    if empty_entries:
        logger.debug(f"[{file_path.name}]: empty entries {len(empty_entries)}: {empty_entries}")
        return 1
    return 0


def get_all_keys_from_path(json_path: Path) -> set[str]:
    json_keys = set()

    for file_path in json_path.glob('*.json'):
        with file_path.open() as json_file:
            json_data = json.load(json_file)
            json_keys |= set(json_data.keys())

    return json_keys


def check_missing_entries_from_path(json_path: Path) -> int:
    all_keys = get_all_keys_from_path(json_path)
    ret = 0

    for file_path in json_path.glob('*.json'):
        with file_path.open() as json_file:
            json_data = json.load(json_file)
            missing_keys_in_file = all_keys - set(json_data.keys())

            if missing_keys_in_file:
                with (file_path.with_suffix('.pattern')).open('w') as pattern_file:
                    pattern_file.write('\n'.join(missing_keys_in_file))
                ret = 1

    return ret


def fix_json(dst_path: Path):
    with open(dst_path) as dst_file:
        json_data = json.load(dst_file)

    with open(dst_path, 'w') as dst_file:
        json.dump(json_data, dst_file, indent=4, sort_keys=True)


def fix_jsons(json_dst_path: Path):
    if not json_dst_path.exists():
        json_dst_path.mkdir(parents=True)

    for file_path in json_dst_path.glob("*.json"):
        dst_file_path = file_path

        fix_json(dst_file_path)

    logger.debug("Translation files fixed")


def verify_keys_code_usage(pattern_src_path: Path, pattern_file=None):
    unused_keys = []
    used_keys = []

    if pattern_file is None:
        file_list = list(pattern_src_path.glob("*.pattern"))
    else:
        pattern_file_path = pattern_src_path / pattern_file
        if not pattern_file_path.exists():
            raise ValueError(f"Pattern file {pattern_file_path} not found.")
        file_list = [pattern_file_path]

    for pattern_path in file_list:
        with pattern_path.open("r") as file:
            lines = [line.strip() for line in file if line.strip()]
            rg_result = subprocess.run(
                ["rg", "-f", str(pattern_path), "-g", f"!{pattern_src_path}", "-T", "json", ".."],
                stdout=subprocess.PIPE,
            ).stdout.decode("UTF-8")

            for line in lines:
                if line in rg_result:
                    used_keys.append(line)
                else:
                    unused_keys.append(line)

        pattern_path.unlink()

    return set(unused_keys), set(used_keys)


def remove_unused_keys(json_dst_path: Path, unused_keys: set):
    if not json_dst_path.exists():
        json_dst_path.mkdir(parents=True)

    for file in json_dst_path.glob("*.json"):
        with file.open() as json_file:
            json_data = json.load(json_file)
        for key in unused_keys:
            json_data.pop(key, None)

        temp_path = file.with_suffix(".tmp")
        with temp_path.open(mode='w') as outfile:
            json.dump(json_data, outfile, indent=4, sort_keys=True)
        shutil.move(str(temp_path), str(file))

    logger.debug("Translation files cleaned up from unused keys")


def get_missing_and_used_keys_for_files(json_path: Path, used_keys: set):
    ret = 0
    dir_list = [x.name for x in json_path.glob("*.json")]

    # iterate to find missing keys
    for file in dir_list:
        file_path = json_path / file
        with file_path.open() as json_file:
            json_data = json.load(json_file)
            missing_keys_in_file = used_keys.difference(set(json_data))

            if missing_keys_in_file:
                logger.debug(
                    f"[{file}]: missing and used {len(missing_keys_in_file)}: {sorted(missing_keys_in_file)}")
                ret |= 1
    return ret


def main(args):
    ret = 0
    src_path = Path(args.src)
    dst_path = Path(args.dst) if args.dst else None

    if args.fix:
        copy_folder_contents(src_path, dst_path)
        fix_jsons(dst_path)

        # check for usage of English.json entries in the code
        write_all_keys_to_file(dst_path / "English.json", dst_path / "English.keys")
        not_used_keys, _ = verify_keys_code_usage(dst_path, "English.keys")
        if not_used_keys:
            logger.critical(f"unused english keys:  {len(not_used_keys)}: {not_used_keys}")

        remove_unused_keys(dst_path, not_used_keys)
        missing_not_used_keys, missing_used_keys = verify_keys_code_usage(src_path)
        ret |= get_missing_and_used_keys_for_files(src_path, missing_used_keys)
        remove_unused_keys(dst_path, missing_not_used_keys)

    ret |= perform_on_files_from_path(src_path, check_empty_entries)
    ret |= perform_on_files_from_path(src_path, check_duplicates)
    ret |= check_missing_entries_from_path(src_path)

    for file in src_path.glob("*.pattern"):
        file.unlink()

    return ret


if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        prog='verify_translations',
        description='Script for checking the inconsistency of lang jsons',
        formatter_class=argparse.RawTextHelpFormatter
    )

    parser.add_argument('-s', '--src', metavar='path', type=Path, help="source path to the json files", required=True)
    parser.add_argument('--fix', action='store_true', help=textwrap.dedent('''\
        fix the translation files: remove duplicates, remove unused keys and sort
        WARNING! this will overwrite your destination files!
    
        Use with caution!'''))
    parser.add_argument('-d', '--dst', metavar='path', type=Path, help="destination path for the fixed json files")
    parser.add_argument('-v', '--verbose', action='store_true')

    args = parser.parse_args()

    if args.fix and not args.dst:
        parser.error("The destination path must be specified")
        sys.exit(1)

    if args.verbose:
        logger.setLevel(logging.DEBUG)
    else:
        logger.setLevel(logging.CRITICAL)

    error_code = main(args)
    if error_code:
        logging.error("Verification failed")
    sys.exit(error_code)