Spaces:
Running
Running
import ast | |
import json | |
from collections import OrderedDict | |
import os | |
# locale_path = "./i18n/locale" # The path to the i18n locale directory, you can change it to your own path | |
# scan_list = ["./", | |
# "GPT_SoVITS/", | |
# "tools/" | |
# ] # The path to the directory you want to scan, you can change it to your own path | |
# scan_subfolders = False # Whether to scan subfolders | |
locale_path = "./Inference/i18n/locale" | |
scan_list = ["./Inference/"] # The path to the directory you want to scan, you can change it to your own path | |
scan_subfolders = True | |
special_words_to_keep = { | |
"auto": "自动判断", | |
"zh": "中文", | |
"en": "英文", | |
"ja": "日文", | |
"all_zh": "只有中文", | |
"all_ja": "只有日文", | |
"auto_cut": "智能切分", | |
"cut0": "仅凭换行切分", | |
"cut1": "凑四句一切", | |
"cut2": "凑50字一切", | |
"cut3": "按中文句号。切", | |
"cut4": "按英文句号.切", | |
"cut5": "按标点符号切", | |
} | |
def extract_i18n_strings(node): | |
i18n_strings = [] | |
if ( | |
isinstance(node, ast.Call) | |
and isinstance(node.func, ast.Name) | |
and node.func.id == "i18n" | |
): | |
for arg in node.args: | |
if isinstance(arg, ast.Str): | |
i18n_strings.append(arg.s) | |
for child_node in ast.iter_child_nodes(node): | |
i18n_strings.extend(extract_i18n_strings(child_node)) | |
return i18n_strings | |
strings = [] | |
# for each file, parse the code into an AST | |
# for each AST, extract the i18n strings | |
def scan_i18n_strings(filename): | |
with open(filename, "r", encoding="utf-8") as f: | |
code = f.read() | |
if "I18nAuto" in code: | |
tree = ast.parse(code) | |
i18n_strings = extract_i18n_strings(tree) | |
print(filename, len(i18n_strings)) | |
strings.extend(i18n_strings) | |
# scan the directory for all .py files (recursively) | |
if scan_subfolders: | |
for folder in scan_list: | |
for dirpath, dirnames, filenames in os.walk(folder): | |
for filename in [f for f in filenames if f.endswith(".py")]: | |
scan_i18n_strings(os.path.join(dirpath, filename)) | |
else: | |
for folder in scan_list: | |
for filename in os.listdir(folder): | |
if filename.endswith(".py"): | |
scan_i18n_strings(os.path.join(folder, filename)) | |
code_keys = set(strings) | |
""" | |
n_i18n.py | |
gui_v1.py 26 | |
app.py 16 | |
infer-web.py 147 | |
scan_i18n.py 0 | |
i18n.py 0 | |
lib/train/process_ckpt.py 1 | |
""" | |
print() | |
print("Total unique:", len(code_keys)) | |
standard_file = os.path.join(locale_path, "zh_CN.json") | |
with open(standard_file, "r", encoding="utf-8") as f: | |
standard_data = json.load(f, object_pairs_hook=OrderedDict) | |
standard_keys = set(standard_data.keys()) | |
# Define the standard file name | |
unused_keys = standard_keys - code_keys | |
print("Unused keys:", len(unused_keys)) | |
for unused_key in unused_keys: | |
print("\t", unused_key) | |
missing_keys = code_keys - standard_keys | |
print("Missing keys:", len(missing_keys)) | |
for missing_key in missing_keys: | |
print("\t", missing_key) | |
code_keys_dict = OrderedDict() | |
for s in strings: | |
if s in special_words_to_keep: | |
code_keys_dict[s] = special_words_to_keep[s] | |
else: | |
code_keys_dict[s] = s | |
# write back | |
with open(standard_file, "w", encoding="utf-8") as f: | |
json.dump(code_keys_dict, f, ensure_ascii=False, indent=4, sort_keys=True) | |
f.write("\n") | |