Spaces:

pineconeT94
/

fishspeech2

No application file

App Files Files Community

fishspeech2 / fish_speech /i18n /scan.py

pineconeT94

first commit

8b14bed 4 days ago

raw

history blame contribute delete

3.75 kB

	import ast
	import glob
	import json
	from collections import OrderedDict
	from pathlib import Path

	from loguru import logger

	from .core import DEFAULT_LANGUAGE, I18N_FILE_PATH


	def extract_i18n_strings(node):
	i18n_strings = []

	if (
	isinstance(node, ast.Call)
	and isinstance(node.func, ast.Name)
	and node.func.id == "i18n"
	):
	for arg in node.args:
	if isinstance(arg, ast.Str):
	i18n_strings.append(arg.s)

	for child_node in ast.iter_child_nodes(node):
	i18n_strings.extend(extract_i18n_strings(child_node))

	return i18n_strings


	# scan the directory for all .py files (recursively)
	# for each file, parse the code into an AST
	# for each AST, extract the i18n strings

	strings = []
	folders = ["fish_speech", "tools"]
	# for filename in glob.iglob("*/.py", recursive=True):
	for folder in folders:
	for f in Path(folder).rglob("*.py"):
	code = f.read_text(encoding="utf-8")
	if "i18n(" in code:
	tree = ast.parse(code)
	i18n_strings = extract_i18n_strings(tree)
	logger.info(f"Found {len(i18n_strings)} i18n strings in {f}")
	strings.extend(i18n_strings)

	code_keys = set(strings)
	logger.info(f"Total unique: {len(code_keys)}")


	standard_file = I18N_FILE_PATH / f"{DEFAULT_LANGUAGE}.json"
	with open(standard_file, "r", encoding="utf-8") as f:
	standard_data = json.load(f, object_pairs_hook=OrderedDict)
	standard_keys = set(standard_data.keys())

	# Define the standard file name
	unused_keys = standard_keys - code_keys
	logger.info(f"Found {len(unused_keys)} unused keys in {standard_file}")
	for unused_key in unused_keys:
	logger.info(f"\t{unused_key}")

	missing_keys = code_keys - standard_keys
	logger.info(f"Found {len(missing_keys)} missing keys in {standard_file}")
	for missing_key in missing_keys:
	logger.info(f"\t{missing_key}")

	code_keys_dict = OrderedDict()
	for s in strings:
	code_keys_dict[s] = s

	# write back
	with open(standard_file, "w", encoding="utf-8") as f:
	json.dump(code_keys_dict, f, ensure_ascii=False, indent=4, sort_keys=True)
	f.write("\n")

	logger.info(f"Updated {standard_file}")


	# Define the standard file name
	standard_file = I18N_FILE_PATH / f"{DEFAULT_LANGUAGE}.json"

	# Find all JSON files in the directory
	dir_path = I18N_FILE_PATH
	languages = [f for f in dir_path.glob("*.json") if f.stem != DEFAULT_LANGUAGE]

	# Load the standard file
	with open(standard_file, "r", encoding="utf-8") as f:
	standard_data = json.load(f, object_pairs_hook=OrderedDict)

	# Loop through each language file
	for lang_file in languages:
	# Load the language file
	with open(lang_file, "r", encoding="utf-8") as f:
	lang_data = json.load(f, object_pairs_hook=OrderedDict)

	# Find the difference between the language file and the standard file
	diff = set(standard_data.keys()) - set(lang_data.keys())

	miss = set(lang_data.keys()) - set(standard_data.keys())

	# Add any missing keys to the language file
	for key in diff:
	lang_data[key] = "#!" + key
	logger.info(f"Added missing key: {key} to {lang_file}")

	# Del any extra keys to the language file
	for key in miss:
	del lang_data[key]
	logger.info(f"Del extra key: {key} from {lang_file}")

	# Sort the keys of the language file to match the order of the standard file
	lang_data = OrderedDict(
	sorted(lang_data.items(), key=lambda x: list(standard_data.keys()).index(x[0]))
	)

	# Save the updated language file
	with open(lang_file, "w", encoding="utf-8") as f:
	json.dump(lang_data, f, ensure_ascii=False, indent=4, sort_keys=True)
	f.write("\n")

	logger.info(f"Updated {lang_file}")

	logger.info("Done")