Spaces:
Running
Running
import os | |
import sys | |
from flores_codes_map_indic import flores_to_iso | |
def convert_iso_to_flores(data_dir: str): | |
""" | |
Converts ISO language code to flores language code for a given directory of language pairs. | |
Assumes that each subdirectory of the given directory corresponds to a language pair, and | |
that each subdirectory contains files named according to the ISO language codes of the source | |
and target languages. | |
Args: | |
data_dir (str): path of the directory containing the data files for language pairs in ISO language code. | |
""" | |
pairs = os.listdir(data_dir) | |
iso_to_flores = {v:k for k, v in flores_to_iso.items()} | |
for pair in pairs: | |
print(pair) | |
path = os.path.join(data_dir, pair) | |
src_lang_iso, tgt_lang_iso = pair.split('-') | |
src_lang = iso_to_flores[src_lang_iso] | |
tgt_lang = iso_to_flores[tgt_lang_iso] | |
for fname in os.listdir(os.path.join(data_dir, pair)): | |
if fname.endswith(src_lang_iso): | |
old_fname = os.path.join(path, fname) | |
new_fname = os.path.join(path, fname.replace(src_lang_iso, src_lang)) | |
os.rename(old_fname, new_fname) | |
if fname.endswith(tgt_lang_iso): | |
old_fname = os.path.join(path, fname) | |
new_fname = os.path.join(path, fname.replace(tgt_lang_iso, tgt_lang)) | |
os.rename(old_fname, new_fname) | |
new_pair ="{}-{}".format(src_lang, tgt_lang) | |
new_path = os.path.join(data_dir, new_pair) | |
os.rename(path, new_path) | |
if __name__ == "__main__": | |
data_dir = sys.argv[1] | |
convert_iso_to_flores(data_dir) | |