import gradio as gr import yaml from joeynmt.prediction import load_params_for_prediction,translate_for_hf_space from huggingface_hub import hf_hub_download language_map = {'English':'en','Swahili':'sw','Fon':'fon','Igbo':'ig', 'Arabic':'ar','Shona':'sn','Ẹ̀dó':'bin','Hausa':'ha', 'Efik':'efi','Twi':'twi','Afrikaans':'af','Yoruba':'yo','Urhobo':'urh','Dendi':'ddn','̀Ẹ̀sán':'ish','Isoko':'iso', 'Kamba':'kam','Luo':'luo','Southern Ndebele':'nr','Tshivenda':'ve'} #List of available languages I worked on. #... available_language_pairs =['en-sw','en-af','en-ar','en-ddn','en-ish','en-iso','en-kam','en-luo','en-nr','en-ve','efi-en','en-bin','en-ha','en-ig','en-fon','en-twi','sn-en','sw-en','yo-en','en-urh'] available_languages = list(language_map.keys()) def load_config(path="configs/default.yaml") -> dict: """ CODE ADAPTED FROM: https://github.com/joeynmt/joeynmt Loads and parses a YAML configuration file. :param path: path to YAML configuration file :return: configuration dictionary """ with open(path, 'r', encoding="utf-8") as ymlfile: cfg = yaml.safe_load(ymlfile) return cfg def load_model(source_language,target_language): #source_language = language_map[source_language_] #target_language = language_map[target_language_] translation_dir = 'main' try: file_yaml = hf_hub_download("chrisjay/masakhane_benchmarks", filename=f"{source_language}-{target_language}/{translation_dir}/config.yaml",force_filename='config.yaml') src_vocab = hf_hub_download("chrisjay/masakhane_benchmarks", filename=f"{source_language}-{target_language}/{translation_dir}/src_vocab.txt") trg_vocab = hf_hub_download("chrisjay/masakhane_benchmarks", filename=f"{source_language}-{target_language}/{translation_dir}/trg_vocab.txt") best_ckpt = hf_hub_download("chrisjay/masakhane_benchmarks", filename=f"{source_language}-{target_language}/{translation_dir}/best.ckpt") except Exception: raise Exception(f'It seems we do not have a working configuration repo yet for {source_language} -> {target_language}. \n You could help us by creating it here: https://huggingface.co/chrisjay/masakhane_benchmarks/tree/main') parsed_yaml_file = load_config(file_yaml) parsed_yaml_file['data']['src_vocab']=src_vocab parsed_yaml_file['data']['trg_vocab']=trg_vocab params = load_params_for_prediction(parsed_yaml_file,best_ckpt) return params #Load models of all available language pairs model_mapping = {} examples_available_models=[] # Keep track of models that loaded successfully and display only them in the Examples. for availabe_lang in available_language_pairs: try: model_mapping.update({availabe_lang:load_model(availabe_lang.split('-')[0],availabe_lang.split('-')[1])}) examples_available_models.append([f"{list(language_map.keys())[list(language_map.values()).index(availabe_lang.split('-')[0])]}",f"{list(language_map.keys())[list(language_map.values()).index(availabe_lang.split('-')[1])]}"]) #idea to extract key from value got from https://stackoverflow.com/questions/8023306/get-key-by-value-in-dictionary except Exception: continue if examples_available_models==[]: raise Exception(f'Available models for Space cannot be empty!') def get_translation(source_language,target_language,source_sentence=None,source_file=None): ''' This takes a sentence and gets the translation. ''' source_language_ = language_map[source_language] target_language_ = language_map[target_language] source = source_sentence translation_type='sentence' if source_file!=None: translation_type='file' source = source_file.name try: params = model_mapping[f'{source_language_}-{target_language_}'] pred = translate_for_hf_space(params,source,translation_type) except Exception: return f'There was an issue loading the translation model for {source_language} -> {target_language}. Try another pair please' return pred[0] if source_file==None else pred title = "Interact with Masakhane Benchmark Models" description = "This enables you to interact with some of the Masakhane Benchmark Models and keep up with their improvement. Some of these models undergo finetuning on a regular basis. This way, you can easily use the best model with no hassles." iface = gr.Interface(fn=get_translation, inputs=[gr.inputs.Dropdown(choices = available_languages,default='English'), gr.inputs.Dropdown(choices = available_languages,default='Swahili'), gr.inputs.Textbox(label="Input"), gr.inputs.File(file_count="single", type="file", label='Or upload txt file containing sentences', optional=True)], outputs=gr.outputs.Textbox(type="auto", label='Translation'), title=title, description=description, examples=examples_available_models, enable_queue=True, theme='huggingface') iface.launch()