Spaces:
Build error
Build error
File size: 5,023 Bytes
b605eca e26a582 b605eca ace9aa6 37d3de6 379a274 8e94ee2 37d3de6 e586aba ace9aa6 b605eca ace9aa6 b605eca ace9aa6 37fe427 cd1f2cb 5e78dce 1a49d63 ace9aa6 b605eca ace9aa6 25e3777 b605eca ace9aa6 b605eca ace9aa6 379a274 e586aba cd1f2cb e586aba cd1f2cb 447b9ea cd1f2cb 447b9ea cd1f2cb ace9aa6 b605eca cd1f2cb a054096 cd1f2cb ace9aa6 e26a582 ace9aa6 e26a582 ace9aa6 1a49d63 cd1f2cb e26a582 ace9aa6 fa5e9aa ace9aa6 cd1f2cb ace9aa6 b605eca c0a0850 ace9aa6 b605eca ace9aa6 4e441f5 c0a0850 7da89d6 b605eca 447b9ea d76e7c4 2153e72 b605eca 2153e72 8804405 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 |
import gradio as gr
import yaml
from joeynmt.prediction import load_params_for_prediction,translate_for_hf_space
from huggingface_hub import hf_hub_download
language_map = {'English':'en','Swahili':'sw','Fon':'fon','Igbo':'ig',
'Arabic':'ar','Shona':'sn','Ẹ̀dó':'bin','Hausa':'ha',
'Efik':'efi','Twi':'twi','Afrikaans':'af','Yoruba':'yo','Urhobo':'urh','Dendi':'ddn','̀Ẹ̀sán':'ish','Isoko':'iso',
'Kamba':'kam','Luo':'luo','Southern Ndebele':'nr','Tshivenda':'ve'}
#List of available languages I worked on.
#...
available_language_pairs =['en-sw','en-af','en-ar','en-ddn','en-ish','en-iso','en-kam','en-luo','en-nr','en-ve','efi-en','en-bin','en-ha','en-ig','en-fon','en-twi','sn-en','sw-en','yo-en','en-urh']
available_languages = list(language_map.keys())
def load_config(path="configs/default.yaml") -> dict:
"""
CODE ADAPTED FROM: https://github.com/joeynmt/joeynmt
Loads and parses a YAML configuration file.
:param path: path to YAML configuration file
:return: configuration dictionary
"""
with open(path, 'r', encoding="utf-8") as ymlfile:
cfg = yaml.safe_load(ymlfile)
return cfg
def load_model(source_language,target_language):
#source_language = language_map[source_language_]
#target_language = language_map[target_language_]
translation_dir = 'main'
try:
file_yaml = hf_hub_download("chrisjay/masakhane_benchmarks", filename=f"{source_language}-{target_language}/{translation_dir}/config.yaml",force_filename='config.yaml')
src_vocab = hf_hub_download("chrisjay/masakhane_benchmarks", filename=f"{source_language}-{target_language}/{translation_dir}/src_vocab.txt")
trg_vocab = hf_hub_download("chrisjay/masakhane_benchmarks", filename=f"{source_language}-{target_language}/{translation_dir}/trg_vocab.txt")
best_ckpt = hf_hub_download("chrisjay/masakhane_benchmarks", filename=f"{source_language}-{target_language}/{translation_dir}/best.ckpt")
except Exception:
raise Exception(f'It seems we do not have a working configuration repo yet for {source_language} -> {target_language}. \n You could help us by creating it here: https://huggingface.co/chrisjay/masakhane_benchmarks/tree/main')
parsed_yaml_file = load_config(file_yaml)
parsed_yaml_file['data']['src_vocab']=src_vocab
parsed_yaml_file['data']['trg_vocab']=trg_vocab
params = load_params_for_prediction(parsed_yaml_file,best_ckpt)
return params
#Load models of all available language pairs
model_mapping = {}
examples_available_models=[] # Keep track of models that loaded successfully and display only them in the Examples.
for availabe_lang in available_language_pairs:
try:
model_mapping.update({availabe_lang:load_model(availabe_lang.split('-')[0],availabe_lang.split('-')[1])})
examples_available_models.append([f"{list(language_map.keys())[list(language_map.values()).index(availabe_lang.split('-')[0])]}",f"{list(language_map.keys())[list(language_map.values()).index(availabe_lang.split('-')[1])]}"]) #idea to extract key from value got from https://stackoverflow.com/questions/8023306/get-key-by-value-in-dictionary
except Exception:
continue
if examples_available_models==[]:
raise Exception(f'Available models for Space cannot be empty!')
def get_translation(source_language,target_language,source_sentence=None,source_file=None):
'''
This takes a sentence and gets the translation.
'''
source_language_ = language_map[source_language]
target_language_ = language_map[target_language]
source = source_sentence
translation_type='sentence'
if source_file!=None:
translation_type='file'
source = source_file.name
try:
params = model_mapping[f'{source_language_}-{target_language_}']
pred = translate_for_hf_space(params,source,translation_type)
except Exception:
return f'There was an issue loading the translation model for {source_language} -> {target_language}. Try another pair please'
return pred[0] if source_file==None else pred
title = "Interact with Masakhane Benchmark Models"
description = "This enables you to interact with some of the Masakhane Benchmark Models and keep up with their improvement. Some of these models undergo finetuning on a regular basis. This way, you can easily use the best model with no hassles."
iface = gr.Interface(fn=get_translation,
inputs=[gr.inputs.Dropdown(choices = available_languages,default='English'),
gr.inputs.Dropdown(choices = available_languages,default='Swahili'),
gr.inputs.Textbox(label="Input"),
gr.inputs.File(file_count="single", type="file", label='Or upload txt file containing sentences', optional=True)],
outputs=gr.outputs.Textbox(type="auto", label='Translation'),
title=title,
description=description,
examples=examples_available_models,
enable_queue=True,
theme='huggingface')
iface.launch()
|