File size: 3,512 Bytes
ce1c4dd 6318241 1cf16a5 ce1c4dd 4307124 725ca4d ddbcd26 ce1c4dd d1882a9 8210be8 ce1c4dd 8210be8 ce1c4dd 0101c12 2e7cfe8 0101c12 6d5dae6 340a52c 6d5dae6 340a52c 2923aa8 340a52c 6d5dae6 340a52c ddbcd26 340a52c d1882a9 340a52c 6d5dae6 2e7cfe8 0101c12 d1882a9 0101c12 d1882a9 2923aa8 0101c12 d1882a9 0101c12 d1882a9 76c5f4f d1882a9 0101c12 ce1c4dd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 |
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
import torch
device = "cuda:0" if torch.cuda.is_available() else "cpu"
LANG_CODES = {
"English":"en",
"Romanian":"ro",
"Spanish":"es",
"Italian":"it",
"German":"de",
"Portugese":"pt",
"French":"fr",
"Dutch":"nl",
"Chinese":"zh",
"Japanese":"ja",
"Korean":"ko",
"Russian":"ru"
}
model = AutoModelForSeq2SeqLM.from_pretrained("facebook/m2m100_418M").to(device)
tokenizer = AutoTokenizer.from_pretrained("facebook/m2m100_418M")
def translate(text, src_lang, tgt_lang, candidates:int):
"""
Translate the text from source lang to target lang
"""
src = LANG_CODES.get(src_lang)
tgt = LANG_CODES.get(tgt_lang)
tokenizer.src_lang = src
tokenizer.tgt_lang = tgt
ins = tokenizer(text, return_tensors='pt').to(device)
gen_args = {
'return_dict_in_generate': True,
'output_scores': True,
'output_hidden_states': True,
'length_penalty': 0.0, # don't encourage longer or shorter output,
'num_return_sequences': candidates,
'num_beams':candidates,
'forced_bos_token_id': tokenizer.lang_code_to_id[tgt]
}
outs = model.generate(**{**ins, **gen_args})
output = tokenizer.batch_decode(outs.sequences, skip_special_tokens=True)
return '\n'.join(output)
with gr.Blocks() as app:
markdown="""
# Translate any text to ANY language!
### Bună! 💬
This is an English to Any Language / Any Language to English neural machine translation app.
Input your text to translate, a source language and target language, and desired number of return sequences!
Return sequences is formally known as alternative translations.
If the main translation is not good for what tone you expect, you can increase return sequences and retranslate.
It will show a list of alternative translations, alongside the main translation.
Right now, this only supports 12 languages.
I will add more later! So stay tuned!
### Model and Data
This app uses Facebook/Meta AI's M2M100 418M param model for translation.
### This app is a machine and not all translations will be perfect.
"""
with gr.Row():
gr.Markdown(markdown)
with gr.Column():
input_text = gr.components.Textbox(label="Input Text", value="Hello, world! Have a nice day!")
source_lang = gr.components.Dropdown(label="Source Language", value="English", choices=list(LANG_CODES.keys()))
target_lang = gr.components.Dropdown(label="Target Language", value="Romanian", choices=list(LANG_CODES.keys()))
return_seqs = gr.Slider(label="Number of return sequences", value=3, minimum=1, maximum=256, step=1)
inputs=[input_text, source_lang, target_lang, return_seqs]
outputs = gr.Textbox()
translate_btn = gr.Button("Translate!")
translate_btn.click(translate, inputs=inputs, outputs=outputs)
gr.Examples(
[
["Hello! How are you?", "English", "Romanian", 3],
["Mă numesc Popa Mihai și am 13 ani.", "Romanian", "English", 3],
["Tu vreau cafea.", "Romanian", "Romanian", 3],
["Do you needs coffee?", "English", "English", 3],
],
inputs=inputs
)
app.launch() |