import gradio as gr from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline import torch device = "cuda:0" if torch.cuda.is_available() else "cpu" LANG_CODES = { "English":"en", "Romanian":"ro", "Spanish":"es", "Italian":"it", "German":"de", "Portugese":"pt", "French":"fr", "Dutch":"nl", "Chinese":"zh", "Japanese":"ja", "Korean":"ko", "Russian":"ru" } model = AutoModelForSeq2SeqLM.from_pretrained("facebook/m2m100_418M").to(device) tokenizer = AutoTokenizer.from_pretrained("facebook/m2m100_418M") def translate(text, src_lang, tgt_lang, candidates:int): """ Translate the text from source lang to target lang """ src = LANG_CODES.get(src_lang) tgt = LANG_CODES.get(tgt_lang) tokenizer.src_lang = src tokenizer.tgt_lang = tgt ins = tokenizer(text, return_tensors='pt').to(device) gen_args = { 'return_dict_in_generate': True, 'output_scores': True, 'output_hidden_states': True, 'length_penalty': 0.0, # don't encourage longer or shorter output, 'num_return_sequences': candidates, 'num_beams':candidates, 'forced_bos_token_id': tokenizer.lang_code_to_id[tgt] } outs = model.generate(**{**ins, **gen_args}) output = tokenizer.batch_decode(outs.sequences, skip_special_tokens=True) return '\n'.join(output) with gr.Blocks() as app: markdown=""" # Translate any text to ANY language! ### Bună! 💬 This is an English to Any Language / Any Language to English neural machine translation app. Input your text to translate, a source language and target language, and desired number of return sequences! Return sequences is formally known as alternative translations. If the main translation is not good for what tone you expect, you can increase return sequences and retranslate. It will show a list of alternative translations, alongside the main translation. Right now, this only supports 12 languages. I will add more later! So stay tuned! ### Model and Data This app uses Facebook/Meta AI's M2M100 418M param model for translation. ### This app is a machine and not all translations will be perfect. """ with gr.Row(): gr.Markdown(markdown) with gr.Column(): input_text = gr.components.Textbox(label="Input Text", value="Hello, world! Have a nice day!") source_lang = gr.components.Dropdown(label="Source Language", value="English", choices=list(LANG_CODES.keys())) target_lang = gr.components.Dropdown(label="Target Language", value="Romanian", choices=list(LANG_CODES.keys())) return_seqs = gr.Slider(label="Number of return sequences", value=3, minimum=1, maximum=256, step=1) inputs=[input_text, source_lang, target_lang, return_seqs] outputs = gr.Textbox() translate_btn = gr.Button("Translate!") translate_btn.click(translate, inputs=inputs, outputs=outputs) gr.Examples( [ ["Hello! How are you?", "English", "Romanian", 3], ["Mă numesc Popa Mihai și am 13 ani.", "Romanian", "English", 3], ["Tu vreau cafea.", "Romanian", "Romanian", 3], ["Do you needs coffee?", "English", "English", 3], ], inputs=inputs ) app.launch()