from transformers import pipeline, M2M100ForConditionalGeneration, M2M100Tokenizer, AutoTokenizer, AutoModelForSeq2SeqLM import gradio as gr translator_1 = pipeline("translation", model = "penpen/novel-zh-en", max_time = 7) translator_2_model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_1.2B") translator_2_tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_1.2B") translator_3_model = AutoModelForSeq2SeqLM.from_pretrained("Helsinki-NLP/opus-mt-zh-en") translator_3_tokenizer = AutoTokenizer.from_pretrained("Helsinki-NLP/opus-mt-zh-en") def model_1(text): return translator_1(text)[0]["translation_text"] def model_2(text): translator_2_tokenizer.src_lang = "zh" encoded_zh = translator_2_tokenizer(text, return_tensors = "pt", truncation = True, max_length = 512) generated_tokens = translator_2_model.generate(**encoded_zh, forced_bos_token_id = translator_2_tokenizer.get_lang_id("en")) return translator_2_tokenizer.batch_decode(generated_tokens, skip_special_tokens = True)[0] def model_3(text): batch = translator_3_tokenizer(text, return_tensors = "pt", truncation = True, max_length = 512) generated_tokens = translator_3_model.generate(**batch) return translator_3_tokenizer.batch_decode(generated_tokens, skip_special_tokens = True)[0] def on_click(text): print('input: ', text) res_1 = model_1(text) print('model_1: ', res_1) res_2 = model_2(text) print('model_2: ', res_2) res_3 = model_3(text) print('model_3: ', res_3) print('----------------------------') return res_1, res_2, res_3 with gr.Blocks() as block: gr.Markdown("

中文翻译英文对比

") tb_input = gr.Textbox(label = "输入", placeholder = "输入中文句子", lines = 1) btn = gr.Button("翻译", variant = 'primary') tb_trans_1 = gr.Textbox(label = "模型1(penpen/novel-zh-en)") tb_trans_2 = gr.Textbox(label = "模型2(facebook/m2m100_1.2B)") tb_trans_3 = gr.Textbox(label = "模型3(Helsinki-NLP/opus-mt-zh-en)") btn.click(fn = on_click, inputs = tb_input, outputs = [tb_trans_1, tb_trans_2, tb_trans_3]) gr.close_all() block.queue(concurrency_count = 5) block.launch()