CloudNativeDDL / app.py
Timing0311's picture
Update app.py
b849606
from transformers import MT5ForConditionalGeneration, AutoTokenizer, Text2TextGenerationPipeline, AutoModelForSeq2SeqLM
import gradio as gr
import re
# 翻译任务设置
trans_mdl = MT5ForConditionalGeneration.from_pretrained("K024/mt5-zh-ja-en-trimmed")
trans_tokenizer = AutoTokenizer.from_pretrained("K024/mt5-zh-ja-en-trimmed")
trans_pipe = Text2TextGenerationPipeline(model=trans_mdl, tokenizer=trans_tokenizer)
# 摘要任务设置
sum_mdl = AutoModelForSeq2SeqLM.from_pretrained("csebuetnlp/mT5_multilingual_XLSum")
sum_tokenizer = AutoTokenizer.from_pretrained("csebuetnlp/mT5_multilingual_XLSum")
def translation_job(job, text):
# 设置翻译任务和提示语的映射
job_key = ["中译日", "中译英", "日译中", "英译中", "日译英", "英译日"]
job_value = ["zh2ja:", "zh2en:", "ja2zh:", "en2zh:", "ja2en:", "en2ja:"]
job_map = dict(zip(job_key, job_value))
input = job_map[job] + text
print(input)
response = trans_pipe(input, max_length=100, num_beams=4)
return response[0]['generated_text']
def sum_job(text):
# 去除源文本中的空格
WHITESPACE_HANDLER = lambda k: re.sub('\s+', ' ', re.sub('\n+', ' ', k.strip()))
input_ids = sum_tokenizer(
[WHITESPACE_HANDLER(text)],
return_tensors="pt",
padding="max_length",
truncation=True,
max_length=512
)["input_ids"]
output_ids = sum_mdl.generate(
input_ids=input_ids,
max_length=84,
no_repeat_ngram_size=2,
num_beams=4
)[0]
response = sum_tokenizer.decode(
output_ids,
skip_special_tokens=True,
clean_up_tokenization_spaces=False
)
return response
with gr.Blocks() as app:
# 中英日三语翻译任务
with gr.Tab("中英日三语翻译"):
job_name = gr.Dropdown(
["中译日", "中译英", "日译中", "英译中", "日译英", "英译日"],
label = "翻译任务选择",
info = "单选"
)
source_text = gr.Textbox(lines=1, label="翻译文本", placeholder="请输入要翻译的文本")
trans_result = gr.Textbox(lines=1, label="翻译结果")
trans_btn = gr.Button("翻译")
# 多语言自动摘要任务
with gr.Tab("多语言自动摘要"):
article_text = gr.Textbox(lines=8, label="待总结文本", placeholder="请输入要进行摘要的文本")
sum_result = gr.Textbox(lines=2, label="摘要结果")
sum_btn = gr.Button("摘要")
trans_btn.click(translation_job, inputs=[job_name, source_text], outputs=trans_result)
sum_btn.click(sum_job, inputs=article_text, outputs=sum_result)
app.launch()