Spaces:
Runtime error
Runtime error
from transformers import MT5ForConditionalGeneration, AutoTokenizer, Text2TextGenerationPipeline, AutoModelForSeq2SeqLM | |
import gradio as gr | |
import re | |
# 翻译任务设置 | |
trans_mdl = MT5ForConditionalGeneration.from_pretrained("K024/mt5-zh-ja-en-trimmed") | |
trans_tokenizer = AutoTokenizer.from_pretrained("K024/mt5-zh-ja-en-trimmed") | |
trans_pipe = Text2TextGenerationPipeline(model=trans_mdl, tokenizer=trans_tokenizer) | |
# 摘要任务设置 | |
sum_mdl = AutoModelForSeq2SeqLM.from_pretrained("csebuetnlp/mT5_multilingual_XLSum") | |
sum_tokenizer = AutoTokenizer.from_pretrained("csebuetnlp/mT5_multilingual_XLSum") | |
def translation_job(job, text): | |
# 设置翻译任务和提示语的映射 | |
job_key = ["中译日", "中译英", "日译中", "英译中", "日译英", "英译日"] | |
job_value = ["zh2ja:", "zh2en:", "ja2zh:", "en2zh:", "ja2en:", "en2ja:"] | |
job_map = dict(zip(job_key, job_value)) | |
input = job_map[job] + text | |
print(input) | |
response = trans_pipe(input, max_length=100, num_beams=4) | |
return response[0]['generated_text'] | |
def sum_job(text): | |
# 去除源文本中的空格 | |
WHITESPACE_HANDLER = lambda k: re.sub('\s+', ' ', re.sub('\n+', ' ', k.strip())) | |
input_ids = sum_tokenizer( | |
[WHITESPACE_HANDLER(text)], | |
return_tensors="pt", | |
padding="max_length", | |
truncation=True, | |
max_length=512 | |
)["input_ids"] | |
output_ids = sum_mdl.generate( | |
input_ids=input_ids, | |
max_length=84, | |
no_repeat_ngram_size=2, | |
num_beams=4 | |
)[0] | |
response = sum_tokenizer.decode( | |
output_ids, | |
skip_special_tokens=True, | |
clean_up_tokenization_spaces=False | |
) | |
return response | |
with gr.Blocks() as app: | |
# 中英日三语翻译任务 | |
with gr.Tab("中英日三语翻译"): | |
job_name = gr.Dropdown( | |
["中译日", "中译英", "日译中", "英译中", "日译英", "英译日"], | |
label = "翻译任务选择", | |
info = "单选" | |
) | |
source_text = gr.Textbox(lines=1, label="翻译文本", placeholder="请输入要翻译的文本") | |
trans_result = gr.Textbox(lines=1, label="翻译结果") | |
trans_btn = gr.Button("翻译") | |
# 多语言自动摘要任务 | |
with gr.Tab("多语言自动摘要"): | |
article_text = gr.Textbox(lines=8, label="待总结文本", placeholder="请输入要进行摘要的文本") | |
sum_result = gr.Textbox(lines=2, label="摘要结果") | |
sum_btn = gr.Button("摘要") | |
trans_btn.click(translation_job, inputs=[job_name, source_text], outputs=trans_result) | |
sum_btn.click(sum_job, inputs=article_text, outputs=sum_result) | |
app.launch() | |