Spaces:
Runtime error
Runtime error
import gradio as gr | |
import operator | |
import torch | |
from transformers import BertTokenizer, BertForMaskedLM | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
tokenizer = BertTokenizer.from_pretrained("shibing624/macbert4csc-base-chinese") | |
model = BertForMaskedLM.from_pretrained("shibing624/macbert4csc-base-chinese") | |
model.to(device) | |
def ai_text(texts): | |
with torch.no_grad(): | |
outputs = model(**tokenizer(texts, padding=True, return_tensors='pt').to(device)) | |
def get_errors(corrected_text, origin_text): | |
sub_details = [] | |
for i, ori_char in enumerate(origin_text): | |
if ori_char in [' ', '“', '”', '‘', '’', '琊', '\n', '…', '—', '擤']: | |
# add unk word | |
corrected_text = corrected_text[:i] + ori_char + corrected_text[i:] | |
continue | |
if i >= len(corrected_text): | |
continue | |
if ori_char != corrected_text[i]: | |
if ori_char.lower() == corrected_text[i]: | |
# pass english upper char | |
corrected_text = corrected_text[:i] + ori_char + corrected_text[i + 1:] | |
continue | |
sub_details.append((ori_char, corrected_text[i], i, i + 1)) | |
sub_details = sorted(sub_details, key=operator.itemgetter(2)) | |
return corrected_text, sub_details | |
result = [] | |
for ids, text in zip(outputs.logits, texts): | |
_text = tokenizer.decode(torch.argmax(ids, dim=-1), skip_special_tokens=True).replace(' ', '') | |
corrected_text = _text[:len(text)] | |
corrected_text, details = get_errors(corrected_text, text) | |
print(text, ' => ', corrected_text, details) | |
result.append((corrected_text, details)) | |
print(result) | |
return result | |
examples = [ | |
['真麻烦你了。希望你们好好的跳无'], | |
['少先队员因该为老人让坐'], | |
['机七学习是人工智能领遇最能体现智能的一个分知'], | |
['今天心情很好', | |
'老是较书。'], | |
['遇到一位很棒的奴生跟我聊天。'], | |
['他的语说的很好,法语也不错'], | |
['他法语说的很好,的语也不错'], | |
['他们的吵翻很不错,再说他们做的咖喱鸡也好吃'], | |
['不过在许多传统国家,女人向未得到平等'], | |
] | |
output_text = gr.outputs.Textbox() | |
gr.Interface(ai_text, "textbox", output_text, title="Chinese Text Correction shibing624/macbert4csc-base-chinese", | |
description="Copy or input error Chinese text. Submit and the machine will correct text.", | |
examples=examples).launch() | |