File size: 910 Bytes
7c19755 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 |
import re
import sys
from sacremoses import MosesDetokenizer
md_en = MosesDetokenizer(lang='en')
md_zh = MosesDetokenizer(lang='zh')
def moses_detokenize(tokens, language='en'):
en_detokenizer = MosesDetokenizer(lang=language)
stdout = en_detokenizer.detokenize(tokens,return_str=True)
# 返回处理后的句子
return stdout.strip()
def detokenize(tokens, mode):
if mode == "汉译英" :
text = moses_detokenize(tokens)
text = re.sub(r" n't", "n't",text)
else :
text = ''.join(tokens)
return text
def detokenize2(tokens, mode):
if mode == "汉译英" :
answer_en_bpe = md_en.detokenize(tokens,return_str=True)
text = re.sub(r"@@ ", "",answer_en_bpe)
else :
answer_zh_bpe = md_zh.detokenize(tokens,return_str=True)
text = re.sub(r"@@ ", "",answer_zh_bpe)
return text |