File size: 910 Bytes
7c19755
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import re
import sys
from sacremoses import MosesDetokenizer

md_en = MosesDetokenizer(lang='en')
md_zh = MosesDetokenizer(lang='zh')

def moses_detokenize(tokens, language='en'):
    en_detokenizer = MosesDetokenizer(lang=language)
    
    stdout = en_detokenizer.detokenize(tokens,return_str=True)

    # 返回处理后的句子
    return stdout.strip()

def detokenize(tokens, mode):
    if mode == "汉译英" :
        text = moses_detokenize(tokens)
        text = re.sub(r" n't", "n't",text)
    else :
        text = ''.join(tokens)

    return text

def detokenize2(tokens, mode):
    if mode == "汉译英" :
        answer_en_bpe = md_en.detokenize(tokens,return_str=True)
        text = re.sub(r"@@ ", "",answer_en_bpe)
    else :    
        answer_zh_bpe = md_zh.detokenize(tokens,return_str=True)
        text = re.sub(r"@@ ", "",answer_zh_bpe) 
    return text