|
import re |
|
from text.japanese import japanese_to_romaji_with_accent |
|
from text.k2j import korean2katakana |
|
from text.symbols import symbols |
|
|
|
|
|
_cleaner_cleans = re.compile('['+'^'.join(symbols)+']') |
|
|
|
|
|
def japanese_cleaners(text): |
|
text = japanese_to_romaji_with_accent(text) |
|
text = re.sub(r'([A-Za-z])$', r'\1.', text).replace('ts', 'ʦ').replace('...', '…') |
|
return text |
|
|
|
|
|
def japanese_cleaners2(text): |
|
text = re.sub(r'\[KO\](.*?)\[KO\]', lambda x: '[JA]'+korean2katakana(x.group(1))+'.[JA]', text) |
|
text = re.sub(r'\[JA\](.*?)\[JA\]', lambda x: japanese_cleaners(x.group(1))+' ', text) |
|
text = ''.join(_cleaner_cleans.findall(text)).replace(' ', '') |
|
return text |