|
from text.cleaners import japanese_to_romaji_with_accent |
|
from text.korean import ( |
|
join_jamos, j2hcj, h2j, |
|
latin_to_hangul, |
|
number_to_hangul, |
|
g2pk, |
|
) |
|
import re |
|
import jaconv |
|
|
|
|
|
repl_lst = { |
|
'γ²': 'γ
', |
|
'γΈ': 'γ
', |
|
'γ
': 'γ
', |
|
'γ
': 'γ
', |
|
'γ
': 'γ
', |
|
|
|
'γ
': 'γ
/γ
', |
|
'γ
': 'γ
/γ
£', |
|
'γ
': 'γ
γ
', |
|
'γ
': 'γ
/γ
', |
|
'γ
': 'γ
/γ
', |
|
'γ
': 'γ
γ
£', |
|
'γ
’': 'γ
γ
£', |
|
|
|
'γ
': 'γ
£γ
', |
|
'γ
': 'γ
', |
|
'γ
': 'γ
£γ
', |
|
|
|
'γ
': 'γ
', |
|
'γ
': 'γ
', |
|
'γ
‘': 'γ
', |
|
|
|
'||//γ
': 'γΉ', |
|
} |
|
|
|
|
|
def get_word_list(text): |
|
text = g2pk(text) |
|
text = latin_to_hangul(text) |
|
text = number_to_hangul(text) |
|
text = j2hcj(h2j(text)) |
|
text = re.sub(r'([\u3131-\u3163])$', r'\1.', text) |
|
return list(join_jamos(text.replace(' ', ' ')[:-1])) |
|
|
|
|
|
def korean2katakana(text): |
|
word_lst = get_word_list(text) |
|
text = '/' + text.replace('/', ' ').replace('|', ' ').replace('^', ' ').replace(' ', ' ').replace(' ', '^') |
|
new_lst = [] |
|
|
|
for i, s in enumerate(word_lst): |
|
dh = list(j2hcj(h2j(s))) |
|
if len(dh) == 3: |
|
if dh[-1] == 'γ΄': |
|
dh[-1] = 'γ΄' |
|
|
|
elif dh[-1] == 'γ
' or dh[-1] == 'γ
': |
|
dh[-1] = 'γ΄|' |
|
|
|
elif dh[-1] == 'γΉ': |
|
dh[-1] = '||/' |
|
|
|
else: |
|
dh[-1] = dh[-1] |
|
|
|
|
|
dh.append('/') |
|
new_lst.extend(dh) |
|
|
|
kr = ''.join(new_lst) |
|
|
|
for k, v in repl_lst.items(): |
|
kr = kr.replace(k, v) |
|
kr2ro = japanese_to_romaji_with_accent(kr).replace('si', 'shi').replace('c', 'ts') \ |
|
.replace('ti', 'γγ£γΌ').replace('tu', 'γγ₯γΌ') \ |
|
.replace('di', 'γγ£γΌ').replace('du', 'γγ₯γΌ') |
|
result = jaconv.alphabet2kata(kr2ro) |
|
result = result.replace('/', '').replace('|', 'γΌ').replace('^', '') |
|
print(result) |
|
return result |