AronaTTS

Sleeping

App Files Files Community

AronaTTS / text /k2j.py

andhikagg

Update text/k2j.py

3de27d2 verified 8 months ago

raw

history blame contribute delete

2.05 kB

	from text.cleaners import japanese_to_romaji_with_accent
	from text.korean import (
	join_jamos, j2hcj, h2j,
	latin_to_hangul,
	number_to_hangul,
	g2pk,
	)
	import re
	import jaconv


	repl_lst = {
	'ㄲ': 'ㅋ',
	'ㄸ': 'ㅌ',
	'ㅃ': 'ㅍ',
	'ㅆ': 'ㅅ',
	'ㅉ': 'ㅊ',

	'ㅙ': 'ㅗ/ㅔ',
	'ㅚ': 'ㅗ/ㅣ',
	'ㅘ': 'ㅜㅏ',
	'ㅝ': 'ㅜ/ㅓ',
	'ㅞ': 'ㅜ/ㅔ',
	'ㅟ': 'ㅜㅣ',
	'ㅢ': 'ㅜㅣ',

	'ㅒ': 'ㅣㅔ',
	'ㅕ': 'ㅛ',
	'ㅖ': 'ㅣㅔ',

	'ㅓ': 'ㅗ',
	'ㅐ': 'ㅔ',
	'ㅡ': 'ㅜ',

	'\|\|//ㅎ': 'ㄹ',
	}


	def get_word_list(text):
	text = g2pk(text)
	text = latin_to_hangul(text)
	text = number_to_hangul(text)
	text = j2hcj(h2j(text))
	text = re.sub(r'([\u3131-\u3163])$', r'\1.', text)
	return list(join_jamos(text.replace(' ', ' ')[:-1]))


	def korean2katakana(text):
	word_lst = get_word_list(text)
	text = '/' + text.replace('/', ' ').replace('\|', ' ').replace('^', ' ').replace(' ', ' ').replace(' ', '^')
	new_lst = []

	for i, s in enumerate(word_lst):
	dh = list(j2hcj(h2j(s)))
	if len(dh) == 3:
	if dh[-1] == 'ㄴ':
	dh[-1] = 'ㄴ'

	elif dh[-1] == 'ㅁ' or dh[-1] == 'ㅇ':
	dh[-1] = 'ㄴ\|'

	elif dh[-1] == 'ㄹ':
	dh[-1] = '\|\|/'

	else: # ㄱ ㄷ ㅂ
	dh[-1] = dh[-1]


	dh.append('/')
	new_lst.extend(dh)

	kr = ''.join(new_lst)

	for k, v in repl_lst.items():
	kr = kr.replace(k, v)
	kr2ro = japanese_to_romaji_with_accent(kr).replace('si', 'shi').replace('c', 'ts') \
	.replace('ti', 'ティー').replace('tu', 'トゥー') \
	.replace('di', 'ディー').replace('du', 'ドゥー')
	result = jaconv.alphabet2kata(kr2ro)
	result = result.replace('/', '').replace('\|', 'ー').replace('^', '')
	print(result)
	return result