Spaces:
Running
Running
import subprocess as sp | |
import sys | |
import gradio as gr | |
import romkan | |
from fugashi import Tagger | |
try: | |
tagger = Tagger() | |
except: | |
sp.call([sys.executable, "-m", "unidic", "download"]) | |
tagger = Tagger() | |
ALL_HIRA = "ぁあぃいぅうぇえぉおかがきぎくぐけげこごさざしじすずせぜそぞただちぢっつづてでとどなにぬねのはばぱひびぴふぶぷへべぺほぼぽまみむめもゃやゅゆょよらりるれろゎわゐゑをんゔゕゖ" | |
ALL_KATA = "ァアィイゥウェエォオカガキギクグケゲコゴサザシジスズセゼソゾタダチヂッツヅテデトドナニヌネノハバパヒビピフブプヘベペホボポマミムメモャヤュユョヨラリルレロヮワヰヱヲンヴヵヶ" | |
kata2hira_table = str.maketrans(ALL_KATA, ALL_HIRA) | |
def kata2hira(token) -> str: | |
kana: str = token.feature.kana | |
if not kana: | |
return token.surface | |
return kana.translate(kata2hira_table) | |
def get_hira_roma(kanji): | |
hira = [kata2hira(token) for token in tagger(kanji)] | |
roma = [romkan.to_roma(token) for token in hira] | |
hira = "".join(hira) | |
roma = " ".join(roma) | |
return hira, roma, f"{kanji} {hira} {roma}" | |
def main(): | |
font = gr.themes.GoogleFont("NotoSans CJK") | |
theme = gr.themes.Soft(font=font) | |
with gr.Blocks(theme) as app: | |
with gr.Row(): | |
with gr.Column(scale=1): | |
text = gr.Textbox(label="Text", placeholder="日本語", show_copy_button=True) | |
hira = gr.Textbox(label="Hiragana", show_copy_button=True) | |
roma = gr.Textbox(label="Roma", show_copy_button=True) | |
line = gr.Textbox(label="Copy", show_copy_button=True) | |
text.submit(get_hira_roma, text, [hira, roma, line], show_progress="hidden") | |
app.launch() | |
if __name__ == "__main__": | |
main() | |