import gradio as gr from toiro import tokenizers num_input_lines = 3 default_text = "ここにテキストを入力し、Enter を押してください。" title = "Japanese Tokenizer Comparison" description = """ This is a demo comparing Japanese tokenizers. You can compare the tokenization results of tools that are available with just a `pip install` in Python. """ article = """ # How to install each library [Janome](https://github.com/mocobeta/janome): ``` pip install janome ``` [nagisa](https://github.com/taishi-i/nagisa): ``` pip install nagisa ``` [sudachi.rs](https://github.com/WorksApplications/sudachi.rs): ``` pip install sudachipy sudachidict_core ``` [mecab-python3](https://github.com/SamuraiT/mecab-python3): ``` pip install mecab-python3 ``` [fugashi_ipadic](https://github.com/polm/fugashi): ``` pip install fugashi ipadic ``` [fugashi_ipadic](https://github.com/polm/fugashi): ``` pip install fugashi unidic-lite ``` """ def tokenize(text): words_janome = tokenizers.tokenize_janome(text) words_nagisa = tokenizers.tokenize_nagisa(text) words_sudachirs = tokenizers.tokenize_sudachipy(text) words_mecabpython3 = tokenizers.tokenize_mecab(text) words_fugashi_ipadic = tokenizers.tokenize_fugashi_ipadic(text) words_fugashi_unidic = tokenizers.tokenize_fugashi_unidic(text) return ( words_janome, words_nagisa, words_sudachirs, words_mecabpython3, words_fugashi_ipadic, words_fugashi_unidic, ) iface = gr.Interface( fn=tokenize, inputs=gr.Textbox( label="Input text", lines=num_input_lines, value=default_text, ), title=title, description=description, article=article, outputs=[ gr.Textbox(label="Janome"), gr.Textbox(label="nagisa"), gr.Textbox(label="sudachi.rs"), gr.Textbox(label="mecab-python3"), gr.Textbox(label="fugashi_ipadic"), gr.Textbox(label="fugashi_unidic"), ], live=True, ) iface.launch()