|
import gradio as gr |
|
from toiro import tokenizers |
|
|
|
num_input_lines = 3 |
|
default_text = "γγγ«γγγΉγγε
₯εγγEnter γζΌγγ¦γγ γγγ" |
|
title = "Japanese Tokenizer Comparison" |
|
description = """ |
|
This is a demo comparing Japanese tokenizers. You can compare the tokenization results of tools that are available with just a `pip install` in Python. |
|
""" |
|
|
|
article = """ |
|
|
|
# How to install each library |
|
|
|
[Janome](https://github.com/mocobeta/janome): |
|
``` |
|
pip install janome |
|
``` |
|
|
|
[nagisa](https://github.com/taishi-i/nagisa): |
|
``` |
|
pip install nagisa |
|
``` |
|
|
|
[sudachi.rs](https://github.com/WorksApplications/sudachi.rs): |
|
``` |
|
pip install sudachipy sudachidict_core |
|
``` |
|
|
|
[mecab-python3](https://github.com/SamuraiT/mecab-python3): |
|
``` |
|
pip install mecab-python3 |
|
``` |
|
|
|
[fugashi_ipadic](https://github.com/polm/fugashi): |
|
``` |
|
pip install fugashi ipadic |
|
``` |
|
|
|
[fugashi_ipadic](https://github.com/polm/fugashi): |
|
``` |
|
pip install fugashi unidic-lite |
|
``` |
|
|
|
""" |
|
|
|
|
|
def tokenize(text): |
|
words_janome = tokenizers.tokenize_janome(text) |
|
words_nagisa = tokenizers.tokenize_nagisa(text) |
|
words_sudachirs = tokenizers.tokenize_sudachipy(text) |
|
words_mecabpython3 = tokenizers.tokenize_mecab(text) |
|
words_fugashi_ipadic = tokenizers.tokenize_fugashi_ipadic(text) |
|
words_fugashi_unidic = tokenizers.tokenize_fugashi_unidic(text) |
|
|
|
return ( |
|
words_janome, |
|
words_nagisa, |
|
words_sudachirs, |
|
words_mecabpython3, |
|
words_fugashi_ipadic, |
|
words_fugashi_unidic, |
|
) |
|
|
|
|
|
iface = gr.Interface( |
|
fn=tokenize, |
|
inputs=gr.Textbox( |
|
label="Input text", |
|
lines=num_input_lines, |
|
value=default_text, |
|
), |
|
title=title, |
|
description=description, |
|
article=article, |
|
outputs=[ |
|
gr.Textbox(label="Janome"), |
|
gr.Textbox(label="nagisa"), |
|
gr.Textbox(label="sudachi.rs"), |
|
gr.Textbox(label="mecab-python3"), |
|
gr.Textbox(label="fugashi_ipadic"), |
|
gr.Textbox(label="fugashi_unidic"), |
|
], |
|
live=True, |
|
) |
|
|
|
|
|
iface.launch() |
|
|