taishi-i's picture
fix app.py
8bc03af
import gradio as gr
from toiro import tokenizers
num_input_lines = 3
default_text = "γ“γ“γ«γƒ†γ‚­γ‚Ήγƒˆγ‚’ε…₯εŠ›γ—γ€Enter γ‚’ζŠΌγ—γ¦γγ γ•γ„γ€‚"
title = "Japanese Tokenizer Comparison"
description = """
This is a demo comparing Japanese tokenizers. You can compare the tokenization results of tools that are available with just a `pip install` in Python.
"""
article = """
# How to install each library
[Janome](https://github.com/mocobeta/janome):
```
pip install janome
```
[nagisa](https://github.com/taishi-i/nagisa):
```
pip install nagisa
```
[sudachi.rs](https://github.com/WorksApplications/sudachi.rs):
```
pip install sudachipy sudachidict_core
```
[mecab-python3](https://github.com/SamuraiT/mecab-python3):
```
pip install mecab-python3
```
[fugashi_ipadic](https://github.com/polm/fugashi):
```
pip install fugashi ipadic
```
[fugashi_ipadic](https://github.com/polm/fugashi):
```
pip install fugashi unidic-lite
```
"""
def tokenize(text):
words_janome = tokenizers.tokenize_janome(text)
words_nagisa = tokenizers.tokenize_nagisa(text)
words_sudachirs = tokenizers.tokenize_sudachipy(text)
words_mecabpython3 = tokenizers.tokenize_mecab(text)
words_fugashi_ipadic = tokenizers.tokenize_fugashi_ipadic(text)
words_fugashi_unidic = tokenizers.tokenize_fugashi_unidic(text)
return (
words_janome,
words_nagisa,
words_sudachirs,
words_mecabpython3,
words_fugashi_ipadic,
words_fugashi_unidic,
)
iface = gr.Interface(
fn=tokenize,
inputs=gr.Textbox(
label="Input text",
lines=num_input_lines,
value=default_text,
),
title=title,
description=description,
article=article,
outputs=[
gr.Textbox(label="Janome"),
gr.Textbox(label="nagisa"),
gr.Textbox(label="sudachi.rs"),
gr.Textbox(label="mecab-python3"),
gr.Textbox(label="fugashi_ipadic"),
gr.Textbox(label="fugashi_unidic"),
],
live=True,
)
iface.launch()