File size: 2,032 Bytes
6684a21 88b75d2 6684a21 88b75d2 6684a21 8bc03af 6684a21 88b75d2 6684a21 88b75d2 6684a21 f450fdc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 |
import gradio as gr
from toiro import tokenizers
num_input_lines = 3
default_text = "γγγ«γγγΉγγε
₯εγγEnter γζΌγγ¦γγ γγγ"
title = "Japanese Tokenizer Comparison"
description = """
This is a demo comparing Japanese tokenizers. You can compare the tokenization results of tools that are available with just a `pip install` in Python.
"""
article = """
# How to install each library
[Janome](https://github.com/mocobeta/janome):
```
pip install janome
```
[nagisa](https://github.com/taishi-i/nagisa):
```
pip install nagisa
```
[sudachi.rs](https://github.com/WorksApplications/sudachi.rs):
```
pip install sudachipy sudachidict_core
```
[mecab-python3](https://github.com/SamuraiT/mecab-python3):
```
pip install mecab-python3
```
[fugashi_ipadic](https://github.com/polm/fugashi):
```
pip install fugashi ipadic
```
[fugashi_ipadic](https://github.com/polm/fugashi):
```
pip install fugashi unidic-lite
```
"""
def tokenize(text):
words_janome = tokenizers.tokenize_janome(text)
words_nagisa = tokenizers.tokenize_nagisa(text)
words_sudachirs = tokenizers.tokenize_sudachipy(text)
words_mecabpython3 = tokenizers.tokenize_mecab(text)
words_fugashi_ipadic = tokenizers.tokenize_fugashi_ipadic(text)
words_fugashi_unidic = tokenizers.tokenize_fugashi_unidic(text)
return (
words_janome,
words_nagisa,
words_sudachirs,
words_mecabpython3,
words_fugashi_ipadic,
words_fugashi_unidic,
)
iface = gr.Interface(
fn=tokenize,
inputs=gr.Textbox(
label="Input text",
lines=num_input_lines,
value=default_text,
),
title=title,
description=description,
article=article,
outputs=[
gr.Textbox(label="Janome"),
gr.Textbox(label="nagisa"),
gr.Textbox(label="sudachi.rs"),
gr.Textbox(label="mecab-python3"),
gr.Textbox(label="fugashi_ipadic"),
gr.Textbox(label="fugashi_unidic"),
],
live=True,
)
iface.launch()
|