taishi-i commited on
Commit
6684a21
Β·
1 Parent(s): 1470ffa

add app.py and requirements.txt

Browse files
Files changed (2) hide show
  1. app.py +87 -0
  2. requirements.txt +8 -0
app.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from toiro import tokenizers
3
+
4
+ num_input_lines = 3
5
+ default_text = "γ“γ“γ«γƒ†γ‚­γ‚Ήγƒˆγ‚’ε…₯εŠ›γ—γ€Submit γ‚’ζŠΌγ—γ¦γγ γ•γ„γ€‚"
6
+ title = "Japanese Tokenizer Comparison"
7
+ description = """
8
+ This is a demo comparing Japanese tokenizers. You can compare the tokenization results of tools that are available with just a `pip install` in Python.
9
+ """
10
+
11
+ article = """
12
+
13
+ # How to install each library
14
+
15
+ [Janome](https://github.com/mocobeta/janome):
16
+ ```
17
+ pip install janome
18
+ ```
19
+
20
+ [nagisa](https://github.com/taishi-i/nagisa):
21
+ ```
22
+ pip install nagisa
23
+ ```
24
+
25
+ [sudachi.rs](https://github.com/WorksApplications/sudachi.rs):
26
+ ```
27
+ pip install sudachipy sudachidict_core
28
+ ```
29
+
30
+ [mecab-python3](https://github.com/SamuraiT/mecab-python3):
31
+ ```
32
+ pip install mecab-python3
33
+ ```
34
+
35
+ [fugashi_ipadic](https://github.com/polm/fugashi):
36
+ ```
37
+ pip install fugashi ipadic
38
+ ```
39
+
40
+ [fugashi_ipadic](https://github.com/polm/fugashi):
41
+ ```
42
+ pip install fugashi unidic-lite
43
+ ```
44
+
45
+ """
46
+
47
+
48
+ def tokenize(text):
49
+ words_janome = tokenizers.tokenize_janome(text)
50
+ words_nagisa = tokenizers.tokenize_nagisa(text)
51
+ words_sudachirs = tokenizers.tokenize_sudachipy(text)
52
+ words_mecabpython3 = tokenizers.tokenize_mecab(text)
53
+ words_fugashi_ipadic = tokenizers.tokenize_fugashi_ipadic(text)
54
+ words_fugashi_unidic = tokenizers.tokenize_fugashi_unidic(text)
55
+
56
+ return (
57
+ words_janome,
58
+ words_nagisa,
59
+ words_sudachirs,
60
+ words_mecabpython3,
61
+ words_fugashi_ipadic,
62
+ words_fugashi_unidic,
63
+ )
64
+
65
+
66
+ iface = gr.Interface(
67
+ fn=tokenize,
68
+ inputs=gr.inputs.Textbox(
69
+ label="Input text",
70
+ lines=num_input_lines,
71
+ default=default_text,
72
+ ),
73
+ title=title,
74
+ description=description,
75
+ article=article,
76
+ outputs=[
77
+ gr.outputs.Textbox(label="Janome"),
78
+ gr.outputs.Textbox(label="nagisa"),
79
+ gr.outputs.Textbox(label="sudachi.rs"),
80
+ gr.outputs.Textbox(label="mecab-python3"),
81
+ gr.outputs.Textbox(label="fugashi_ipadic"),
82
+ gr.outputs.Textbox(label="fugashi_unidic"),
83
+ ],
84
+ )
85
+
86
+
87
+ iface.launch()
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ toiro
2
+ nagisa
3
+ sudachipy
4
+ sudachidict_core
5
+ mecab-python3
6
+ fugashi
7
+ ipadic
8
+ unidic-lite