AlGe commited on
Commit
2024883
·
verified ·
1 Parent(s): f81027f

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -0
app.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoTokenizer
3
+
4
+ # List of available tokenizers
5
+ tokenizers = [
6
+ "bert-base-uncased",
7
+ "gpt2",
8
+ "roberta-base",
9
+ "distilbert-base-uncased",
10
+ "xlnet-base-cased"
11
+ ]
12
+
13
+ def tokenize_text(text, tokenizer_name):
14
+ tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
15
+ tokens = tokenizer.tokenize(text)
16
+ return " ".join(tokens)
17
+
18
+ def compare_tokenizers(text, selected_tokenizers):
19
+ results = {}
20
+ for tokenizer_name in selected_tokenizers:
21
+ results[tokenizer_name] = tokenize_text(text, tokenizer_name)
22
+ return results
23
+
24
+ # Create the Gradio interface
25
+ iface = gr.Interface(
26
+ fn=compare_tokenizers,
27
+ inputs=[
28
+ gr.Textbox(label="Enter text to tokenize"),
29
+ gr.CheckboxGroup(choices=tokenizers, label="Select tokenizers")
30
+ ],
31
+ outputs=gr.JSON(label="Tokenization Results"),
32
+ title="Tokenizer Comparison",
33
+ description="Compare tokenization results from different tokenizers.",
34
+ )
35
+
36
+ # Launch the app
37
+ iface.launch()