AlGe's picture
Create app.py
2024883 verified
raw
history blame
1.01 kB
import gradio as gr
from transformers import AutoTokenizer
# List of available tokenizers
tokenizers = [
"bert-base-uncased",
"gpt2",
"roberta-base",
"distilbert-base-uncased",
"xlnet-base-cased"
]
def tokenize_text(text, tokenizer_name):
tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
tokens = tokenizer.tokenize(text)
return " ".join(tokens)
def compare_tokenizers(text, selected_tokenizers):
results = {}
for tokenizer_name in selected_tokenizers:
results[tokenizer_name] = tokenize_text(text, tokenizer_name)
return results
# Create the Gradio interface
iface = gr.Interface(
fn=compare_tokenizers,
inputs=[
gr.Textbox(label="Enter text to tokenize"),
gr.CheckboxGroup(choices=tokenizers, label="Select tokenizers")
],
outputs=gr.JSON(label="Tokenization Results"),
title="Tokenizer Comparison",
description="Compare tokenization results from different tokenizers.",
)
# Launch the app
iface.launch()