Spaces:
Running
Running
import gradio as gr | |
from transformers import AutoTokenizer | |
# List of available tokenizers | |
tokenizers = [ | |
"bert-base-uncased", | |
"gpt2", | |
"roberta-base", | |
"distilbert-base-uncased", | |
"xlnet-base-cased" | |
] | |
def tokenize_text(text, tokenizer_name): | |
tokenizer = AutoTokenizer.from_pretrained(tokenizer_name) | |
tokens = tokenizer.tokenize(text) | |
return " ".join(tokens) | |
def compare_tokenizers(text, selected_tokenizers): | |
results = {} | |
for tokenizer_name in selected_tokenizers: | |
results[tokenizer_name] = tokenize_text(text, tokenizer_name) | |
return results | |
# Create the Gradio interface | |
iface = gr.Interface( | |
fn=compare_tokenizers, | |
inputs=[ | |
gr.Textbox(label="Enter text to tokenize"), | |
gr.CheckboxGroup(choices=tokenizers, label="Select tokenizers") | |
], | |
outputs=gr.JSON(label="Tokenization Results"), | |
title="Tokenizer Comparison", | |
description="Compare tokenization results from different tokenizers.", | |
) | |
# Launch the app | |
iface.launch() |