Spaces:
Running
Running
File size: 1,012 Bytes
2024883 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 |
import gradio as gr
from transformers import AutoTokenizer
# List of available tokenizers
tokenizers = [
"bert-base-uncased",
"gpt2",
"roberta-base",
"distilbert-base-uncased",
"xlnet-base-cased"
]
def tokenize_text(text, tokenizer_name):
tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
tokens = tokenizer.tokenize(text)
return " ".join(tokens)
def compare_tokenizers(text, selected_tokenizers):
results = {}
for tokenizer_name in selected_tokenizers:
results[tokenizer_name] = tokenize_text(text, tokenizer_name)
return results
# Create the Gradio interface
iface = gr.Interface(
fn=compare_tokenizers,
inputs=[
gr.Textbox(label="Enter text to tokenize"),
gr.CheckboxGroup(choices=tokenizers, label="Select tokenizers")
],
outputs=gr.JSON(label="Tokenization Results"),
title="Tokenizer Comparison",
description="Compare tokenization results from different tokenizers.",
)
# Launch the app
iface.launch() |