import gradio as gr from transformers import AutoTokenizer # List of available tokenizers tokenizers = [ "bert-base-uncased", "gpt2", "roberta-base", "distilbert-base-uncased", "xlnet-base-cased" ] def tokenize_text(text, tokenizer_name): tokenizer = AutoTokenizer.from_pretrained(tokenizer_name) tokens = tokenizer.tokenize(text) return " ".join(tokens) def compare_tokenizers(text, selected_tokenizers): results = {} for tokenizer_name in selected_tokenizers: results[tokenizer_name] = tokenize_text(text, tokenizer_name) return results # Create the Gradio interface iface = gr.Interface( fn=compare_tokenizers, inputs=[ gr.Textbox(label="Enter text to tokenize"), gr.CheckboxGroup(choices=tokenizers, label="Select tokenizers") ], outputs=gr.JSON(label="Tokenization Results"), title="Tokenizer Comparison", description="Compare tokenization results from different tokenizers.", ) # Launch the app iface.launch()