Spaces:
Running
Running
File size: 992 Bytes
5b879f4 b39e76c 5b879f4 b39e76c 5b879f4 b39e76c 5b879f4 b39e76c 5b879f4 b39e76c 5b879f4 b39e76c 5b879f4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 |
import gradio as gr
from transformers import T5TokenizerFast, CLIPTokenizer
def count_tokens(text):
# Load the common tokenizers
t5_tokenizer = T5TokenizerFast.from_pretrained("google/t5-v1_1-xxl", legacy=False)
clip_tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-base-patch32")
# Get token counts directly using the encode method
t5_count = len(t5_tokenizer.encode(text))
clip_count = len(clip_tokenizer.encode(text))
return f"T5: {t5_count} tokens", f"CLIP: {clip_count} tokens"
# Create a Gradio interface
iface = gr.Interface(
fn=count_tokens,
inputs=[
gr.Textbox(label="Text", placeholder="Enter text here...")
],
outputs=[
gr.Textbox(label="T5 Tokenizer"),
gr.Textbox(label="CLIP Tokenizer")
],
title="Common Diffusion Model Token Counter",
description="Enter text to count tokens using T5 and CLIP tokenizers, commonly used in diffusion models."
)
# Launch the app
iface.launch()
|