import os import gradio as gr import tiktoken os.environ["TIKTOKEN_CACHE_DIR"] = "" encoding = tiktoken.get_encoding("cl100k_base") enc_mapping = { "gpt-4": "cl100k_base", "gpt-3.5-turbo(chatgpt)": "cl100k_base", "text-embedding-ada-002": "cl100k_base", "Codex": "p50k_base", "text-davinci-002": "p50k_base", "text-davinci-003": "p50k_base", "gpt3": "r50k_base", "gpt2": "r50k_base" } def tokenize(text, model): encoding = tiktoken.get_encoding(enc_mapping[model]) enc = encoding.encode(text) return len(enc), enc title = "GPT Token" description = "This demo uses tiktoken to calculate the token number needed for GPT models." iface = gr.Interface(fn=tokenize, inputs=[ gr.Textbox(label="input sequence"), gr.Radio(choices=["gpt-4", "gpt-3.5-turbo(chatgpt)", "text-embedding-ada-002", "Codex", "text-davinci-002", "text-davinci-003", "gpt3", "gpt2"], value="gpt-3.5-turbo(chatgpt)", label="model")], outputs=[gr.Textbox(label="token number"), gr.Textbox( label="token sequence")], title=title, description=description, allow_flagging='never') iface.launch(share=False, debug=True)