File size: 1,352 Bytes
96ece17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import os

import gradio as gr
import tiktoken


os.environ["TIKTOKEN_CACHE_DIR"] = ""

encoding = tiktoken.get_encoding("cl100k_base")

enc_mapping = {
    "gpt-4": "cl100k_base", "gpt-3.5-turbo(chatgpt)": "cl100k_base", "text-embedding-ada-002": "cl100k_base", "Codex": "p50k_base", "text-davinci-002": "p50k_base", "text-davinci-003": "p50k_base", "gpt3": "r50k_base", "gpt2": "r50k_base"
}


def tokenize(text, model):
    encoding = tiktoken.get_encoding(enc_mapping[model])
    enc = encoding.encode(text)
    return len(enc), enc


title = "GPT Token"
description = "This demo uses <a href='https://github.com/openai/tiktoken' target='_blank'>tiktoken</a> to calculate the token number needed for GPT models."

iface = gr.Interface(fn=tokenize,
                     inputs=[
                         gr.Textbox(label="input sequence"),
                         gr.Radio(choices=["gpt-4", "gpt-3.5-turbo(chatgpt)", "text-embedding-ada-002", "Codex", "text-davinci-002", "text-davinci-003", "gpt3", "gpt2"], value="gpt-3.5-turbo(chatgpt)", label="model")],
                     outputs=[gr.Textbox(label="token number"), gr.Textbox(
                         label="token sequence")],
                     title=title,
                     description=description,
                     allow_flagging='never')
iface.launch(share=False, debug=True)