Celeste-cj commited on
Commit
96ece17
·
1 Parent(s): c414b20

initial commit

Browse files
Files changed (3) hide show
  1. .gitignore +1 -0
  2. app.py +34 -0
  3. requirements.txt +2 -0
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .vscode
app.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ import gradio as gr
4
+ import tiktoken
5
+
6
+
7
+ os.environ["TIKTOKEN_CACHE_DIR"] = ""
8
+
9
+ encoding = tiktoken.get_encoding("cl100k_base")
10
+
11
+ enc_mapping = {
12
+ "gpt-4": "cl100k_base", "gpt-3.5-turbo(chatgpt)": "cl100k_base", "text-embedding-ada-002": "cl100k_base", "Codex": "p50k_base", "text-davinci-002": "p50k_base", "text-davinci-003": "p50k_base", "gpt3": "r50k_base", "gpt2": "r50k_base"
13
+ }
14
+
15
+
16
+ def tokenize(text, model):
17
+ encoding = tiktoken.get_encoding(enc_mapping[model])
18
+ enc = encoding.encode(text)
19
+ return len(enc), enc
20
+
21
+
22
+ title = "GPT Token"
23
+ description = "This demo uses <a href='https://github.com/openai/tiktoken' target='_blank'>tiktoken</a> to calculate the token number needed for GPT models."
24
+
25
+ iface = gr.Interface(fn=tokenize,
26
+ inputs=[
27
+ gr.Textbox(label="input sequence"),
28
+ gr.Radio(choices=["gpt-4", "gpt-3.5-turbo(chatgpt)", "text-embedding-ada-002", "Codex", "text-davinci-002", "text-davinci-003", "gpt3", "gpt2"], value="gpt-3.5-turbo(chatgpt)", label="model")],
29
+ outputs=[gr.Textbox(label="token number"), gr.Textbox(
30
+ label="token sequence")],
31
+ title=title,
32
+ description=description,
33
+ allow_flagging='never')
34
+ iface.launch(share=False, debug=True)
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ gradio
2
+ tiktoken