Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -10,6 +10,7 @@ def count_tokens(json_file, encoding_name):
|
|
10 |
with open(json_file.name, 'r') as f:
|
11 |
data = json.load(f) if json_file.name.endswith('.json') else [json.loads(line) for line in f.readlines()]
|
12 |
|
|
|
13 |
token_counts = []
|
14 |
for entry in data:
|
15 |
conversation_token_count = 0
|
@@ -18,14 +19,18 @@ def count_tokens(json_file, encoding_name):
|
|
18 |
for message in entry["messages"]:
|
19 |
content = message.get("content", "")
|
20 |
conversation_texts.append(content)
|
21 |
-
|
|
|
22 |
|
|
|
|
|
|
|
23 |
token_counts.append({
|
24 |
'conversation': ' '.join(conversation_texts),
|
25 |
'token_count': conversation_token_count
|
26 |
})
|
27 |
|
28 |
-
return token_counts
|
29 |
|
30 |
# Gradio interface function
|
31 |
def token_counter(json_file, encoding_name):
|
@@ -39,5 +44,7 @@ gr.Interface(
|
|
39 |
gr.File(label="Upload JSON/JSONL File"),
|
40 |
gr.Dropdown(["r50k_base", "p50k_base", "cl100k_base", "o200k_base"], label="Select Encoding", value="cl100k_base")
|
41 |
],
|
42 |
-
outputs=
|
|
|
|
|
43 |
).launch()
|
|
|
10 |
with open(json_file.name, 'r') as f:
|
11 |
data = json.load(f) if json_file.name.endswith('.json') else [json.loads(line) for line in f.readlines()]
|
12 |
|
13 |
+
total_token_count = 0
|
14 |
token_counts = []
|
15 |
for entry in data:
|
16 |
conversation_token_count = 0
|
|
|
19 |
for message in entry["messages"]:
|
20 |
content = message.get("content", "")
|
21 |
conversation_texts.append(content)
|
22 |
+
tokens = len(encoding.encode(content))
|
23 |
+
conversation_token_count += tokens
|
24 |
|
25 |
+
# Add conversation token count to the total
|
26 |
+
total_token_count += conversation_token_count
|
27 |
+
|
28 |
token_counts.append({
|
29 |
'conversation': ' '.join(conversation_texts),
|
30 |
'token_count': conversation_token_count
|
31 |
})
|
32 |
|
33 |
+
return {"conversations": token_counts, "total_token_count": total_token_count}
|
34 |
|
35 |
# Gradio interface function
|
36 |
def token_counter(json_file, encoding_name):
|
|
|
44 |
gr.File(label="Upload JSON/JSONL File"),
|
45 |
gr.Dropdown(["r50k_base", "p50k_base", "cl100k_base", "o200k_base"], label="Select Encoding", value="cl100k_base")
|
46 |
],
|
47 |
+
outputs=[
|
48 |
+
gr.JSON(label="Token Counts per Conversation and Total"),
|
49 |
+
]
|
50 |
).launch()
|