luminoussg commited on
Commit
1476a99
·
verified ·
1 Parent(s): 0878173

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -3
app.py CHANGED
@@ -10,6 +10,7 @@ def count_tokens(json_file, encoding_name):
10
  with open(json_file.name, 'r') as f:
11
  data = json.load(f) if json_file.name.endswith('.json') else [json.loads(line) for line in f.readlines()]
12
 
 
13
  token_counts = []
14
  for entry in data:
15
  conversation_token_count = 0
@@ -18,14 +19,18 @@ def count_tokens(json_file, encoding_name):
18
  for message in entry["messages"]:
19
  content = message.get("content", "")
20
  conversation_texts.append(content)
21
- conversation_token_count += len(encoding.encode(content))
 
22
 
 
 
 
23
  token_counts.append({
24
  'conversation': ' '.join(conversation_texts),
25
  'token_count': conversation_token_count
26
  })
27
 
28
- return token_counts
29
 
30
  # Gradio interface function
31
  def token_counter(json_file, encoding_name):
@@ -39,5 +44,7 @@ gr.Interface(
39
  gr.File(label="Upload JSON/JSONL File"),
40
  gr.Dropdown(["r50k_base", "p50k_base", "cl100k_base", "o200k_base"], label="Select Encoding", value="cl100k_base")
41
  ],
42
- outputs=gr.JSON(label="Token Counts")
 
 
43
  ).launch()
 
10
  with open(json_file.name, 'r') as f:
11
  data = json.load(f) if json_file.name.endswith('.json') else [json.loads(line) for line in f.readlines()]
12
 
13
+ total_token_count = 0
14
  token_counts = []
15
  for entry in data:
16
  conversation_token_count = 0
 
19
  for message in entry["messages"]:
20
  content = message.get("content", "")
21
  conversation_texts.append(content)
22
+ tokens = len(encoding.encode(content))
23
+ conversation_token_count += tokens
24
 
25
+ # Add conversation token count to the total
26
+ total_token_count += conversation_token_count
27
+
28
  token_counts.append({
29
  'conversation': ' '.join(conversation_texts),
30
  'token_count': conversation_token_count
31
  })
32
 
33
+ return {"conversations": token_counts, "total_token_count": total_token_count}
34
 
35
  # Gradio interface function
36
  def token_counter(json_file, encoding_name):
 
44
  gr.File(label="Upload JSON/JSONL File"),
45
  gr.Dropdown(["r50k_base", "p50k_base", "cl100k_base", "o200k_base"], label="Select Encoding", value="cl100k_base")
46
  ],
47
+ outputs=[
48
+ gr.JSON(label="Token Counts per Conversation and Total"),
49
+ ]
50
  ).launch()