jiangjiechen commited on
Commit
b6d36be
·
1 Parent(s): 69b0a31

support latex formula

Browse files
Files changed (1) hide show
  1. app.py +26 -19
app.py CHANGED
@@ -4,19 +4,16 @@ import json
4
 
5
  def count_tokens(text):
6
  """
7
- Calculate the number of tokens in the input text using tiktoken.
8
-
9
  Args:
10
- text (str): The input text to be tokenized.
11
-
 
12
  Returns:
13
- int: The number of tokens in the input text.
14
  """
15
- # Choose the encoding based on the model you are targeting.
16
- # Here, we use 'gpt-3.5-turbo' as an example.
17
  encoding = tiktoken.encoding_for_model("gpt-4")
18
-
19
- # Encode the input text to get the list of token IDs
20
  tokens = encoding.encode(text)
21
 
22
  try:
@@ -27,26 +24,36 @@ def count_tokens(text):
27
 
28
  text = text.replace("\\n", "\n")
29
 
30
- return len(tokens), text
 
 
31
 
32
- # Define the Gradio interface
33
  iface = gr.Interface(
34
- fn=count_tokens, # The function to call
35
- inputs=gr.Textbox(lines=1, max_lines=1000000, placeholder="Enter your text here..."), # Input component
 
 
 
 
 
 
 
36
  outputs=[
37
  "number",
38
- gr.Textbox(label="Beautified Text", lines=30)
39
  ],
40
  title="Token Counter with tiktoken",
41
- description="Enter text below to calculate the number of tokens using the tiktoken library.",
42
  examples=[
43
- ["Hello, how are you doing today?"],
44
- ["Gradio makes it easy to create web apps for machine learning models."],
45
- ["OpenAI's GPT models are powerful tools for natural language processing tasks."]
 
46
  ],
47
  theme="default"
48
  )
49
 
50
- # Launch the app
51
  if __name__ == "__main__":
52
  iface.launch()
 
4
 
5
  def count_tokens(text):
6
  """
7
+ 计算输入文本中的 token 数量,并根据用户选择格式化文本。
8
+
9
  Args:
10
+ text (str): 输入文本。
11
+ use_markdown (bool): 是否使用 Markdown/LaTeX 格式输出。
12
+
13
  Returns:
14
+ tuple: 返回 token 数量和格式化后的文本。
15
  """
 
 
16
  encoding = tiktoken.encoding_for_model("gpt-4")
 
 
17
  tokens = encoding.encode(text)
18
 
19
  try:
 
24
 
25
  text = text.replace("\\n", "\n")
26
 
27
+ formatted_text = text
28
+
29
+ return len(tokens), gr.update(value=formatted_text)
30
 
31
+ # 定义 Gradio 接口
32
  iface = gr.Interface(
33
+ fn=count_tokens,
34
+ inputs=[
35
+ gr.Textbox(
36
+ lines=10,
37
+ max_lines=1000000,
38
+ placeholder="Enter your text here..."
39
+ ),
40
+ # gr.Checkbox(label="使用 Markdown/LaTeX 格式输出", value=True) # 格式选择开关
41
+ ],
42
  outputs=[
43
  "number",
44
+ gr.Markdown(label="Beautified Text")
45
  ],
46
  title="Token Counter with tiktoken",
47
+ description="Enter text below to calculate the number of tokens using the tiktoken library. Supports LaTeX formulas using $ for inline and $$ for block formulas.",
48
  examples=[
49
+ ["这是一个行内公式示例:$E=mc^2$"],
50
+ ["这是一个块级公式示例:$$\\sum_{i=1}^n i = \\frac{n(n+1)}{2}$$"],
51
+ ["这是混合示例:\n行内公式:$\\alpha + \\beta$\n块级公式:$$\\int_0^\\infty e^{-x^2} dx = \\frac{\\sqrt{\\pi}}{2}$$"],
52
+ ["普通文本示例:Hello, how are you doing today?"],
53
  ],
54
  theme="default"
55
  )
56
 
57
+ # 启动应用
58
  if __name__ == "__main__":
59
  iface.launch()