Spaces:

jiangjiechen
/

tiktoken_count

Running

App Files Files Community

tiktoken_count / app.py

jiangjiechen

support latex formula

b6d36be 5 months ago

raw

history blame contribute delete

1.86 kB

	import gradio as gr
	import tiktoken
	import json

	def count_tokens(text):
	"""
	计算输入文本中的 token 数量，并根据用户选择格式化文本。

	Args:
	text (str): 输入文本。
	use_markdown (bool): 是否使用 Markdown/LaTeX 格式输出。

	Returns:
	tuple: 返回 token 数量和格式化后的文本。
	"""
	encoding = tiktoken.encoding_for_model("gpt-4")
	tokens = encoding.encode(text)

	try:
	parsed_json = json.loads(text)
	text = json.dumps(parsed_json, indent=4, ensure_ascii=False)
	except json.JSONDecodeError:
	pass

	text = text.replace("\\n", "\n")

	formatted_text = text

	return len(tokens), gr.update(value=formatted_text)

	# 定义 Gradio 接口
	iface = gr.Interface(
	fn=count_tokens,
	inputs=[
	gr.Textbox(
	lines=10,
	max_lines=1000000,
	placeholder="Enter your text here..."
	),
	# gr.Checkbox(label="使用 Markdown/LaTeX 格式输出", value=True) # 格式选择开关
	],
	outputs=[
	"number",
	gr.Markdown(label="Beautified Text")
	],
	title="Token Counter with tiktoken",
	description="Enter text below to calculate the number of tokens using the tiktoken library. Supports LaTeX formulas using $ for inline and $$ for block formulas.",
	examples=[
	["这是一个行内公式示例：$E=mc^2$"],
	["这是一个块级公式示例：$$\\sum_{i=1}^n i = \\frac{n(n+1)}{2}$$"],
	["这是混合示例：\n行内公式：$\\alpha + \\beta$\n块级公式：$$\\int_0^\\infty e^{-x^2} dx = \\frac{\\sqrt{\\pi}}{2}$$"],
	["普通文本示例：Hello, how are you doing today?"],
	],
	theme="default"
	)

	# 启动应用
	if __name__ == "__main__":
	iface.launch()