Spaces:

LinkSoul
/

AutoAgents

Runtime error

App Files Files Community

AutoAgents / autoagents /system /utils /token_counter.py

Orami01

Upload 274 files

9c48ae2 over 1 year ago

raw

history blame

3.48 kB

	#!/usr/bin/env python
	# -- coding: utf-8 --
	"""
	@Time : 2023/5/18 00:40
	@Author : alexanderwu
	@File : token_counter.py
	@From : https://github.com/geekan/MetaGPT/blob/main/metagpt/utils/token_counter.py
	ref1: https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
	ref2: https://github.com/Significant-Gravitas/Auto-GPT/blob/master/autogpt/llm/token_counter.py
	ref3: https://github.com/hwchase17/langchain/blob/master/langchain/chat_models/openai.py
	"""
	import tiktoken

	TOKEN_COSTS = {
	"gpt-3.5-turbo": {"prompt": 0.0015, "completion": 0.002},
	"gpt-3.5-turbo-0301": {"prompt": 0.0015, "completion": 0.002},
	"gpt-3.5-turbo-0613": {"prompt": 0.0015, "completion": 0.002},
	"gpt-3.5-turbo-16k": {"prompt": 0.003, "completion": 0.004},
	"gpt-3.5-turbo-16k-0613": {"prompt": 0.003, "completion": 0.004},
	"gpt-4-0314": {"prompt": 0.03, "completion": 0.06},
	"gpt-4": {"prompt": 0.03, "completion": 0.06},
	"gpt-4-32k": {"prompt": 0.06, "completion": 0.12},
	"gpt-4-32k-0314": {"prompt": 0.06, "completion": 0.12},
	"gpt-4-0613": {"prompt": 0.06, "completion": 0.12},
	"text-embedding-ada-002": {"prompt": 0.0004, "completion": 0.0},
	}


	def count_message_tokens(messages, model="gpt-3.5-turbo-0613"):
	"""Return the number of tokens used by a list of messages."""
	try:
	encoding = tiktoken.encoding_for_model(model)
	except KeyError:
	print("Warning: model not found. Using cl100k_base encoding.")
	encoding = tiktoken.get_encoding("cl100k_base")
	if model in {
	"gpt-3.5-turbo-0613",
	"gpt-3.5-turbo-16k-0613",
	"gpt-4-0314",
	"gpt-4-32k-0314",
	"gpt-4-0613",
	"gpt-4-32k-0613",
	}:
	tokens_per_message = 3
	tokens_per_name = 1
	elif model == "gpt-3.5-turbo-0301":
	tokens_per_message = 4 # every message follows <\|start\|>{role/name}\n{content}<\|end\|>\n
	tokens_per_name = -1 # if there's a name, the role is omitted
	elif "gpt-3.5-turbo" in model:
	print("Warning: gpt-3.5-turbo may update over time. Returning num tokens assuming gpt-3.5-turbo-0613.")
	return count_message_tokens(messages, model="gpt-3.5-turbo-0613")
	elif "gpt-4" in model:
	print("Warning: gpt-4 may update over time. Returning num tokens assuming gpt-4-0613.")
	return count_message_tokens(messages, model="gpt-4-0613")
	else:
	raise NotImplementedError(
	f"""num_tokens_from_messages() is not implemented for model {model}. See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens."""
	)
	num_tokens = 0
	for message in messages:
	num_tokens += tokens_per_message
	for key, value in message.items():
	num_tokens += len(encoding.encode(value))
	if key == "name":
	num_tokens += tokens_per_name
	num_tokens += 3 # every reply is primed with <\|start\|>assistant<\|message\|>
	return num_tokens


	def count_string_tokens(string: str, model_name: str) -> int:
	"""
	Returns the number of tokens in a text string.

	Args:
	string (str): The text string.
	model_name (str): The name of the encoding to use. (e.g., "gpt-3.5-turbo")

	Returns:
	int: The number of tokens in the text string.
	"""
	encoding = tiktoken.encoding_for_model(model_name)
	return len(encoding.encode(string))