Spaces:

Raju2024
/

TestLLM

Runtime error

App Files Files Community

TestLLM / litellm /llms /vertex_ai /cost_calculator.py

Raju2024

Upload 1072 files

e3278e4 verified 5 months ago

raw

history blame contribute delete

8.72 kB

	# What is this?
	## Cost calculation for Google AI Studio / Vertex AI models
	from typing import Literal, Optional, Tuple, Union

	import litellm
	from litellm import verbose_logger
	from litellm.litellm_core_utils.llm_cost_calc.utils import _is_above_128k

	"""
	Gemini pricing covers:
	- token
	- image
	- audio
	- video
	"""

	"""
	Vertex AI -> character based pricing

	Google AI Studio -> token based pricing
	"""

	models_without_dynamic_pricing = ["gemini-1.0-pro", "gemini-pro"]


	def cost_router(
	model: str,
	custom_llm_provider: str,
	call_type: Union[Literal["embedding", "aembedding"], str],
	) -> Literal["cost_per_character", "cost_per_token"]:
	"""
	Route the cost calc to the right place, based on model/call_type/etc.

	Returns
	- str, the specific google cost calc function it should route to.
	"""
	if custom_llm_provider == "vertex_ai" and (
	"claude" in model
	or "llama" in model
	or "mistral" in model
	or "jamba" in model
	or "codestral" in model
	):
	return "cost_per_token"
	elif custom_llm_provider == "vertex_ai" and (
	call_type == "embedding" or call_type == "aembedding"
	):
	return "cost_per_token"
	return "cost_per_character"


	def cost_per_character(
	model: str,
	custom_llm_provider: str,
	prompt_tokens: float,
	completion_tokens: float,
	prompt_characters: Optional[float] = None,
	completion_characters: Optional[float] = None,
	) -> Tuple[float, float]:
	"""
	Calculates the cost per character for a given VertexAI model, input messages, and response object.

	Input:
	- model: str, the model name without provider prefix
	- custom_llm_provider: str, "vertex_ai-*"
	- prompt_characters: float, the number of input characters
	- completion_characters: float, the number of output characters

	Returns:
	Tuple[float, float] - prompt_cost_in_usd, completion_cost_in_usd

	Raises:
	Exception if model requires >128k pricing, but model cost not mapped
	"""
	model_info = litellm.get_model_info(
	model=model, custom_llm_provider=custom_llm_provider
	)

	## GET MODEL INFO
	model_info = litellm.get_model_info(
	model=model, custom_llm_provider=custom_llm_provider
	)

	## CALCULATE INPUT COST
	if prompt_characters is None:
	prompt_cost, _ = cost_per_token(
	model=model,
	custom_llm_provider=custom_llm_provider,
	prompt_tokens=prompt_tokens,
	completion_tokens=completion_tokens,
	)
	else:
	try:
	if (
	_is_above_128k(tokens=prompt_characters * 4) # 1 token = 4 char
	and model not in models_without_dynamic_pricing
	):
	## check if character pricing, else default to token pricing
	assert (
	"input_cost_per_character_above_128k_tokens" in model_info
	and model_info["input_cost_per_character_above_128k_tokens"]
	is not None
	), "model info for model={} does not have 'input_cost_per_character_above_128k_tokens'-pricing for > 128k tokens\nmodel_info={}".format(
	model, model_info
	)
	prompt_cost = (
	prompt_characters
	* model_info["input_cost_per_character_above_128k_tokens"]
	)
	else:
	assert (
	"input_cost_per_character" in model_info
	and model_info["input_cost_per_character"] is not None
	), "model info for model={} does not have 'input_cost_per_character'-pricing\nmodel_info={}".format(
	model, model_info
	)
	prompt_cost = prompt_characters * model_info["input_cost_per_character"]
	except Exception as e:
	verbose_logger.debug(
	"litellm.litellm_core_utils.llm_cost_calc.google.py::cost_per_character(): Exception occured - {}\nDefaulting to None".format(
	str(e)
	)
	)
	prompt_cost, _ = cost_per_token(
	model=model,
	custom_llm_provider=custom_llm_provider,
	prompt_tokens=prompt_tokens,
	completion_tokens=completion_tokens,
	)

	## CALCULATE OUTPUT COST
	if completion_characters is None:
	_, completion_cost = cost_per_token(
	model=model,
	custom_llm_provider=custom_llm_provider,
	prompt_tokens=prompt_tokens,
	completion_tokens=completion_tokens,
	)
	else:
	try:
	if (
	_is_above_128k(tokens=completion_characters * 4) # 1 token = 4 char
	and model not in models_without_dynamic_pricing
	):
	assert (
	"output_cost_per_character_above_128k_tokens" in model_info
	and model_info["output_cost_per_character_above_128k_tokens"]
	is not None
	), "model info for model={} does not have 'output_cost_per_character_above_128k_tokens' pricing\nmodel_info={}".format(
	model, model_info
	)
	completion_cost = (
	completion_tokens
	* model_info["output_cost_per_character_above_128k_tokens"]
	)
	else:
	assert (
	"output_cost_per_character" in model_info
	and model_info["output_cost_per_character"] is not None
	), "model info for model={} does not have 'output_cost_per_character'-pricing\nmodel_info={}".format(
	model, model_info
	)
	completion_cost = (
	completion_characters * model_info["output_cost_per_character"]
	)
	except Exception as e:
	verbose_logger.debug(
	"litellm.litellm_core_utils.llm_cost_calc.google.py::cost_per_character(): Exception occured - {}\nDefaulting to None".format(
	str(e)
	)
	)
	_, completion_cost = cost_per_token(
	model=model,
	custom_llm_provider=custom_llm_provider,
	prompt_tokens=prompt_tokens,
	completion_tokens=completion_tokens,
	)

	return prompt_cost, completion_cost


	def cost_per_token(
	model: str,
	custom_llm_provider: str,
	prompt_tokens: float,
	completion_tokens: float,
	) -> Tuple[float, float]:
	"""
	Calculates the cost per token for a given model, prompt tokens, and completion tokens.

	Input:
	- model: str, the model name without provider prefix
	- custom_llm_provider: str, either "vertex_ai-*" or "gemini"
	- prompt_tokens: float, the number of input tokens
	- completion_tokens: float, the number of output tokens

	Returns:
	Tuple[float, float] - prompt_cost_in_usd, completion_cost_in_usd

	Raises:
	Exception if model requires >128k pricing, but model cost not mapped
	"""
	## GET MODEL INFO
	model_info = litellm.get_model_info(
	model=model, custom_llm_provider=custom_llm_provider
	)

	## CALCULATE INPUT COST
	if (
	_is_above_128k(tokens=prompt_tokens)
	and model not in models_without_dynamic_pricing
	):
	assert (
	"input_cost_per_token_above_128k_tokens" in model_info
	and model_info["input_cost_per_token_above_128k_tokens"] is not None
	), "model info for model={} does not have pricing for > 128k tokens\nmodel_info={}".format(
	model, model_info
	)
	prompt_cost = (
	prompt_tokens * model_info["input_cost_per_token_above_128k_tokens"]
	)
	else:
	prompt_cost = prompt_tokens * model_info["input_cost_per_token"]

	## CALCULATE OUTPUT COST
	if (
	_is_above_128k(tokens=completion_tokens)
	and model not in models_without_dynamic_pricing
	):
	assert (
	"output_cost_per_token_above_128k_tokens" in model_info
	and model_info["output_cost_per_token_above_128k_tokens"] is not None
	), "model info for model={} does not have pricing for > 128k tokens\nmodel_info={}".format(
	model, model_info
	)
	completion_cost = (
	completion_tokens * model_info["output_cost_per_token_above_128k_tokens"]
	)
	else:
	completion_cost = completion_tokens * model_info["output_cost_per_token"]

	return prompt_cost, completion_cost