piotr-szleg-bards-ai's picture
2024-01-26 15:19:42 Publish script update
1a7567e
raw
history blame
4.62 kB
import os
from dataclasses import dataclass
import pandas as pd
@dataclass
class Model(object):
model_display_name: str
model_name: str
api_url: str
provider: str
hourly_cost: int = None
cost: str = None
supports_functions: str = False
size_billion_parameters: int = None # in billion paramters
cost_per_million_tokens: int = None
cost_per_million_input_tokens: int = None
cost_per_million_output_tokens: int = None
def __post_init__(self):
self.cost_per_million_input_tokens = self.cost_per_million_input_tokens or self.cost_per_million_tokens
self.cost_per_million_output_tokens = self.cost_per_million_output_tokens or self.cost_per_million_tokens
if not self.cost and self.hourly_cost:
self.cost = f"${self.hourly_cost} / hour"
if not self.cost and self.cost_per_million_tokens:
self.cost = f"${self.cost_per_million_tokens} / 1M tokens"
elif not self.cost and self.cost_per_million_input_tokens and self.cost_per_million_output_tokens:
self.cost = f"${self.cost_per_million_input_tokens} / 1M input tokens, ${self.cost_per_million_output_tokens} / 1M output tokens"
env = os.environ.get
MODELS = [
# source: https://openai.com/pricing
# converted costs from dollar/1K tokens to dollar/1M for readability and together_ai comparability
Model(
"gpt-3.5-turbo",
"gpt-3.5-turbo",
None,
"OpenAI",
supports_functions=True,
cost_per_million_input_tokens=1,
cost_per_million_output_tokens=2,
),
Model(
"gpt-4-turbo",
"gpt-4-1106-preview",
None,
"OpenAI",
supports_functions=True,
cost_per_million_input_tokens=10,
cost_per_million_output_tokens=30,
),
Model(
"gpt-4",
"gpt-4",
None,
"OpenAI",
supports_functions=True,
cost_per_million_input_tokens=30,
cost_per_million_output_tokens=60,
),
# we don't test gpt-4-32k because the tasks don't reach gpt-4 limitations
Model(
"gpt-3.5-turbo",
"gpt-3.5-turbo",
None,
"OpenAI",
supports_functions=True,
cost_per_million_input_tokens=1,
cost_per_million_output_tokens=2,
),
# source: https://www.together.ai/pricing
Model(
"llama-2-70b-chat",
"together_ai/togethercomputer/llama-2-70b-chat",
None,
"Together AI",
cost_per_million_tokens=0.2,
),
Model(
"Mixtral-8x7B-Instruct-v0.1",
"together_ai/mistralai/Mixtral-8x7B-Instruct-v0.1",
None,
"Together AI",
size_billion_parameters=8 * 7,
cost_per_million_tokens=0.9,
),
# taken from endpoint pages
Model(
"zephyr-7b-beta",
"huggingface/HuggingFaceH4/zephyr-7b-beta",
env("ZEPHYR_7B_BETA_URL"),
"Hugging Face Inference Endpoint",
hourly_cost=1.30,
size_billion_parameters=7,
),
Model(
"Mistral-7B-Instruct-v0.2",
"huggingface/mistralai/Mistral-7B-Instruct-v0.2",
env("MISTRAL_7B_BETA_URL"),
"Hugging Face Inference Endpoint",
hourly_cost=1.30,
size_billion_parameters=7,
),
Model(
"TinyLlama/TinyLlama-1.1B-Chat-v1.0",
"huggingface/TinyLlama/TinyLlama-1.1B-Chat-v1.0",
env("TINY_LLAMA_URL"),
"Hugging Face Inference Endpoint",
hourly_cost=0.60,
size_billion_parameters=1.1,
),
Model(
"gemini-pro",
"gemini-pro",
None,
"Google VertexAI",
# https://ai.google.dev/pricing
cost="$0.25 / 1M input characters, $0.5 / 1K output characters (60 queries per minute are free)",
cost_per_million_input_tokens=0.25,
cost_per_million_output_tokens=0.5,
),
Model(
"chat-bison (PaLM 2)",
"chat-bison",
None,
"Google VertexAI",
# https://cloud.google.com/vertex-ai/docs/generative-ai/pricing
cost_per_million_input_tokens=0.25,
cost_per_million_output_tokens=0.5,
),
Model(
"chat-bison-32k (PaLM 2 32K)",
"chat-bison-32k",
None,
"Google VertexAI",
# https://cloud.google.com/vertex-ai/docs/generative-ai/pricing
cost_per_million_input_tokens=0.25,
cost_per_million_output_tokens=0.5,
),
]
def models_costs():
return pd.DataFrame(
[(model.model_display_name, model.provider, model.cost) for model in MODELS],
columns=["Model", "Provider", "Cost"],
)