piotr-szleg-bards-ai's picture
2024-02-05 23:52:05 Publish script update
4cb217b
raw
history blame
4.9 kB
import os
from dataclasses import dataclass
@dataclass(unsafe_hash=True)
class Model(object):
model_display_name: str
model_name: str
api_url: str
provider: str
hourly_cost: int = None
cost_description: str = None
supports_functions: str = False
size_billion_parameters: int = None # in billion paramters
cost_per_million_tokens: int = None
cost_per_million_input_tokens: int = None
cost_per_million_output_tokens: int = None
input_size: int = None
def __post_init__(self):
self.cost_per_million_input_tokens = self.cost_per_million_input_tokens or self.cost_per_million_tokens
self.cost_per_million_output_tokens = self.cost_per_million_output_tokens or self.cost_per_million_tokens
@property
def cost(self):
if self.cost_description:
return self.cost_description
if self.hourly_cost:
return f"${self.hourly_cost:.2g} / hour"
if self.cost_per_million_tokens:
return f"${self.cost_per_million_tokens:.2g} / 1M tokens"
elif self.cost_per_million_input_tokens and self.cost_per_million_output_tokens:
return f"${self.cost_per_million_input_tokens:.2g} / 1M input tokens, ${self.cost_per_million_output_tokens:.2g} / 1M output tokens"
env = os.environ.get
MODELS = [
# source: https://openai.com/pricing
# converted costs from dollar/1K tokens to dollar/1M for readability and together_ai comparability
Model(
"gpt-3.5-turbo",
"gpt-3.5-turbo",
None,
"OpenAI",
supports_functions=True,
cost_per_million_input_tokens=1,
cost_per_million_output_tokens=2,
# https://learn.microsoft.com/en-us/answers/questions/1356487/what-is-the-exact-maximum-input-tokens-of-azure-gp
input_size=4096
),
Model(
"gpt-4-turbo",
"gpt-4-1106-preview",
None,
"OpenAI",
supports_functions=True,
cost_per_million_input_tokens=10,
cost_per_million_output_tokens=30,
# https://writesonic.com/blog/gpt-4-turbo-vs-gpt-4
input_size=128_000,
),
Model(
"gpt-4",
"gpt-4",
None,
"OpenAI",
supports_functions=True,
cost_per_million_input_tokens=30,
cost_per_million_output_tokens=60,
input_size=32_000,
),
# source: https://www.together.ai/pricing
Model(
"llama-2-70b-chat",
"together_ai/togethercomputer/llama-2-70b-chat",
None,
"Together AI",
cost_per_million_tokens=0.9,
size_billion_parameters=70,
# https://github.com/facebookresearch/llama/issues/148
input_size=2048,
),
Model(
"Mixtral-8x7B-Instruct-v0.1",
"together_ai/mistralai/Mixtral-8x7B-Instruct-v0.1",
None,
"Together AI",
size_billion_parameters=8 * 7,
cost_per_million_tokens=0.9,
),
# taken from endpoint pages
Model(
"zephyr-7b-beta",
"huggingface/HuggingFaceH4/zephyr-7b-beta",
env("ZEPHYR_7B_BETA_URL"),
"Hugging Face Inference Endpoint",
hourly_cost=1.30,
size_billion_parameters=7,
),
Model(
"Mistral-7B-Instruct-v0.2",
"huggingface/mistralai/Mistral-7B-Instruct-v0.2",
env("MISTRAL_7B_BETA_URL"),
"Hugging Face Inference Endpoint",
hourly_cost=1.30,
size_billion_parameters=7,
),
Model(
"TinyLlama/TinyLlama-1.1B-Chat-v1.0",
"huggingface/TinyLlama/TinyLlama-1.1B-Chat-v1.0",
env("TINY_LLAMA_URL"),
"Hugging Face Inference Endpoint",
hourly_cost=0.60,
size_billion_parameters=1.1,
),
Model(
"gemini-pro",
"gemini-pro",
None,
"Google VertexAI",
# https://ai.google.dev/pricing
cost_description="$0.25 / 1M input characters, $0.5 / 1M output characters (60 queries per minute are free)",
cost_per_million_input_tokens=0.25,
cost_per_million_output_tokens=0.5,
),
Model(
"chat-bison (PaLM 2)",
"chat-bison",
None,
"Google VertexAI",
# https://cloud.google.com/vertex-ai/docs/generative-ai/pricing
cost_per_million_input_tokens=0.25,
cost_per_million_output_tokens=0.5,
# https://ai.google.dev/models/palm
input_size=8196,
),
Model(
"chat-bison-32k (PaLM 2 32K)",
"chat-bison-32k",
None,
"Google VertexAI",
# https://cloud.google.com/vertex-ai/docs/generative-ai/pricing
cost_per_million_input_tokens=0.25,
cost_per_million_output_tokens=0.5,
),
]
MODELS = [model for model in MODELS
if model.model_name=="together_ai/mistralai/Mixtral-8x7B-Instruct-v0.1"
or model.model_name=="huggingface/HuggingFaceH4/zephyr-7b-beta"]