Spaces:
Running
Running
File size: 4,895 Bytes
0b07a42 f48b842 0b07a42 40be773 0b07a42 f48b842 0b07a42 79d529c 40be773 0b07a42 362448c 0b07a42 f48b842 0b07a42 f48b842 0b07a42 f48b842 0b07a42 4cb217b f48b842 0b07a42 362448c 0b07a42 362448c 0b07a42 362448c 0b07a42 40be773 0b07a42 1a7567e 0b07a42 f48b842 0b07a42 1a7567e 0b07a42 4cb217b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 |
import os
from dataclasses import dataclass
@dataclass(unsafe_hash=True)
class Model(object):
model_display_name: str
model_name: str
api_url: str
provider: str
hourly_cost: int = None
cost_description: str = None
supports_functions: str = False
size_billion_parameters: int = None # in billion paramters
cost_per_million_tokens: int = None
cost_per_million_input_tokens: int = None
cost_per_million_output_tokens: int = None
input_size: int = None
def __post_init__(self):
self.cost_per_million_input_tokens = self.cost_per_million_input_tokens or self.cost_per_million_tokens
self.cost_per_million_output_tokens = self.cost_per_million_output_tokens or self.cost_per_million_tokens
@property
def cost(self):
if self.cost_description:
return self.cost_description
if self.hourly_cost:
return f"${self.hourly_cost:.2g} / hour"
if self.cost_per_million_tokens:
return f"${self.cost_per_million_tokens:.2g} / 1M tokens"
elif self.cost_per_million_input_tokens and self.cost_per_million_output_tokens:
return f"${self.cost_per_million_input_tokens:.2g} / 1M input tokens, ${self.cost_per_million_output_tokens:.2g} / 1M output tokens"
env = os.environ.get
MODELS = [
# source: https://openai.com/pricing
# converted costs from dollar/1K tokens to dollar/1M for readability and together_ai comparability
Model(
"gpt-3.5-turbo",
"gpt-3.5-turbo",
None,
"OpenAI",
supports_functions=True,
cost_per_million_input_tokens=1,
cost_per_million_output_tokens=2,
# https://learn.microsoft.com/en-us/answers/questions/1356487/what-is-the-exact-maximum-input-tokens-of-azure-gp
input_size=4096
),
Model(
"gpt-4-turbo",
"gpt-4-1106-preview",
None,
"OpenAI",
supports_functions=True,
cost_per_million_input_tokens=10,
cost_per_million_output_tokens=30,
# https://writesonic.com/blog/gpt-4-turbo-vs-gpt-4
input_size=128_000,
),
Model(
"gpt-4",
"gpt-4",
None,
"OpenAI",
supports_functions=True,
cost_per_million_input_tokens=30,
cost_per_million_output_tokens=60,
input_size=32_000,
),
# source: https://www.together.ai/pricing
Model(
"llama-2-70b-chat",
"together_ai/togethercomputer/llama-2-70b-chat",
None,
"Together AI",
cost_per_million_tokens=0.9,
size_billion_parameters=70,
# https://github.com/facebookresearch/llama/issues/148
input_size=2048,
),
Model(
"Mixtral-8x7B-Instruct-v0.1",
"together_ai/mistralai/Mixtral-8x7B-Instruct-v0.1",
None,
"Together AI",
size_billion_parameters=8 * 7,
cost_per_million_tokens=0.9,
),
# taken from endpoint pages
Model(
"zephyr-7b-beta",
"huggingface/HuggingFaceH4/zephyr-7b-beta",
env("ZEPHYR_7B_BETA_URL"),
"Hugging Face Inference Endpoint",
hourly_cost=1.30,
size_billion_parameters=7,
),
Model(
"Mistral-7B-Instruct-v0.2",
"huggingface/mistralai/Mistral-7B-Instruct-v0.2",
env("MISTRAL_7B_BETA_URL"),
"Hugging Face Inference Endpoint",
hourly_cost=1.30,
size_billion_parameters=7,
),
Model(
"TinyLlama/TinyLlama-1.1B-Chat-v1.0",
"huggingface/TinyLlama/TinyLlama-1.1B-Chat-v1.0",
env("TINY_LLAMA_URL"),
"Hugging Face Inference Endpoint",
hourly_cost=0.60,
size_billion_parameters=1.1,
),
Model(
"gemini-pro",
"gemini-pro",
None,
"Google VertexAI",
# https://ai.google.dev/pricing
cost_description="$0.25 / 1M input characters, $0.5 / 1M output characters (60 queries per minute are free)",
cost_per_million_input_tokens=0.25,
cost_per_million_output_tokens=0.5,
),
Model(
"chat-bison (PaLM 2)",
"chat-bison",
None,
"Google VertexAI",
# https://cloud.google.com/vertex-ai/docs/generative-ai/pricing
cost_per_million_input_tokens=0.25,
cost_per_million_output_tokens=0.5,
# https://ai.google.dev/models/palm
input_size=8196,
),
Model(
"chat-bison-32k (PaLM 2 32K)",
"chat-bison-32k",
None,
"Google VertexAI",
# https://cloud.google.com/vertex-ai/docs/generative-ai/pricing
cost_per_million_input_tokens=0.25,
cost_per_million_output_tokens=0.5,
),
]
MODELS = [model for model in MODELS
if model.model_name=="together_ai/mistralai/Mixtral-8x7B-Instruct-v0.1"
or model.model_name=="huggingface/HuggingFaceH4/zephyr-7b-beta"]
|