File size: 4,895 Bytes
0b07a42
 
 
 
f48b842
0b07a42
 
 
 
 
 
40be773
0b07a42
 
 
 
 
f48b842
0b07a42
 
 
 
79d529c
40be773
 
 
 
 
 
 
 
 
 
0b07a42
 
362448c
0b07a42
 
 
 
 
 
 
 
 
 
 
 
f48b842
 
0b07a42
 
 
 
 
 
 
 
 
f48b842
 
0b07a42
 
 
 
 
 
 
 
 
f48b842
0b07a42
 
 
 
 
 
 
4cb217b
 
f48b842
 
0b07a42
 
 
 
 
 
 
 
 
 
 
 
 
362448c
0b07a42
 
 
 
 
 
 
362448c
0b07a42
 
 
 
 
 
 
362448c
0b07a42
 
 
 
 
 
 
 
 
 
40be773
0b07a42
 
 
 
1a7567e
0b07a42
 
 
 
 
 
f48b842
 
0b07a42
 
1a7567e
0b07a42
 
 
 
 
 
 
 
4cb217b
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
import os
from dataclasses import dataclass


@dataclass(unsafe_hash=True)
class Model(object):
    model_display_name: str
    model_name: str
    api_url: str
    provider: str
    hourly_cost: int = None
    cost_description: str = None
    supports_functions: str = False
    size_billion_parameters: int = None  # in billion paramters
    cost_per_million_tokens: int = None
    cost_per_million_input_tokens: int = None
    cost_per_million_output_tokens: int = None
    input_size: int = None

    def __post_init__(self):
        self.cost_per_million_input_tokens = self.cost_per_million_input_tokens or self.cost_per_million_tokens
        self.cost_per_million_output_tokens = self.cost_per_million_output_tokens or self.cost_per_million_tokens

    @property
    def cost(self):
        if self.cost_description:
            return self.cost_description
        if self.hourly_cost:
            return f"${self.hourly_cost:.2g} / hour"
        if self.cost_per_million_tokens:
            return f"${self.cost_per_million_tokens:.2g} / 1M tokens"
        elif self.cost_per_million_input_tokens and self.cost_per_million_output_tokens:
            return f"${self.cost_per_million_input_tokens:.2g} / 1M input tokens, ${self.cost_per_million_output_tokens:.2g} / 1M output tokens"


env = os.environ.get

MODELS = [
    # source: https://openai.com/pricing
    # converted costs from dollar/1K tokens to dollar/1M for readability and together_ai comparability
    Model(
        "gpt-3.5-turbo",
        "gpt-3.5-turbo",
        None,
        "OpenAI",
        supports_functions=True,
        cost_per_million_input_tokens=1,
        cost_per_million_output_tokens=2,
        # https://learn.microsoft.com/en-us/answers/questions/1356487/what-is-the-exact-maximum-input-tokens-of-azure-gp
        input_size=4096
    ),
    Model(
        "gpt-4-turbo",
        "gpt-4-1106-preview",
        None,
        "OpenAI",
        supports_functions=True,
        cost_per_million_input_tokens=10,
        cost_per_million_output_tokens=30,
        # https://writesonic.com/blog/gpt-4-turbo-vs-gpt-4
        input_size=128_000,
    ),
    Model(
        "gpt-4",
        "gpt-4",
        None,
        "OpenAI",
        supports_functions=True,
        cost_per_million_input_tokens=30,
        cost_per_million_output_tokens=60,
        input_size=32_000,
    ),
    # source: https://www.together.ai/pricing
    Model(
        "llama-2-70b-chat",
        "together_ai/togethercomputer/llama-2-70b-chat",
        None,
        "Together AI",
        cost_per_million_tokens=0.9,
        size_billion_parameters=70,
        # https://github.com/facebookresearch/llama/issues/148
        input_size=2048,
    ),
    Model(
        "Mixtral-8x7B-Instruct-v0.1",
        "together_ai/mistralai/Mixtral-8x7B-Instruct-v0.1",
        None,
        "Together AI",
        size_billion_parameters=8 * 7,
        cost_per_million_tokens=0.9,
    ),
    # taken from endpoint pages
    Model(
        "zephyr-7b-beta",
        "huggingface/HuggingFaceH4/zephyr-7b-beta",
        env("ZEPHYR_7B_BETA_URL"),
        "Hugging Face Inference Endpoint",
        hourly_cost=1.30,
        size_billion_parameters=7,
    ),
    Model(
        "Mistral-7B-Instruct-v0.2",
        "huggingface/mistralai/Mistral-7B-Instruct-v0.2",
        env("MISTRAL_7B_BETA_URL"),
        "Hugging Face Inference Endpoint",
        hourly_cost=1.30,
        size_billion_parameters=7,
    ),
    Model(
        "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
        "huggingface/TinyLlama/TinyLlama-1.1B-Chat-v1.0",
        env("TINY_LLAMA_URL"),
        "Hugging Face Inference Endpoint",
        hourly_cost=0.60,
        size_billion_parameters=1.1,
    ),
    Model(
        "gemini-pro",
        "gemini-pro",
        None,
        "Google VertexAI",
        # https://ai.google.dev/pricing
        cost_description="$0.25 / 1M input characters, $0.5 / 1M output characters (60 queries per minute are free)",
        cost_per_million_input_tokens=0.25,
        cost_per_million_output_tokens=0.5,
    ),
    Model(
        "chat-bison (PaLM 2)",
        "chat-bison",
        None,
        "Google VertexAI",
        # https://cloud.google.com/vertex-ai/docs/generative-ai/pricing
        cost_per_million_input_tokens=0.25,
        cost_per_million_output_tokens=0.5,
        # https://ai.google.dev/models/palm
        input_size=8196,
    ),
    Model(
        "chat-bison-32k (PaLM 2 32K)",
        "chat-bison-32k",
        None,
        "Google VertexAI",
        # https://cloud.google.com/vertex-ai/docs/generative-ai/pricing
        cost_per_million_input_tokens=0.25,
        cost_per_million_output_tokens=0.5,
    ),
]

MODELS = [model for model in MODELS 
          if model.model_name=="together_ai/mistralai/Mixtral-8x7B-Instruct-v0.1"
          or model.model_name=="huggingface/HuggingFaceH4/zephyr-7b-beta"]