jadehardouin commited on
Commit
0e893b5
·
1 Parent(s): eef299f

Update models.py

Browse files
Files changed (1) hide show
  1. models.py +6 -7
models.py CHANGED
@@ -131,10 +131,10 @@ class OpenAIModelGPT3_5(BaseTCOModel):
131
 
132
  return cost_per_input_token, cost_per_output_token, labor
133
 
134
- class OpenSourceLlama2Model(BaseTCOModel):
135
 
136
  def __init__(self):
137
- self.set_name("(Open source) Llama 2 70B")
138
  self.set_latency("27s")
139
  super().__init__()
140
 
@@ -144,7 +144,7 @@ class OpenSourceLlama2Model(BaseTCOModel):
144
  input_tokens_cost_per_token = 0.00052
145
  r = maxed_out / 100
146
  return input_tokens_cost_per_token * 0.65 / r, output_tokens_cost_per_token * 0.65/ r
147
-
148
  self.source = gr.Markdown("""<span style="font-size: 16px; font-weight: 600; color: #212529;">Source</span>""")
149
  self.info = gr.Markdown("The cost per input and output tokens values below are from [these benchmark results](https://www.cursor.so/blog/llama-inference#user-content-fn-llama-paper) that were obtained using the following initial configurations.",
150
  interactive=False,
@@ -176,7 +176,7 @@ class OpenSourceLlama2Model(BaseTCOModel):
176
  )
177
  self.maxed_out.change(on_maxed_out_change, inputs=[self.maxed_out, self.input_tokens_cost_per_token, self.output_tokens_cost_per_token], outputs=[self.input_tokens_cost_per_token, self.output_tokens_cost_per_token])
178
 
179
- self.labor = gr.Number(10000, visible=False,
180
  label="($) Labor cost per month",
181
  info="This is an estimate of the labor cost of the AI engineer in charge of deploying the model",
182
  interactive=True
@@ -266,16 +266,15 @@ class ModelPage:
266
 
267
  def compute_cost_per_token(self, *args):
268
  begin=0
269
- current_model = args[-3]
270
  current_input_tokens = args[-2]
271
  current_output_tokens = args[-1]
272
  for model in self.models:
273
  model_n_args = len(model.get_components_for_cost_computing())
274
  if current_model == model.get_name():
275
-
276
  model_args = args[begin:begin+model_n_args]
277
  cost_per_input_token, cost_per_output_token, labor_cost = model.compute_cost_per_token(*model_args)
278
- model_tco = cost_per_input_token * current_input_tokens + cost_per_output_token * current_output_tokens
279
  latency = model.get_latency()
280
 
281
  return model_tco, latency, labor_cost
 
131
 
132
  return cost_per_input_token, cost_per_output_token, labor
133
 
134
+ class DIYLlama2Model(BaseTCOModel):
135
 
136
  def __init__(self):
137
+ self.set_name("(Deploy yourself) Llama 2 70B")
138
  self.set_latency("27s")
139
  super().__init__()
140
 
 
144
  input_tokens_cost_per_token = 0.00052
145
  r = maxed_out / 100
146
  return input_tokens_cost_per_token * 0.65 / r, output_tokens_cost_per_token * 0.65/ r
147
+
148
  self.source = gr.Markdown("""<span style="font-size: 16px; font-weight: 600; color: #212529;">Source</span>""")
149
  self.info = gr.Markdown("The cost per input and output tokens values below are from [these benchmark results](https://www.cursor.so/blog/llama-inference#user-content-fn-llama-paper) that were obtained using the following initial configurations.",
150
  interactive=False,
 
176
  )
177
  self.maxed_out.change(on_maxed_out_change, inputs=[self.maxed_out, self.input_tokens_cost_per_token, self.output_tokens_cost_per_token], outputs=[self.input_tokens_cost_per_token, self.output_tokens_cost_per_token])
178
 
179
+ self.labor = gr.Number(5000, visible=False,
180
  label="($) Labor cost per month",
181
  info="This is an estimate of the labor cost of the AI engineer in charge of deploying the model",
182
  interactive=True
 
266
 
267
  def compute_cost_per_token(self, *args):
268
  begin=0
269
+ current_model = args[-3]
270
  current_input_tokens = args[-2]
271
  current_output_tokens = args[-1]
272
  for model in self.models:
273
  model_n_args = len(model.get_components_for_cost_computing())
274
  if current_model == model.get_name():
 
275
  model_args = args[begin:begin+model_n_args]
276
  cost_per_input_token, cost_per_output_token, labor_cost = model.compute_cost_per_token(*model_args)
277
+ model_tco = cost_per_input_token * current_input_tokens.value + cost_per_output_token * current_output_tokens.value
278
  latency = model.get_latency()
279
 
280
  return model_tco, latency, labor_cost