davidpomerenke commited on
Commit
80d21cb
·
verified ·
1 Parent(s): 2cf2580

Upload from GitHub Actions: Merge pull request #13 from datenlabor-bmz/jn-dev

Browse files
Files changed (2) hide show
  1. .DS_Store +0 -0
  2. evals/models.py +4 -2
.DS_Store CHANGED
Binary files a/.DS_Store and b/.DS_Store differ
 
evals/models.py CHANGED
@@ -23,6 +23,7 @@ important_models = [
23
  "meta-llama/llama-3.1-70b-instruct", # 0.3$
24
  "meta-llama/llama-3-70b-instruct", # 0.4$
25
  # "meta-llama/llama-2-70b-chat", # 0.9$; not properly supported by OpenRouter
 
26
  "openai/gpt-4.1", # 8$
27
  "openai/gpt-4.1-mini", # 1.6$
28
  "openai/gpt-4.1-nano", # 0.4$
@@ -54,6 +55,7 @@ important_models = [
54
 
55
  blocklist = [
56
  "google/gemini-2.5-pro-preview",
 
57
  "google/gemini-2.5-flash-preview",
58
  "google/gemini-2.5-flash-lite-preview",
59
  "google/gemini-2.5-flash-preview-04-17",
@@ -236,7 +238,7 @@ async def complete(**kwargs) -> str | None:
236
  return None
237
  raise e
238
  except asyncio.TimeoutError:
239
- print(f"⏰ Timeout after {timeout}s for model {model}")
240
  return None
241
  if not response.choices:
242
  raise Exception(response)
@@ -407,7 +409,7 @@ def load_models(date: date):
407
  creation_date=creation_date_hf.combine_first(creation_date_or),
408
  )
409
  # Filter out expensive models to keep costs reasonable
410
- models = models[models["cost"] <= 20.0].reset_index(drop=True)
411
  models["tasks"] = [
412
  ["translation_from", "translation_to", "classification", "mmlu", "arc", "truthfulqa", "mgsm"]
413
  ] * len(models)
 
23
  "meta-llama/llama-3.1-70b-instruct", # 0.3$
24
  "meta-llama/llama-3-70b-instruct", # 0.4$
25
  # "meta-llama/llama-2-70b-chat", # 0.9$; not properly supported by OpenRouter
26
+ "openai/gpt-5", # include if/when available
27
  "openai/gpt-4.1", # 8$
28
  "openai/gpt-4.1-mini", # 1.6$
29
  "openai/gpt-4.1-nano", # 0.4$
 
55
 
56
  blocklist = [
57
  "google/gemini-2.5-pro-preview",
58
+ "google/gemini-2.5-pro",
59
  "google/gemini-2.5-flash-preview",
60
  "google/gemini-2.5-flash-lite-preview",
61
  "google/gemini-2.5-flash-preview-04-17",
 
238
  return None
239
  raise e
240
  except asyncio.TimeoutError:
241
+ print(f"⏰ Timeout after {timeout}s for model {model_id}")
242
  return None
243
  if not response.choices:
244
  raise Exception(response)
 
409
  creation_date=creation_date_hf.combine_first(creation_date_or),
410
  )
411
  # Filter out expensive models to keep costs reasonable
412
+ models = models[models["cost"] <= 15.0].reset_index(drop=True)
413
  models["tasks"] = [
414
  ["translation_from", "translation_to", "classification", "mmlu", "arc", "truthfulqa", "mgsm"]
415
  ] * len(models)