David Pomerenke commited on
Commit
9983b5f
·
1 Parent(s): 2f9dee1

Use most popular current + historical models

Browse files
Files changed (2) hide show
  1. evals/main.py +0 -2
  2. evals/models.py +41 -8
evals/main.py CHANGED
@@ -1,7 +1,5 @@
1
  import asyncio
2
- import json
3
 
4
- import numpy as np
5
  import pandas as pd
6
  from tqdm.asyncio import tqdm_asyncio
7
 
 
1
  import asyncio
 
2
 
 
3
  import pandas as pd
4
  from tqdm.asyncio import tqdm_asyncio
5
 
evals/models.py CHANGED
@@ -28,8 +28,8 @@ models = [
28
  "openai/gpt-3.5-turbo", # 1.5$
29
  # "anthropic/claude-3.5-haiku", # 4$ -> too expensive for dev
30
  "mistralai/mistral-small-3.1-24b-instruct", # 0.3$
31
- "mistralai/mistral-saba", # 0.6$
32
- "mistralai/mistral-nemo", # 0.08$
33
  "google/gemini-2.5-flash-preview", # 0.6$
34
  "google/gemini-2.0-flash-lite-001", # 0.3$
35
  "google/gemma-3-27b-it", # 0.2$
@@ -38,7 +38,7 @@ models = [
38
  # "qwen/qwen-2.5-72b-instruct", # 0.39$
39
  # "qwen/qwen-2-72b-instruct", # 0.9$
40
  "deepseek/deepseek-chat-v3-0324", # 1.1$
41
- "deepseek/deepseek-chat", # 0.89$
42
  "microsoft/phi-4", # 0.07$
43
  "microsoft/phi-4-multimodal-instruct", # 0.1$
44
  "amazon/nova-micro-v1", # 0.09$
@@ -55,7 +55,18 @@ cache = Memory(location=".cache", verbose=0).cache
55
 
56
 
57
  @cache
58
- def get_popular_models(date: date):
 
 
 
 
 
 
 
 
 
 
 
59
  raw = get("https://openrouter.ai/rankings").text
60
  data = re.search(r'{\\"data\\":(.*),\\"isPercentage\\"', raw).group(1)
61
  data = json.loads(data.replace("\\", ""))
@@ -66,11 +77,25 @@ def get_popular_models(date: date):
66
  continue
67
  counts[model.split(":")[0]] += count
68
  counts = sorted(counts.items(), key=lambda x: x[1], reverse=True)
69
- return [model for model, _ in counts]
70
 
71
 
72
- pop_models = get_popular_models(date.today())
73
- # models += [m for m in pop_models if m not in models][:1]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
 
75
  load_dotenv()
76
  client = AsyncOpenAI(
@@ -125,10 +150,12 @@ async def transcribe(path, model="elevenlabs/scribe_v1"):
125
 
126
  models = pd.DataFrame(models, columns=["id"])
127
 
 
128
  @cache
129
  def get_models(date):
130
  return get("https://openrouter.ai/api/frontend/models/").json()["data"]
131
 
 
132
  def get_or_metadata(id):
133
  # get metadata from OpenRouter
134
  models = get_models(date.today())
@@ -156,7 +183,12 @@ def get_hf_metadata(row):
156
  return empty
157
  try:
158
  info = api.model_info(id)
159
- license = (info.card_data.license or "").replace("-", " ").replace("mit", "MIT").title()
 
 
 
 
 
160
  return {
161
  "hf_id": info.id,
162
  "creation_date": info.created_at,
@@ -190,3 +222,4 @@ models = models.assign(
190
  license=hf_metadata.str["license"],
191
  creation_date=creation_date_hf.combine_first(creation_date_or),
192
  )
 
 
28
  "openai/gpt-3.5-turbo", # 1.5$
29
  # "anthropic/claude-3.5-haiku", # 4$ -> too expensive for dev
30
  "mistralai/mistral-small-3.1-24b-instruct", # 0.3$
31
+ "mistralai/mistral-saba", # 0.6$
32
+ "mistralai/mistral-nemo", # 0.08$
33
  "google/gemini-2.5-flash-preview", # 0.6$
34
  "google/gemini-2.0-flash-lite-001", # 0.3$
35
  "google/gemma-3-27b-it", # 0.2$
 
38
  # "qwen/qwen-2.5-72b-instruct", # 0.39$
39
  # "qwen/qwen-2-72b-instruct", # 0.9$
40
  "deepseek/deepseek-chat-v3-0324", # 1.1$
41
+ "deepseek/deepseek-chat", # 0.89$
42
  "microsoft/phi-4", # 0.07$
43
  "microsoft/phi-4-multimodal-instruct", # 0.1$
44
  "amazon/nova-micro-v1", # 0.09$
 
55
 
56
 
57
  @cache
58
+ def get_models(date: date):
59
+ return get("https://openrouter.ai/api/frontend/models").json()["data"]
60
+
61
+
62
+ def get_slug(permaslug):
63
+ models = get_models(date.today())
64
+ slugs = [m["slug"] for m in models if m["permaslug"] == permaslug]
65
+ return slugs[0] if len(slugs) == 1 else None
66
+
67
+
68
+ @cache
69
+ def get_historical_popular_models(date: date):
70
  raw = get("https://openrouter.ai/rankings").text
71
  data = re.search(r'{\\"data\\":(.*),\\"isPercentage\\"', raw).group(1)
72
  data = json.loads(data.replace("\\", ""))
 
77
  continue
78
  counts[model.split(":")[0]] += count
79
  counts = sorted(counts.items(), key=lambda x: x[1], reverse=True)
80
+ return [get_slug(model) for model, _ in counts]
81
 
82
 
83
+ @cache
84
+ def get_current_popular_models(date: date):
85
+ raw = get("https://openrouter.ai/rankings").text
86
+ data = re.search(r'{\\"rankMap\\":(.*)\}\]\\n"\]\)</script>', raw).group(1)
87
+ data = json.loads(data.replace("\\", ""))["day"]
88
+ data = sorted(data, key=lambda x: x["total_prompt_tokens"], reverse=True)
89
+ return [get_slug(model["model_permaslug"]) for model in data]
90
+
91
+
92
+ models += [
93
+ m for m in get_historical_popular_models(date.today()) if m and m not in models
94
+ ][:5]
95
+ models += [
96
+ m for m in get_current_popular_models(date.today()) if m and m not in models
97
+ ][:5]
98
+
99
 
100
  load_dotenv()
101
  client = AsyncOpenAI(
 
150
 
151
  models = pd.DataFrame(models, columns=["id"])
152
 
153
+
154
  @cache
155
  def get_models(date):
156
  return get("https://openrouter.ai/api/frontend/models/").json()["data"]
157
 
158
+
159
  def get_or_metadata(id):
160
  # get metadata from OpenRouter
161
  models = get_models(date.today())
 
183
  return empty
184
  try:
185
  info = api.model_info(id)
186
+ license = (
187
+ (info.card_data.license or "")
188
+ .replace("-", " ")
189
+ .replace("mit", "MIT")
190
+ .title()
191
+ )
192
  return {
193
  "hf_id": info.id,
194
  "creation_date": info.created_at,
 
222
  license=hf_metadata.str["license"],
223
  creation_date=creation_date_hf.combine_first(creation_date_or),
224
  )
225
+ models = models[models["cost"] <= 2.0].reset_index(drop=True)