davidpomerenke commited on
Commit
b311dd5
·
verified ·
1 Parent(s): 7c6a118

Upload from GitHub Actions: New results

Browse files
Files changed (7) hide show
  1. evals/download_data.py +1 -1
  2. evals/languages.py +3 -3
  3. evals/main.py +31 -20
  4. evals/models.py +42 -42
  5. languages.json +217 -217
  6. models.json +171 -127
  7. results.json +0 -0
evals/download_data.py CHANGED
@@ -15,7 +15,7 @@ from datasets_.fleurs import fleurs
15
 
16
 
17
  # Add project root to sys.path (still useful for potential future imports if needed)
18
- project_root = Path(__file__).resolve().parent
19
  if str(project_root) not in sys.path:
20
  sys.path.append(str(project_root))
21
 
 
15
 
16
 
17
  # Add project root to sys.path (still useful for potential future imports if needed)
18
+ project_root = Path(__file__).resolve().parent.parent
19
  if str(project_root) not in sys.path:
20
  sys.path.append(str(project_root))
21
 
evals/languages.py CHANGED
@@ -1,9 +1,9 @@
1
  import re
2
 
3
  import pandas as pd
4
- from .datasets_.commonvoice import commonvoice
5
- from .datasets_.fleurs import fleurs
6
- from .datasets_.flores import flores
7
  from joblib.memory import Memory
8
  from langcodes import Language, standardize_tag
9
  from language_data.population_data import LANGUAGE_SPEAKING_POPULATION
 
1
  import re
2
 
3
  import pandas as pd
4
+ from datasets_.commonvoice import commonvoice
5
+ from datasets_.fleurs import fleurs
6
+ from datasets_.flores import flores
7
  from joblib.memory import Memory
8
  from langcodes import Language, standardize_tag
9
  from language_data.population_data import LANGUAGE_SPEAKING_POPULATION
evals/main.py CHANGED
@@ -1,11 +1,10 @@
1
  import asyncio
2
 
3
  import pandas as pd
4
- from tqdm.asyncio import tqdm_asyncio
5
-
6
  from languages import languages
7
  from models import models
8
  from tasks import tasks
 
9
 
10
  # ===== config =====
11
 
@@ -17,32 +16,44 @@ n_models = 25
17
 
18
 
19
  async def evaluate():
 
 
 
 
20
  print("running evaluations")
21
  old_results = pd.read_json("results.json")
22
- results = [
23
- task(model, lang.bcp_47, i)
 
24
  for task_name, task in tasks.items()
25
- for i in range(n_sentences)
26
  for lang in languages.iloc[:n_languages].itertuples()
27
  for model in models["id"].iloc[:n_models]
28
- if len(
29
- old_results[
30
- (old_results["model"] == model)
31
- & (old_results["bcp_47"] == lang.bcp_47)
32
- & (old_results["task"] == task_name)
33
- & (old_results["sentence_nr"] == i)
34
- ]
35
- )
36
- == 0
 
 
37
  ]
38
  results = await tqdm_asyncio.gather(*results, miniters=1)
39
  results = [r for group in results for r in group]
40
- results = pd.DataFrame(results)
41
- results = pd.concat([old_results, results])
42
- args = dict(orient="records", indent=2, force_ascii=False)
43
- results.to_json("results.json", **args)
44
- pd.DataFrame(models).to_json("models.json", **args)
45
- pd.DataFrame(languages).to_json("languages.json", **args)
 
 
 
 
 
 
46
 
47
 
48
  if __name__ == "__main__":
 
1
  import asyncio
2
 
3
  import pandas as pd
 
 
4
  from languages import languages
5
  from models import models
6
  from tasks import tasks
7
+ from tqdm.asyncio import tqdm_asyncio
8
 
9
  # ===== config =====
10
 
 
16
 
17
 
18
  async def evaluate():
19
+ # save up-to-date info on models and languages
20
+ args = dict(orient="records", indent=2, force_ascii=False)
21
+ pd.DataFrame(models).to_json("models.json", **args)
22
+ pd.DataFrame(languages).to_json("languages.json", **args)
23
  print("running evaluations")
24
  old_results = pd.read_json("results.json")
25
+ # get all combinations of model, language and task
26
+ combis = [
27
+ (model, lang.bcp_47, task_name)
28
  for task_name, task in tasks.items()
 
29
  for lang in languages.iloc[:n_languages].itertuples()
30
  for model in models["id"].iloc[:n_models]
31
+ ]
32
+ # filter out combinations that have already been evaluated
33
+ combis = pd.DataFrame(combis, columns=["model", "bcp_47", "task"])
34
+ combis = combis.merge(old_results, on=["model", "bcp_47", "task"], how="left")
35
+ combis = combis[combis["metric"].isna()][["model", "bcp_47", "task"]]
36
+ print(combis["model"].unique())
37
+ # run evaluations
38
+ results = [
39
+ tasks[task_name](model, bcp_47, i)
40
+ for i in range(n_sentences)
41
+ for model, bcp_47, task_name in combis.itertuples(index=False)
42
  ]
43
  results = await tqdm_asyncio.gather(*results, miniters=1)
44
  results = [r for group in results for r in group]
45
+ if results:
46
+ # aggregate results
47
+ results = pd.DataFrame(results)
48
+ results = (
49
+ results.groupby(["model", "bcp_47", "task", "metric"])
50
+ .agg({"score": "mean"})
51
+ .reset_index()
52
+ )
53
+ # save results
54
+ results = pd.concat([old_results, results])
55
+ results = results.sort_values(by=["model", "bcp_47", "task", "metric"])
56
+ results.to_json("results.json", **args)
57
 
58
 
59
  if __name__ == "__main__":
evals/models.py CHANGED
@@ -15,7 +15,7 @@ from requests import HTTPError, get
15
 
16
  # for development purposes, all languages will be evaluated on the fast models
17
  # and only a sample of languages will be evaluated on all models
18
- models = [
19
  "meta-llama/llama-4-maverick", # 0.6$
20
  "meta-llama/llama-3.3-70b-instruct", # 0.3$
21
  "meta-llama/llama-3.1-70b-instruct", # 0.3$
@@ -24,8 +24,8 @@ models = [
24
  "openai/gpt-4.1-mini", # 1.6$
25
  "openai/gpt-4.1-nano", # 0.4$
26
  "openai/gpt-4o-mini", # 0.6$
27
- "openai/gpt-3.5-turbo-0613", # 2$
28
- "openai/gpt-3.5-turbo", # 1.5$
29
  # "anthropic/claude-3.5-haiku", # 4$ -> too expensive for dev
30
  "mistralai/mistral-small-3.1-24b-instruct", # 0.3$
31
  "mistralai/mistral-saba", # 0.6$
@@ -65,8 +65,10 @@ def get_models(date: date):
65
 
66
  def get_model(permaslug):
67
  models = get_models(date.today())
68
- slugs = [m for m in models if m["permaslug"] == permaslug]
69
- return slugs[0] if len(slugs) == 1 else None
 
 
70
 
71
 
72
  @cache
@@ -81,7 +83,8 @@ def get_historical_popular_models(date: date):
81
  continue
82
  counts[model.split(":")[0]] += count
83
  counts = sorted(counts.items(), key=lambda x: x[1], reverse=True)
84
- return [get_model(model) for model, _ in counts]
 
85
 
86
 
87
  @cache
@@ -90,23 +93,10 @@ def get_current_popular_models(date: date):
90
  data = re.search(r'{\\"rankMap\\":(.*)\}\]\\n"\]\)</script>', raw).group(1)
91
  data = json.loads(data.replace("\\", ""))["day"]
92
  data = sorted(data, key=lambda x: x["total_prompt_tokens"], reverse=True)
93
- return [get_model(model["model_permaslug"]) for model in data]
 
94
 
95
 
96
- popular_models = (
97
- get_historical_popular_models(date.today())[:5]
98
- + get_current_popular_models(date.today())[:5]
99
- )
100
- popular_models = [get_model(m) for m in popular_models if get_model(m)]
101
- popular_models = [
102
- m for m in popular_models if m["endpoint"] and not m["endpoint"]["is_free"]
103
- ]
104
- popular_models = [m["slug"] for m in popular_models]
105
- popular_models = [
106
- m for m in popular_models if m and m not in models and m not in blocklist
107
- ]
108
- models += popular_models
109
-
110
  load_dotenv()
111
  client = AsyncOpenAI(
112
  base_url="https://openrouter.ai/api/v1",
@@ -158,9 +148,6 @@ async def transcribe(path, model="elevenlabs/scribe_v1"):
158
  raise ValueError(f"Model {model} not supported")
159
 
160
 
161
- models = pd.DataFrame(models, columns=["id"])
162
-
163
-
164
  def get_or_metadata(id):
165
  # get metadata from OpenRouter
166
  models = get_models(date.today())
@@ -210,21 +197,34 @@ def get_cost(row):
210
  return round(cost * 1_000_000, 2)
211
 
212
 
213
- or_metadata = models["id"].apply(get_or_metadata)
214
- hf_metadata = or_metadata.apply(get_hf_metadata)
215
- creation_date_hf = pd.to_datetime(hf_metadata.str["creation_date"]).dt.date
216
- creation_date_or = pd.to_datetime(
217
- or_metadata.str["created_at"].str.split("T").str[0]
218
- ).dt.date
219
-
220
- models = models.assign(
221
- name=or_metadata.str["short_name"],
222
- provider_name=or_metadata.str["name"].str.split(": ").str[0],
223
- cost=or_metadata.apply(get_cost),
224
- hf_id=hf_metadata.str["hf_id"],
225
- size=hf_metadata.str["size"],
226
- type=hf_metadata.str["type"],
227
- license=hf_metadata.str["license"],
228
- creation_date=creation_date_hf.combine_first(creation_date_or),
229
- )
230
- models = models[models["cost"] <= 2.0].reset_index(drop=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
  # for development purposes, all languages will be evaluated on the fast models
17
  # and only a sample of languages will be evaluated on all models
18
+ important_models = [
19
  "meta-llama/llama-4-maverick", # 0.6$
20
  "meta-llama/llama-3.3-70b-instruct", # 0.3$
21
  "meta-llama/llama-3.1-70b-instruct", # 0.3$
 
24
  "openai/gpt-4.1-mini", # 1.6$
25
  "openai/gpt-4.1-nano", # 0.4$
26
  "openai/gpt-4o-mini", # 0.6$
27
+ # "openai/gpt-3.5-turbo-0613", # 2$
28
+ # "openai/gpt-3.5-turbo", # 1.5$
29
  # "anthropic/claude-3.5-haiku", # 4$ -> too expensive for dev
30
  "mistralai/mistral-small-3.1-24b-instruct", # 0.3$
31
  "mistralai/mistral-saba", # 0.6$
 
65
 
66
  def get_model(permaslug):
67
  models = get_models(date.today())
68
+ slugs = [m for m in models if m["permaslug"] == permaslug and m["endpoint"] and not m["endpoint"]["is_free"]]
69
+ if len(slugs) == 0:
70
+ print(f"no model found for {permaslug}")
71
+ return slugs[0] if len(slugs) >= 1 else None
72
 
73
 
74
  @cache
 
83
  continue
84
  counts[model.split(":")[0]] += count
85
  counts = sorted(counts.items(), key=lambda x: x[1], reverse=True)
86
+ models = [get_model(model) for model, _ in counts]
87
+ return [m for m in models if m]
88
 
89
 
90
  @cache
 
93
  data = re.search(r'{\\"rankMap\\":(.*)\}\]\\n"\]\)</script>', raw).group(1)
94
  data = json.loads(data.replace("\\", ""))["day"]
95
  data = sorted(data, key=lambda x: x["total_prompt_tokens"], reverse=True)
96
+ models = [get_model(model["model_permaslug"]) for model in data]
97
+ return [m for m in models if m]
98
 
99
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
  load_dotenv()
101
  client = AsyncOpenAI(
102
  base_url="https://openrouter.ai/api/v1",
 
148
  raise ValueError(f"Model {model} not supported")
149
 
150
 
 
 
 
151
  def get_or_metadata(id):
152
  # get metadata from OpenRouter
153
  models = get_models(date.today())
 
197
  return round(cost * 1_000_000, 2)
198
 
199
 
200
+ @cache
201
+ def load_models(date: date):
202
+ popular_models = (
203
+ get_historical_popular_models(date.today())[:10]
204
+ + get_current_popular_models(date.today())[:10]
205
+ )
206
+ popular_models = [m["slug"] for m in popular_models]
207
+ models = set(important_models + popular_models) - set(blocklist)
208
+ models = pd.DataFrame(sorted(list(models)), columns=["id"])
209
+ or_metadata = models["id"].apply(get_or_metadata)
210
+ hf_metadata = or_metadata.apply(get_hf_metadata)
211
+ creation_date_hf = pd.to_datetime(hf_metadata.str["creation_date"]).dt.date
212
+ creation_date_or = pd.to_datetime(
213
+ or_metadata.str["created_at"].str.split("T").str[0]
214
+ ).dt.date
215
+
216
+ models = models.assign(
217
+ name=or_metadata.str["short_name"],
218
+ provider_name=or_metadata.str["name"].str.split(": ").str[0],
219
+ cost=or_metadata.apply(get_cost),
220
+ hf_id=hf_metadata.str["hf_id"],
221
+ size=hf_metadata.str["size"],
222
+ type=hf_metadata.str["type"],
223
+ license=hf_metadata.str["license"],
224
+ creation_date=creation_date_hf.combine_first(creation_date_or),
225
+ )
226
+ models = models[models["cost"] <= 2.0].reset_index(drop=True)
227
+ return models
228
+
229
+
230
+ models = load_models(date.today())
languages.json CHANGED
@@ -163,7 +163,7 @@
163
  "family":"Indo-European",
164
  "flores_path":"deu_Latn",
165
  "fleurs_tag":"de_de",
166
- "commonvoice_hours":1362.0,
167
  "commonvoice_locale":"de",
168
  "in_benchmark":true
169
  },
@@ -269,11 +269,11 @@
269
  "language_name":"Wu Chinese",
270
  "autonym":"Wu Chinese",
271
  "family":"Sino-Tibetan",
272
- "flores_path":"wuu_Hans",
273
  "fleurs_tag":null,
274
  "commonvoice_hours":null,
275
  "commonvoice_locale":null,
276
- "in_benchmark":true
277
  },
278
  {
279
  "bcp_47":"tr",
@@ -485,11 +485,11 @@
485
  "language_name":"North Levantine Arabic",
486
  "autonym":"العامية",
487
  "family":"Afro-Asiatic",
488
- "flores_path":"apc_Arab",
489
  "fleurs_tag":null,
490
  "commonvoice_hours":null,
491
  "commonvoice_locale":null,
492
- "in_benchmark":true
493
  },
494
  {
495
  "bcp_47":"ms",
@@ -619,7 +619,7 @@
619
  "family":"Indo-European",
620
  "flores_path":"nld_Latn",
621
  "fleurs_tag":"nl_nl",
622
- "commonvoice_hours":119.0,
623
  "commonvoice_locale":"nl",
624
  "in_benchmark":true
625
  },
@@ -876,10 +876,10 @@
876
  "in_benchmark":true
877
  },
878
  {
879
- "bcp_47":"mwr",
880
  "speakers":15913080,
881
- "language_name":"Marwari",
882
- "autonym":"Marwari",
883
  "family":"Indo-European",
884
  "flores_path":null,
885
  "fleurs_tag":null,
@@ -888,10 +888,10 @@
888
  "in_benchmark":false
889
  },
890
  {
891
- "bcp_47":"bgc",
892
  "speakers":15913080,
893
- "language_name":"Haryanvi",
894
- "autonym":"हरियाणवी",
895
  "family":"Indo-European",
896
  "flores_path":null,
897
  "fleurs_tag":null,
@@ -1073,11 +1073,11 @@
1073
  "language_name":"Akan",
1074
  "autonym":"Akan",
1075
  "family":"Atlantic-Congo",
1076
- "flores_path":"twi_Latn",
1077
  "fleurs_tag":null,
1078
  "commonvoice_hours":0.2,
1079
  "commonvoice_locale":"tw",
1080
- "in_benchmark":true
1081
  },
1082
  {
1083
  "bcp_47":"qu",
@@ -1195,7 +1195,7 @@
1195
  "family":"Atlantic-Congo",
1196
  "flores_path":"lua_Latn",
1197
  "fleurs_tag":null,
1198
- "commonvoice_hours":0.7,
1199
  "commonvoice_locale":"lua",
1200
  "in_benchmark":true
1201
  },
@@ -1303,7 +1303,7 @@
1303
  "family":"Afro-Asiatic",
1304
  "flores_path":"heb_Hebr",
1305
  "fleurs_tag":"he_il",
1306
- "commonvoice_hours":1.2,
1307
  "commonvoice_locale":"he",
1308
  "in_benchmark":true
1309
  },
@@ -1375,7 +1375,7 @@
1375
  "family":"Turkic",
1376
  "flores_path":"uig_Arab",
1377
  "fleurs_tag":null,
1378
- "commonvoice_hours":371.0,
1379
  "commonvoice_locale":"ug",
1380
  "in_benchmark":true
1381
  },
@@ -1555,7 +1555,7 @@
1555
  "family":"Indo-European",
1556
  "flores_path":"slk_Latn",
1557
  "fleurs_tag":"sk_sk",
1558
- "commonvoice_hours":48.0,
1559
  "commonvoice_locale":"sk",
1560
  "in_benchmark":true
1561
  },
@@ -1955,18 +1955,6 @@
1955
  "commonvoice_locale":"gom",
1956
  "in_benchmark":true
1957
  },
1958
- {
1959
- "bcp_47":"kln",
1960
- "speakers":4068120,
1961
- "language_name":"Kalenjin",
1962
- "autonym":"Kalenjin",
1963
- "family":"Nilotic",
1964
- "flores_path":null,
1965
- "fleurs_tag":null,
1966
- "commonvoice_hours":43.0,
1967
- "commonvoice_locale":"kln",
1968
- "in_benchmark":false
1969
- },
1970
  {
1971
  "bcp_47":"kam",
1972
  "speakers":4068120,
@@ -1979,6 +1967,18 @@
1979
  "commonvoice_locale":"kam",
1980
  "in_benchmark":true
1981
  },
 
 
 
 
 
 
 
 
 
 
 
 
1982
  {
1983
  "bcp_47":"bjn",
1984
  "speakers":4010288,
@@ -2009,11 +2009,11 @@
2009
  "language_name":"Makhuwa",
2010
  "autonym":"Emakhuwa",
2011
  "family":"Atlantic-Congo",
2012
- "flores_path":"vmw_Latn",
2013
  "fleurs_tag":null,
2014
  "commonvoice_hours":0.0,
2015
  "commonvoice_locale":"vmw",
2016
- "in_benchmark":true
2017
  },
2018
  {
2019
  "bcp_47":"glk",
@@ -2124,10 +2124,10 @@
2124
  "in_benchmark":true
2125
  },
2126
  {
2127
- "bcp_47":"gbm",
2128
  "speakers":3580443,
2129
- "language_name":"Garhwali",
2130
- "autonym":"Garhwali",
2131
  "family":"Indo-European",
2132
  "flores_path":null,
2133
  "fleurs_tag":null,
@@ -2136,10 +2136,10 @@
2136
  "in_benchmark":false
2137
  },
2138
  {
2139
- "bcp_47":"lmn",
2140
  "speakers":3580443,
2141
- "language_name":"Lambadi",
2142
- "autonym":"Lambadi",
2143
  "family":"Indo-European",
2144
  "flores_path":null,
2145
  "fleurs_tag":null,
@@ -2335,7 +2335,7 @@
2335
  "family":"Atlantic-Congo",
2336
  "flores_path":null,
2337
  "fleurs_tag":null,
2338
- "commonvoice_hours":2.5,
2339
  "commonvoice_locale":"bci",
2340
  "in_benchmark":false
2341
  },
@@ -2352,27 +2352,27 @@
2352
  "in_benchmark":true
2353
  },
2354
  {
2355
- "bcp_47":"efi",
2356
  "speakers":2996392,
2357
- "language_name":"Efik",
2358
- "autonym":"Efik",
2359
  "family":"Atlantic-Congo",
2360
  "flores_path":null,
2361
  "fleurs_tag":null,
2362
- "commonvoice_hours":null,
2363
- "commonvoice_locale":null,
2364
  "in_benchmark":false
2365
  },
2366
  {
2367
- "bcp_47":"ibb",
2368
  "speakers":2996392,
2369
- "language_name":"Ibibio",
2370
- "autonym":"Ibibio",
2371
  "family":"Atlantic-Congo",
2372
  "flores_path":null,
2373
  "fleurs_tag":null,
2374
- "commonvoice_hours":5.3,
2375
- "commonvoice_locale":"ibb",
2376
  "in_benchmark":false
2377
  },
2378
  {
@@ -2544,11 +2544,11 @@
2544
  "in_benchmark":false
2545
  },
2546
  {
2547
- "bcp_47":"wbq",
2548
  "speakers":2386962,
2549
- "language_name":"Waddar",
2550
- "autonym":"Waddar",
2551
- "family":"Dravidian",
2552
  "flores_path":null,
2553
  "fleurs_tag":null,
2554
  "commonvoice_hours":null,
@@ -2556,11 +2556,11 @@
2556
  "in_benchmark":false
2557
  },
2558
  {
2559
- "bcp_47":"sck",
2560
  "speakers":2386962,
2561
- "language_name":"Sadri",
2562
- "autonym":"Sadri",
2563
- "family":"Indo-European",
2564
  "flores_path":null,
2565
  "fleurs_tag":null,
2566
  "commonvoice_hours":null,
@@ -2724,10 +2724,10 @@
2724
  "in_benchmark":false
2725
  },
2726
  {
2727
- "bcp_47":"khn",
2728
  "speakers":1989135,
2729
- "language_name":"Khandesi",
2730
- "autonym":"Khandesi",
2731
  "family":"Indo-European",
2732
  "flores_path":null,
2733
  "fleurs_tag":null,
@@ -2748,10 +2748,10 @@
2748
  "in_benchmark":false
2749
  },
2750
  {
2751
- "bcp_47":"wbr",
2752
  "speakers":1989135,
2753
- "language_name":"Wagdi",
2754
- "autonym":"Wagdi",
2755
  "family":"Indo-European",
2756
  "flores_path":null,
2757
  "fleurs_tag":null,
@@ -3283,7 +3283,7 @@
3283
  "family":"Atlantic-Congo",
3284
  "flores_path":null,
3285
  "fleurs_tag":null,
3286
- "commonvoice_hours":10.0,
3287
  "commonvoice_locale":"bum",
3288
  "in_benchmark":false
3289
  },
@@ -3535,7 +3535,7 @@
3535
  "family":null,
3536
  "flores_path":"eus_Latn",
3537
  "fleurs_tag":null,
3538
- "commonvoice_hours":358.0,
3539
  "commonvoice_locale":"eu",
3540
  "in_benchmark":true
3541
  },
@@ -3559,7 +3559,7 @@
3559
  "family":"Abkhaz-Adyge",
3560
  "flores_path":null,
3561
  "fleurs_tag":null,
3562
- "commonvoice_hours":56.0,
3563
  "commonvoice_locale":"kbd",
3564
  "in_benchmark":false
3565
  },
@@ -3679,15 +3679,15 @@
3679
  "family":"Indo-European",
3680
  "flores_path":"ydd_Hebr",
3681
  "fleurs_tag":null,
3682
- "commonvoice_hours":0.5,
3683
  "commonvoice_locale":"yi",
3684
  "in_benchmark":true
3685
  },
3686
  {
3687
- "bcp_47":"ksb",
3688
  "speakers":995398,
3689
- "language_name":"Shambala",
3690
- "autonym":"Kishambaa",
3691
  "family":"Atlantic-Congo",
3692
  "flores_path":null,
3693
  "fleurs_tag":null,
@@ -3696,10 +3696,10 @@
3696
  "in_benchmark":false
3697
  },
3698
  {
3699
- "bcp_47":"bez",
3700
  "speakers":995398,
3701
- "language_name":"Bena",
3702
- "autonym":"Hibena",
3703
  "family":"Atlantic-Congo",
3704
  "flores_path":null,
3705
  "fleurs_tag":null,
@@ -4512,27 +4512,27 @@
4512
  "in_benchmark":false
4513
  },
4514
  {
4515
- "bcp_47":"nhw",
4516
  "speakers":501735,
4517
- "language_name":"Western Huasteca Nahuatl",
4518
- "autonym":"Western Huasteca Nahuatl",
4519
  "family":"Uto-Aztecan",
4520
  "flores_path":null,
4521
  "fleurs_tag":null,
4522
- "commonvoice_hours":null,
4523
- "commonvoice_locale":null,
4524
  "in_benchmark":false
4525
  },
4526
  {
4527
- "bcp_47":"nhe",
4528
  "speakers":501735,
4529
- "language_name":"Eastern Huasteca Nahuatl",
4530
- "autonym":"Eastern Huasteca Nahuatl",
4531
  "family":"Uto-Aztecan",
4532
  "flores_path":null,
4533
  "fleurs_tag":null,
4534
- "commonvoice_hours":0.0,
4535
- "commonvoice_locale":"nhe",
4536
  "in_benchmark":false
4537
  },
4538
  {
@@ -4651,7 +4651,7 @@
4651
  "family":"Abkhaz-Adyge",
4652
  "flores_path":null,
4653
  "fleurs_tag":null,
4654
- "commonvoice_hours":15.0,
4655
  "commonvoice_locale":"ady",
4656
  "in_benchmark":false
4657
  },
@@ -4675,7 +4675,7 @@
4675
  "family":"Kartvelian",
4676
  "flores_path":null,
4677
  "fleurs_tag":null,
4678
- "commonvoice_hours":11.0,
4679
  "commonvoice_locale":"xmf",
4680
  "in_benchmark":false
4681
  },
@@ -4685,11 +4685,11 @@
4685
  "language_name":"Erzya",
4686
  "autonym":"Эрзянь Кель",
4687
  "family":"Uralic",
4688
- "flores_path":"myv_Cyrl",
4689
  "fleurs_tag":null,
4690
  "commonvoice_hours":3.8,
4691
  "commonvoice_locale":"myv",
4692
- "in_benchmark":true
4693
  },
4694
  {
4695
  "bcp_47":"dav",
@@ -4715,18 +4715,6 @@
4715
  "commonvoice_locale":null,
4716
  "in_benchmark":false
4717
  },
4718
- {
4719
- "bcp_47":"jmc",
4720
- "speakers":433291,
4721
- "language_name":"Machame",
4722
- "autonym":"Kimachame",
4723
- "family":"Atlantic-Congo",
4724
- "flores_path":null,
4725
- "fleurs_tag":null,
4726
- "commonvoice_hours":null,
4727
- "commonvoice_locale":null,
4728
- "in_benchmark":false
4729
- },
4730
  {
4731
  "bcp_47":"vun",
4732
  "speakers":433291,
@@ -4751,6 +4739,18 @@
4751
  "commonvoice_locale":"rof",
4752
  "in_benchmark":false
4753
  },
 
 
 
 
 
 
 
 
 
 
 
 
4754
  {
4755
  "bcp_47":"kjg",
4756
  "speakers":431949,
@@ -5009,11 +5009,11 @@
5009
  "language_name":"Dargwa",
5010
  "autonym":"Dargwa",
5011
  "family":"Nakh-Daghestanian",
5012
- "flores_path":"dar_Cyrl",
5013
  "fleurs_tag":null,
5014
  "commonvoice_hours":0.0,
5015
  "commonvoice_locale":"dar",
5016
- "in_benchmark":true
5017
  },
5018
  {
5019
  "bcp_47":"lif",
@@ -5124,27 +5124,27 @@
5124
  "in_benchmark":false
5125
  },
5126
  {
5127
- "bcp_47":"bas",
5128
  "speakers":332940,
5129
- "language_name":"Basaa",
5130
- "autonym":"Ɓàsàa",
5131
  "family":"Atlantic-Congo",
5132
  "flores_path":null,
5133
  "fleurs_tag":null,
5134
- "commonvoice_hours":12.0,
5135
- "commonvoice_locale":"bas",
5136
  "in_benchmark":false
5137
  },
5138
  {
5139
- "bcp_47":"bax",
5140
  "speakers":332940,
5141
- "language_name":"Bamun",
5142
- "autonym":"Bamun",
5143
  "family":"Atlantic-Congo",
5144
  "flores_path":null,
5145
  "fleurs_tag":null,
5146
- "commonvoice_hours":11.0,
5147
- "commonvoice_locale":"bax",
5148
  "in_benchmark":false
5149
  },
5150
  {
@@ -5232,11 +5232,11 @@
5232
  "in_benchmark":false
5233
  },
5234
  {
5235
- "bcp_47":"bfq",
5236
  "speakers":305001,
5237
- "language_name":"Badaga",
5238
- "autonym":"Badaga",
5239
- "family":"Dravidian",
5240
  "flores_path":null,
5241
  "fleurs_tag":null,
5242
  "commonvoice_hours":null,
@@ -5244,11 +5244,11 @@
5244
  "in_benchmark":false
5245
  },
5246
  {
5247
- "bcp_47":"njo",
5248
  "speakers":305001,
5249
- "language_name":"Ao Naga",
5250
- "autonym":"Ao Naga",
5251
- "family":"Sino-Tibetan",
5252
  "flores_path":null,
5253
  "fleurs_tag":null,
5254
  "commonvoice_hours":null,
@@ -5388,10 +5388,10 @@
5388
  "in_benchmark":false
5389
  },
5390
  {
5391
- "bcp_47":"tdd",
5392
  "speakers":264864,
5393
- "language_name":"Tai Nüa",
5394
- "autonym":"Tai Nüa",
5395
  "family":"Tai-Kadai",
5396
  "flores_path":null,
5397
  "fleurs_tag":null,
@@ -5400,10 +5400,10 @@
5400
  "in_benchmark":false
5401
  },
5402
  {
5403
- "bcp_47":"khb",
5404
  "speakers":264864,
5405
- "language_name":"",
5406
- "autonym":"",
5407
  "family":"Tai-Kadai",
5408
  "flores_path":null,
5409
  "fleurs_tag":null,
@@ -5508,10 +5508,10 @@
5508
  "in_benchmark":true
5509
  },
5510
  {
5511
- "bcp_47":"sxn",
5512
  "speakers":245664,
5513
- "language_name":"Sangir",
5514
- "autonym":"Sangir",
5515
  "family":"Austronesian",
5516
  "flores_path":null,
5517
  "fleurs_tag":null,
@@ -5520,10 +5520,10 @@
5520
  "in_benchmark":false
5521
  },
5522
  {
5523
- "bcp_47":"mdr",
5524
  "speakers":245664,
5525
- "language_name":"Mandar",
5526
- "autonym":"Mandar",
5527
  "family":"Austronesian",
5528
  "flores_path":null,
5529
  "fleurs_tag":null,
@@ -5753,11 +5753,11 @@
5753
  "language_name":"Tuvinian",
5754
  "autonym":"Tuvinian",
5755
  "family":"Turkic",
5756
- "flores_path":"tyv_Cyrl",
5757
  "fleurs_tag":null,
5758
  "commonvoice_hours":0.0,
5759
  "commonvoice_locale":"tyv",
5760
- "in_benchmark":true
5761
  },
5762
  {
5763
  "bcp_47":"dtp",
@@ -5904,10 +5904,10 @@
5904
  "in_benchmark":false
5905
  },
5906
  {
5907
- "bcp_47":"bss",
5908
  "speakers":149823,
5909
- "language_name":"Akoose",
5910
- "autonym":"Akoose",
5911
  "family":"Atlantic-Congo",
5912
  "flores_path":null,
5913
  "fleurs_tag":null,
@@ -5916,10 +5916,10 @@
5916
  "in_benchmark":false
5917
  },
5918
  {
5919
- "bcp_47":"kkj",
5920
  "speakers":149823,
5921
- "language_name":"Kako",
5922
- "autonym":"Kakɔ",
5923
  "family":"Atlantic-Congo",
5924
  "flores_path":null,
5925
  "fleurs_tag":null,
@@ -6223,7 +6223,7 @@
6223
  "family":"Abkhaz-Adyge",
6224
  "flores_path":null,
6225
  "fleurs_tag":null,
6226
- "commonvoice_hours":68.0,
6227
  "commonvoice_locale":"ab",
6228
  "in_benchmark":false
6229
  },
@@ -6821,11 +6821,11 @@
6821
  "language_name":"Aragonese",
6822
  "autonym":"Aragonés",
6823
  "family":"Indo-European",
6824
- "flores_path":"arg_Latn",
6825
  "fleurs_tag":null,
6826
  "commonvoice_hours":17.0,
6827
  "commonvoice_locale":"an",
6828
- "in_benchmark":true
6829
  },
6830
  {
6831
  "bcp_47":"chr",
@@ -7272,11 +7272,11 @@
7272
  "in_benchmark":false
7273
  },
7274
  {
7275
- "bcp_47":"bku",
7276
  "speakers":7970,
7277
- "language_name":"Buhid",
7278
- "autonym":"Buhid",
7279
- "family":"Austronesian",
7280
  "flores_path":null,
7281
  "fleurs_tag":null,
7282
  "commonvoice_hours":null,
@@ -7284,11 +7284,11 @@
7284
  "in_benchmark":false
7285
  },
7286
  {
7287
- "bcp_47":"twq",
7288
  "speakers":7970,
7289
- "language_name":"Tasawaq",
7290
- "autonym":"Tasawaq Senni",
7291
- "family":"Songhay",
7292
  "flores_path":null,
7293
  "fleurs_tag":null,
7294
  "commonvoice_hours":null,
@@ -7567,7 +7567,7 @@
7567
  "family":"Atlantic-Congo",
7568
  "flores_path":null,
7569
  "fleurs_tag":null,
7570
- "commonvoice_hours":1.7,
7571
  "commonvoice_locale":"yav",
7572
  "in_benchmark":false
7573
  },
@@ -7836,11 +7836,11 @@
7836
  "in_benchmark":false
7837
  },
7838
  {
7839
- "bcp_47":"crl",
7840
  "speakers":377,
7841
- "language_name":"Northern East Cree",
7842
- "autonym":"Northern East Cree",
7843
- "family":"Algic",
7844
  "flores_path":null,
7845
  "fleurs_tag":null,
7846
  "commonvoice_hours":null,
@@ -7848,11 +7848,11 @@
7848
  "in_benchmark":false
7849
  },
7850
  {
7851
- "bcp_47":"kwk",
7852
  "speakers":377,
7853
- "language_name":"Kwakʼwala",
7854
- "autonym":"KwakʼWala",
7855
- "family":"Wakashan",
7856
  "flores_path":null,
7857
  "fleurs_tag":null,
7858
  "commonvoice_hours":null,
@@ -7968,11 +7968,11 @@
7968
  "in_benchmark":false
7969
  },
7970
  {
7971
- "bcp_47":"sgs",
7972
  "speakers":0,
7973
- "language_name":"Samogitian",
7974
- "autonym":"Samogitian",
7975
- "family":"Indo-European",
7976
  "flores_path":null,
7977
  "fleurs_tag":null,
7978
  "commonvoice_hours":null,
@@ -7980,11 +7980,11 @@
7980
  "in_benchmark":false
7981
  },
7982
  {
7983
- "bcp_47":"rgn",
7984
  "speakers":0,
7985
- "language_name":"Romagnol",
7986
- "autonym":"Romagnol",
7987
- "family":"Indo-European",
7988
  "flores_path":null,
7989
  "fleurs_tag":null,
7990
  "commonvoice_hours":null,
@@ -7992,22 +7992,22 @@
7992
  "in_benchmark":false
7993
  },
7994
  {
7995
- "bcp_47":"ann",
7996
  "speakers":0,
7997
- "language_name":"Obolo",
7998
- "autonym":"Obolo",
7999
- "family":"Atlantic-Congo",
8000
  "flores_path":null,
8001
  "fleurs_tag":null,
8002
- "commonvoice_hours":null,
8003
- "commonvoice_locale":null,
8004
  "in_benchmark":false
8005
  },
8006
  {
8007
- "bcp_47":"pfl",
8008
  "speakers":0,
8009
- "language_name":"Palatine German",
8010
- "autonym":"Palatine German",
8011
  "family":"Indo-European",
8012
  "flores_path":null,
8013
  "fleurs_tag":null,
@@ -8016,23 +8016,23 @@
8016
  "in_benchmark":false
8017
  },
8018
  {
8019
- "bcp_47":"osa",
8020
  "speakers":0,
8021
- "language_name":"Osage",
8022
- "autonym":"𐓏𐓘𐓻𐓘𐓻𐓟",
8023
- "family":"Siouan",
8024
  "flores_path":null,
8025
  "fleurs_tag":null,
8026
- "commonvoice_hours":null,
8027
- "commonvoice_locale":null,
8028
  "in_benchmark":false
8029
  },
8030
  {
8031
- "bcp_47":"lzh",
8032
  "speakers":0,
8033
- "language_name":"Literary Chinese",
8034
- "autonym":"Literary Chinese",
8035
- "family":"Sino-Tibetan",
8036
  "flores_path":null,
8037
  "fleurs_tag":null,
8038
  "commonvoice_hours":null,
@@ -8040,23 +8040,23 @@
8040
  "in_benchmark":false
8041
  },
8042
  {
8043
- "bcp_47":"jbo",
8044
  "speakers":0,
8045
- "language_name":"Lojban",
8046
- "autonym":"La .Lojban.",
8047
- "family":"Artificial Language",
8048
  "flores_path":null,
8049
  "fleurs_tag":null,
8050
- "commonvoice_hours":0.0,
8051
- "commonvoice_locale":"jbo",
8052
  "in_benchmark":false
8053
  },
8054
  {
8055
- "bcp_47":"io",
8056
  "speakers":0,
8057
- "language_name":"Ido",
8058
- "autonym":"Ido",
8059
- "family":"Artificial Language",
8060
  "flores_path":null,
8061
  "fleurs_tag":null,
8062
  "commonvoice_hours":null,
@@ -8064,10 +8064,10 @@
8064
  "in_benchmark":false
8065
  },
8066
  {
8067
- "bcp_47":"jut",
8068
  "speakers":0,
8069
- "language_name":"Jutish",
8070
- "autonym":"Jutish",
8071
  "family":"Indo-European",
8072
  "flores_path":null,
8073
  "fleurs_tag":null,
@@ -8076,11 +8076,11 @@
8076
  "in_benchmark":false
8077
  },
8078
  {
8079
- "bcp_47":"gez",
8080
  "speakers":0,
8081
- "language_name":"Geez",
8082
- "autonym":"Geez",
8083
- "family":"Afro-Asiatic",
8084
  "flores_path":null,
8085
  "fleurs_tag":null,
8086
  "commonvoice_hours":null,
@@ -8088,11 +8088,11 @@
8088
  "in_benchmark":false
8089
  },
8090
  {
8091
- "bcp_47":"cu",
8092
  "speakers":0,
8093
- "language_name":"Church Slavic",
8094
- "autonym":"Church Slavic",
8095
- "family":"Indo-European",
8096
  "flores_path":null,
8097
  "fleurs_tag":null,
8098
  "commonvoice_hours":null,
@@ -8112,23 +8112,23 @@
8112
  "in_benchmark":false
8113
  },
8114
  {
8115
- "bcp_47":"vot",
8116
  "speakers":0,
8117
- "language_name":"Votic",
8118
- "autonym":"Votic",
8119
- "family":"Uralic",
8120
  "flores_path":null,
8121
  "fleurs_tag":null,
8122
- "commonvoice_hours":0.1,
8123
- "commonvoice_locale":"vot",
8124
  "in_benchmark":false
8125
  },
8126
  {
8127
- "bcp_47":"cad",
8128
  "speakers":0,
8129
- "language_name":"Caddo",
8130
- "autonym":"Caddo",
8131
- "family":"Caddoan",
8132
  "flores_path":null,
8133
  "fleurs_tag":null,
8134
  "commonvoice_hours":null,
 
163
  "family":"Indo-European",
164
  "flores_path":"deu_Latn",
165
  "fleurs_tag":"de_de",
166
+ "commonvoice_hours":1363.0,
167
  "commonvoice_locale":"de",
168
  "in_benchmark":true
169
  },
 
269
  "language_name":"Wu Chinese",
270
  "autonym":"Wu Chinese",
271
  "family":"Sino-Tibetan",
272
+ "flores_path":null,
273
  "fleurs_tag":null,
274
  "commonvoice_hours":null,
275
  "commonvoice_locale":null,
276
+ "in_benchmark":false
277
  },
278
  {
279
  "bcp_47":"tr",
 
485
  "language_name":"North Levantine Arabic",
486
  "autonym":"العامية",
487
  "family":"Afro-Asiatic",
488
+ "flores_path":null,
489
  "fleurs_tag":null,
490
  "commonvoice_hours":null,
491
  "commonvoice_locale":null,
492
+ "in_benchmark":false
493
  },
494
  {
495
  "bcp_47":"ms",
 
619
  "family":"Indo-European",
620
  "flores_path":"nld_Latn",
621
  "fleurs_tag":"nl_nl",
622
+ "commonvoice_hours":120.0,
623
  "commonvoice_locale":"nl",
624
  "in_benchmark":true
625
  },
 
876
  "in_benchmark":true
877
  },
878
  {
879
+ "bcp_47":"bgc",
880
  "speakers":15913080,
881
+ "language_name":"Haryanvi",
882
+ "autonym":"हरियाणवी",
883
  "family":"Indo-European",
884
  "flores_path":null,
885
  "fleurs_tag":null,
 
888
  "in_benchmark":false
889
  },
890
  {
891
+ "bcp_47":"mwr",
892
  "speakers":15913080,
893
+ "language_name":"Marwari",
894
+ "autonym":"Marwari",
895
  "family":"Indo-European",
896
  "flores_path":null,
897
  "fleurs_tag":null,
 
1073
  "language_name":"Akan",
1074
  "autonym":"Akan",
1075
  "family":"Atlantic-Congo",
1076
+ "flores_path":null,
1077
  "fleurs_tag":null,
1078
  "commonvoice_hours":0.2,
1079
  "commonvoice_locale":"tw",
1080
+ "in_benchmark":false
1081
  },
1082
  {
1083
  "bcp_47":"qu",
 
1195
  "family":"Atlantic-Congo",
1196
  "flores_path":"lua_Latn",
1197
  "fleurs_tag":null,
1198
+ "commonvoice_hours":0.8,
1199
  "commonvoice_locale":"lua",
1200
  "in_benchmark":true
1201
  },
 
1303
  "family":"Afro-Asiatic",
1304
  "flores_path":"heb_Hebr",
1305
  "fleurs_tag":"he_il",
1306
+ "commonvoice_hours":1.3,
1307
  "commonvoice_locale":"he",
1308
  "in_benchmark":true
1309
  },
 
1375
  "family":"Turkic",
1376
  "flores_path":"uig_Arab",
1377
  "fleurs_tag":null,
1378
+ "commonvoice_hours":372.0,
1379
  "commonvoice_locale":"ug",
1380
  "in_benchmark":true
1381
  },
 
1555
  "family":"Indo-European",
1556
  "flores_path":"slk_Latn",
1557
  "fleurs_tag":"sk_sk",
1558
+ "commonvoice_hours":49.0,
1559
  "commonvoice_locale":"sk",
1560
  "in_benchmark":true
1561
  },
 
1955
  "commonvoice_locale":"gom",
1956
  "in_benchmark":true
1957
  },
 
 
 
 
 
 
 
 
 
 
 
 
1958
  {
1959
  "bcp_47":"kam",
1960
  "speakers":4068120,
 
1967
  "commonvoice_locale":"kam",
1968
  "in_benchmark":true
1969
  },
1970
+ {
1971
+ "bcp_47":"kln",
1972
+ "speakers":4068120,
1973
+ "language_name":"Kalenjin",
1974
+ "autonym":"Kalenjin",
1975
+ "family":"Nilotic",
1976
+ "flores_path":null,
1977
+ "fleurs_tag":null,
1978
+ "commonvoice_hours":43.0,
1979
+ "commonvoice_locale":"kln",
1980
+ "in_benchmark":false
1981
+ },
1982
  {
1983
  "bcp_47":"bjn",
1984
  "speakers":4010288,
 
2009
  "language_name":"Makhuwa",
2010
  "autonym":"Emakhuwa",
2011
  "family":"Atlantic-Congo",
2012
+ "flores_path":null,
2013
  "fleurs_tag":null,
2014
  "commonvoice_hours":0.0,
2015
  "commonvoice_locale":"vmw",
2016
+ "in_benchmark":false
2017
  },
2018
  {
2019
  "bcp_47":"glk",
 
2124
  "in_benchmark":true
2125
  },
2126
  {
2127
+ "bcp_47":"lmn",
2128
  "speakers":3580443,
2129
+ "language_name":"Lambadi",
2130
+ "autonym":"Lambadi",
2131
  "family":"Indo-European",
2132
  "flores_path":null,
2133
  "fleurs_tag":null,
 
2136
  "in_benchmark":false
2137
  },
2138
  {
2139
+ "bcp_47":"gbm",
2140
  "speakers":3580443,
2141
+ "language_name":"Garhwali",
2142
+ "autonym":"Garhwali",
2143
  "family":"Indo-European",
2144
  "flores_path":null,
2145
  "fleurs_tag":null,
 
2335
  "family":"Atlantic-Congo",
2336
  "flores_path":null,
2337
  "fleurs_tag":null,
2338
+ "commonvoice_hours":4.5,
2339
  "commonvoice_locale":"bci",
2340
  "in_benchmark":false
2341
  },
 
2352
  "in_benchmark":true
2353
  },
2354
  {
2355
+ "bcp_47":"ibb",
2356
  "speakers":2996392,
2357
+ "language_name":"Ibibio",
2358
+ "autonym":"Ibibio",
2359
  "family":"Atlantic-Congo",
2360
  "flores_path":null,
2361
  "fleurs_tag":null,
2362
+ "commonvoice_hours":5.5,
2363
+ "commonvoice_locale":"ibb",
2364
  "in_benchmark":false
2365
  },
2366
  {
2367
+ "bcp_47":"efi",
2368
  "speakers":2996392,
2369
+ "language_name":"Efik",
2370
+ "autonym":"Efik",
2371
  "family":"Atlantic-Congo",
2372
  "flores_path":null,
2373
  "fleurs_tag":null,
2374
+ "commonvoice_hours":null,
2375
+ "commonvoice_locale":null,
2376
  "in_benchmark":false
2377
  },
2378
  {
 
2544
  "in_benchmark":false
2545
  },
2546
  {
2547
+ "bcp_47":"sck",
2548
  "speakers":2386962,
2549
+ "language_name":"Sadri",
2550
+ "autonym":"Sadri",
2551
+ "family":"Indo-European",
2552
  "flores_path":null,
2553
  "fleurs_tag":null,
2554
  "commonvoice_hours":null,
 
2556
  "in_benchmark":false
2557
  },
2558
  {
2559
+ "bcp_47":"wbq",
2560
  "speakers":2386962,
2561
+ "language_name":"Waddar",
2562
+ "autonym":"Waddar",
2563
+ "family":"Dravidian",
2564
  "flores_path":null,
2565
  "fleurs_tag":null,
2566
  "commonvoice_hours":null,
 
2724
  "in_benchmark":false
2725
  },
2726
  {
2727
+ "bcp_47":"wbr",
2728
  "speakers":1989135,
2729
+ "language_name":"Wagdi",
2730
+ "autonym":"Wagdi",
2731
  "family":"Indo-European",
2732
  "flores_path":null,
2733
  "fleurs_tag":null,
 
2748
  "in_benchmark":false
2749
  },
2750
  {
2751
+ "bcp_47":"khn",
2752
  "speakers":1989135,
2753
+ "language_name":"Khandesi",
2754
+ "autonym":"Khandesi",
2755
  "family":"Indo-European",
2756
  "flores_path":null,
2757
  "fleurs_tag":null,
 
3283
  "family":"Atlantic-Congo",
3284
  "flores_path":null,
3285
  "fleurs_tag":null,
3286
+ "commonvoice_hours":11.0,
3287
  "commonvoice_locale":"bum",
3288
  "in_benchmark":false
3289
  },
 
3535
  "family":null,
3536
  "flores_path":"eus_Latn",
3537
  "fleurs_tag":null,
3538
+ "commonvoice_hours":365.0,
3539
  "commonvoice_locale":"eu",
3540
  "in_benchmark":true
3541
  },
 
3559
  "family":"Abkhaz-Adyge",
3560
  "flores_path":null,
3561
  "fleurs_tag":null,
3562
+ "commonvoice_hours":58.0,
3563
  "commonvoice_locale":"kbd",
3564
  "in_benchmark":false
3565
  },
 
3679
  "family":"Indo-European",
3680
  "flores_path":"ydd_Hebr",
3681
  "fleurs_tag":null,
3682
+ "commonvoice_hours":0.6,
3683
  "commonvoice_locale":"yi",
3684
  "in_benchmark":true
3685
  },
3686
  {
3687
+ "bcp_47":"bez",
3688
  "speakers":995398,
3689
+ "language_name":"Bena",
3690
+ "autonym":"Hibena",
3691
  "family":"Atlantic-Congo",
3692
  "flores_path":null,
3693
  "fleurs_tag":null,
 
3696
  "in_benchmark":false
3697
  },
3698
  {
3699
+ "bcp_47":"ksb",
3700
  "speakers":995398,
3701
+ "language_name":"Shambala",
3702
+ "autonym":"Kishambaa",
3703
  "family":"Atlantic-Congo",
3704
  "flores_path":null,
3705
  "fleurs_tag":null,
 
4512
  "in_benchmark":false
4513
  },
4514
  {
4515
+ "bcp_47":"nhe",
4516
  "speakers":501735,
4517
+ "language_name":"Eastern Huasteca Nahuatl",
4518
+ "autonym":"Eastern Huasteca Nahuatl",
4519
  "family":"Uto-Aztecan",
4520
  "flores_path":null,
4521
  "fleurs_tag":null,
4522
+ "commonvoice_hours":0.0,
4523
+ "commonvoice_locale":"nhe",
4524
  "in_benchmark":false
4525
  },
4526
  {
4527
+ "bcp_47":"nhw",
4528
  "speakers":501735,
4529
+ "language_name":"Western Huasteca Nahuatl",
4530
+ "autonym":"Western Huasteca Nahuatl",
4531
  "family":"Uto-Aztecan",
4532
  "flores_path":null,
4533
  "fleurs_tag":null,
4534
+ "commonvoice_hours":null,
4535
+ "commonvoice_locale":null,
4536
  "in_benchmark":false
4537
  },
4538
  {
 
4651
  "family":"Abkhaz-Adyge",
4652
  "flores_path":null,
4653
  "fleurs_tag":null,
4654
+ "commonvoice_hours":16.0,
4655
  "commonvoice_locale":"ady",
4656
  "in_benchmark":false
4657
  },
 
4675
  "family":"Kartvelian",
4676
  "flores_path":null,
4677
  "fleurs_tag":null,
4678
+ "commonvoice_hours":12.0,
4679
  "commonvoice_locale":"xmf",
4680
  "in_benchmark":false
4681
  },
 
4685
  "language_name":"Erzya",
4686
  "autonym":"Эрзянь Кель",
4687
  "family":"Uralic",
4688
+ "flores_path":null,
4689
  "fleurs_tag":null,
4690
  "commonvoice_hours":3.8,
4691
  "commonvoice_locale":"myv",
4692
+ "in_benchmark":false
4693
  },
4694
  {
4695
  "bcp_47":"dav",
 
4715
  "commonvoice_locale":null,
4716
  "in_benchmark":false
4717
  },
 
 
 
 
 
 
 
 
 
 
 
 
4718
  {
4719
  "bcp_47":"vun",
4720
  "speakers":433291,
 
4739
  "commonvoice_locale":"rof",
4740
  "in_benchmark":false
4741
  },
4742
+ {
4743
+ "bcp_47":"jmc",
4744
+ "speakers":433291,
4745
+ "language_name":"Machame",
4746
+ "autonym":"Kimachame",
4747
+ "family":"Atlantic-Congo",
4748
+ "flores_path":null,
4749
+ "fleurs_tag":null,
4750
+ "commonvoice_hours":null,
4751
+ "commonvoice_locale":null,
4752
+ "in_benchmark":false
4753
+ },
4754
  {
4755
  "bcp_47":"kjg",
4756
  "speakers":431949,
 
5009
  "language_name":"Dargwa",
5010
  "autonym":"Dargwa",
5011
  "family":"Nakh-Daghestanian",
5012
+ "flores_path":null,
5013
  "fleurs_tag":null,
5014
  "commonvoice_hours":0.0,
5015
  "commonvoice_locale":"dar",
5016
+ "in_benchmark":false
5017
  },
5018
  {
5019
  "bcp_47":"lif",
 
5124
  "in_benchmark":false
5125
  },
5126
  {
5127
+ "bcp_47":"bax",
5128
  "speakers":332940,
5129
+ "language_name":"Bamun",
5130
+ "autonym":"Bamun",
5131
  "family":"Atlantic-Congo",
5132
  "flores_path":null,
5133
  "fleurs_tag":null,
5134
+ "commonvoice_hours":11.0,
5135
+ "commonvoice_locale":"bax",
5136
  "in_benchmark":false
5137
  },
5138
  {
5139
+ "bcp_47":"bas",
5140
  "speakers":332940,
5141
+ "language_name":"Basaa",
5142
+ "autonym":"Ɓàsàa",
5143
  "family":"Atlantic-Congo",
5144
  "flores_path":null,
5145
  "fleurs_tag":null,
5146
+ "commonvoice_hours":12.0,
5147
+ "commonvoice_locale":"bas",
5148
  "in_benchmark":false
5149
  },
5150
  {
 
5232
  "in_benchmark":false
5233
  },
5234
  {
5235
+ "bcp_47":"njo",
5236
  "speakers":305001,
5237
+ "language_name":"Ao Naga",
5238
+ "autonym":"Ao Naga",
5239
+ "family":"Sino-Tibetan",
5240
  "flores_path":null,
5241
  "fleurs_tag":null,
5242
  "commonvoice_hours":null,
 
5244
  "in_benchmark":false
5245
  },
5246
  {
5247
+ "bcp_47":"bfq",
5248
  "speakers":305001,
5249
+ "language_name":"Badaga",
5250
+ "autonym":"Badaga",
5251
+ "family":"Dravidian",
5252
  "flores_path":null,
5253
  "fleurs_tag":null,
5254
  "commonvoice_hours":null,
 
5388
  "in_benchmark":false
5389
  },
5390
  {
5391
+ "bcp_47":"khb",
5392
  "speakers":264864,
5393
+ "language_name":"",
5394
+ "autonym":"",
5395
  "family":"Tai-Kadai",
5396
  "flores_path":null,
5397
  "fleurs_tag":null,
 
5400
  "in_benchmark":false
5401
  },
5402
  {
5403
+ "bcp_47":"tdd",
5404
  "speakers":264864,
5405
+ "language_name":"Tai Nüa",
5406
+ "autonym":"Tai Nüa",
5407
  "family":"Tai-Kadai",
5408
  "flores_path":null,
5409
  "fleurs_tag":null,
 
5508
  "in_benchmark":true
5509
  },
5510
  {
5511
+ "bcp_47":"mdr",
5512
  "speakers":245664,
5513
+ "language_name":"Mandar",
5514
+ "autonym":"Mandar",
5515
  "family":"Austronesian",
5516
  "flores_path":null,
5517
  "fleurs_tag":null,
 
5520
  "in_benchmark":false
5521
  },
5522
  {
5523
+ "bcp_47":"sxn",
5524
  "speakers":245664,
5525
+ "language_name":"Sangir",
5526
+ "autonym":"Sangir",
5527
  "family":"Austronesian",
5528
  "flores_path":null,
5529
  "fleurs_tag":null,
 
5753
  "language_name":"Tuvinian",
5754
  "autonym":"Tuvinian",
5755
  "family":"Turkic",
5756
+ "flores_path":null,
5757
  "fleurs_tag":null,
5758
  "commonvoice_hours":0.0,
5759
  "commonvoice_locale":"tyv",
5760
+ "in_benchmark":false
5761
  },
5762
  {
5763
  "bcp_47":"dtp",
 
5904
  "in_benchmark":false
5905
  },
5906
  {
5907
+ "bcp_47":"kkj",
5908
  "speakers":149823,
5909
+ "language_name":"Kako",
5910
+ "autonym":"Kakɔ",
5911
  "family":"Atlantic-Congo",
5912
  "flores_path":null,
5913
  "fleurs_tag":null,
 
5916
  "in_benchmark":false
5917
  },
5918
  {
5919
+ "bcp_47":"bss",
5920
  "speakers":149823,
5921
+ "language_name":"Akoose",
5922
+ "autonym":"Akoose",
5923
  "family":"Atlantic-Congo",
5924
  "flores_path":null,
5925
  "fleurs_tag":null,
 
6223
  "family":"Abkhaz-Adyge",
6224
  "flores_path":null,
6225
  "fleurs_tag":null,
6226
+ "commonvoice_hours":67.0,
6227
  "commonvoice_locale":"ab",
6228
  "in_benchmark":false
6229
  },
 
6821
  "language_name":"Aragonese",
6822
  "autonym":"Aragonés",
6823
  "family":"Indo-European",
6824
+ "flores_path":null,
6825
  "fleurs_tag":null,
6826
  "commonvoice_hours":17.0,
6827
  "commonvoice_locale":"an",
6828
+ "in_benchmark":false
6829
  },
6830
  {
6831
  "bcp_47":"chr",
 
7272
  "in_benchmark":false
7273
  },
7274
  {
7275
+ "bcp_47":"twq",
7276
  "speakers":7970,
7277
+ "language_name":"Tasawaq",
7278
+ "autonym":"Tasawaq Senni",
7279
+ "family":"Songhay",
7280
  "flores_path":null,
7281
  "fleurs_tag":null,
7282
  "commonvoice_hours":null,
 
7284
  "in_benchmark":false
7285
  },
7286
  {
7287
+ "bcp_47":"bku",
7288
  "speakers":7970,
7289
+ "language_name":"Buhid",
7290
+ "autonym":"Buhid",
7291
+ "family":"Austronesian",
7292
  "flores_path":null,
7293
  "fleurs_tag":null,
7294
  "commonvoice_hours":null,
 
7567
  "family":"Atlantic-Congo",
7568
  "flores_path":null,
7569
  "fleurs_tag":null,
7570
+ "commonvoice_hours":2.4,
7571
  "commonvoice_locale":"yav",
7572
  "in_benchmark":false
7573
  },
 
7836
  "in_benchmark":false
7837
  },
7838
  {
7839
+ "bcp_47":"kwk",
7840
  "speakers":377,
7841
+ "language_name":"Kwakʼwala",
7842
+ "autonym":"KwakʼWala",
7843
+ "family":"Wakashan",
7844
  "flores_path":null,
7845
  "fleurs_tag":null,
7846
  "commonvoice_hours":null,
 
7848
  "in_benchmark":false
7849
  },
7850
  {
7851
+ "bcp_47":"crl",
7852
  "speakers":377,
7853
+ "language_name":"Northern East Cree",
7854
+ "autonym":"Northern East Cree",
7855
+ "family":"Algic",
7856
  "flores_path":null,
7857
  "fleurs_tag":null,
7858
  "commonvoice_hours":null,
 
7968
  "in_benchmark":false
7969
  },
7970
  {
7971
+ "bcp_47":"lzh",
7972
  "speakers":0,
7973
+ "language_name":"Literary Chinese",
7974
+ "autonym":"Literary Chinese",
7975
+ "family":"Sino-Tibetan",
7976
  "flores_path":null,
7977
  "fleurs_tag":null,
7978
  "commonvoice_hours":null,
 
7980
  "in_benchmark":false
7981
  },
7982
  {
7983
+ "bcp_47":"io",
7984
  "speakers":0,
7985
+ "language_name":"Ido",
7986
+ "autonym":"Ido",
7987
+ "family":"Artificial Language",
7988
  "flores_path":null,
7989
  "fleurs_tag":null,
7990
  "commonvoice_hours":null,
 
7992
  "in_benchmark":false
7993
  },
7994
  {
7995
+ "bcp_47":"jbo",
7996
  "speakers":0,
7997
+ "language_name":"Lojban",
7998
+ "autonym":"La .Lojban.",
7999
+ "family":"Artificial Language",
8000
  "flores_path":null,
8001
  "fleurs_tag":null,
8002
+ "commonvoice_hours":0.0,
8003
+ "commonvoice_locale":"jbo",
8004
  "in_benchmark":false
8005
  },
8006
  {
8007
+ "bcp_47":"jut",
8008
  "speakers":0,
8009
+ "language_name":"Jutish",
8010
+ "autonym":"Jutish",
8011
  "family":"Indo-European",
8012
  "flores_path":null,
8013
  "fleurs_tag":null,
 
8016
  "in_benchmark":false
8017
  },
8018
  {
8019
+ "bcp_47":"vot",
8020
  "speakers":0,
8021
+ "language_name":"Votic",
8022
+ "autonym":"Votic",
8023
+ "family":"Uralic",
8024
  "flores_path":null,
8025
  "fleurs_tag":null,
8026
+ "commonvoice_hours":0.1,
8027
+ "commonvoice_locale":"vot",
8028
  "in_benchmark":false
8029
  },
8030
  {
8031
+ "bcp_47":"gez",
8032
  "speakers":0,
8033
+ "language_name":"Geez",
8034
+ "autonym":"Geez",
8035
+ "family":"Afro-Asiatic",
8036
  "flores_path":null,
8037
  "fleurs_tag":null,
8038
  "commonvoice_hours":null,
 
8040
  "in_benchmark":false
8041
  },
8042
  {
8043
+ "bcp_47":"osa",
8044
  "speakers":0,
8045
+ "language_name":"Osage",
8046
+ "autonym":"𐓏𐓘𐓻𐓘𐓻𐓟",
8047
+ "family":"Siouan",
8048
  "flores_path":null,
8049
  "fleurs_tag":null,
8050
+ "commonvoice_hours":null,
8051
+ "commonvoice_locale":null,
8052
  "in_benchmark":false
8053
  },
8054
  {
8055
+ "bcp_47":"rgn",
8056
  "speakers":0,
8057
+ "language_name":"Romagnol",
8058
+ "autonym":"Romagnol",
8059
+ "family":"Indo-European",
8060
  "flores_path":null,
8061
  "fleurs_tag":null,
8062
  "commonvoice_hours":null,
 
8064
  "in_benchmark":false
8065
  },
8066
  {
8067
+ "bcp_47":"cu",
8068
  "speakers":0,
8069
+ "language_name":"Church Slavic",
8070
+ "autonym":"Church Slavic",
8071
  "family":"Indo-European",
8072
  "flores_path":null,
8073
  "fleurs_tag":null,
 
8076
  "in_benchmark":false
8077
  },
8078
  {
8079
+ "bcp_47":"sgs",
8080
  "speakers":0,
8081
+ "language_name":"Samogitian",
8082
+ "autonym":"Samogitian",
8083
+ "family":"Indo-European",
8084
  "flores_path":null,
8085
  "fleurs_tag":null,
8086
  "commonvoice_hours":null,
 
8088
  "in_benchmark":false
8089
  },
8090
  {
8091
+ "bcp_47":"ann",
8092
  "speakers":0,
8093
+ "language_name":"Obolo",
8094
+ "autonym":"Obolo",
8095
+ "family":"Atlantic-Congo",
8096
  "flores_path":null,
8097
  "fleurs_tag":null,
8098
  "commonvoice_hours":null,
 
8112
  "in_benchmark":false
8113
  },
8114
  {
8115
+ "bcp_47":"cad",
8116
  "speakers":0,
8117
+ "language_name":"Caddo",
8118
+ "autonym":"Caddo",
8119
+ "family":"Caddoan",
8120
  "flores_path":null,
8121
  "fleurs_tag":null,
8122
+ "commonvoice_hours":null,
8123
+ "commonvoice_locale":null,
8124
  "in_benchmark":false
8125
  },
8126
  {
8127
+ "bcp_47":"pfl",
8128
  "speakers":0,
8129
+ "language_name":"Palatine German",
8130
+ "autonym":"Palatine German",
8131
+ "family":"Indo-European",
8132
  "flores_path":null,
8133
  "fleurs_tag":null,
8134
  "commonvoice_hours":null,
models.json CHANGED
@@ -1,157 +1,102 @@
1
  [
2
  {
3
- "id":"meta-llama\/llama-4-maverick",
4
- "name":"Llama 4 Maverick (free)",
5
- "provider_name":"Meta",
6
- "cost":0.0,
7
- "hf_id":"meta-llama\/Llama-4-Maverick-17B-128E-Instruct",
8
- "size":401583781376.0,
9
- "type":"Open",
10
- "license":"Other",
11
- "creation_date":1743465600000
12
  },
13
  {
14
- "id":"meta-llama\/llama-3.3-70b-instruct",
15
- "name":"Llama 3.3 70B Instruct (free)",
16
- "provider_name":"Meta",
17
  "cost":0.0,
18
- "hf_id":"meta-llama\/Llama-3.3-70B-Instruct",
19
- "size":70553706496.0,
20
  "type":"Open",
21
- "license":"Llama3.3",
22
- "creation_date":1732579200000
23
  },
24
  {
25
- "id":"meta-llama\/llama-3.1-70b-instruct",
26
- "name":"Llama 3.1 70B Instruct",
27
- "provider_name":"Meta",
28
- "cost":0.28,
29
- "hf_id":"meta-llama\/Llama-3.1-70B-Instruct",
30
- "size":70553706496.0,
31
  "type":"Open",
32
- "license":"Llama3.1",
33
- "creation_date":1721088000000
34
  },
35
  {
36
- "id":"meta-llama\/llama-3-70b-instruct",
37
- "name":"Llama 3 70B Instruct",
38
- "provider_name":"Meta",
39
- "cost":0.4,
40
- "hf_id":"meta-llama\/Meta-Llama-3-70B-Instruct",
41
- "size":70553706496.0,
42
  "type":"Open",
43
- "license":"Llama3",
44
- "creation_date":1713312000000
45
- },
46
- {
47
- "id":"openai\/gpt-4.1-mini",
48
- "name":"GPT-4.1 Mini",
49
- "provider_name":"OpenAI",
50
- "cost":1.6,
51
- "hf_id":null,
52
- "size":null,
53
- "type":"Commercial",
54
- "license":null,
55
- "creation_date":1744588800000
56
  },
57
  {
58
- "id":"openai\/gpt-4.1-nano",
59
- "name":"GPT-4.1 Nano",
60
- "provider_name":"OpenAI",
61
  "cost":0.4,
62
  "hf_id":null,
63
  "size":null,
64
  "type":"Commercial",
65
  "license":null,
66
- "creation_date":1744588800000
67
- },
68
- {
69
- "id":"openai\/gpt-4o-mini",
70
- "name":"GPT-4o-mini",
71
- "provider_name":"OpenAI",
72
- "cost":0.6,
73
- "hf_id":null,
74
- "size":null,
75
- "type":"Commercial",
76
- "license":null,
77
- "creation_date":1721260800000
78
- },
79
- {
80
- "id":"openai\/gpt-3.5-turbo-0613",
81
- "name":"GPT-3.5 Turbo (older v0613)",
82
- "provider_name":"OpenAI",
83
- "cost":2.0,
84
- "hf_id":null,
85
- "size":null,
86
- "type":"Commercial",
87
- "license":null,
88
- "creation_date":1706140800000
89
  },
90
  {
91
- "id":"openai\/gpt-3.5-turbo",
92
- "name":"GPT-3.5 Turbo",
93
- "provider_name":"OpenAI",
94
- "cost":1.5,
95
  "hf_id":null,
96
  "size":null,
97
  "type":"Commercial",
98
  "license":null,
99
- "creation_date":1685232000000
100
- },
101
- {
102
- "id":"mistralai\/mistral-small-3.1-24b-instruct",
103
- "name":"Mistral Small 3.1 24B (free)",
104
- "provider_name":"Mistral",
105
- "cost":0.0,
106
- "hf_id":"mistralai\/Mistral-Small-3.1-24B-Instruct-2503",
107
- "size":24011361280.0,
108
- "type":"Open",
109
- "license":"Apache 2.0",
110
- "creation_date":1741651200000
111
  },
112
  {
113
- "id":"mistralai\/mistral-saba",
114
- "name":"Saba",
115
- "provider_name":"Mistral",
116
  "cost":0.6,
117
  "hf_id":null,
118
  "size":null,
119
  "type":"Commercial",
120
  "license":null,
121
- "creation_date":1739750400000
122
- },
123
- {
124
- "id":"mistralai\/mistral-nemo",
125
- "name":"Mistral Nemo (free)",
126
- "provider_name":"Mistral",
127
- "cost":0.0,
128
- "hf_id":"mistralai\/Mistral-Nemo-Instruct-2407",
129
- "size":12247782400.0,
130
- "type":"Open",
131
- "license":"Apache 2.0",
132
- "creation_date":1721174400000
133
  },
134
  {
135
- "id":"google\/gemini-2.5-flash-preview",
136
- "name":"Gemini 2.5 Flash Preview",
137
  "provider_name":"Google",
138
- "cost":0.6,
139
  "hf_id":null,
140
  "size":null,
141
  "type":"Commercial",
142
  "license":null,
143
- "creation_date":1744848000000
144
  },
145
  {
146
- "id":"google\/gemini-2.0-flash-lite-001",
147
- "name":"Gemini 2.0 Flash Lite",
148
  "provider_name":"Google",
149
- "cost":0.3,
150
  "hf_id":null,
151
  "size":null,
152
  "type":"Commercial",
153
  "license":null,
154
- "creation_date":1740441600000
155
  },
156
  {
157
  "id":"google\/gemma-3-27b-it",
@@ -165,26 +110,48 @@
165
  "creation_date":1740787200000
166
  },
167
  {
168
- "id":"deepseek\/deepseek-chat-v3-0324",
169
- "name":"DeepSeek V3 0324 (free)",
170
- "provider_name":"DeepSeek",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
171
  "cost":0.0,
172
- "hf_id":"deepseek-ai\/DeepSeek-V3-0324",
173
- "size":684531386000.0,
174
  "type":"Open",
175
- "license":"Mit",
176
- "creation_date":1742774400000
177
  },
178
  {
179
- "id":"deepseek\/deepseek-chat",
180
- "name":"DeepSeek V3 (free)",
181
- "provider_name":"DeepSeek",
182
  "cost":0.0,
183
- "hf_id":"deepseek-ai\/DeepSeek-V3",
184
- "size":684531386000.0,
185
  "type":"Open",
186
- "license":"",
187
- "creation_date":1735084800000
188
  },
189
  {
190
  "id":"microsoft\/phi-4",
@@ -209,14 +176,91 @@
209
  "creation_date":1740355200000
210
  },
211
  {
212
- "id":"amazon\/nova-micro-v1",
213
- "name":"Nova Micro 1.0",
214
- "provider_name":"Amazon",
215
- "cost":0.14,
 
 
 
 
 
 
 
 
 
 
 
216
  "hf_id":null,
217
  "size":null,
218
  "type":"Commercial",
219
  "license":null,
220
- "creation_date":1733356800000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
221
  }
222
  ]
 
1
  [
2
  {
3
+ "id":"amazon\/nova-micro-v1",
4
+ "name":"Nova Micro 1.0",
5
+ "provider_name":"Amazon",
6
+ "cost":0.14,
7
+ "hf_id":null,
8
+ "size":null,
9
+ "type":"Commercial",
10
+ "license":null,
11
+ "creation_date":1733356800000
12
  },
13
  {
14
+ "id":"deepseek\/deepseek-chat",
15
+ "name":"DeepSeek V3 (free)",
16
+ "provider_name":"DeepSeek",
17
  "cost":0.0,
18
+ "hf_id":"deepseek-ai\/DeepSeek-V3",
19
+ "size":684531386000.0,
20
  "type":"Open",
21
+ "license":"",
22
+ "creation_date":1735084800000
23
  },
24
  {
25
+ "id":"deepseek\/deepseek-chat-v3-0324",
26
+ "name":"DeepSeek V3 0324 (free)",
27
+ "provider_name":"DeepSeek",
28
+ "cost":0.0,
29
+ "hf_id":"deepseek-ai\/DeepSeek-V3-0324",
30
+ "size":684531386000.0,
31
  "type":"Open",
32
+ "license":"Mit",
33
+ "creation_date":1742774400000
34
  },
35
  {
36
+ "id":"deepseek\/deepseek-r1",
37
+ "name":"R1 (free)",
38
+ "provider_name":"DeepSeek",
39
+ "cost":0.0,
40
+ "hf_id":"deepseek-ai\/DeepSeek-R1",
41
+ "size":684531386000.0,
42
  "type":"Open",
43
+ "license":"Mit",
44
+ "creation_date":1737331200000
 
 
 
 
 
 
 
 
 
 
 
45
  },
46
  {
47
+ "id":"google\/gemini-2.0-flash-001",
48
+ "name":"Gemini 2.0 Flash",
49
+ "provider_name":"Google",
50
  "cost":0.4,
51
  "hf_id":null,
52
  "size":null,
53
  "type":"Commercial",
54
  "license":null,
55
+ "creation_date":1738713600000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  },
57
  {
58
+ "id":"google\/gemini-2.0-flash-lite-001",
59
+ "name":"Gemini 2.0 Flash Lite",
60
+ "provider_name":"Google",
61
+ "cost":0.3,
62
  "hf_id":null,
63
  "size":null,
64
  "type":"Commercial",
65
  "license":null,
66
+ "creation_date":1740441600000
 
 
 
 
 
 
 
 
 
 
 
67
  },
68
  {
69
+ "id":"google\/gemini-2.5-flash-preview",
70
+ "name":"Gemini 2.5 Flash Preview",
71
+ "provider_name":"Google",
72
  "cost":0.6,
73
  "hf_id":null,
74
  "size":null,
75
  "type":"Commercial",
76
  "license":null,
77
+ "creation_date":1744848000000
 
 
 
 
 
 
 
 
 
 
 
78
  },
79
  {
80
+ "id":"google\/gemini-flash-1.5",
81
+ "name":"Gemini 1.5 Flash ",
82
  "provider_name":"Google",
83
+ "cost":0.3,
84
  "hf_id":null,
85
  "size":null,
86
  "type":"Commercial",
87
  "license":null,
88
+ "creation_date":1715644800000
89
  },
90
  {
91
+ "id":"google\/gemini-flash-1.5-8b",
92
+ "name":"Gemini 1.5 Flash 8B",
93
  "provider_name":"Google",
94
+ "cost":0.15,
95
  "hf_id":null,
96
  "size":null,
97
  "type":"Commercial",
98
  "license":null,
99
+ "creation_date":1727913600000
100
  },
101
  {
102
  "id":"google\/gemma-3-27b-it",
 
110
  "creation_date":1740787200000
111
  },
112
  {
113
+ "id":"meta-llama\/llama-3-70b-instruct",
114
+ "name":"Llama 3 70B Instruct",
115
+ "provider_name":"Meta",
116
+ "cost":0.4,
117
+ "hf_id":"meta-llama\/Meta-Llama-3-70B-Instruct",
118
+ "size":70553706496.0,
119
+ "type":"Open",
120
+ "license":"Llama3",
121
+ "creation_date":1713312000000
122
+ },
123
+ {
124
+ "id":"meta-llama\/llama-3.1-70b-instruct",
125
+ "name":"Llama 3.1 70B Instruct",
126
+ "provider_name":"Meta",
127
+ "cost":0.28,
128
+ "hf_id":"meta-llama\/Llama-3.1-70B-Instruct",
129
+ "size":70553706496.0,
130
+ "type":"Open",
131
+ "license":"Llama3.1",
132
+ "creation_date":1721088000000
133
+ },
134
+ {
135
+ "id":"meta-llama\/llama-3.3-70b-instruct",
136
+ "name":"Llama 3.3 70B Instruct (free)",
137
+ "provider_name":"Meta",
138
  "cost":0.0,
139
+ "hf_id":"meta-llama\/Llama-3.3-70B-Instruct",
140
+ "size":70553706496.0,
141
  "type":"Open",
142
+ "license":"Llama3.3",
143
+ "creation_date":1732579200000
144
  },
145
  {
146
+ "id":"meta-llama\/llama-4-maverick",
147
+ "name":"Llama 4 Maverick (free)",
148
+ "provider_name":"Meta",
149
  "cost":0.0,
150
+ "hf_id":"meta-llama\/Llama-4-Maverick-17B-128E-Instruct",
151
+ "size":401583781376.0,
152
  "type":"Open",
153
+ "license":"Other",
154
+ "creation_date":1743465600000
155
  },
156
  {
157
  "id":"microsoft\/phi-4",
 
176
  "creation_date":1740355200000
177
  },
178
  {
179
+ "id":"mistralai\/mistral-nemo",
180
+ "name":"Mistral Nemo (free)",
181
+ "provider_name":"Mistral",
182
+ "cost":0.0,
183
+ "hf_id":"mistralai\/Mistral-Nemo-Instruct-2407",
184
+ "size":12247782400.0,
185
+ "type":"Open",
186
+ "license":"Apache 2.0",
187
+ "creation_date":1721174400000
188
+ },
189
+ {
190
+ "id":"mistralai\/mistral-saba",
191
+ "name":"Saba",
192
+ "provider_name":"Mistral",
193
+ "cost":0.6,
194
  "hf_id":null,
195
  "size":null,
196
  "type":"Commercial",
197
  "license":null,
198
+ "creation_date":1739750400000
199
+ },
200
+ {
201
+ "id":"mistralai\/mistral-small-3.1-24b-instruct",
202
+ "name":"Mistral Small 3.1 24B (free)",
203
+ "provider_name":"Mistral",
204
+ "cost":0.0,
205
+ "hf_id":"mistralai\/Mistral-Small-3.1-24B-Instruct-2503",
206
+ "size":24011361280.0,
207
+ "type":"Open",
208
+ "license":"Apache 2.0",
209
+ "creation_date":1741651200000
210
+ },
211
+ {
212
+ "id":"openai\/gpt-3.5-turbo",
213
+ "name":"GPT-3.5 Turbo",
214
+ "provider_name":"OpenAI",
215
+ "cost":1.5,
216
+ "hf_id":null,
217
+ "size":null,
218
+ "type":"Commercial",
219
+ "license":null,
220
+ "creation_date":1685232000000
221
+ },
222
+ {
223
+ "id":"openai\/gpt-3.5-turbo-0613",
224
+ "name":"GPT-3.5 Turbo (older v0613)",
225
+ "provider_name":"OpenAI",
226
+ "cost":2.0,
227
+ "hf_id":null,
228
+ "size":null,
229
+ "type":"Commercial",
230
+ "license":null,
231
+ "creation_date":1706140800000
232
+ },
233
+ {
234
+ "id":"openai\/gpt-4.1-mini",
235
+ "name":"GPT-4.1 Mini",
236
+ "provider_name":"OpenAI",
237
+ "cost":1.6,
238
+ "hf_id":null,
239
+ "size":null,
240
+ "type":"Commercial",
241
+ "license":null,
242
+ "creation_date":1744588800000
243
+ },
244
+ {
245
+ "id":"openai\/gpt-4.1-nano",
246
+ "name":"GPT-4.1 Nano",
247
+ "provider_name":"OpenAI",
248
+ "cost":0.4,
249
+ "hf_id":null,
250
+ "size":null,
251
+ "type":"Commercial",
252
+ "license":null,
253
+ "creation_date":1744588800000
254
+ },
255
+ {
256
+ "id":"openai\/gpt-4o-mini",
257
+ "name":"GPT-4o-mini",
258
+ "provider_name":"OpenAI",
259
+ "cost":0.6,
260
+ "hf_id":null,
261
+ "size":null,
262
+ "type":"Commercial",
263
+ "license":null,
264
+ "creation_date":1721260800000
265
  }
266
  ]
results.json CHANGED
The diff for this file is too large to render. See raw diff