davidpomerenke commited on
Commit
52abc5b
·
verified ·
1 Parent(s): 4a34e67

Upload from GitHub Actions: More results

Browse files
Files changed (6) hide show
  1. evals/languages.py +1 -1
  2. evals/main.py +2 -2
  3. evals/models.py +6 -2
  4. languages.json +133 -133
  5. models.json +0 -99
  6. results.json +0 -0
evals/languages.py CHANGED
@@ -56,4 +56,4 @@ languages = pd.merge(languages, flores, on="bcp_47", how="left")
56
  languages = pd.merge(languages, fleurs, on="bcp_47", how="left")
57
  languages = pd.merge(languages, commonvoice, on="bcp_47", how="left")
58
  languages["in_benchmark"] = languages["bcp_47"].isin(flores["bcp_47"])
59
- languages = languages.sort_values(by="speakers", ascending=False)
 
56
  languages = pd.merge(languages, fleurs, on="bcp_47", how="left")
57
  languages = pd.merge(languages, commonvoice, on="bcp_47", how="left")
58
  languages["in_benchmark"] = languages["bcp_47"].isin(flores["bcp_47"])
59
+ languages = languages.sort_values(by=["speakers", "language_name"], ascending=False)
evals/main.py CHANGED
@@ -9,8 +9,8 @@ from tqdm.asyncio import tqdm_asyncio
9
  # ===== config =====
10
 
11
  n_sentences = 10
12
- n_languages = 18
13
- n_models = 22
14
 
15
  # ===== run evaluation and aggregate results =====
16
 
 
9
  # ===== config =====
10
 
11
  n_sentences = 10
12
+ n_languages = 20
13
+ n_models = 35
14
 
15
  # ===== run evaluation and aggregate results =====
16
 
evals/models.py CHANGED
@@ -45,6 +45,10 @@ important_models = [
45
  "amazon/nova-micro-v1", # 0.09$
46
  ]
47
 
 
 
 
 
48
  transcription_models = [
49
  "elevenlabs/scribe_v1",
50
  "openai/whisper-large-v3",
@@ -200,11 +204,11 @@ def get_cost(row):
200
  @cache
201
  def load_models(date: date):
202
  popular_models = (
203
- get_historical_popular_models(date.today())[:30]
204
  + get_current_popular_models(date.today())[:10]
205
  )
206
  popular_models = [m["slug"] for m in popular_models]
207
- models = set(important_models + popular_models)
208
  models = pd.DataFrame(sorted(list(models)), columns=["id"])
209
  or_metadata = models["id"].apply(get_or_metadata)
210
  hf_metadata = or_metadata.apply(get_hf_metadata)
 
45
  "amazon/nova-micro-v1", # 0.09$
46
  ]
47
 
48
+ blocklist = [
49
+ "microsoft/wizardlm-2-8x22b" # temporarily rate-limited
50
+ ]
51
+
52
  transcription_models = [
53
  "elevenlabs/scribe_v1",
54
  "openai/whisper-large-v3",
 
204
  @cache
205
  def load_models(date: date):
206
  popular_models = (
207
+ get_historical_popular_models(date.today())[:20]
208
  + get_current_popular_models(date.today())[:10]
209
  )
210
  popular_models = [m["slug"] for m in popular_models]
211
+ models = set(important_models + popular_models) - set(blocklist)
212
  models = pd.DataFrame(sorted(list(models)), columns=["id"])
213
  or_metadata = models["id"].apply(get_or_metadata)
214
  hf_metadata = or_metadata.apply(get_hf_metadata)
languages.json CHANGED
@@ -319,7 +319,7 @@
319
  "family":"Indo-European",
320
  "flores_path":"ita_Latn",
321
  "fleurs_tag":"it_it",
322
- "commonvoice_hours":363.0,
323
  "commonvoice_locale":"it",
324
  "in_benchmark":true
325
  },
@@ -864,28 +864,28 @@
864
  "in_benchmark":false
865
  },
866
  {
867
- "bcp_47":"mag",
868
  "speakers":15913080,
869
- "language_name":"Magahi",
870
- "autonym":"Magahi",
871
  "family":"Indo-European",
872
- "flores_path":"mag_Deva",
873
  "fleurs_tag":null,
874
  "commonvoice_hours":null,
875
  "commonvoice_locale":null,
876
- "in_benchmark":true
877
  },
878
  {
879
- "bcp_47":"mwr",
880
  "speakers":15913080,
881
- "language_name":"Marwari",
882
- "autonym":"Marwari",
883
  "family":"Indo-European",
884
- "flores_path":null,
885
  "fleurs_tag":null,
886
  "commonvoice_hours":null,
887
  "commonvoice_locale":null,
888
- "in_benchmark":false
889
  },
890
  {
891
  "bcp_47":"bgc",
@@ -1183,7 +1183,7 @@
1183
  "family":"Indo-European",
1184
  "flores_path":"bel_Cyrl",
1185
  "fleurs_tag":"be_by",
1186
- "commonvoice_hours":1808.0,
1187
  "commonvoice_locale":"be",
1188
  "in_benchmark":true
1189
  },
@@ -1195,7 +1195,7 @@
1195
  "family":"Atlantic-Congo",
1196
  "flores_path":"lua_Latn",
1197
  "fleurs_tag":null,
1198
- "commonvoice_hours":2.8,
1199
  "commonvoice_locale":"lua",
1200
  "in_benchmark":true
1201
  },
@@ -1955,18 +1955,6 @@
1955
  "commonvoice_locale":"gom",
1956
  "in_benchmark":true
1957
  },
1958
- {
1959
- "bcp_47":"kln",
1960
- "speakers":4068120,
1961
- "language_name":"Kalenjin",
1962
- "autonym":"Kalenjin",
1963
- "family":"Nilotic",
1964
- "flores_path":null,
1965
- "fleurs_tag":null,
1966
- "commonvoice_hours":43.0,
1967
- "commonvoice_locale":"kln",
1968
- "in_benchmark":false
1969
- },
1970
  {
1971
  "bcp_47":"kam",
1972
  "speakers":4068120,
@@ -1979,6 +1967,18 @@
1979
  "commonvoice_locale":"kam",
1980
  "in_benchmark":true
1981
  },
 
 
 
 
 
 
 
 
 
 
 
 
1982
  {
1983
  "bcp_47":"bjn",
1984
  "speakers":4010288,
@@ -2124,10 +2124,10 @@
2124
  "in_benchmark":true
2125
  },
2126
  {
2127
- "bcp_47":"gbm",
2128
  "speakers":3580443,
2129
- "language_name":"Garhwali",
2130
- "autonym":"Garhwali",
2131
  "family":"Indo-European",
2132
  "flores_path":null,
2133
  "fleurs_tag":null,
@@ -2136,10 +2136,10 @@
2136
  "in_benchmark":false
2137
  },
2138
  {
2139
- "bcp_47":"lmn",
2140
  "speakers":3580443,
2141
- "language_name":"Lambadi",
2142
- "autonym":"Lambadi",
2143
  "family":"Indo-European",
2144
  "flores_path":null,
2145
  "fleurs_tag":null,
@@ -2352,27 +2352,27 @@
2352
  "in_benchmark":true
2353
  },
2354
  {
2355
- "bcp_47":"efi",
2356
  "speakers":2996392,
2357
- "language_name":"Efik",
2358
- "autonym":"Efik",
2359
  "family":"Atlantic-Congo",
2360
  "flores_path":null,
2361
  "fleurs_tag":null,
2362
- "commonvoice_hours":null,
2363
- "commonvoice_locale":null,
2364
  "in_benchmark":false
2365
  },
2366
  {
2367
- "bcp_47":"ibb",
2368
  "speakers":2996392,
2369
- "language_name":"Ibibio",
2370
- "autonym":"Ibibio",
2371
  "family":"Atlantic-Congo",
2372
  "flores_path":null,
2373
  "fleurs_tag":null,
2374
- "commonvoice_hours":11.0,
2375
- "commonvoice_locale":"ibb",
2376
  "in_benchmark":false
2377
  },
2378
  {
@@ -2484,10 +2484,10 @@
2484
  "in_benchmark":false
2485
  },
2486
  {
2487
- "bcp_47":"pam",
2488
  "speakers":2511163,
2489
- "language_name":"Pampanga",
2490
- "autonym":"Pampanga",
2491
  "family":"Austronesian",
2492
  "flores_path":null,
2493
  "fleurs_tag":null,
@@ -2496,10 +2496,10 @@
2496
  "in_benchmark":false
2497
  },
2498
  {
2499
- "bcp_47":"fbl",
2500
  "speakers":2511163,
2501
- "language_name":"West Albay Bikol",
2502
- "autonym":"West Albay Bikol",
2503
  "family":"Austronesian",
2504
  "flores_path":null,
2505
  "fleurs_tag":null,
@@ -2724,10 +2724,10 @@
2724
  "in_benchmark":false
2725
  },
2726
  {
2727
- "bcp_47":"khn",
2728
  "speakers":1989135,
2729
- "language_name":"Khandesi",
2730
- "autonym":"Khandesi",
2731
  "family":"Indo-European",
2732
  "flores_path":null,
2733
  "fleurs_tag":null,
@@ -2748,10 +2748,10 @@
2748
  "in_benchmark":false
2749
  },
2750
  {
2751
- "bcp_47":"wbr",
2752
  "speakers":1989135,
2753
- "language_name":"Wagdi",
2754
- "autonym":"Wagdi",
2755
  "family":"Indo-European",
2756
  "flores_path":null,
2757
  "fleurs_tag":null,
@@ -3535,7 +3535,7 @@
3535
  "family":null,
3536
  "flores_path":"eus_Latn",
3537
  "fleurs_tag":null,
3538
- "commonvoice_hours":380.0,
3539
  "commonvoice_locale":"eu",
3540
  "in_benchmark":true
3541
  },
@@ -3559,7 +3559,7 @@
3559
  "family":"Abkhaz-Adyge",
3560
  "flores_path":null,
3561
  "fleurs_tag":null,
3562
- "commonvoice_hours":63.0,
3563
  "commonvoice_locale":"kbd",
3564
  "in_benchmark":false
3565
  },
@@ -4715,18 +4715,6 @@
4715
  "commonvoice_locale":null,
4716
  "in_benchmark":false
4717
  },
4718
- {
4719
- "bcp_47":"jmc",
4720
- "speakers":433291,
4721
- "language_name":"Machame",
4722
- "autonym":"Kimachame",
4723
- "family":"Atlantic-Congo",
4724
- "flores_path":null,
4725
- "fleurs_tag":null,
4726
- "commonvoice_hours":null,
4727
- "commonvoice_locale":null,
4728
- "in_benchmark":false
4729
- },
4730
  {
4731
  "bcp_47":"vun",
4732
  "speakers":433291,
@@ -4747,10 +4735,22 @@
4747
  "family":"Atlantic-Congo",
4748
  "flores_path":null,
4749
  "fleurs_tag":null,
4750
- "commonvoice_hours":2.7,
4751
  "commonvoice_locale":"rof",
4752
  "in_benchmark":false
4753
  },
 
 
 
 
 
 
 
 
 
 
 
 
4754
  {
4755
  "bcp_47":"kjg",
4756
  "speakers":431949,
@@ -5904,10 +5904,10 @@
5904
  "in_benchmark":false
5905
  },
5906
  {
5907
- "bcp_47":"bss",
5908
  "speakers":149823,
5909
- "language_name":"Akoose",
5910
- "autonym":"Akoose",
5911
  "family":"Atlantic-Congo",
5912
  "flores_path":null,
5913
  "fleurs_tag":null,
@@ -5916,10 +5916,10 @@
5916
  "in_benchmark":false
5917
  },
5918
  {
5919
- "bcp_47":"kkj",
5920
  "speakers":149823,
5921
- "language_name":"Kako",
5922
- "autonym":"Kakɔ",
5923
  "family":"Atlantic-Congo",
5924
  "flores_path":null,
5925
  "fleurs_tag":null,
@@ -6240,11 +6240,11 @@
6240
  "in_benchmark":false
6241
  },
6242
  {
6243
- "bcp_47":"iu",
6244
  "speakers":90466,
6245
- "language_name":"Inuktitut",
6246
- "autonym":"Inuktitut",
6247
- "family":"Eskimo-Aleut",
6248
  "flores_path":null,
6249
  "fleurs_tag":null,
6250
  "commonvoice_hours":null,
@@ -6252,11 +6252,11 @@
6252
  "in_benchmark":false
6253
  },
6254
  {
6255
- "bcp_47":"pdt",
6256
  "speakers":90466,
6257
- "language_name":"Plautdietsch",
6258
- "autonym":"Plautdietsch",
6259
- "family":"Indo-European",
6260
  "flores_path":null,
6261
  "fleurs_tag":null,
6262
  "commonvoice_hours":null,
@@ -6367,7 +6367,7 @@
6367
  "family":"Indo-European",
6368
  "flores_path":null,
6369
  "fleurs_tag":null,
6370
- "commonvoice_hours":5.4,
6371
  "commonvoice_locale":"btv",
6372
  "in_benchmark":false
6373
  },
@@ -7247,6 +7247,18 @@
7247
  "commonvoice_locale":null,
7248
  "in_benchmark":false
7249
  },
 
 
 
 
 
 
 
 
 
 
 
 
7250
  {
7251
  "bcp_47":"ik",
7252
  "speakers":7983,
@@ -7260,11 +7272,11 @@
7260
  "in_benchmark":false
7261
  },
7262
  {
7263
- "bcp_47":"gbz",
7264
- "speakers":7983,
7265
- "language_name":"Zoroastrian Dari",
7266
- "autonym":"Zoroastrian Dari",
7267
- "family":"Indo-European",
7268
  "flores_path":null,
7269
  "fleurs_tag":null,
7270
  "commonvoice_hours":null,
@@ -7283,18 +7295,6 @@
7283
  "commonvoice_locale":null,
7284
  "in_benchmark":false
7285
  },
7286
- {
7287
- "bcp_47":"twq",
7288
- "speakers":7970,
7289
- "language_name":"Tasawaq",
7290
- "autonym":"Tasawaq Senni",
7291
- "family":"Songhay",
7292
- "flores_path":null,
7293
- "fleurs_tag":null,
7294
- "commonvoice_hours":null,
7295
- "commonvoice_locale":null,
7296
- "in_benchmark":false
7297
- },
7298
  {
7299
  "bcp_47":"mic",
7300
  "speakers":7916,
@@ -7567,7 +7567,7 @@
7567
  "family":"Atlantic-Congo",
7568
  "flores_path":null,
7569
  "fleurs_tag":null,
7570
- "commonvoice_hours":2.4,
7571
  "commonvoice_locale":"yav",
7572
  "in_benchmark":false
7573
  },
@@ -7967,6 +7967,18 @@
7967
  "commonvoice_locale":"ie",
7968
  "in_benchmark":false
7969
  },
 
 
 
 
 
 
 
 
 
 
 
 
7970
  {
7971
  "bcp_47":"sgs",
7972
  "speakers":0,
@@ -7991,18 +8003,6 @@
7991
  "commonvoice_locale":null,
7992
  "in_benchmark":false
7993
  },
7994
- {
7995
- "bcp_47":"ann",
7996
- "speakers":0,
7997
- "language_name":"Obolo",
7998
- "autonym":"Obolo",
7999
- "family":"Atlantic-Congo",
8000
- "flores_path":null,
8001
- "fleurs_tag":null,
8002
- "commonvoice_hours":null,
8003
- "commonvoice_locale":null,
8004
- "in_benchmark":false
8005
- },
8006
  {
8007
  "bcp_47":"pfl",
8008
  "speakers":0,
@@ -8028,11 +8028,11 @@
8028
  "in_benchmark":false
8029
  },
8030
  {
8031
- "bcp_47":"lzh",
8032
  "speakers":0,
8033
- "language_name":"Literary Chinese",
8034
- "autonym":"Literary Chinese",
8035
- "family":"Sino-Tibetan",
8036
  "flores_path":null,
8037
  "fleurs_tag":null,
8038
  "commonvoice_hours":null,
@@ -8052,11 +8052,11 @@
8052
  "in_benchmark":false
8053
  },
8054
  {
8055
- "bcp_47":"io",
8056
  "speakers":0,
8057
- "language_name":"Ido",
8058
- "autonym":"Ido",
8059
- "family":"Artificial Language",
8060
  "flores_path":null,
8061
  "fleurs_tag":null,
8062
  "commonvoice_hours":null,
@@ -8075,6 +8075,18 @@
8075
  "commonvoice_locale":null,
8076
  "in_benchmark":false
8077
  },
 
 
 
 
 
 
 
 
 
 
 
 
8078
  {
8079
  "bcp_47":"gez",
8080
  "speakers":0,
@@ -8111,18 +8123,6 @@
8111
  "commonvoice_locale":null,
8112
  "in_benchmark":false
8113
  },
8114
- {
8115
- "bcp_47":"vot",
8116
- "speakers":0,
8117
- "language_name":"Votic",
8118
- "autonym":"Votic",
8119
- "family":"Uralic",
8120
- "flores_path":null,
8121
- "fleurs_tag":null,
8122
- "commonvoice_hours":0.1,
8123
- "commonvoice_locale":"vot",
8124
- "in_benchmark":false
8125
- },
8126
  {
8127
  "bcp_47":"cad",
8128
  "speakers":0,
 
319
  "family":"Indo-European",
320
  "flores_path":"ita_Latn",
321
  "fleurs_tag":"it_it",
322
+ "commonvoice_hours":362.0,
323
  "commonvoice_locale":"it",
324
  "in_benchmark":true
325
  },
 
864
  "in_benchmark":false
865
  },
866
  {
867
+ "bcp_47":"mwr",
868
  "speakers":15913080,
869
+ "language_name":"Marwari",
870
+ "autonym":"Marwari",
871
  "family":"Indo-European",
872
+ "flores_path":null,
873
  "fleurs_tag":null,
874
  "commonvoice_hours":null,
875
  "commonvoice_locale":null,
876
+ "in_benchmark":false
877
  },
878
  {
879
+ "bcp_47":"mag",
880
  "speakers":15913080,
881
+ "language_name":"Magahi",
882
+ "autonym":"Magahi",
883
  "family":"Indo-European",
884
+ "flores_path":"mag_Deva",
885
  "fleurs_tag":null,
886
  "commonvoice_hours":null,
887
  "commonvoice_locale":null,
888
+ "in_benchmark":true
889
  },
890
  {
891
  "bcp_47":"bgc",
 
1183
  "family":"Indo-European",
1184
  "flores_path":"bel_Cyrl",
1185
  "fleurs_tag":"be_by",
1186
+ "commonvoice_hours":1807.0,
1187
  "commonvoice_locale":"be",
1188
  "in_benchmark":true
1189
  },
 
1195
  "family":"Atlantic-Congo",
1196
  "flores_path":"lua_Latn",
1197
  "fleurs_tag":null,
1198
+ "commonvoice_hours":2.2,
1199
  "commonvoice_locale":"lua",
1200
  "in_benchmark":true
1201
  },
 
1955
  "commonvoice_locale":"gom",
1956
  "in_benchmark":true
1957
  },
 
 
 
 
 
 
 
 
 
 
 
 
1958
  {
1959
  "bcp_47":"kam",
1960
  "speakers":4068120,
 
1967
  "commonvoice_locale":"kam",
1968
  "in_benchmark":true
1969
  },
1970
+ {
1971
+ "bcp_47":"kln",
1972
+ "speakers":4068120,
1973
+ "language_name":"Kalenjin",
1974
+ "autonym":"Kalenjin",
1975
+ "family":"Nilotic",
1976
+ "flores_path":null,
1977
+ "fleurs_tag":null,
1978
+ "commonvoice_hours":43.0,
1979
+ "commonvoice_locale":"kln",
1980
+ "in_benchmark":false
1981
+ },
1982
  {
1983
  "bcp_47":"bjn",
1984
  "speakers":4010288,
 
2124
  "in_benchmark":true
2125
  },
2126
  {
2127
+ "bcp_47":"lmn",
2128
  "speakers":3580443,
2129
+ "language_name":"Lambadi",
2130
+ "autonym":"Lambadi",
2131
  "family":"Indo-European",
2132
  "flores_path":null,
2133
  "fleurs_tag":null,
 
2136
  "in_benchmark":false
2137
  },
2138
  {
2139
+ "bcp_47":"gbm",
2140
  "speakers":3580443,
2141
+ "language_name":"Garhwali",
2142
+ "autonym":"Garhwali",
2143
  "family":"Indo-European",
2144
  "flores_path":null,
2145
  "fleurs_tag":null,
 
2352
  "in_benchmark":true
2353
  },
2354
  {
2355
+ "bcp_47":"ibb",
2356
  "speakers":2996392,
2357
+ "language_name":"Ibibio",
2358
+ "autonym":"Ibibio",
2359
  "family":"Atlantic-Congo",
2360
  "flores_path":null,
2361
  "fleurs_tag":null,
2362
+ "commonvoice_hours":11.0,
2363
+ "commonvoice_locale":"ibb",
2364
  "in_benchmark":false
2365
  },
2366
  {
2367
+ "bcp_47":"efi",
2368
  "speakers":2996392,
2369
+ "language_name":"Efik",
2370
+ "autonym":"Efik",
2371
  "family":"Atlantic-Congo",
2372
  "flores_path":null,
2373
  "fleurs_tag":null,
2374
+ "commonvoice_hours":null,
2375
+ "commonvoice_locale":null,
2376
  "in_benchmark":false
2377
  },
2378
  {
 
2484
  "in_benchmark":false
2485
  },
2486
  {
2487
+ "bcp_47":"fbl",
2488
  "speakers":2511163,
2489
+ "language_name":"West Albay Bikol",
2490
+ "autonym":"West Albay Bikol",
2491
  "family":"Austronesian",
2492
  "flores_path":null,
2493
  "fleurs_tag":null,
 
2496
  "in_benchmark":false
2497
  },
2498
  {
2499
+ "bcp_47":"pam",
2500
  "speakers":2511163,
2501
+ "language_name":"Pampanga",
2502
+ "autonym":"Pampanga",
2503
  "family":"Austronesian",
2504
  "flores_path":null,
2505
  "fleurs_tag":null,
 
2724
  "in_benchmark":false
2725
  },
2726
  {
2727
+ "bcp_47":"wbr",
2728
  "speakers":1989135,
2729
+ "language_name":"Wagdi",
2730
+ "autonym":"Wagdi",
2731
  "family":"Indo-European",
2732
  "flores_path":null,
2733
  "fleurs_tag":null,
 
2748
  "in_benchmark":false
2749
  },
2750
  {
2751
+ "bcp_47":"khn",
2752
  "speakers":1989135,
2753
+ "language_name":"Khandesi",
2754
+ "autonym":"Khandesi",
2755
  "family":"Indo-European",
2756
  "flores_path":null,
2757
  "fleurs_tag":null,
 
3535
  "family":null,
3536
  "flores_path":"eus_Latn",
3537
  "fleurs_tag":null,
3538
+ "commonvoice_hours":379.0,
3539
  "commonvoice_locale":"eu",
3540
  "in_benchmark":true
3541
  },
 
3559
  "family":"Abkhaz-Adyge",
3560
  "flores_path":null,
3561
  "fleurs_tag":null,
3562
+ "commonvoice_hours":62.0,
3563
  "commonvoice_locale":"kbd",
3564
  "in_benchmark":false
3565
  },
 
4715
  "commonvoice_locale":null,
4716
  "in_benchmark":false
4717
  },
 
 
 
 
 
 
 
 
 
 
 
 
4718
  {
4719
  "bcp_47":"vun",
4720
  "speakers":433291,
 
4735
  "family":"Atlantic-Congo",
4736
  "flores_path":null,
4737
  "fleurs_tag":null,
4738
+ "commonvoice_hours":2.5,
4739
  "commonvoice_locale":"rof",
4740
  "in_benchmark":false
4741
  },
4742
+ {
4743
+ "bcp_47":"jmc",
4744
+ "speakers":433291,
4745
+ "language_name":"Machame",
4746
+ "autonym":"Kimachame",
4747
+ "family":"Atlantic-Congo",
4748
+ "flores_path":null,
4749
+ "fleurs_tag":null,
4750
+ "commonvoice_hours":null,
4751
+ "commonvoice_locale":null,
4752
+ "in_benchmark":false
4753
+ },
4754
  {
4755
  "bcp_47":"kjg",
4756
  "speakers":431949,
 
5904
  "in_benchmark":false
5905
  },
5906
  {
5907
+ "bcp_47":"kkj",
5908
  "speakers":149823,
5909
+ "language_name":"Kako",
5910
+ "autonym":"Kakɔ",
5911
  "family":"Atlantic-Congo",
5912
  "flores_path":null,
5913
  "fleurs_tag":null,
 
5916
  "in_benchmark":false
5917
  },
5918
  {
5919
+ "bcp_47":"bss",
5920
  "speakers":149823,
5921
+ "language_name":"Akoose",
5922
+ "autonym":"Akoose",
5923
  "family":"Atlantic-Congo",
5924
  "flores_path":null,
5925
  "fleurs_tag":null,
 
6240
  "in_benchmark":false
6241
  },
6242
  {
6243
+ "bcp_47":"pdt",
6244
  "speakers":90466,
6245
+ "language_name":"Plautdietsch",
6246
+ "autonym":"Plautdietsch",
6247
+ "family":"Indo-European",
6248
  "flores_path":null,
6249
  "fleurs_tag":null,
6250
  "commonvoice_hours":null,
 
6252
  "in_benchmark":false
6253
  },
6254
  {
6255
+ "bcp_47":"iu",
6256
  "speakers":90466,
6257
+ "language_name":"Inuktitut",
6258
+ "autonym":"Inuktitut",
6259
+ "family":"Eskimo-Aleut",
6260
  "flores_path":null,
6261
  "fleurs_tag":null,
6262
  "commonvoice_hours":null,
 
6367
  "family":"Indo-European",
6368
  "flores_path":null,
6369
  "fleurs_tag":null,
6370
+ "commonvoice_hours":4.6,
6371
  "commonvoice_locale":"btv",
6372
  "in_benchmark":false
6373
  },
 
7247
  "commonvoice_locale":null,
7248
  "in_benchmark":false
7249
  },
7250
+ {
7251
+ "bcp_47":"gbz",
7252
+ "speakers":7983,
7253
+ "language_name":"Zoroastrian Dari",
7254
+ "autonym":"Zoroastrian Dari",
7255
+ "family":"Indo-European",
7256
+ "flores_path":null,
7257
+ "fleurs_tag":null,
7258
+ "commonvoice_hours":null,
7259
+ "commonvoice_locale":null,
7260
+ "in_benchmark":false
7261
+ },
7262
  {
7263
  "bcp_47":"ik",
7264
  "speakers":7983,
 
7272
  "in_benchmark":false
7273
  },
7274
  {
7275
+ "bcp_47":"twq",
7276
+ "speakers":7970,
7277
+ "language_name":"Tasawaq",
7278
+ "autonym":"Tasawaq Senni",
7279
+ "family":"Songhay",
7280
  "flores_path":null,
7281
  "fleurs_tag":null,
7282
  "commonvoice_hours":null,
 
7295
  "commonvoice_locale":null,
7296
  "in_benchmark":false
7297
  },
 
 
 
 
 
 
 
 
 
 
 
 
7298
  {
7299
  "bcp_47":"mic",
7300
  "speakers":7916,
 
7567
  "family":"Atlantic-Congo",
7568
  "flores_path":null,
7569
  "fleurs_tag":null,
7570
+ "commonvoice_hours":2.3,
7571
  "commonvoice_locale":"yav",
7572
  "in_benchmark":false
7573
  },
 
7967
  "commonvoice_locale":"ie",
7968
  "in_benchmark":false
7969
  },
7970
+ {
7971
+ "bcp_47":"vot",
7972
+ "speakers":0,
7973
+ "language_name":"Votic",
7974
+ "autonym":"Votic",
7975
+ "family":"Uralic",
7976
+ "flores_path":null,
7977
+ "fleurs_tag":null,
7978
+ "commonvoice_hours":0.1,
7979
+ "commonvoice_locale":"vot",
7980
+ "in_benchmark":false
7981
+ },
7982
  {
7983
  "bcp_47":"sgs",
7984
  "speakers":0,
 
8003
  "commonvoice_locale":null,
8004
  "in_benchmark":false
8005
  },
 
 
 
 
 
 
 
 
 
 
 
 
8006
  {
8007
  "bcp_47":"pfl",
8008
  "speakers":0,
 
8028
  "in_benchmark":false
8029
  },
8030
  {
8031
+ "bcp_47":"ann",
8032
  "speakers":0,
8033
+ "language_name":"Obolo",
8034
+ "autonym":"Obolo",
8035
+ "family":"Atlantic-Congo",
8036
  "flores_path":null,
8037
  "fleurs_tag":null,
8038
  "commonvoice_hours":null,
 
8052
  "in_benchmark":false
8053
  },
8054
  {
8055
+ "bcp_47":"lzh",
8056
  "speakers":0,
8057
+ "language_name":"Literary Chinese",
8058
+ "autonym":"Literary Chinese",
8059
+ "family":"Sino-Tibetan",
8060
  "flores_path":null,
8061
  "fleurs_tag":null,
8062
  "commonvoice_hours":null,
 
8075
  "commonvoice_locale":null,
8076
  "in_benchmark":false
8077
  },
8078
+ {
8079
+ "bcp_47":"io",
8080
+ "speakers":0,
8081
+ "language_name":"Ido",
8082
+ "autonym":"Ido",
8083
+ "family":"Artificial Language",
8084
+ "flores_path":null,
8085
+ "fleurs_tag":null,
8086
+ "commonvoice_hours":null,
8087
+ "commonvoice_locale":null,
8088
+ "in_benchmark":false
8089
+ },
8090
  {
8091
  "bcp_47":"gez",
8092
  "speakers":0,
 
8123
  "commonvoice_locale":null,
8124
  "in_benchmark":false
8125
  },
 
 
 
 
 
 
 
 
 
 
 
 
8126
  {
8127
  "bcp_47":"cad",
8128
  "speakers":0,
models.json CHANGED
@@ -10,28 +10,6 @@
10
  "license":null,
11
  "creation_date":1733356800000
12
  },
13
- {
14
- "id":"anthropic\/claude-3-haiku",
15
- "name":"Claude 3 Haiku (self-moderated)",
16
- "provider_name":"Anthropic",
17
- "cost":1.25,
18
- "hf_id":null,
19
- "size":null,
20
- "type":"Commercial",
21
- "license":null,
22
- "creation_date":1710288000000
23
- },
24
- {
25
- "id":"cohere\/command-r",
26
- "name":"Command R",
27
- "provider_name":"Cohere",
28
- "cost":1.5,
29
- "hf_id":null,
30
- "size":null,
31
- "type":"Commercial",
32
- "license":null,
33
- "creation_date":1710374400000
34
- },
35
  {
36
  "id":"deepseek\/deepseek-chat",
37
  "name":"DeepSeek V3",
@@ -131,17 +109,6 @@
131
  "license":null,
132
  "creation_date":1727913600000
133
  },
134
- {
135
- "id":"google\/gemma-2-9b-it",
136
- "name":"Gemma 2 9B",
137
- "provider_name":"Google",
138
- "cost":0.0,
139
- "hf_id":"google\/gemma-2-9b-it",
140
- "size":9241705984.0,
141
- "type":"Open",
142
- "license":"Gemma",
143
- "creation_date":1719187200000
144
- },
145
  {
146
  "id":"google\/gemma-3-27b-it",
147
  "name":"Gemma 3 27B",
@@ -175,17 +142,6 @@
175
  "license":"Llama3",
176
  "creation_date":1713312000000
177
  },
178
- {
179
- "id":"meta-llama\/llama-3-8b-instruct",
180
- "name":"Llama 3 8B Instruct",
181
- "provider_name":"Meta",
182
- "cost":0.06,
183
- "hf_id":"meta-llama\/Meta-Llama-3-8B-Instruct",
184
- "size":8030261248.0,
185
- "type":"Open",
186
- "license":"Llama3",
187
- "creation_date":1713312000000
188
- },
189
  {
190
  "id":"meta-llama\/llama-3.1-70b-instruct",
191
  "name":"Llama 3.1 70B Instruct",
@@ -263,28 +219,6 @@
263
  "license":"Mit",
264
  "creation_date":1740355200000
265
  },
266
- {
267
- "id":"microsoft\/wizardlm-2-8x22b",
268
- "name":"WizardLM-2 8x22B",
269
- "provider_name":"WizardLM-2 8x22B",
270
- "cost":0.5,
271
- "hf_id":null,
272
- "size":null,
273
- "type":"Commercial",
274
- "license":null,
275
- "creation_date":1713225600000
276
- },
277
- {
278
- "id":"mistralai\/mistral-7b-instruct",
279
- "name":"Mistral 7B Instruct",
280
- "provider_name":"Mistral",
281
- "cost":0.0,
282
- "hf_id":"mistralai\/Mistral-7B-Instruct-v0.3",
283
- "size":7248023552.0,
284
- "type":"Open",
285
- "license":"Apache 2.0",
286
- "creation_date":1716336000000
287
- },
288
  {
289
  "id":"mistralai\/mistral-nemo",
290
  "name":"Mistral Nemo",
@@ -318,28 +252,6 @@
318
  "license":"Apache 2.0",
319
  "creation_date":1741651200000
320
  },
321
- {
322
- "id":"mistralai\/mistral-tiny",
323
- "name":"Mistral Tiny",
324
- "provider_name":"Mistral Tiny",
325
- "cost":0.25,
326
- "hf_id":null,
327
- "size":null,
328
- "type":"Commercial",
329
- "license":null,
330
- "creation_date":1704844800000
331
- },
332
- {
333
- "id":"nousresearch\/hermes-3-llama-3.1-405b",
334
- "name":"Hermes 3 405B Instruct",
335
- "provider_name":"Nous",
336
- "cost":0.8,
337
- "hf_id":"NousResearch\/Hermes-3-Llama-3.1-405B",
338
- "size":405853388800.0,
339
- "type":"Open",
340
- "license":"Llama3",
341
- "creation_date":1723507200000
342
- },
343
  {
344
  "id":"openai\/gpt-4.1-mini",
345
  "name":"GPT-4.1 Mini",
@@ -372,16 +284,5 @@
372
  "type":"Commercial",
373
  "license":null,
374
  "creation_date":1721260800000
375
- },
376
- {
377
- "id":"openai\/gpt-4o-mini-2024-07-18",
378
- "name":"GPT-4o-mini (2024-07-18)",
379
- "provider_name":"OpenAI",
380
- "cost":0.6,
381
- "hf_id":null,
382
- "size":null,
383
- "type":"Commercial",
384
- "license":null,
385
- "creation_date":1721260800000
386
  }
387
  ]
 
10
  "license":null,
11
  "creation_date":1733356800000
12
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  {
14
  "id":"deepseek\/deepseek-chat",
15
  "name":"DeepSeek V3",
 
109
  "license":null,
110
  "creation_date":1727913600000
111
  },
 
 
 
 
 
 
 
 
 
 
 
112
  {
113
  "id":"google\/gemma-3-27b-it",
114
  "name":"Gemma 3 27B",
 
142
  "license":"Llama3",
143
  "creation_date":1713312000000
144
  },
 
 
 
 
 
 
 
 
 
 
 
145
  {
146
  "id":"meta-llama\/llama-3.1-70b-instruct",
147
  "name":"Llama 3.1 70B Instruct",
 
219
  "license":"Mit",
220
  "creation_date":1740355200000
221
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
222
  {
223
  "id":"mistralai\/mistral-nemo",
224
  "name":"Mistral Nemo",
 
252
  "license":"Apache 2.0",
253
  "creation_date":1741651200000
254
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
255
  {
256
  "id":"openai\/gpt-4.1-mini",
257
  "name":"GPT-4.1 Mini",
 
284
  "type":"Commercial",
285
  "license":null,
286
  "creation_date":1721260800000
 
 
 
 
 
 
 
 
 
 
 
287
  }
288
  ]
results.json CHANGED
The diff for this file is too large to render. See raw diff