Upload from GitHub Actions: More results
Browse files- evals/languages.py +1 -1
- evals/main.py +2 -2
- evals/models.py +6 -2
- languages.json +133 -133
- models.json +0 -99
- results.json +0 -0
evals/languages.py
CHANGED
@@ -56,4 +56,4 @@ languages = pd.merge(languages, flores, on="bcp_47", how="left")
|
|
56 |
languages = pd.merge(languages, fleurs, on="bcp_47", how="left")
|
57 |
languages = pd.merge(languages, commonvoice, on="bcp_47", how="left")
|
58 |
languages["in_benchmark"] = languages["bcp_47"].isin(flores["bcp_47"])
|
59 |
-
languages = languages.sort_values(by="speakers", ascending=False)
|
|
|
56 |
languages = pd.merge(languages, fleurs, on="bcp_47", how="left")
|
57 |
languages = pd.merge(languages, commonvoice, on="bcp_47", how="left")
|
58 |
languages["in_benchmark"] = languages["bcp_47"].isin(flores["bcp_47"])
|
59 |
+
languages = languages.sort_values(by=["speakers", "language_name"], ascending=False)
|
evals/main.py
CHANGED
@@ -9,8 +9,8 @@ from tqdm.asyncio import tqdm_asyncio
|
|
9 |
# ===== config =====
|
10 |
|
11 |
n_sentences = 10
|
12 |
-
n_languages =
|
13 |
-
n_models =
|
14 |
|
15 |
# ===== run evaluation and aggregate results =====
|
16 |
|
|
|
9 |
# ===== config =====
|
10 |
|
11 |
n_sentences = 10
|
12 |
+
n_languages = 20
|
13 |
+
n_models = 35
|
14 |
|
15 |
# ===== run evaluation and aggregate results =====
|
16 |
|
evals/models.py
CHANGED
@@ -45,6 +45,10 @@ important_models = [
|
|
45 |
"amazon/nova-micro-v1", # 0.09$
|
46 |
]
|
47 |
|
|
|
|
|
|
|
|
|
48 |
transcription_models = [
|
49 |
"elevenlabs/scribe_v1",
|
50 |
"openai/whisper-large-v3",
|
@@ -200,11 +204,11 @@ def get_cost(row):
|
|
200 |
@cache
|
201 |
def load_models(date: date):
|
202 |
popular_models = (
|
203 |
-
get_historical_popular_models(date.today())[:
|
204 |
+ get_current_popular_models(date.today())[:10]
|
205 |
)
|
206 |
popular_models = [m["slug"] for m in popular_models]
|
207 |
-
models = set(important_models + popular_models)
|
208 |
models = pd.DataFrame(sorted(list(models)), columns=["id"])
|
209 |
or_metadata = models["id"].apply(get_or_metadata)
|
210 |
hf_metadata = or_metadata.apply(get_hf_metadata)
|
|
|
45 |
"amazon/nova-micro-v1", # 0.09$
|
46 |
]
|
47 |
|
48 |
+
blocklist = [
|
49 |
+
"microsoft/wizardlm-2-8x22b" # temporarily rate-limited
|
50 |
+
]
|
51 |
+
|
52 |
transcription_models = [
|
53 |
"elevenlabs/scribe_v1",
|
54 |
"openai/whisper-large-v3",
|
|
|
204 |
@cache
|
205 |
def load_models(date: date):
|
206 |
popular_models = (
|
207 |
+
get_historical_popular_models(date.today())[:20]
|
208 |
+ get_current_popular_models(date.today())[:10]
|
209 |
)
|
210 |
popular_models = [m["slug"] for m in popular_models]
|
211 |
+
models = set(important_models + popular_models) - set(blocklist)
|
212 |
models = pd.DataFrame(sorted(list(models)), columns=["id"])
|
213 |
or_metadata = models["id"].apply(get_or_metadata)
|
214 |
hf_metadata = or_metadata.apply(get_hf_metadata)
|
languages.json
CHANGED
@@ -319,7 +319,7 @@
|
|
319 |
"family":"Indo-European",
|
320 |
"flores_path":"ita_Latn",
|
321 |
"fleurs_tag":"it_it",
|
322 |
-
"commonvoice_hours":
|
323 |
"commonvoice_locale":"it",
|
324 |
"in_benchmark":true
|
325 |
},
|
@@ -864,28 +864,28 @@
|
|
864 |
"in_benchmark":false
|
865 |
},
|
866 |
{
|
867 |
-
"bcp_47":"
|
868 |
"speakers":15913080,
|
869 |
-
"language_name":"
|
870 |
-
"autonym":"
|
871 |
"family":"Indo-European",
|
872 |
-
"flores_path":
|
873 |
"fleurs_tag":null,
|
874 |
"commonvoice_hours":null,
|
875 |
"commonvoice_locale":null,
|
876 |
-
"in_benchmark":
|
877 |
},
|
878 |
{
|
879 |
-
"bcp_47":"
|
880 |
"speakers":15913080,
|
881 |
-
"language_name":"
|
882 |
-
"autonym":"
|
883 |
"family":"Indo-European",
|
884 |
-
"flores_path":
|
885 |
"fleurs_tag":null,
|
886 |
"commonvoice_hours":null,
|
887 |
"commonvoice_locale":null,
|
888 |
-
"in_benchmark":
|
889 |
},
|
890 |
{
|
891 |
"bcp_47":"bgc",
|
@@ -1183,7 +1183,7 @@
|
|
1183 |
"family":"Indo-European",
|
1184 |
"flores_path":"bel_Cyrl",
|
1185 |
"fleurs_tag":"be_by",
|
1186 |
-
"commonvoice_hours":
|
1187 |
"commonvoice_locale":"be",
|
1188 |
"in_benchmark":true
|
1189 |
},
|
@@ -1195,7 +1195,7 @@
|
|
1195 |
"family":"Atlantic-Congo",
|
1196 |
"flores_path":"lua_Latn",
|
1197 |
"fleurs_tag":null,
|
1198 |
-
"commonvoice_hours":2.
|
1199 |
"commonvoice_locale":"lua",
|
1200 |
"in_benchmark":true
|
1201 |
},
|
@@ -1955,18 +1955,6 @@
|
|
1955 |
"commonvoice_locale":"gom",
|
1956 |
"in_benchmark":true
|
1957 |
},
|
1958 |
-
{
|
1959 |
-
"bcp_47":"kln",
|
1960 |
-
"speakers":4068120,
|
1961 |
-
"language_name":"Kalenjin",
|
1962 |
-
"autonym":"Kalenjin",
|
1963 |
-
"family":"Nilotic",
|
1964 |
-
"flores_path":null,
|
1965 |
-
"fleurs_tag":null,
|
1966 |
-
"commonvoice_hours":43.0,
|
1967 |
-
"commonvoice_locale":"kln",
|
1968 |
-
"in_benchmark":false
|
1969 |
-
},
|
1970 |
{
|
1971 |
"bcp_47":"kam",
|
1972 |
"speakers":4068120,
|
@@ -1979,6 +1967,18 @@
|
|
1979 |
"commonvoice_locale":"kam",
|
1980 |
"in_benchmark":true
|
1981 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1982 |
{
|
1983 |
"bcp_47":"bjn",
|
1984 |
"speakers":4010288,
|
@@ -2124,10 +2124,10 @@
|
|
2124 |
"in_benchmark":true
|
2125 |
},
|
2126 |
{
|
2127 |
-
"bcp_47":"
|
2128 |
"speakers":3580443,
|
2129 |
-
"language_name":"
|
2130 |
-
"autonym":"
|
2131 |
"family":"Indo-European",
|
2132 |
"flores_path":null,
|
2133 |
"fleurs_tag":null,
|
@@ -2136,10 +2136,10 @@
|
|
2136 |
"in_benchmark":false
|
2137 |
},
|
2138 |
{
|
2139 |
-
"bcp_47":"
|
2140 |
"speakers":3580443,
|
2141 |
-
"language_name":"
|
2142 |
-
"autonym":"
|
2143 |
"family":"Indo-European",
|
2144 |
"flores_path":null,
|
2145 |
"fleurs_tag":null,
|
@@ -2352,27 +2352,27 @@
|
|
2352 |
"in_benchmark":true
|
2353 |
},
|
2354 |
{
|
2355 |
-
"bcp_47":"
|
2356 |
"speakers":2996392,
|
2357 |
-
"language_name":"
|
2358 |
-
"autonym":"
|
2359 |
"family":"Atlantic-Congo",
|
2360 |
"flores_path":null,
|
2361 |
"fleurs_tag":null,
|
2362 |
-
"commonvoice_hours":
|
2363 |
-
"commonvoice_locale":
|
2364 |
"in_benchmark":false
|
2365 |
},
|
2366 |
{
|
2367 |
-
"bcp_47":"
|
2368 |
"speakers":2996392,
|
2369 |
-
"language_name":"
|
2370 |
-
"autonym":"
|
2371 |
"family":"Atlantic-Congo",
|
2372 |
"flores_path":null,
|
2373 |
"fleurs_tag":null,
|
2374 |
-
"commonvoice_hours":
|
2375 |
-
"commonvoice_locale":
|
2376 |
"in_benchmark":false
|
2377 |
},
|
2378 |
{
|
@@ -2484,10 +2484,10 @@
|
|
2484 |
"in_benchmark":false
|
2485 |
},
|
2486 |
{
|
2487 |
-
"bcp_47":"
|
2488 |
"speakers":2511163,
|
2489 |
-
"language_name":"
|
2490 |
-
"autonym":"
|
2491 |
"family":"Austronesian",
|
2492 |
"flores_path":null,
|
2493 |
"fleurs_tag":null,
|
@@ -2496,10 +2496,10 @@
|
|
2496 |
"in_benchmark":false
|
2497 |
},
|
2498 |
{
|
2499 |
-
"bcp_47":"
|
2500 |
"speakers":2511163,
|
2501 |
-
"language_name":"
|
2502 |
-
"autonym":"
|
2503 |
"family":"Austronesian",
|
2504 |
"flores_path":null,
|
2505 |
"fleurs_tag":null,
|
@@ -2724,10 +2724,10 @@
|
|
2724 |
"in_benchmark":false
|
2725 |
},
|
2726 |
{
|
2727 |
-
"bcp_47":"
|
2728 |
"speakers":1989135,
|
2729 |
-
"language_name":"
|
2730 |
-
"autonym":"
|
2731 |
"family":"Indo-European",
|
2732 |
"flores_path":null,
|
2733 |
"fleurs_tag":null,
|
@@ -2748,10 +2748,10 @@
|
|
2748 |
"in_benchmark":false
|
2749 |
},
|
2750 |
{
|
2751 |
-
"bcp_47":"
|
2752 |
"speakers":1989135,
|
2753 |
-
"language_name":"
|
2754 |
-
"autonym":"
|
2755 |
"family":"Indo-European",
|
2756 |
"flores_path":null,
|
2757 |
"fleurs_tag":null,
|
@@ -3535,7 +3535,7 @@
|
|
3535 |
"family":null,
|
3536 |
"flores_path":"eus_Latn",
|
3537 |
"fleurs_tag":null,
|
3538 |
-
"commonvoice_hours":
|
3539 |
"commonvoice_locale":"eu",
|
3540 |
"in_benchmark":true
|
3541 |
},
|
@@ -3559,7 +3559,7 @@
|
|
3559 |
"family":"Abkhaz-Adyge",
|
3560 |
"flores_path":null,
|
3561 |
"fleurs_tag":null,
|
3562 |
-
"commonvoice_hours":
|
3563 |
"commonvoice_locale":"kbd",
|
3564 |
"in_benchmark":false
|
3565 |
},
|
@@ -4715,18 +4715,6 @@
|
|
4715 |
"commonvoice_locale":null,
|
4716 |
"in_benchmark":false
|
4717 |
},
|
4718 |
-
{
|
4719 |
-
"bcp_47":"jmc",
|
4720 |
-
"speakers":433291,
|
4721 |
-
"language_name":"Machame",
|
4722 |
-
"autonym":"Kimachame",
|
4723 |
-
"family":"Atlantic-Congo",
|
4724 |
-
"flores_path":null,
|
4725 |
-
"fleurs_tag":null,
|
4726 |
-
"commonvoice_hours":null,
|
4727 |
-
"commonvoice_locale":null,
|
4728 |
-
"in_benchmark":false
|
4729 |
-
},
|
4730 |
{
|
4731 |
"bcp_47":"vun",
|
4732 |
"speakers":433291,
|
@@ -4747,10 +4735,22 @@
|
|
4747 |
"family":"Atlantic-Congo",
|
4748 |
"flores_path":null,
|
4749 |
"fleurs_tag":null,
|
4750 |
-
"commonvoice_hours":2.
|
4751 |
"commonvoice_locale":"rof",
|
4752 |
"in_benchmark":false
|
4753 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4754 |
{
|
4755 |
"bcp_47":"kjg",
|
4756 |
"speakers":431949,
|
@@ -5904,10 +5904,10 @@
|
|
5904 |
"in_benchmark":false
|
5905 |
},
|
5906 |
{
|
5907 |
-
"bcp_47":"
|
5908 |
"speakers":149823,
|
5909 |
-
"language_name":"
|
5910 |
-
"autonym":"
|
5911 |
"family":"Atlantic-Congo",
|
5912 |
"flores_path":null,
|
5913 |
"fleurs_tag":null,
|
@@ -5916,10 +5916,10 @@
|
|
5916 |
"in_benchmark":false
|
5917 |
},
|
5918 |
{
|
5919 |
-
"bcp_47":"
|
5920 |
"speakers":149823,
|
5921 |
-
"language_name":"
|
5922 |
-
"autonym":"
|
5923 |
"family":"Atlantic-Congo",
|
5924 |
"flores_path":null,
|
5925 |
"fleurs_tag":null,
|
@@ -6240,11 +6240,11 @@
|
|
6240 |
"in_benchmark":false
|
6241 |
},
|
6242 |
{
|
6243 |
-
"bcp_47":"
|
6244 |
"speakers":90466,
|
6245 |
-
"language_name":"
|
6246 |
-
"autonym":"
|
6247 |
-
"family":"
|
6248 |
"flores_path":null,
|
6249 |
"fleurs_tag":null,
|
6250 |
"commonvoice_hours":null,
|
@@ -6252,11 +6252,11 @@
|
|
6252 |
"in_benchmark":false
|
6253 |
},
|
6254 |
{
|
6255 |
-
"bcp_47":"
|
6256 |
"speakers":90466,
|
6257 |
-
"language_name":"
|
6258 |
-
"autonym":"
|
6259 |
-
"family":"
|
6260 |
"flores_path":null,
|
6261 |
"fleurs_tag":null,
|
6262 |
"commonvoice_hours":null,
|
@@ -6367,7 +6367,7 @@
|
|
6367 |
"family":"Indo-European",
|
6368 |
"flores_path":null,
|
6369 |
"fleurs_tag":null,
|
6370 |
-
"commonvoice_hours":
|
6371 |
"commonvoice_locale":"btv",
|
6372 |
"in_benchmark":false
|
6373 |
},
|
@@ -7247,6 +7247,18 @@
|
|
7247 |
"commonvoice_locale":null,
|
7248 |
"in_benchmark":false
|
7249 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7250 |
{
|
7251 |
"bcp_47":"ik",
|
7252 |
"speakers":7983,
|
@@ -7260,11 +7272,11 @@
|
|
7260 |
"in_benchmark":false
|
7261 |
},
|
7262 |
{
|
7263 |
-
"bcp_47":"
|
7264 |
-
"speakers":
|
7265 |
-
"language_name":"
|
7266 |
-
"autonym":"
|
7267 |
-
"family":"
|
7268 |
"flores_path":null,
|
7269 |
"fleurs_tag":null,
|
7270 |
"commonvoice_hours":null,
|
@@ -7283,18 +7295,6 @@
|
|
7283 |
"commonvoice_locale":null,
|
7284 |
"in_benchmark":false
|
7285 |
},
|
7286 |
-
{
|
7287 |
-
"bcp_47":"twq",
|
7288 |
-
"speakers":7970,
|
7289 |
-
"language_name":"Tasawaq",
|
7290 |
-
"autonym":"Tasawaq Senni",
|
7291 |
-
"family":"Songhay",
|
7292 |
-
"flores_path":null,
|
7293 |
-
"fleurs_tag":null,
|
7294 |
-
"commonvoice_hours":null,
|
7295 |
-
"commonvoice_locale":null,
|
7296 |
-
"in_benchmark":false
|
7297 |
-
},
|
7298 |
{
|
7299 |
"bcp_47":"mic",
|
7300 |
"speakers":7916,
|
@@ -7567,7 +7567,7 @@
|
|
7567 |
"family":"Atlantic-Congo",
|
7568 |
"flores_path":null,
|
7569 |
"fleurs_tag":null,
|
7570 |
-
"commonvoice_hours":2.
|
7571 |
"commonvoice_locale":"yav",
|
7572 |
"in_benchmark":false
|
7573 |
},
|
@@ -7967,6 +7967,18 @@
|
|
7967 |
"commonvoice_locale":"ie",
|
7968 |
"in_benchmark":false
|
7969 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7970 |
{
|
7971 |
"bcp_47":"sgs",
|
7972 |
"speakers":0,
|
@@ -7991,18 +8003,6 @@
|
|
7991 |
"commonvoice_locale":null,
|
7992 |
"in_benchmark":false
|
7993 |
},
|
7994 |
-
{
|
7995 |
-
"bcp_47":"ann",
|
7996 |
-
"speakers":0,
|
7997 |
-
"language_name":"Obolo",
|
7998 |
-
"autonym":"Obolo",
|
7999 |
-
"family":"Atlantic-Congo",
|
8000 |
-
"flores_path":null,
|
8001 |
-
"fleurs_tag":null,
|
8002 |
-
"commonvoice_hours":null,
|
8003 |
-
"commonvoice_locale":null,
|
8004 |
-
"in_benchmark":false
|
8005 |
-
},
|
8006 |
{
|
8007 |
"bcp_47":"pfl",
|
8008 |
"speakers":0,
|
@@ -8028,11 +8028,11 @@
|
|
8028 |
"in_benchmark":false
|
8029 |
},
|
8030 |
{
|
8031 |
-
"bcp_47":"
|
8032 |
"speakers":0,
|
8033 |
-
"language_name":"
|
8034 |
-
"autonym":"
|
8035 |
-
"family":"
|
8036 |
"flores_path":null,
|
8037 |
"fleurs_tag":null,
|
8038 |
"commonvoice_hours":null,
|
@@ -8052,11 +8052,11 @@
|
|
8052 |
"in_benchmark":false
|
8053 |
},
|
8054 |
{
|
8055 |
-
"bcp_47":"
|
8056 |
"speakers":0,
|
8057 |
-
"language_name":"
|
8058 |
-
"autonym":"
|
8059 |
-
"family":"
|
8060 |
"flores_path":null,
|
8061 |
"fleurs_tag":null,
|
8062 |
"commonvoice_hours":null,
|
@@ -8075,6 +8075,18 @@
|
|
8075 |
"commonvoice_locale":null,
|
8076 |
"in_benchmark":false
|
8077 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8078 |
{
|
8079 |
"bcp_47":"gez",
|
8080 |
"speakers":0,
|
@@ -8111,18 +8123,6 @@
|
|
8111 |
"commonvoice_locale":null,
|
8112 |
"in_benchmark":false
|
8113 |
},
|
8114 |
-
{
|
8115 |
-
"bcp_47":"vot",
|
8116 |
-
"speakers":0,
|
8117 |
-
"language_name":"Votic",
|
8118 |
-
"autonym":"Votic",
|
8119 |
-
"family":"Uralic",
|
8120 |
-
"flores_path":null,
|
8121 |
-
"fleurs_tag":null,
|
8122 |
-
"commonvoice_hours":0.1,
|
8123 |
-
"commonvoice_locale":"vot",
|
8124 |
-
"in_benchmark":false
|
8125 |
-
},
|
8126 |
{
|
8127 |
"bcp_47":"cad",
|
8128 |
"speakers":0,
|
|
|
319 |
"family":"Indo-European",
|
320 |
"flores_path":"ita_Latn",
|
321 |
"fleurs_tag":"it_it",
|
322 |
+
"commonvoice_hours":362.0,
|
323 |
"commonvoice_locale":"it",
|
324 |
"in_benchmark":true
|
325 |
},
|
|
|
864 |
"in_benchmark":false
|
865 |
},
|
866 |
{
|
867 |
+
"bcp_47":"mwr",
|
868 |
"speakers":15913080,
|
869 |
+
"language_name":"Marwari",
|
870 |
+
"autonym":"Marwari",
|
871 |
"family":"Indo-European",
|
872 |
+
"flores_path":null,
|
873 |
"fleurs_tag":null,
|
874 |
"commonvoice_hours":null,
|
875 |
"commonvoice_locale":null,
|
876 |
+
"in_benchmark":false
|
877 |
},
|
878 |
{
|
879 |
+
"bcp_47":"mag",
|
880 |
"speakers":15913080,
|
881 |
+
"language_name":"Magahi",
|
882 |
+
"autonym":"Magahi",
|
883 |
"family":"Indo-European",
|
884 |
+
"flores_path":"mag_Deva",
|
885 |
"fleurs_tag":null,
|
886 |
"commonvoice_hours":null,
|
887 |
"commonvoice_locale":null,
|
888 |
+
"in_benchmark":true
|
889 |
},
|
890 |
{
|
891 |
"bcp_47":"bgc",
|
|
|
1183 |
"family":"Indo-European",
|
1184 |
"flores_path":"bel_Cyrl",
|
1185 |
"fleurs_tag":"be_by",
|
1186 |
+
"commonvoice_hours":1807.0,
|
1187 |
"commonvoice_locale":"be",
|
1188 |
"in_benchmark":true
|
1189 |
},
|
|
|
1195 |
"family":"Atlantic-Congo",
|
1196 |
"flores_path":"lua_Latn",
|
1197 |
"fleurs_tag":null,
|
1198 |
+
"commonvoice_hours":2.2,
|
1199 |
"commonvoice_locale":"lua",
|
1200 |
"in_benchmark":true
|
1201 |
},
|
|
|
1955 |
"commonvoice_locale":"gom",
|
1956 |
"in_benchmark":true
|
1957 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1958 |
{
|
1959 |
"bcp_47":"kam",
|
1960 |
"speakers":4068120,
|
|
|
1967 |
"commonvoice_locale":"kam",
|
1968 |
"in_benchmark":true
|
1969 |
},
|
1970 |
+
{
|
1971 |
+
"bcp_47":"kln",
|
1972 |
+
"speakers":4068120,
|
1973 |
+
"language_name":"Kalenjin",
|
1974 |
+
"autonym":"Kalenjin",
|
1975 |
+
"family":"Nilotic",
|
1976 |
+
"flores_path":null,
|
1977 |
+
"fleurs_tag":null,
|
1978 |
+
"commonvoice_hours":43.0,
|
1979 |
+
"commonvoice_locale":"kln",
|
1980 |
+
"in_benchmark":false
|
1981 |
+
},
|
1982 |
{
|
1983 |
"bcp_47":"bjn",
|
1984 |
"speakers":4010288,
|
|
|
2124 |
"in_benchmark":true
|
2125 |
},
|
2126 |
{
|
2127 |
+
"bcp_47":"lmn",
|
2128 |
"speakers":3580443,
|
2129 |
+
"language_name":"Lambadi",
|
2130 |
+
"autonym":"Lambadi",
|
2131 |
"family":"Indo-European",
|
2132 |
"flores_path":null,
|
2133 |
"fleurs_tag":null,
|
|
|
2136 |
"in_benchmark":false
|
2137 |
},
|
2138 |
{
|
2139 |
+
"bcp_47":"gbm",
|
2140 |
"speakers":3580443,
|
2141 |
+
"language_name":"Garhwali",
|
2142 |
+
"autonym":"Garhwali",
|
2143 |
"family":"Indo-European",
|
2144 |
"flores_path":null,
|
2145 |
"fleurs_tag":null,
|
|
|
2352 |
"in_benchmark":true
|
2353 |
},
|
2354 |
{
|
2355 |
+
"bcp_47":"ibb",
|
2356 |
"speakers":2996392,
|
2357 |
+
"language_name":"Ibibio",
|
2358 |
+
"autonym":"Ibibio",
|
2359 |
"family":"Atlantic-Congo",
|
2360 |
"flores_path":null,
|
2361 |
"fleurs_tag":null,
|
2362 |
+
"commonvoice_hours":11.0,
|
2363 |
+
"commonvoice_locale":"ibb",
|
2364 |
"in_benchmark":false
|
2365 |
},
|
2366 |
{
|
2367 |
+
"bcp_47":"efi",
|
2368 |
"speakers":2996392,
|
2369 |
+
"language_name":"Efik",
|
2370 |
+
"autonym":"Efik",
|
2371 |
"family":"Atlantic-Congo",
|
2372 |
"flores_path":null,
|
2373 |
"fleurs_tag":null,
|
2374 |
+
"commonvoice_hours":null,
|
2375 |
+
"commonvoice_locale":null,
|
2376 |
"in_benchmark":false
|
2377 |
},
|
2378 |
{
|
|
|
2484 |
"in_benchmark":false
|
2485 |
},
|
2486 |
{
|
2487 |
+
"bcp_47":"fbl",
|
2488 |
"speakers":2511163,
|
2489 |
+
"language_name":"West Albay Bikol",
|
2490 |
+
"autonym":"West Albay Bikol",
|
2491 |
"family":"Austronesian",
|
2492 |
"flores_path":null,
|
2493 |
"fleurs_tag":null,
|
|
|
2496 |
"in_benchmark":false
|
2497 |
},
|
2498 |
{
|
2499 |
+
"bcp_47":"pam",
|
2500 |
"speakers":2511163,
|
2501 |
+
"language_name":"Pampanga",
|
2502 |
+
"autonym":"Pampanga",
|
2503 |
"family":"Austronesian",
|
2504 |
"flores_path":null,
|
2505 |
"fleurs_tag":null,
|
|
|
2724 |
"in_benchmark":false
|
2725 |
},
|
2726 |
{
|
2727 |
+
"bcp_47":"wbr",
|
2728 |
"speakers":1989135,
|
2729 |
+
"language_name":"Wagdi",
|
2730 |
+
"autonym":"Wagdi",
|
2731 |
"family":"Indo-European",
|
2732 |
"flores_path":null,
|
2733 |
"fleurs_tag":null,
|
|
|
2748 |
"in_benchmark":false
|
2749 |
},
|
2750 |
{
|
2751 |
+
"bcp_47":"khn",
|
2752 |
"speakers":1989135,
|
2753 |
+
"language_name":"Khandesi",
|
2754 |
+
"autonym":"Khandesi",
|
2755 |
"family":"Indo-European",
|
2756 |
"flores_path":null,
|
2757 |
"fleurs_tag":null,
|
|
|
3535 |
"family":null,
|
3536 |
"flores_path":"eus_Latn",
|
3537 |
"fleurs_tag":null,
|
3538 |
+
"commonvoice_hours":379.0,
|
3539 |
"commonvoice_locale":"eu",
|
3540 |
"in_benchmark":true
|
3541 |
},
|
|
|
3559 |
"family":"Abkhaz-Adyge",
|
3560 |
"flores_path":null,
|
3561 |
"fleurs_tag":null,
|
3562 |
+
"commonvoice_hours":62.0,
|
3563 |
"commonvoice_locale":"kbd",
|
3564 |
"in_benchmark":false
|
3565 |
},
|
|
|
4715 |
"commonvoice_locale":null,
|
4716 |
"in_benchmark":false
|
4717 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4718 |
{
|
4719 |
"bcp_47":"vun",
|
4720 |
"speakers":433291,
|
|
|
4735 |
"family":"Atlantic-Congo",
|
4736 |
"flores_path":null,
|
4737 |
"fleurs_tag":null,
|
4738 |
+
"commonvoice_hours":2.5,
|
4739 |
"commonvoice_locale":"rof",
|
4740 |
"in_benchmark":false
|
4741 |
},
|
4742 |
+
{
|
4743 |
+
"bcp_47":"jmc",
|
4744 |
+
"speakers":433291,
|
4745 |
+
"language_name":"Machame",
|
4746 |
+
"autonym":"Kimachame",
|
4747 |
+
"family":"Atlantic-Congo",
|
4748 |
+
"flores_path":null,
|
4749 |
+
"fleurs_tag":null,
|
4750 |
+
"commonvoice_hours":null,
|
4751 |
+
"commonvoice_locale":null,
|
4752 |
+
"in_benchmark":false
|
4753 |
+
},
|
4754 |
{
|
4755 |
"bcp_47":"kjg",
|
4756 |
"speakers":431949,
|
|
|
5904 |
"in_benchmark":false
|
5905 |
},
|
5906 |
{
|
5907 |
+
"bcp_47":"kkj",
|
5908 |
"speakers":149823,
|
5909 |
+
"language_name":"Kako",
|
5910 |
+
"autonym":"Kakɔ",
|
5911 |
"family":"Atlantic-Congo",
|
5912 |
"flores_path":null,
|
5913 |
"fleurs_tag":null,
|
|
|
5916 |
"in_benchmark":false
|
5917 |
},
|
5918 |
{
|
5919 |
+
"bcp_47":"bss",
|
5920 |
"speakers":149823,
|
5921 |
+
"language_name":"Akoose",
|
5922 |
+
"autonym":"Akoose",
|
5923 |
"family":"Atlantic-Congo",
|
5924 |
"flores_path":null,
|
5925 |
"fleurs_tag":null,
|
|
|
6240 |
"in_benchmark":false
|
6241 |
},
|
6242 |
{
|
6243 |
+
"bcp_47":"pdt",
|
6244 |
"speakers":90466,
|
6245 |
+
"language_name":"Plautdietsch",
|
6246 |
+
"autonym":"Plautdietsch",
|
6247 |
+
"family":"Indo-European",
|
6248 |
"flores_path":null,
|
6249 |
"fleurs_tag":null,
|
6250 |
"commonvoice_hours":null,
|
|
|
6252 |
"in_benchmark":false
|
6253 |
},
|
6254 |
{
|
6255 |
+
"bcp_47":"iu",
|
6256 |
"speakers":90466,
|
6257 |
+
"language_name":"Inuktitut",
|
6258 |
+
"autonym":"Inuktitut",
|
6259 |
+
"family":"Eskimo-Aleut",
|
6260 |
"flores_path":null,
|
6261 |
"fleurs_tag":null,
|
6262 |
"commonvoice_hours":null,
|
|
|
6367 |
"family":"Indo-European",
|
6368 |
"flores_path":null,
|
6369 |
"fleurs_tag":null,
|
6370 |
+
"commonvoice_hours":4.6,
|
6371 |
"commonvoice_locale":"btv",
|
6372 |
"in_benchmark":false
|
6373 |
},
|
|
|
7247 |
"commonvoice_locale":null,
|
7248 |
"in_benchmark":false
|
7249 |
},
|
7250 |
+
{
|
7251 |
+
"bcp_47":"gbz",
|
7252 |
+
"speakers":7983,
|
7253 |
+
"language_name":"Zoroastrian Dari",
|
7254 |
+
"autonym":"Zoroastrian Dari",
|
7255 |
+
"family":"Indo-European",
|
7256 |
+
"flores_path":null,
|
7257 |
+
"fleurs_tag":null,
|
7258 |
+
"commonvoice_hours":null,
|
7259 |
+
"commonvoice_locale":null,
|
7260 |
+
"in_benchmark":false
|
7261 |
+
},
|
7262 |
{
|
7263 |
"bcp_47":"ik",
|
7264 |
"speakers":7983,
|
|
|
7272 |
"in_benchmark":false
|
7273 |
},
|
7274 |
{
|
7275 |
+
"bcp_47":"twq",
|
7276 |
+
"speakers":7970,
|
7277 |
+
"language_name":"Tasawaq",
|
7278 |
+
"autonym":"Tasawaq Senni",
|
7279 |
+
"family":"Songhay",
|
7280 |
"flores_path":null,
|
7281 |
"fleurs_tag":null,
|
7282 |
"commonvoice_hours":null,
|
|
|
7295 |
"commonvoice_locale":null,
|
7296 |
"in_benchmark":false
|
7297 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7298 |
{
|
7299 |
"bcp_47":"mic",
|
7300 |
"speakers":7916,
|
|
|
7567 |
"family":"Atlantic-Congo",
|
7568 |
"flores_path":null,
|
7569 |
"fleurs_tag":null,
|
7570 |
+
"commonvoice_hours":2.3,
|
7571 |
"commonvoice_locale":"yav",
|
7572 |
"in_benchmark":false
|
7573 |
},
|
|
|
7967 |
"commonvoice_locale":"ie",
|
7968 |
"in_benchmark":false
|
7969 |
},
|
7970 |
+
{
|
7971 |
+
"bcp_47":"vot",
|
7972 |
+
"speakers":0,
|
7973 |
+
"language_name":"Votic",
|
7974 |
+
"autonym":"Votic",
|
7975 |
+
"family":"Uralic",
|
7976 |
+
"flores_path":null,
|
7977 |
+
"fleurs_tag":null,
|
7978 |
+
"commonvoice_hours":0.1,
|
7979 |
+
"commonvoice_locale":"vot",
|
7980 |
+
"in_benchmark":false
|
7981 |
+
},
|
7982 |
{
|
7983 |
"bcp_47":"sgs",
|
7984 |
"speakers":0,
|
|
|
8003 |
"commonvoice_locale":null,
|
8004 |
"in_benchmark":false
|
8005 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8006 |
{
|
8007 |
"bcp_47":"pfl",
|
8008 |
"speakers":0,
|
|
|
8028 |
"in_benchmark":false
|
8029 |
},
|
8030 |
{
|
8031 |
+
"bcp_47":"ann",
|
8032 |
"speakers":0,
|
8033 |
+
"language_name":"Obolo",
|
8034 |
+
"autonym":"Obolo",
|
8035 |
+
"family":"Atlantic-Congo",
|
8036 |
"flores_path":null,
|
8037 |
"fleurs_tag":null,
|
8038 |
"commonvoice_hours":null,
|
|
|
8052 |
"in_benchmark":false
|
8053 |
},
|
8054 |
{
|
8055 |
+
"bcp_47":"lzh",
|
8056 |
"speakers":0,
|
8057 |
+
"language_name":"Literary Chinese",
|
8058 |
+
"autonym":"Literary Chinese",
|
8059 |
+
"family":"Sino-Tibetan",
|
8060 |
"flores_path":null,
|
8061 |
"fleurs_tag":null,
|
8062 |
"commonvoice_hours":null,
|
|
|
8075 |
"commonvoice_locale":null,
|
8076 |
"in_benchmark":false
|
8077 |
},
|
8078 |
+
{
|
8079 |
+
"bcp_47":"io",
|
8080 |
+
"speakers":0,
|
8081 |
+
"language_name":"Ido",
|
8082 |
+
"autonym":"Ido",
|
8083 |
+
"family":"Artificial Language",
|
8084 |
+
"flores_path":null,
|
8085 |
+
"fleurs_tag":null,
|
8086 |
+
"commonvoice_hours":null,
|
8087 |
+
"commonvoice_locale":null,
|
8088 |
+
"in_benchmark":false
|
8089 |
+
},
|
8090 |
{
|
8091 |
"bcp_47":"gez",
|
8092 |
"speakers":0,
|
|
|
8123 |
"commonvoice_locale":null,
|
8124 |
"in_benchmark":false
|
8125 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8126 |
{
|
8127 |
"bcp_47":"cad",
|
8128 |
"speakers":0,
|
models.json
CHANGED
@@ -10,28 +10,6 @@
|
|
10 |
"license":null,
|
11 |
"creation_date":1733356800000
|
12 |
},
|
13 |
-
{
|
14 |
-
"id":"anthropic\/claude-3-haiku",
|
15 |
-
"name":"Claude 3 Haiku (self-moderated)",
|
16 |
-
"provider_name":"Anthropic",
|
17 |
-
"cost":1.25,
|
18 |
-
"hf_id":null,
|
19 |
-
"size":null,
|
20 |
-
"type":"Commercial",
|
21 |
-
"license":null,
|
22 |
-
"creation_date":1710288000000
|
23 |
-
},
|
24 |
-
{
|
25 |
-
"id":"cohere\/command-r",
|
26 |
-
"name":"Command R",
|
27 |
-
"provider_name":"Cohere",
|
28 |
-
"cost":1.5,
|
29 |
-
"hf_id":null,
|
30 |
-
"size":null,
|
31 |
-
"type":"Commercial",
|
32 |
-
"license":null,
|
33 |
-
"creation_date":1710374400000
|
34 |
-
},
|
35 |
{
|
36 |
"id":"deepseek\/deepseek-chat",
|
37 |
"name":"DeepSeek V3",
|
@@ -131,17 +109,6 @@
|
|
131 |
"license":null,
|
132 |
"creation_date":1727913600000
|
133 |
},
|
134 |
-
{
|
135 |
-
"id":"google\/gemma-2-9b-it",
|
136 |
-
"name":"Gemma 2 9B",
|
137 |
-
"provider_name":"Google",
|
138 |
-
"cost":0.0,
|
139 |
-
"hf_id":"google\/gemma-2-9b-it",
|
140 |
-
"size":9241705984.0,
|
141 |
-
"type":"Open",
|
142 |
-
"license":"Gemma",
|
143 |
-
"creation_date":1719187200000
|
144 |
-
},
|
145 |
{
|
146 |
"id":"google\/gemma-3-27b-it",
|
147 |
"name":"Gemma 3 27B",
|
@@ -175,17 +142,6 @@
|
|
175 |
"license":"Llama3",
|
176 |
"creation_date":1713312000000
|
177 |
},
|
178 |
-
{
|
179 |
-
"id":"meta-llama\/llama-3-8b-instruct",
|
180 |
-
"name":"Llama 3 8B Instruct",
|
181 |
-
"provider_name":"Meta",
|
182 |
-
"cost":0.06,
|
183 |
-
"hf_id":"meta-llama\/Meta-Llama-3-8B-Instruct",
|
184 |
-
"size":8030261248.0,
|
185 |
-
"type":"Open",
|
186 |
-
"license":"Llama3",
|
187 |
-
"creation_date":1713312000000
|
188 |
-
},
|
189 |
{
|
190 |
"id":"meta-llama\/llama-3.1-70b-instruct",
|
191 |
"name":"Llama 3.1 70B Instruct",
|
@@ -263,28 +219,6 @@
|
|
263 |
"license":"Mit",
|
264 |
"creation_date":1740355200000
|
265 |
},
|
266 |
-
{
|
267 |
-
"id":"microsoft\/wizardlm-2-8x22b",
|
268 |
-
"name":"WizardLM-2 8x22B",
|
269 |
-
"provider_name":"WizardLM-2 8x22B",
|
270 |
-
"cost":0.5,
|
271 |
-
"hf_id":null,
|
272 |
-
"size":null,
|
273 |
-
"type":"Commercial",
|
274 |
-
"license":null,
|
275 |
-
"creation_date":1713225600000
|
276 |
-
},
|
277 |
-
{
|
278 |
-
"id":"mistralai\/mistral-7b-instruct",
|
279 |
-
"name":"Mistral 7B Instruct",
|
280 |
-
"provider_name":"Mistral",
|
281 |
-
"cost":0.0,
|
282 |
-
"hf_id":"mistralai\/Mistral-7B-Instruct-v0.3",
|
283 |
-
"size":7248023552.0,
|
284 |
-
"type":"Open",
|
285 |
-
"license":"Apache 2.0",
|
286 |
-
"creation_date":1716336000000
|
287 |
-
},
|
288 |
{
|
289 |
"id":"mistralai\/mistral-nemo",
|
290 |
"name":"Mistral Nemo",
|
@@ -318,28 +252,6 @@
|
|
318 |
"license":"Apache 2.0",
|
319 |
"creation_date":1741651200000
|
320 |
},
|
321 |
-
{
|
322 |
-
"id":"mistralai\/mistral-tiny",
|
323 |
-
"name":"Mistral Tiny",
|
324 |
-
"provider_name":"Mistral Tiny",
|
325 |
-
"cost":0.25,
|
326 |
-
"hf_id":null,
|
327 |
-
"size":null,
|
328 |
-
"type":"Commercial",
|
329 |
-
"license":null,
|
330 |
-
"creation_date":1704844800000
|
331 |
-
},
|
332 |
-
{
|
333 |
-
"id":"nousresearch\/hermes-3-llama-3.1-405b",
|
334 |
-
"name":"Hermes 3 405B Instruct",
|
335 |
-
"provider_name":"Nous",
|
336 |
-
"cost":0.8,
|
337 |
-
"hf_id":"NousResearch\/Hermes-3-Llama-3.1-405B",
|
338 |
-
"size":405853388800.0,
|
339 |
-
"type":"Open",
|
340 |
-
"license":"Llama3",
|
341 |
-
"creation_date":1723507200000
|
342 |
-
},
|
343 |
{
|
344 |
"id":"openai\/gpt-4.1-mini",
|
345 |
"name":"GPT-4.1 Mini",
|
@@ -372,16 +284,5 @@
|
|
372 |
"type":"Commercial",
|
373 |
"license":null,
|
374 |
"creation_date":1721260800000
|
375 |
-
},
|
376 |
-
{
|
377 |
-
"id":"openai\/gpt-4o-mini-2024-07-18",
|
378 |
-
"name":"GPT-4o-mini (2024-07-18)",
|
379 |
-
"provider_name":"OpenAI",
|
380 |
-
"cost":0.6,
|
381 |
-
"hf_id":null,
|
382 |
-
"size":null,
|
383 |
-
"type":"Commercial",
|
384 |
-
"license":null,
|
385 |
-
"creation_date":1721260800000
|
386 |
}
|
387 |
]
|
|
|
10 |
"license":null,
|
11 |
"creation_date":1733356800000
|
12 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
{
|
14 |
"id":"deepseek\/deepseek-chat",
|
15 |
"name":"DeepSeek V3",
|
|
|
109 |
"license":null,
|
110 |
"creation_date":1727913600000
|
111 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
112 |
{
|
113 |
"id":"google\/gemma-3-27b-it",
|
114 |
"name":"Gemma 3 27B",
|
|
|
142 |
"license":"Llama3",
|
143 |
"creation_date":1713312000000
|
144 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
145 |
{
|
146 |
"id":"meta-llama\/llama-3.1-70b-instruct",
|
147 |
"name":"Llama 3.1 70B Instruct",
|
|
|
219 |
"license":"Mit",
|
220 |
"creation_date":1740355200000
|
221 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
222 |
{
|
223 |
"id":"mistralai\/mistral-nemo",
|
224 |
"name":"Mistral Nemo",
|
|
|
252 |
"license":"Apache 2.0",
|
253 |
"creation_date":1741651200000
|
254 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
255 |
{
|
256 |
"id":"openai\/gpt-4.1-mini",
|
257 |
"name":"GPT-4.1 Mini",
|
|
|
284 |
"type":"Commercial",
|
285 |
"license":null,
|
286 |
"creation_date":1721260800000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
287 |
}
|
288 |
]
|
results.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|