Upload from GitHub Actions: New results
Browse files- evals/download_data.py +1 -1
- evals/languages.py +3 -3
- evals/main.py +31 -20
- evals/models.py +42 -42
- languages.json +217 -217
- models.json +171 -127
- results.json +0 -0
evals/download_data.py
CHANGED
@@ -15,7 +15,7 @@ from datasets_.fleurs import fleurs
|
|
15 |
|
16 |
|
17 |
# Add project root to sys.path (still useful for potential future imports if needed)
|
18 |
-
project_root = Path(__file__).resolve().parent
|
19 |
if str(project_root) not in sys.path:
|
20 |
sys.path.append(str(project_root))
|
21 |
|
|
|
15 |
|
16 |
|
17 |
# Add project root to sys.path (still useful for potential future imports if needed)
|
18 |
+
project_root = Path(__file__).resolve().parent.parent
|
19 |
if str(project_root) not in sys.path:
|
20 |
sys.path.append(str(project_root))
|
21 |
|
evals/languages.py
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
import re
|
2 |
|
3 |
import pandas as pd
|
4 |
-
from
|
5 |
-
from
|
6 |
-
from
|
7 |
from joblib.memory import Memory
|
8 |
from langcodes import Language, standardize_tag
|
9 |
from language_data.population_data import LANGUAGE_SPEAKING_POPULATION
|
|
|
1 |
import re
|
2 |
|
3 |
import pandas as pd
|
4 |
+
from datasets_.commonvoice import commonvoice
|
5 |
+
from datasets_.fleurs import fleurs
|
6 |
+
from datasets_.flores import flores
|
7 |
from joblib.memory import Memory
|
8 |
from langcodes import Language, standardize_tag
|
9 |
from language_data.population_data import LANGUAGE_SPEAKING_POPULATION
|
evals/main.py
CHANGED
@@ -1,11 +1,10 @@
|
|
1 |
import asyncio
|
2 |
|
3 |
import pandas as pd
|
4 |
-
from tqdm.asyncio import tqdm_asyncio
|
5 |
-
|
6 |
from languages import languages
|
7 |
from models import models
|
8 |
from tasks import tasks
|
|
|
9 |
|
10 |
# ===== config =====
|
11 |
|
@@ -17,32 +16,44 @@ n_models = 25
|
|
17 |
|
18 |
|
19 |
async def evaluate():
|
|
|
|
|
|
|
|
|
20 |
print("running evaluations")
|
21 |
old_results = pd.read_json("results.json")
|
22 |
-
|
23 |
-
|
|
|
24 |
for task_name, task in tasks.items()
|
25 |
-
for i in range(n_sentences)
|
26 |
for lang in languages.iloc[:n_languages].itertuples()
|
27 |
for model in models["id"].iloc[:n_models]
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
|
|
|
|
37 |
]
|
38 |
results = await tqdm_asyncio.gather(*results, miniters=1)
|
39 |
results = [r for group in results for r in group]
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
|
47 |
|
48 |
if __name__ == "__main__":
|
|
|
1 |
import asyncio
|
2 |
|
3 |
import pandas as pd
|
|
|
|
|
4 |
from languages import languages
|
5 |
from models import models
|
6 |
from tasks import tasks
|
7 |
+
from tqdm.asyncio import tqdm_asyncio
|
8 |
|
9 |
# ===== config =====
|
10 |
|
|
|
16 |
|
17 |
|
18 |
async def evaluate():
|
19 |
+
# save up-to-date info on models and languages
|
20 |
+
args = dict(orient="records", indent=2, force_ascii=False)
|
21 |
+
pd.DataFrame(models).to_json("models.json", **args)
|
22 |
+
pd.DataFrame(languages).to_json("languages.json", **args)
|
23 |
print("running evaluations")
|
24 |
old_results = pd.read_json("results.json")
|
25 |
+
# get all combinations of model, language and task
|
26 |
+
combis = [
|
27 |
+
(model, lang.bcp_47, task_name)
|
28 |
for task_name, task in tasks.items()
|
|
|
29 |
for lang in languages.iloc[:n_languages].itertuples()
|
30 |
for model in models["id"].iloc[:n_models]
|
31 |
+
]
|
32 |
+
# filter out combinations that have already been evaluated
|
33 |
+
combis = pd.DataFrame(combis, columns=["model", "bcp_47", "task"])
|
34 |
+
combis = combis.merge(old_results, on=["model", "bcp_47", "task"], how="left")
|
35 |
+
combis = combis[combis["metric"].isna()][["model", "bcp_47", "task"]]
|
36 |
+
print(combis["model"].unique())
|
37 |
+
# run evaluations
|
38 |
+
results = [
|
39 |
+
tasks[task_name](model, bcp_47, i)
|
40 |
+
for i in range(n_sentences)
|
41 |
+
for model, bcp_47, task_name in combis.itertuples(index=False)
|
42 |
]
|
43 |
results = await tqdm_asyncio.gather(*results, miniters=1)
|
44 |
results = [r for group in results for r in group]
|
45 |
+
if results:
|
46 |
+
# aggregate results
|
47 |
+
results = pd.DataFrame(results)
|
48 |
+
results = (
|
49 |
+
results.groupby(["model", "bcp_47", "task", "metric"])
|
50 |
+
.agg({"score": "mean"})
|
51 |
+
.reset_index()
|
52 |
+
)
|
53 |
+
# save results
|
54 |
+
results = pd.concat([old_results, results])
|
55 |
+
results = results.sort_values(by=["model", "bcp_47", "task", "metric"])
|
56 |
+
results.to_json("results.json", **args)
|
57 |
|
58 |
|
59 |
if __name__ == "__main__":
|
evals/models.py
CHANGED
@@ -15,7 +15,7 @@ from requests import HTTPError, get
|
|
15 |
|
16 |
# for development purposes, all languages will be evaluated on the fast models
|
17 |
# and only a sample of languages will be evaluated on all models
|
18 |
-
|
19 |
"meta-llama/llama-4-maverick", # 0.6$
|
20 |
"meta-llama/llama-3.3-70b-instruct", # 0.3$
|
21 |
"meta-llama/llama-3.1-70b-instruct", # 0.3$
|
@@ -24,8 +24,8 @@ models = [
|
|
24 |
"openai/gpt-4.1-mini", # 1.6$
|
25 |
"openai/gpt-4.1-nano", # 0.4$
|
26 |
"openai/gpt-4o-mini", # 0.6$
|
27 |
-
"openai/gpt-3.5-turbo-0613", # 2$
|
28 |
-
"openai/gpt-3.5-turbo", # 1.5$
|
29 |
# "anthropic/claude-3.5-haiku", # 4$ -> too expensive for dev
|
30 |
"mistralai/mistral-small-3.1-24b-instruct", # 0.3$
|
31 |
"mistralai/mistral-saba", # 0.6$
|
@@ -65,8 +65,10 @@ def get_models(date: date):
|
|
65 |
|
66 |
def get_model(permaslug):
|
67 |
models = get_models(date.today())
|
68 |
-
slugs = [m for m in models if m["permaslug"] == permaslug]
|
69 |
-
|
|
|
|
|
70 |
|
71 |
|
72 |
@cache
|
@@ -81,7 +83,8 @@ def get_historical_popular_models(date: date):
|
|
81 |
continue
|
82 |
counts[model.split(":")[0]] += count
|
83 |
counts = sorted(counts.items(), key=lambda x: x[1], reverse=True)
|
84 |
-
|
|
|
85 |
|
86 |
|
87 |
@cache
|
@@ -90,23 +93,10 @@ def get_current_popular_models(date: date):
|
|
90 |
data = re.search(r'{\\"rankMap\\":(.*)\}\]\\n"\]\)</script>', raw).group(1)
|
91 |
data = json.loads(data.replace("\\", ""))["day"]
|
92 |
data = sorted(data, key=lambda x: x["total_prompt_tokens"], reverse=True)
|
93 |
-
|
|
|
94 |
|
95 |
|
96 |
-
popular_models = (
|
97 |
-
get_historical_popular_models(date.today())[:5]
|
98 |
-
+ get_current_popular_models(date.today())[:5]
|
99 |
-
)
|
100 |
-
popular_models = [get_model(m) for m in popular_models if get_model(m)]
|
101 |
-
popular_models = [
|
102 |
-
m for m in popular_models if m["endpoint"] and not m["endpoint"]["is_free"]
|
103 |
-
]
|
104 |
-
popular_models = [m["slug"] for m in popular_models]
|
105 |
-
popular_models = [
|
106 |
-
m for m in popular_models if m and m not in models and m not in blocklist
|
107 |
-
]
|
108 |
-
models += popular_models
|
109 |
-
|
110 |
load_dotenv()
|
111 |
client = AsyncOpenAI(
|
112 |
base_url="https://openrouter.ai/api/v1",
|
@@ -158,9 +148,6 @@ async def transcribe(path, model="elevenlabs/scribe_v1"):
|
|
158 |
raise ValueError(f"Model {model} not supported")
|
159 |
|
160 |
|
161 |
-
models = pd.DataFrame(models, columns=["id"])
|
162 |
-
|
163 |
-
|
164 |
def get_or_metadata(id):
|
165 |
# get metadata from OpenRouter
|
166 |
models = get_models(date.today())
|
@@ -210,21 +197,34 @@ def get_cost(row):
|
|
210 |
return round(cost * 1_000_000, 2)
|
211 |
|
212 |
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
)
|
219 |
-
|
220 |
-
models =
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
|
16 |
# for development purposes, all languages will be evaluated on the fast models
|
17 |
# and only a sample of languages will be evaluated on all models
|
18 |
+
important_models = [
|
19 |
"meta-llama/llama-4-maverick", # 0.6$
|
20 |
"meta-llama/llama-3.3-70b-instruct", # 0.3$
|
21 |
"meta-llama/llama-3.1-70b-instruct", # 0.3$
|
|
|
24 |
"openai/gpt-4.1-mini", # 1.6$
|
25 |
"openai/gpt-4.1-nano", # 0.4$
|
26 |
"openai/gpt-4o-mini", # 0.6$
|
27 |
+
# "openai/gpt-3.5-turbo-0613", # 2$
|
28 |
+
# "openai/gpt-3.5-turbo", # 1.5$
|
29 |
# "anthropic/claude-3.5-haiku", # 4$ -> too expensive for dev
|
30 |
"mistralai/mistral-small-3.1-24b-instruct", # 0.3$
|
31 |
"mistralai/mistral-saba", # 0.6$
|
|
|
65 |
|
66 |
def get_model(permaslug):
|
67 |
models = get_models(date.today())
|
68 |
+
slugs = [m for m in models if m["permaslug"] == permaslug and m["endpoint"] and not m["endpoint"]["is_free"]]
|
69 |
+
if len(slugs) == 0:
|
70 |
+
print(f"no model found for {permaslug}")
|
71 |
+
return slugs[0] if len(slugs) >= 1 else None
|
72 |
|
73 |
|
74 |
@cache
|
|
|
83 |
continue
|
84 |
counts[model.split(":")[0]] += count
|
85 |
counts = sorted(counts.items(), key=lambda x: x[1], reverse=True)
|
86 |
+
models = [get_model(model) for model, _ in counts]
|
87 |
+
return [m for m in models if m]
|
88 |
|
89 |
|
90 |
@cache
|
|
|
93 |
data = re.search(r'{\\"rankMap\\":(.*)\}\]\\n"\]\)</script>', raw).group(1)
|
94 |
data = json.loads(data.replace("\\", ""))["day"]
|
95 |
data = sorted(data, key=lambda x: x["total_prompt_tokens"], reverse=True)
|
96 |
+
models = [get_model(model["model_permaslug"]) for model in data]
|
97 |
+
return [m for m in models if m]
|
98 |
|
99 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
100 |
load_dotenv()
|
101 |
client = AsyncOpenAI(
|
102 |
base_url="https://openrouter.ai/api/v1",
|
|
|
148 |
raise ValueError(f"Model {model} not supported")
|
149 |
|
150 |
|
|
|
|
|
|
|
151 |
def get_or_metadata(id):
|
152 |
# get metadata from OpenRouter
|
153 |
models = get_models(date.today())
|
|
|
197 |
return round(cost * 1_000_000, 2)
|
198 |
|
199 |
|
200 |
+
@cache
|
201 |
+
def load_models(date: date):
|
202 |
+
popular_models = (
|
203 |
+
get_historical_popular_models(date.today())[:10]
|
204 |
+
+ get_current_popular_models(date.today())[:10]
|
205 |
+
)
|
206 |
+
popular_models = [m["slug"] for m in popular_models]
|
207 |
+
models = set(important_models + popular_models) - set(blocklist)
|
208 |
+
models = pd.DataFrame(sorted(list(models)), columns=["id"])
|
209 |
+
or_metadata = models["id"].apply(get_or_metadata)
|
210 |
+
hf_metadata = or_metadata.apply(get_hf_metadata)
|
211 |
+
creation_date_hf = pd.to_datetime(hf_metadata.str["creation_date"]).dt.date
|
212 |
+
creation_date_or = pd.to_datetime(
|
213 |
+
or_metadata.str["created_at"].str.split("T").str[0]
|
214 |
+
).dt.date
|
215 |
+
|
216 |
+
models = models.assign(
|
217 |
+
name=or_metadata.str["short_name"],
|
218 |
+
provider_name=or_metadata.str["name"].str.split(": ").str[0],
|
219 |
+
cost=or_metadata.apply(get_cost),
|
220 |
+
hf_id=hf_metadata.str["hf_id"],
|
221 |
+
size=hf_metadata.str["size"],
|
222 |
+
type=hf_metadata.str["type"],
|
223 |
+
license=hf_metadata.str["license"],
|
224 |
+
creation_date=creation_date_hf.combine_first(creation_date_or),
|
225 |
+
)
|
226 |
+
models = models[models["cost"] <= 2.0].reset_index(drop=True)
|
227 |
+
return models
|
228 |
+
|
229 |
+
|
230 |
+
models = load_models(date.today())
|
languages.json
CHANGED
@@ -163,7 +163,7 @@
|
|
163 |
"family":"Indo-European",
|
164 |
"flores_path":"deu_Latn",
|
165 |
"fleurs_tag":"de_de",
|
166 |
-
"commonvoice_hours":
|
167 |
"commonvoice_locale":"de",
|
168 |
"in_benchmark":true
|
169 |
},
|
@@ -269,11 +269,11 @@
|
|
269 |
"language_name":"Wu Chinese",
|
270 |
"autonym":"Wu Chinese",
|
271 |
"family":"Sino-Tibetan",
|
272 |
-
"flores_path":
|
273 |
"fleurs_tag":null,
|
274 |
"commonvoice_hours":null,
|
275 |
"commonvoice_locale":null,
|
276 |
-
"in_benchmark":
|
277 |
},
|
278 |
{
|
279 |
"bcp_47":"tr",
|
@@ -485,11 +485,11 @@
|
|
485 |
"language_name":"North Levantine Arabic",
|
486 |
"autonym":"العامية",
|
487 |
"family":"Afro-Asiatic",
|
488 |
-
"flores_path":
|
489 |
"fleurs_tag":null,
|
490 |
"commonvoice_hours":null,
|
491 |
"commonvoice_locale":null,
|
492 |
-
"in_benchmark":
|
493 |
},
|
494 |
{
|
495 |
"bcp_47":"ms",
|
@@ -619,7 +619,7 @@
|
|
619 |
"family":"Indo-European",
|
620 |
"flores_path":"nld_Latn",
|
621 |
"fleurs_tag":"nl_nl",
|
622 |
-
"commonvoice_hours":
|
623 |
"commonvoice_locale":"nl",
|
624 |
"in_benchmark":true
|
625 |
},
|
@@ -876,10 +876,10 @@
|
|
876 |
"in_benchmark":true
|
877 |
},
|
878 |
{
|
879 |
-
"bcp_47":"
|
880 |
"speakers":15913080,
|
881 |
-
"language_name":"
|
882 |
-
"autonym":"
|
883 |
"family":"Indo-European",
|
884 |
"flores_path":null,
|
885 |
"fleurs_tag":null,
|
@@ -888,10 +888,10 @@
|
|
888 |
"in_benchmark":false
|
889 |
},
|
890 |
{
|
891 |
-
"bcp_47":"
|
892 |
"speakers":15913080,
|
893 |
-
"language_name":"
|
894 |
-
"autonym":"
|
895 |
"family":"Indo-European",
|
896 |
"flores_path":null,
|
897 |
"fleurs_tag":null,
|
@@ -1073,11 +1073,11 @@
|
|
1073 |
"language_name":"Akan",
|
1074 |
"autonym":"Akan",
|
1075 |
"family":"Atlantic-Congo",
|
1076 |
-
"flores_path":
|
1077 |
"fleurs_tag":null,
|
1078 |
"commonvoice_hours":0.2,
|
1079 |
"commonvoice_locale":"tw",
|
1080 |
-
"in_benchmark":
|
1081 |
},
|
1082 |
{
|
1083 |
"bcp_47":"qu",
|
@@ -1195,7 +1195,7 @@
|
|
1195 |
"family":"Atlantic-Congo",
|
1196 |
"flores_path":"lua_Latn",
|
1197 |
"fleurs_tag":null,
|
1198 |
-
"commonvoice_hours":0.
|
1199 |
"commonvoice_locale":"lua",
|
1200 |
"in_benchmark":true
|
1201 |
},
|
@@ -1303,7 +1303,7 @@
|
|
1303 |
"family":"Afro-Asiatic",
|
1304 |
"flores_path":"heb_Hebr",
|
1305 |
"fleurs_tag":"he_il",
|
1306 |
-
"commonvoice_hours":1.
|
1307 |
"commonvoice_locale":"he",
|
1308 |
"in_benchmark":true
|
1309 |
},
|
@@ -1375,7 +1375,7 @@
|
|
1375 |
"family":"Turkic",
|
1376 |
"flores_path":"uig_Arab",
|
1377 |
"fleurs_tag":null,
|
1378 |
-
"commonvoice_hours":
|
1379 |
"commonvoice_locale":"ug",
|
1380 |
"in_benchmark":true
|
1381 |
},
|
@@ -1555,7 +1555,7 @@
|
|
1555 |
"family":"Indo-European",
|
1556 |
"flores_path":"slk_Latn",
|
1557 |
"fleurs_tag":"sk_sk",
|
1558 |
-
"commonvoice_hours":
|
1559 |
"commonvoice_locale":"sk",
|
1560 |
"in_benchmark":true
|
1561 |
},
|
@@ -1955,18 +1955,6 @@
|
|
1955 |
"commonvoice_locale":"gom",
|
1956 |
"in_benchmark":true
|
1957 |
},
|
1958 |
-
{
|
1959 |
-
"bcp_47":"kln",
|
1960 |
-
"speakers":4068120,
|
1961 |
-
"language_name":"Kalenjin",
|
1962 |
-
"autonym":"Kalenjin",
|
1963 |
-
"family":"Nilotic",
|
1964 |
-
"flores_path":null,
|
1965 |
-
"fleurs_tag":null,
|
1966 |
-
"commonvoice_hours":43.0,
|
1967 |
-
"commonvoice_locale":"kln",
|
1968 |
-
"in_benchmark":false
|
1969 |
-
},
|
1970 |
{
|
1971 |
"bcp_47":"kam",
|
1972 |
"speakers":4068120,
|
@@ -1979,6 +1967,18 @@
|
|
1979 |
"commonvoice_locale":"kam",
|
1980 |
"in_benchmark":true
|
1981 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1982 |
{
|
1983 |
"bcp_47":"bjn",
|
1984 |
"speakers":4010288,
|
@@ -2009,11 +2009,11 @@
|
|
2009 |
"language_name":"Makhuwa",
|
2010 |
"autonym":"Emakhuwa",
|
2011 |
"family":"Atlantic-Congo",
|
2012 |
-
"flores_path":
|
2013 |
"fleurs_tag":null,
|
2014 |
"commonvoice_hours":0.0,
|
2015 |
"commonvoice_locale":"vmw",
|
2016 |
-
"in_benchmark":
|
2017 |
},
|
2018 |
{
|
2019 |
"bcp_47":"glk",
|
@@ -2124,10 +2124,10 @@
|
|
2124 |
"in_benchmark":true
|
2125 |
},
|
2126 |
{
|
2127 |
-
"bcp_47":"
|
2128 |
"speakers":3580443,
|
2129 |
-
"language_name":"
|
2130 |
-
"autonym":"
|
2131 |
"family":"Indo-European",
|
2132 |
"flores_path":null,
|
2133 |
"fleurs_tag":null,
|
@@ -2136,10 +2136,10 @@
|
|
2136 |
"in_benchmark":false
|
2137 |
},
|
2138 |
{
|
2139 |
-
"bcp_47":"
|
2140 |
"speakers":3580443,
|
2141 |
-
"language_name":"
|
2142 |
-
"autonym":"
|
2143 |
"family":"Indo-European",
|
2144 |
"flores_path":null,
|
2145 |
"fleurs_tag":null,
|
@@ -2335,7 +2335,7 @@
|
|
2335 |
"family":"Atlantic-Congo",
|
2336 |
"flores_path":null,
|
2337 |
"fleurs_tag":null,
|
2338 |
-
"commonvoice_hours":
|
2339 |
"commonvoice_locale":"bci",
|
2340 |
"in_benchmark":false
|
2341 |
},
|
@@ -2352,27 +2352,27 @@
|
|
2352 |
"in_benchmark":true
|
2353 |
},
|
2354 |
{
|
2355 |
-
"bcp_47":"
|
2356 |
"speakers":2996392,
|
2357 |
-
"language_name":"
|
2358 |
-
"autonym":"
|
2359 |
"family":"Atlantic-Congo",
|
2360 |
"flores_path":null,
|
2361 |
"fleurs_tag":null,
|
2362 |
-
"commonvoice_hours":
|
2363 |
-
"commonvoice_locale":
|
2364 |
"in_benchmark":false
|
2365 |
},
|
2366 |
{
|
2367 |
-
"bcp_47":"
|
2368 |
"speakers":2996392,
|
2369 |
-
"language_name":"
|
2370 |
-
"autonym":"
|
2371 |
"family":"Atlantic-Congo",
|
2372 |
"flores_path":null,
|
2373 |
"fleurs_tag":null,
|
2374 |
-
"commonvoice_hours":
|
2375 |
-
"commonvoice_locale":
|
2376 |
"in_benchmark":false
|
2377 |
},
|
2378 |
{
|
@@ -2544,11 +2544,11 @@
|
|
2544 |
"in_benchmark":false
|
2545 |
},
|
2546 |
{
|
2547 |
-
"bcp_47":"
|
2548 |
"speakers":2386962,
|
2549 |
-
"language_name":"
|
2550 |
-
"autonym":"
|
2551 |
-
"family":"
|
2552 |
"flores_path":null,
|
2553 |
"fleurs_tag":null,
|
2554 |
"commonvoice_hours":null,
|
@@ -2556,11 +2556,11 @@
|
|
2556 |
"in_benchmark":false
|
2557 |
},
|
2558 |
{
|
2559 |
-
"bcp_47":"
|
2560 |
"speakers":2386962,
|
2561 |
-
"language_name":"
|
2562 |
-
"autonym":"
|
2563 |
-
"family":"
|
2564 |
"flores_path":null,
|
2565 |
"fleurs_tag":null,
|
2566 |
"commonvoice_hours":null,
|
@@ -2724,10 +2724,10 @@
|
|
2724 |
"in_benchmark":false
|
2725 |
},
|
2726 |
{
|
2727 |
-
"bcp_47":"
|
2728 |
"speakers":1989135,
|
2729 |
-
"language_name":"
|
2730 |
-
"autonym":"
|
2731 |
"family":"Indo-European",
|
2732 |
"flores_path":null,
|
2733 |
"fleurs_tag":null,
|
@@ -2748,10 +2748,10 @@
|
|
2748 |
"in_benchmark":false
|
2749 |
},
|
2750 |
{
|
2751 |
-
"bcp_47":"
|
2752 |
"speakers":1989135,
|
2753 |
-
"language_name":"
|
2754 |
-
"autonym":"
|
2755 |
"family":"Indo-European",
|
2756 |
"flores_path":null,
|
2757 |
"fleurs_tag":null,
|
@@ -3283,7 +3283,7 @@
|
|
3283 |
"family":"Atlantic-Congo",
|
3284 |
"flores_path":null,
|
3285 |
"fleurs_tag":null,
|
3286 |
-
"commonvoice_hours":
|
3287 |
"commonvoice_locale":"bum",
|
3288 |
"in_benchmark":false
|
3289 |
},
|
@@ -3535,7 +3535,7 @@
|
|
3535 |
"family":null,
|
3536 |
"flores_path":"eus_Latn",
|
3537 |
"fleurs_tag":null,
|
3538 |
-
"commonvoice_hours":
|
3539 |
"commonvoice_locale":"eu",
|
3540 |
"in_benchmark":true
|
3541 |
},
|
@@ -3559,7 +3559,7 @@
|
|
3559 |
"family":"Abkhaz-Adyge",
|
3560 |
"flores_path":null,
|
3561 |
"fleurs_tag":null,
|
3562 |
-
"commonvoice_hours":
|
3563 |
"commonvoice_locale":"kbd",
|
3564 |
"in_benchmark":false
|
3565 |
},
|
@@ -3679,15 +3679,15 @@
|
|
3679 |
"family":"Indo-European",
|
3680 |
"flores_path":"ydd_Hebr",
|
3681 |
"fleurs_tag":null,
|
3682 |
-
"commonvoice_hours":0.
|
3683 |
"commonvoice_locale":"yi",
|
3684 |
"in_benchmark":true
|
3685 |
},
|
3686 |
{
|
3687 |
-
"bcp_47":"
|
3688 |
"speakers":995398,
|
3689 |
-
"language_name":"
|
3690 |
-
"autonym":"
|
3691 |
"family":"Atlantic-Congo",
|
3692 |
"flores_path":null,
|
3693 |
"fleurs_tag":null,
|
@@ -3696,10 +3696,10 @@
|
|
3696 |
"in_benchmark":false
|
3697 |
},
|
3698 |
{
|
3699 |
-
"bcp_47":"
|
3700 |
"speakers":995398,
|
3701 |
-
"language_name":"
|
3702 |
-
"autonym":"
|
3703 |
"family":"Atlantic-Congo",
|
3704 |
"flores_path":null,
|
3705 |
"fleurs_tag":null,
|
@@ -4512,27 +4512,27 @@
|
|
4512 |
"in_benchmark":false
|
4513 |
},
|
4514 |
{
|
4515 |
-
"bcp_47":"
|
4516 |
"speakers":501735,
|
4517 |
-
"language_name":"
|
4518 |
-
"autonym":"
|
4519 |
"family":"Uto-Aztecan",
|
4520 |
"flores_path":null,
|
4521 |
"fleurs_tag":null,
|
4522 |
-
"commonvoice_hours":
|
4523 |
-
"commonvoice_locale":
|
4524 |
"in_benchmark":false
|
4525 |
},
|
4526 |
{
|
4527 |
-
"bcp_47":"
|
4528 |
"speakers":501735,
|
4529 |
-
"language_name":"
|
4530 |
-
"autonym":"
|
4531 |
"family":"Uto-Aztecan",
|
4532 |
"flores_path":null,
|
4533 |
"fleurs_tag":null,
|
4534 |
-
"commonvoice_hours":
|
4535 |
-
"commonvoice_locale":
|
4536 |
"in_benchmark":false
|
4537 |
},
|
4538 |
{
|
@@ -4651,7 +4651,7 @@
|
|
4651 |
"family":"Abkhaz-Adyge",
|
4652 |
"flores_path":null,
|
4653 |
"fleurs_tag":null,
|
4654 |
-
"commonvoice_hours":
|
4655 |
"commonvoice_locale":"ady",
|
4656 |
"in_benchmark":false
|
4657 |
},
|
@@ -4675,7 +4675,7 @@
|
|
4675 |
"family":"Kartvelian",
|
4676 |
"flores_path":null,
|
4677 |
"fleurs_tag":null,
|
4678 |
-
"commonvoice_hours":
|
4679 |
"commonvoice_locale":"xmf",
|
4680 |
"in_benchmark":false
|
4681 |
},
|
@@ -4685,11 +4685,11 @@
|
|
4685 |
"language_name":"Erzya",
|
4686 |
"autonym":"Эрзянь Кель",
|
4687 |
"family":"Uralic",
|
4688 |
-
"flores_path":
|
4689 |
"fleurs_tag":null,
|
4690 |
"commonvoice_hours":3.8,
|
4691 |
"commonvoice_locale":"myv",
|
4692 |
-
"in_benchmark":
|
4693 |
},
|
4694 |
{
|
4695 |
"bcp_47":"dav",
|
@@ -4715,18 +4715,6 @@
|
|
4715 |
"commonvoice_locale":null,
|
4716 |
"in_benchmark":false
|
4717 |
},
|
4718 |
-
{
|
4719 |
-
"bcp_47":"jmc",
|
4720 |
-
"speakers":433291,
|
4721 |
-
"language_name":"Machame",
|
4722 |
-
"autonym":"Kimachame",
|
4723 |
-
"family":"Atlantic-Congo",
|
4724 |
-
"flores_path":null,
|
4725 |
-
"fleurs_tag":null,
|
4726 |
-
"commonvoice_hours":null,
|
4727 |
-
"commonvoice_locale":null,
|
4728 |
-
"in_benchmark":false
|
4729 |
-
},
|
4730 |
{
|
4731 |
"bcp_47":"vun",
|
4732 |
"speakers":433291,
|
@@ -4751,6 +4739,18 @@
|
|
4751 |
"commonvoice_locale":"rof",
|
4752 |
"in_benchmark":false
|
4753 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4754 |
{
|
4755 |
"bcp_47":"kjg",
|
4756 |
"speakers":431949,
|
@@ -5009,11 +5009,11 @@
|
|
5009 |
"language_name":"Dargwa",
|
5010 |
"autonym":"Dargwa",
|
5011 |
"family":"Nakh-Daghestanian",
|
5012 |
-
"flores_path":
|
5013 |
"fleurs_tag":null,
|
5014 |
"commonvoice_hours":0.0,
|
5015 |
"commonvoice_locale":"dar",
|
5016 |
-
"in_benchmark":
|
5017 |
},
|
5018 |
{
|
5019 |
"bcp_47":"lif",
|
@@ -5124,27 +5124,27 @@
|
|
5124 |
"in_benchmark":false
|
5125 |
},
|
5126 |
{
|
5127 |
-
"bcp_47":"
|
5128 |
"speakers":332940,
|
5129 |
-
"language_name":"
|
5130 |
-
"autonym":"
|
5131 |
"family":"Atlantic-Congo",
|
5132 |
"flores_path":null,
|
5133 |
"fleurs_tag":null,
|
5134 |
-
"commonvoice_hours":
|
5135 |
-
"commonvoice_locale":"
|
5136 |
"in_benchmark":false
|
5137 |
},
|
5138 |
{
|
5139 |
-
"bcp_47":"
|
5140 |
"speakers":332940,
|
5141 |
-
"language_name":"
|
5142 |
-
"autonym":"
|
5143 |
"family":"Atlantic-Congo",
|
5144 |
"flores_path":null,
|
5145 |
"fleurs_tag":null,
|
5146 |
-
"commonvoice_hours":
|
5147 |
-
"commonvoice_locale":"
|
5148 |
"in_benchmark":false
|
5149 |
},
|
5150 |
{
|
@@ -5232,11 +5232,11 @@
|
|
5232 |
"in_benchmark":false
|
5233 |
},
|
5234 |
{
|
5235 |
-
"bcp_47":"
|
5236 |
"speakers":305001,
|
5237 |
-
"language_name":"
|
5238 |
-
"autonym":"
|
5239 |
-
"family":"
|
5240 |
"flores_path":null,
|
5241 |
"fleurs_tag":null,
|
5242 |
"commonvoice_hours":null,
|
@@ -5244,11 +5244,11 @@
|
|
5244 |
"in_benchmark":false
|
5245 |
},
|
5246 |
{
|
5247 |
-
"bcp_47":"
|
5248 |
"speakers":305001,
|
5249 |
-
"language_name":"
|
5250 |
-
"autonym":"
|
5251 |
-
"family":"
|
5252 |
"flores_path":null,
|
5253 |
"fleurs_tag":null,
|
5254 |
"commonvoice_hours":null,
|
@@ -5388,10 +5388,10 @@
|
|
5388 |
"in_benchmark":false
|
5389 |
},
|
5390 |
{
|
5391 |
-
"bcp_47":"
|
5392 |
"speakers":264864,
|
5393 |
-
"language_name":"
|
5394 |
-
"autonym":"
|
5395 |
"family":"Tai-Kadai",
|
5396 |
"flores_path":null,
|
5397 |
"fleurs_tag":null,
|
@@ -5400,10 +5400,10 @@
|
|
5400 |
"in_benchmark":false
|
5401 |
},
|
5402 |
{
|
5403 |
-
"bcp_47":"
|
5404 |
"speakers":264864,
|
5405 |
-
"language_name":"
|
5406 |
-
"autonym":"
|
5407 |
"family":"Tai-Kadai",
|
5408 |
"flores_path":null,
|
5409 |
"fleurs_tag":null,
|
@@ -5508,10 +5508,10 @@
|
|
5508 |
"in_benchmark":true
|
5509 |
},
|
5510 |
{
|
5511 |
-
"bcp_47":"
|
5512 |
"speakers":245664,
|
5513 |
-
"language_name":"
|
5514 |
-
"autonym":"
|
5515 |
"family":"Austronesian",
|
5516 |
"flores_path":null,
|
5517 |
"fleurs_tag":null,
|
@@ -5520,10 +5520,10 @@
|
|
5520 |
"in_benchmark":false
|
5521 |
},
|
5522 |
{
|
5523 |
-
"bcp_47":"
|
5524 |
"speakers":245664,
|
5525 |
-
"language_name":"
|
5526 |
-
"autonym":"
|
5527 |
"family":"Austronesian",
|
5528 |
"flores_path":null,
|
5529 |
"fleurs_tag":null,
|
@@ -5753,11 +5753,11 @@
|
|
5753 |
"language_name":"Tuvinian",
|
5754 |
"autonym":"Tuvinian",
|
5755 |
"family":"Turkic",
|
5756 |
-
"flores_path":
|
5757 |
"fleurs_tag":null,
|
5758 |
"commonvoice_hours":0.0,
|
5759 |
"commonvoice_locale":"tyv",
|
5760 |
-
"in_benchmark":
|
5761 |
},
|
5762 |
{
|
5763 |
"bcp_47":"dtp",
|
@@ -5904,10 +5904,10 @@
|
|
5904 |
"in_benchmark":false
|
5905 |
},
|
5906 |
{
|
5907 |
-
"bcp_47":"
|
5908 |
"speakers":149823,
|
5909 |
-
"language_name":"
|
5910 |
-
"autonym":"
|
5911 |
"family":"Atlantic-Congo",
|
5912 |
"flores_path":null,
|
5913 |
"fleurs_tag":null,
|
@@ -5916,10 +5916,10 @@
|
|
5916 |
"in_benchmark":false
|
5917 |
},
|
5918 |
{
|
5919 |
-
"bcp_47":"
|
5920 |
"speakers":149823,
|
5921 |
-
"language_name":"
|
5922 |
-
"autonym":"
|
5923 |
"family":"Atlantic-Congo",
|
5924 |
"flores_path":null,
|
5925 |
"fleurs_tag":null,
|
@@ -6223,7 +6223,7 @@
|
|
6223 |
"family":"Abkhaz-Adyge",
|
6224 |
"flores_path":null,
|
6225 |
"fleurs_tag":null,
|
6226 |
-
"commonvoice_hours":
|
6227 |
"commonvoice_locale":"ab",
|
6228 |
"in_benchmark":false
|
6229 |
},
|
@@ -6821,11 +6821,11 @@
|
|
6821 |
"language_name":"Aragonese",
|
6822 |
"autonym":"Aragonés",
|
6823 |
"family":"Indo-European",
|
6824 |
-
"flores_path":
|
6825 |
"fleurs_tag":null,
|
6826 |
"commonvoice_hours":17.0,
|
6827 |
"commonvoice_locale":"an",
|
6828 |
-
"in_benchmark":
|
6829 |
},
|
6830 |
{
|
6831 |
"bcp_47":"chr",
|
@@ -7272,11 +7272,11 @@
|
|
7272 |
"in_benchmark":false
|
7273 |
},
|
7274 |
{
|
7275 |
-
"bcp_47":"
|
7276 |
"speakers":7970,
|
7277 |
-
"language_name":"
|
7278 |
-
"autonym":"
|
7279 |
-
"family":"
|
7280 |
"flores_path":null,
|
7281 |
"fleurs_tag":null,
|
7282 |
"commonvoice_hours":null,
|
@@ -7284,11 +7284,11 @@
|
|
7284 |
"in_benchmark":false
|
7285 |
},
|
7286 |
{
|
7287 |
-
"bcp_47":"
|
7288 |
"speakers":7970,
|
7289 |
-
"language_name":"
|
7290 |
-
"autonym":"
|
7291 |
-
"family":"
|
7292 |
"flores_path":null,
|
7293 |
"fleurs_tag":null,
|
7294 |
"commonvoice_hours":null,
|
@@ -7567,7 +7567,7 @@
|
|
7567 |
"family":"Atlantic-Congo",
|
7568 |
"flores_path":null,
|
7569 |
"fleurs_tag":null,
|
7570 |
-
"commonvoice_hours":
|
7571 |
"commonvoice_locale":"yav",
|
7572 |
"in_benchmark":false
|
7573 |
},
|
@@ -7836,11 +7836,11 @@
|
|
7836 |
"in_benchmark":false
|
7837 |
},
|
7838 |
{
|
7839 |
-
"bcp_47":"
|
7840 |
"speakers":377,
|
7841 |
-
"language_name":"
|
7842 |
-
"autonym":"
|
7843 |
-
"family":"
|
7844 |
"flores_path":null,
|
7845 |
"fleurs_tag":null,
|
7846 |
"commonvoice_hours":null,
|
@@ -7848,11 +7848,11 @@
|
|
7848 |
"in_benchmark":false
|
7849 |
},
|
7850 |
{
|
7851 |
-
"bcp_47":"
|
7852 |
"speakers":377,
|
7853 |
-
"language_name":"
|
7854 |
-
"autonym":"
|
7855 |
-
"family":"
|
7856 |
"flores_path":null,
|
7857 |
"fleurs_tag":null,
|
7858 |
"commonvoice_hours":null,
|
@@ -7968,11 +7968,11 @@
|
|
7968 |
"in_benchmark":false
|
7969 |
},
|
7970 |
{
|
7971 |
-
"bcp_47":"
|
7972 |
"speakers":0,
|
7973 |
-
"language_name":"
|
7974 |
-
"autonym":"
|
7975 |
-
"family":"
|
7976 |
"flores_path":null,
|
7977 |
"fleurs_tag":null,
|
7978 |
"commonvoice_hours":null,
|
@@ -7980,11 +7980,11 @@
|
|
7980 |
"in_benchmark":false
|
7981 |
},
|
7982 |
{
|
7983 |
-
"bcp_47":"
|
7984 |
"speakers":0,
|
7985 |
-
"language_name":"
|
7986 |
-
"autonym":"
|
7987 |
-
"family":"
|
7988 |
"flores_path":null,
|
7989 |
"fleurs_tag":null,
|
7990 |
"commonvoice_hours":null,
|
@@ -7992,22 +7992,22 @@
|
|
7992 |
"in_benchmark":false
|
7993 |
},
|
7994 |
{
|
7995 |
-
"bcp_47":"
|
7996 |
"speakers":0,
|
7997 |
-
"language_name":"
|
7998 |
-
"autonym":"
|
7999 |
-
"family":"
|
8000 |
"flores_path":null,
|
8001 |
"fleurs_tag":null,
|
8002 |
-
"commonvoice_hours":
|
8003 |
-
"commonvoice_locale":
|
8004 |
"in_benchmark":false
|
8005 |
},
|
8006 |
{
|
8007 |
-
"bcp_47":"
|
8008 |
"speakers":0,
|
8009 |
-
"language_name":"
|
8010 |
-
"autonym":"
|
8011 |
"family":"Indo-European",
|
8012 |
"flores_path":null,
|
8013 |
"fleurs_tag":null,
|
@@ -8016,23 +8016,23 @@
|
|
8016 |
"in_benchmark":false
|
8017 |
},
|
8018 |
{
|
8019 |
-
"bcp_47":"
|
8020 |
"speakers":0,
|
8021 |
-
"language_name":"
|
8022 |
-
"autonym":"
|
8023 |
-
"family":"
|
8024 |
"flores_path":null,
|
8025 |
"fleurs_tag":null,
|
8026 |
-
"commonvoice_hours":
|
8027 |
-
"commonvoice_locale":
|
8028 |
"in_benchmark":false
|
8029 |
},
|
8030 |
{
|
8031 |
-
"bcp_47":"
|
8032 |
"speakers":0,
|
8033 |
-
"language_name":"
|
8034 |
-
"autonym":"
|
8035 |
-
"family":"
|
8036 |
"flores_path":null,
|
8037 |
"fleurs_tag":null,
|
8038 |
"commonvoice_hours":null,
|
@@ -8040,23 +8040,23 @@
|
|
8040 |
"in_benchmark":false
|
8041 |
},
|
8042 |
{
|
8043 |
-
"bcp_47":"
|
8044 |
"speakers":0,
|
8045 |
-
"language_name":"
|
8046 |
-
"autonym":"
|
8047 |
-
"family":"
|
8048 |
"flores_path":null,
|
8049 |
"fleurs_tag":null,
|
8050 |
-
"commonvoice_hours":
|
8051 |
-
"commonvoice_locale":
|
8052 |
"in_benchmark":false
|
8053 |
},
|
8054 |
{
|
8055 |
-
"bcp_47":"
|
8056 |
"speakers":0,
|
8057 |
-
"language_name":"
|
8058 |
-
"autonym":"
|
8059 |
-
"family":"
|
8060 |
"flores_path":null,
|
8061 |
"fleurs_tag":null,
|
8062 |
"commonvoice_hours":null,
|
@@ -8064,10 +8064,10 @@
|
|
8064 |
"in_benchmark":false
|
8065 |
},
|
8066 |
{
|
8067 |
-
"bcp_47":"
|
8068 |
"speakers":0,
|
8069 |
-
"language_name":"
|
8070 |
-
"autonym":"
|
8071 |
"family":"Indo-European",
|
8072 |
"flores_path":null,
|
8073 |
"fleurs_tag":null,
|
@@ -8076,11 +8076,11 @@
|
|
8076 |
"in_benchmark":false
|
8077 |
},
|
8078 |
{
|
8079 |
-
"bcp_47":"
|
8080 |
"speakers":0,
|
8081 |
-
"language_name":"
|
8082 |
-
"autonym":"
|
8083 |
-
"family":"
|
8084 |
"flores_path":null,
|
8085 |
"fleurs_tag":null,
|
8086 |
"commonvoice_hours":null,
|
@@ -8088,11 +8088,11 @@
|
|
8088 |
"in_benchmark":false
|
8089 |
},
|
8090 |
{
|
8091 |
-
"bcp_47":"
|
8092 |
"speakers":0,
|
8093 |
-
"language_name":"
|
8094 |
-
"autonym":"
|
8095 |
-
"family":"
|
8096 |
"flores_path":null,
|
8097 |
"fleurs_tag":null,
|
8098 |
"commonvoice_hours":null,
|
@@ -8112,23 +8112,23 @@
|
|
8112 |
"in_benchmark":false
|
8113 |
},
|
8114 |
{
|
8115 |
-
"bcp_47":"
|
8116 |
"speakers":0,
|
8117 |
-
"language_name":"
|
8118 |
-
"autonym":"
|
8119 |
-
"family":"
|
8120 |
"flores_path":null,
|
8121 |
"fleurs_tag":null,
|
8122 |
-
"commonvoice_hours":
|
8123 |
-
"commonvoice_locale":
|
8124 |
"in_benchmark":false
|
8125 |
},
|
8126 |
{
|
8127 |
-
"bcp_47":"
|
8128 |
"speakers":0,
|
8129 |
-
"language_name":"
|
8130 |
-
"autonym":"
|
8131 |
-
"family":"
|
8132 |
"flores_path":null,
|
8133 |
"fleurs_tag":null,
|
8134 |
"commonvoice_hours":null,
|
|
|
163 |
"family":"Indo-European",
|
164 |
"flores_path":"deu_Latn",
|
165 |
"fleurs_tag":"de_de",
|
166 |
+
"commonvoice_hours":1363.0,
|
167 |
"commonvoice_locale":"de",
|
168 |
"in_benchmark":true
|
169 |
},
|
|
|
269 |
"language_name":"Wu Chinese",
|
270 |
"autonym":"Wu Chinese",
|
271 |
"family":"Sino-Tibetan",
|
272 |
+
"flores_path":null,
|
273 |
"fleurs_tag":null,
|
274 |
"commonvoice_hours":null,
|
275 |
"commonvoice_locale":null,
|
276 |
+
"in_benchmark":false
|
277 |
},
|
278 |
{
|
279 |
"bcp_47":"tr",
|
|
|
485 |
"language_name":"North Levantine Arabic",
|
486 |
"autonym":"العامية",
|
487 |
"family":"Afro-Asiatic",
|
488 |
+
"flores_path":null,
|
489 |
"fleurs_tag":null,
|
490 |
"commonvoice_hours":null,
|
491 |
"commonvoice_locale":null,
|
492 |
+
"in_benchmark":false
|
493 |
},
|
494 |
{
|
495 |
"bcp_47":"ms",
|
|
|
619 |
"family":"Indo-European",
|
620 |
"flores_path":"nld_Latn",
|
621 |
"fleurs_tag":"nl_nl",
|
622 |
+
"commonvoice_hours":120.0,
|
623 |
"commonvoice_locale":"nl",
|
624 |
"in_benchmark":true
|
625 |
},
|
|
|
876 |
"in_benchmark":true
|
877 |
},
|
878 |
{
|
879 |
+
"bcp_47":"bgc",
|
880 |
"speakers":15913080,
|
881 |
+
"language_name":"Haryanvi",
|
882 |
+
"autonym":"हरियाणवी",
|
883 |
"family":"Indo-European",
|
884 |
"flores_path":null,
|
885 |
"fleurs_tag":null,
|
|
|
888 |
"in_benchmark":false
|
889 |
},
|
890 |
{
|
891 |
+
"bcp_47":"mwr",
|
892 |
"speakers":15913080,
|
893 |
+
"language_name":"Marwari",
|
894 |
+
"autonym":"Marwari",
|
895 |
"family":"Indo-European",
|
896 |
"flores_path":null,
|
897 |
"fleurs_tag":null,
|
|
|
1073 |
"language_name":"Akan",
|
1074 |
"autonym":"Akan",
|
1075 |
"family":"Atlantic-Congo",
|
1076 |
+
"flores_path":null,
|
1077 |
"fleurs_tag":null,
|
1078 |
"commonvoice_hours":0.2,
|
1079 |
"commonvoice_locale":"tw",
|
1080 |
+
"in_benchmark":false
|
1081 |
},
|
1082 |
{
|
1083 |
"bcp_47":"qu",
|
|
|
1195 |
"family":"Atlantic-Congo",
|
1196 |
"flores_path":"lua_Latn",
|
1197 |
"fleurs_tag":null,
|
1198 |
+
"commonvoice_hours":0.8,
|
1199 |
"commonvoice_locale":"lua",
|
1200 |
"in_benchmark":true
|
1201 |
},
|
|
|
1303 |
"family":"Afro-Asiatic",
|
1304 |
"flores_path":"heb_Hebr",
|
1305 |
"fleurs_tag":"he_il",
|
1306 |
+
"commonvoice_hours":1.3,
|
1307 |
"commonvoice_locale":"he",
|
1308 |
"in_benchmark":true
|
1309 |
},
|
|
|
1375 |
"family":"Turkic",
|
1376 |
"flores_path":"uig_Arab",
|
1377 |
"fleurs_tag":null,
|
1378 |
+
"commonvoice_hours":372.0,
|
1379 |
"commonvoice_locale":"ug",
|
1380 |
"in_benchmark":true
|
1381 |
},
|
|
|
1555 |
"family":"Indo-European",
|
1556 |
"flores_path":"slk_Latn",
|
1557 |
"fleurs_tag":"sk_sk",
|
1558 |
+
"commonvoice_hours":49.0,
|
1559 |
"commonvoice_locale":"sk",
|
1560 |
"in_benchmark":true
|
1561 |
},
|
|
|
1955 |
"commonvoice_locale":"gom",
|
1956 |
"in_benchmark":true
|
1957 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1958 |
{
|
1959 |
"bcp_47":"kam",
|
1960 |
"speakers":4068120,
|
|
|
1967 |
"commonvoice_locale":"kam",
|
1968 |
"in_benchmark":true
|
1969 |
},
|
1970 |
+
{
|
1971 |
+
"bcp_47":"kln",
|
1972 |
+
"speakers":4068120,
|
1973 |
+
"language_name":"Kalenjin",
|
1974 |
+
"autonym":"Kalenjin",
|
1975 |
+
"family":"Nilotic",
|
1976 |
+
"flores_path":null,
|
1977 |
+
"fleurs_tag":null,
|
1978 |
+
"commonvoice_hours":43.0,
|
1979 |
+
"commonvoice_locale":"kln",
|
1980 |
+
"in_benchmark":false
|
1981 |
+
},
|
1982 |
{
|
1983 |
"bcp_47":"bjn",
|
1984 |
"speakers":4010288,
|
|
|
2009 |
"language_name":"Makhuwa",
|
2010 |
"autonym":"Emakhuwa",
|
2011 |
"family":"Atlantic-Congo",
|
2012 |
+
"flores_path":null,
|
2013 |
"fleurs_tag":null,
|
2014 |
"commonvoice_hours":0.0,
|
2015 |
"commonvoice_locale":"vmw",
|
2016 |
+
"in_benchmark":false
|
2017 |
},
|
2018 |
{
|
2019 |
"bcp_47":"glk",
|
|
|
2124 |
"in_benchmark":true
|
2125 |
},
|
2126 |
{
|
2127 |
+
"bcp_47":"lmn",
|
2128 |
"speakers":3580443,
|
2129 |
+
"language_name":"Lambadi",
|
2130 |
+
"autonym":"Lambadi",
|
2131 |
"family":"Indo-European",
|
2132 |
"flores_path":null,
|
2133 |
"fleurs_tag":null,
|
|
|
2136 |
"in_benchmark":false
|
2137 |
},
|
2138 |
{
|
2139 |
+
"bcp_47":"gbm",
|
2140 |
"speakers":3580443,
|
2141 |
+
"language_name":"Garhwali",
|
2142 |
+
"autonym":"Garhwali",
|
2143 |
"family":"Indo-European",
|
2144 |
"flores_path":null,
|
2145 |
"fleurs_tag":null,
|
|
|
2335 |
"family":"Atlantic-Congo",
|
2336 |
"flores_path":null,
|
2337 |
"fleurs_tag":null,
|
2338 |
+
"commonvoice_hours":4.5,
|
2339 |
"commonvoice_locale":"bci",
|
2340 |
"in_benchmark":false
|
2341 |
},
|
|
|
2352 |
"in_benchmark":true
|
2353 |
},
|
2354 |
{
|
2355 |
+
"bcp_47":"ibb",
|
2356 |
"speakers":2996392,
|
2357 |
+
"language_name":"Ibibio",
|
2358 |
+
"autonym":"Ibibio",
|
2359 |
"family":"Atlantic-Congo",
|
2360 |
"flores_path":null,
|
2361 |
"fleurs_tag":null,
|
2362 |
+
"commonvoice_hours":5.5,
|
2363 |
+
"commonvoice_locale":"ibb",
|
2364 |
"in_benchmark":false
|
2365 |
},
|
2366 |
{
|
2367 |
+
"bcp_47":"efi",
|
2368 |
"speakers":2996392,
|
2369 |
+
"language_name":"Efik",
|
2370 |
+
"autonym":"Efik",
|
2371 |
"family":"Atlantic-Congo",
|
2372 |
"flores_path":null,
|
2373 |
"fleurs_tag":null,
|
2374 |
+
"commonvoice_hours":null,
|
2375 |
+
"commonvoice_locale":null,
|
2376 |
"in_benchmark":false
|
2377 |
},
|
2378 |
{
|
|
|
2544 |
"in_benchmark":false
|
2545 |
},
|
2546 |
{
|
2547 |
+
"bcp_47":"sck",
|
2548 |
"speakers":2386962,
|
2549 |
+
"language_name":"Sadri",
|
2550 |
+
"autonym":"Sadri",
|
2551 |
+
"family":"Indo-European",
|
2552 |
"flores_path":null,
|
2553 |
"fleurs_tag":null,
|
2554 |
"commonvoice_hours":null,
|
|
|
2556 |
"in_benchmark":false
|
2557 |
},
|
2558 |
{
|
2559 |
+
"bcp_47":"wbq",
|
2560 |
"speakers":2386962,
|
2561 |
+
"language_name":"Waddar",
|
2562 |
+
"autonym":"Waddar",
|
2563 |
+
"family":"Dravidian",
|
2564 |
"flores_path":null,
|
2565 |
"fleurs_tag":null,
|
2566 |
"commonvoice_hours":null,
|
|
|
2724 |
"in_benchmark":false
|
2725 |
},
|
2726 |
{
|
2727 |
+
"bcp_47":"wbr",
|
2728 |
"speakers":1989135,
|
2729 |
+
"language_name":"Wagdi",
|
2730 |
+
"autonym":"Wagdi",
|
2731 |
"family":"Indo-European",
|
2732 |
"flores_path":null,
|
2733 |
"fleurs_tag":null,
|
|
|
2748 |
"in_benchmark":false
|
2749 |
},
|
2750 |
{
|
2751 |
+
"bcp_47":"khn",
|
2752 |
"speakers":1989135,
|
2753 |
+
"language_name":"Khandesi",
|
2754 |
+
"autonym":"Khandesi",
|
2755 |
"family":"Indo-European",
|
2756 |
"flores_path":null,
|
2757 |
"fleurs_tag":null,
|
|
|
3283 |
"family":"Atlantic-Congo",
|
3284 |
"flores_path":null,
|
3285 |
"fleurs_tag":null,
|
3286 |
+
"commonvoice_hours":11.0,
|
3287 |
"commonvoice_locale":"bum",
|
3288 |
"in_benchmark":false
|
3289 |
},
|
|
|
3535 |
"family":null,
|
3536 |
"flores_path":"eus_Latn",
|
3537 |
"fleurs_tag":null,
|
3538 |
+
"commonvoice_hours":365.0,
|
3539 |
"commonvoice_locale":"eu",
|
3540 |
"in_benchmark":true
|
3541 |
},
|
|
|
3559 |
"family":"Abkhaz-Adyge",
|
3560 |
"flores_path":null,
|
3561 |
"fleurs_tag":null,
|
3562 |
+
"commonvoice_hours":58.0,
|
3563 |
"commonvoice_locale":"kbd",
|
3564 |
"in_benchmark":false
|
3565 |
},
|
|
|
3679 |
"family":"Indo-European",
|
3680 |
"flores_path":"ydd_Hebr",
|
3681 |
"fleurs_tag":null,
|
3682 |
+
"commonvoice_hours":0.6,
|
3683 |
"commonvoice_locale":"yi",
|
3684 |
"in_benchmark":true
|
3685 |
},
|
3686 |
{
|
3687 |
+
"bcp_47":"bez",
|
3688 |
"speakers":995398,
|
3689 |
+
"language_name":"Bena",
|
3690 |
+
"autonym":"Hibena",
|
3691 |
"family":"Atlantic-Congo",
|
3692 |
"flores_path":null,
|
3693 |
"fleurs_tag":null,
|
|
|
3696 |
"in_benchmark":false
|
3697 |
},
|
3698 |
{
|
3699 |
+
"bcp_47":"ksb",
|
3700 |
"speakers":995398,
|
3701 |
+
"language_name":"Shambala",
|
3702 |
+
"autonym":"Kishambaa",
|
3703 |
"family":"Atlantic-Congo",
|
3704 |
"flores_path":null,
|
3705 |
"fleurs_tag":null,
|
|
|
4512 |
"in_benchmark":false
|
4513 |
},
|
4514 |
{
|
4515 |
+
"bcp_47":"nhe",
|
4516 |
"speakers":501735,
|
4517 |
+
"language_name":"Eastern Huasteca Nahuatl",
|
4518 |
+
"autonym":"Eastern Huasteca Nahuatl",
|
4519 |
"family":"Uto-Aztecan",
|
4520 |
"flores_path":null,
|
4521 |
"fleurs_tag":null,
|
4522 |
+
"commonvoice_hours":0.0,
|
4523 |
+
"commonvoice_locale":"nhe",
|
4524 |
"in_benchmark":false
|
4525 |
},
|
4526 |
{
|
4527 |
+
"bcp_47":"nhw",
|
4528 |
"speakers":501735,
|
4529 |
+
"language_name":"Western Huasteca Nahuatl",
|
4530 |
+
"autonym":"Western Huasteca Nahuatl",
|
4531 |
"family":"Uto-Aztecan",
|
4532 |
"flores_path":null,
|
4533 |
"fleurs_tag":null,
|
4534 |
+
"commonvoice_hours":null,
|
4535 |
+
"commonvoice_locale":null,
|
4536 |
"in_benchmark":false
|
4537 |
},
|
4538 |
{
|
|
|
4651 |
"family":"Abkhaz-Adyge",
|
4652 |
"flores_path":null,
|
4653 |
"fleurs_tag":null,
|
4654 |
+
"commonvoice_hours":16.0,
|
4655 |
"commonvoice_locale":"ady",
|
4656 |
"in_benchmark":false
|
4657 |
},
|
|
|
4675 |
"family":"Kartvelian",
|
4676 |
"flores_path":null,
|
4677 |
"fleurs_tag":null,
|
4678 |
+
"commonvoice_hours":12.0,
|
4679 |
"commonvoice_locale":"xmf",
|
4680 |
"in_benchmark":false
|
4681 |
},
|
|
|
4685 |
"language_name":"Erzya",
|
4686 |
"autonym":"Эрзянь Кель",
|
4687 |
"family":"Uralic",
|
4688 |
+
"flores_path":null,
|
4689 |
"fleurs_tag":null,
|
4690 |
"commonvoice_hours":3.8,
|
4691 |
"commonvoice_locale":"myv",
|
4692 |
+
"in_benchmark":false
|
4693 |
},
|
4694 |
{
|
4695 |
"bcp_47":"dav",
|
|
|
4715 |
"commonvoice_locale":null,
|
4716 |
"in_benchmark":false
|
4717 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4718 |
{
|
4719 |
"bcp_47":"vun",
|
4720 |
"speakers":433291,
|
|
|
4739 |
"commonvoice_locale":"rof",
|
4740 |
"in_benchmark":false
|
4741 |
},
|
4742 |
+
{
|
4743 |
+
"bcp_47":"jmc",
|
4744 |
+
"speakers":433291,
|
4745 |
+
"language_name":"Machame",
|
4746 |
+
"autonym":"Kimachame",
|
4747 |
+
"family":"Atlantic-Congo",
|
4748 |
+
"flores_path":null,
|
4749 |
+
"fleurs_tag":null,
|
4750 |
+
"commonvoice_hours":null,
|
4751 |
+
"commonvoice_locale":null,
|
4752 |
+
"in_benchmark":false
|
4753 |
+
},
|
4754 |
{
|
4755 |
"bcp_47":"kjg",
|
4756 |
"speakers":431949,
|
|
|
5009 |
"language_name":"Dargwa",
|
5010 |
"autonym":"Dargwa",
|
5011 |
"family":"Nakh-Daghestanian",
|
5012 |
+
"flores_path":null,
|
5013 |
"fleurs_tag":null,
|
5014 |
"commonvoice_hours":0.0,
|
5015 |
"commonvoice_locale":"dar",
|
5016 |
+
"in_benchmark":false
|
5017 |
},
|
5018 |
{
|
5019 |
"bcp_47":"lif",
|
|
|
5124 |
"in_benchmark":false
|
5125 |
},
|
5126 |
{
|
5127 |
+
"bcp_47":"bax",
|
5128 |
"speakers":332940,
|
5129 |
+
"language_name":"Bamun",
|
5130 |
+
"autonym":"Bamun",
|
5131 |
"family":"Atlantic-Congo",
|
5132 |
"flores_path":null,
|
5133 |
"fleurs_tag":null,
|
5134 |
+
"commonvoice_hours":11.0,
|
5135 |
+
"commonvoice_locale":"bax",
|
5136 |
"in_benchmark":false
|
5137 |
},
|
5138 |
{
|
5139 |
+
"bcp_47":"bas",
|
5140 |
"speakers":332940,
|
5141 |
+
"language_name":"Basaa",
|
5142 |
+
"autonym":"Ɓàsàa",
|
5143 |
"family":"Atlantic-Congo",
|
5144 |
"flores_path":null,
|
5145 |
"fleurs_tag":null,
|
5146 |
+
"commonvoice_hours":12.0,
|
5147 |
+
"commonvoice_locale":"bas",
|
5148 |
"in_benchmark":false
|
5149 |
},
|
5150 |
{
|
|
|
5232 |
"in_benchmark":false
|
5233 |
},
|
5234 |
{
|
5235 |
+
"bcp_47":"njo",
|
5236 |
"speakers":305001,
|
5237 |
+
"language_name":"Ao Naga",
|
5238 |
+
"autonym":"Ao Naga",
|
5239 |
+
"family":"Sino-Tibetan",
|
5240 |
"flores_path":null,
|
5241 |
"fleurs_tag":null,
|
5242 |
"commonvoice_hours":null,
|
|
|
5244 |
"in_benchmark":false
|
5245 |
},
|
5246 |
{
|
5247 |
+
"bcp_47":"bfq",
|
5248 |
"speakers":305001,
|
5249 |
+
"language_name":"Badaga",
|
5250 |
+
"autonym":"Badaga",
|
5251 |
+
"family":"Dravidian",
|
5252 |
"flores_path":null,
|
5253 |
"fleurs_tag":null,
|
5254 |
"commonvoice_hours":null,
|
|
|
5388 |
"in_benchmark":false
|
5389 |
},
|
5390 |
{
|
5391 |
+
"bcp_47":"khb",
|
5392 |
"speakers":264864,
|
5393 |
+
"language_name":"Lü",
|
5394 |
+
"autonym":"Lü",
|
5395 |
"family":"Tai-Kadai",
|
5396 |
"flores_path":null,
|
5397 |
"fleurs_tag":null,
|
|
|
5400 |
"in_benchmark":false
|
5401 |
},
|
5402 |
{
|
5403 |
+
"bcp_47":"tdd",
|
5404 |
"speakers":264864,
|
5405 |
+
"language_name":"Tai Nüa",
|
5406 |
+
"autonym":"Tai Nüa",
|
5407 |
"family":"Tai-Kadai",
|
5408 |
"flores_path":null,
|
5409 |
"fleurs_tag":null,
|
|
|
5508 |
"in_benchmark":true
|
5509 |
},
|
5510 |
{
|
5511 |
+
"bcp_47":"mdr",
|
5512 |
"speakers":245664,
|
5513 |
+
"language_name":"Mandar",
|
5514 |
+
"autonym":"Mandar",
|
5515 |
"family":"Austronesian",
|
5516 |
"flores_path":null,
|
5517 |
"fleurs_tag":null,
|
|
|
5520 |
"in_benchmark":false
|
5521 |
},
|
5522 |
{
|
5523 |
+
"bcp_47":"sxn",
|
5524 |
"speakers":245664,
|
5525 |
+
"language_name":"Sangir",
|
5526 |
+
"autonym":"Sangir",
|
5527 |
"family":"Austronesian",
|
5528 |
"flores_path":null,
|
5529 |
"fleurs_tag":null,
|
|
|
5753 |
"language_name":"Tuvinian",
|
5754 |
"autonym":"Tuvinian",
|
5755 |
"family":"Turkic",
|
5756 |
+
"flores_path":null,
|
5757 |
"fleurs_tag":null,
|
5758 |
"commonvoice_hours":0.0,
|
5759 |
"commonvoice_locale":"tyv",
|
5760 |
+
"in_benchmark":false
|
5761 |
},
|
5762 |
{
|
5763 |
"bcp_47":"dtp",
|
|
|
5904 |
"in_benchmark":false
|
5905 |
},
|
5906 |
{
|
5907 |
+
"bcp_47":"kkj",
|
5908 |
"speakers":149823,
|
5909 |
+
"language_name":"Kako",
|
5910 |
+
"autonym":"Kakɔ",
|
5911 |
"family":"Atlantic-Congo",
|
5912 |
"flores_path":null,
|
5913 |
"fleurs_tag":null,
|
|
|
5916 |
"in_benchmark":false
|
5917 |
},
|
5918 |
{
|
5919 |
+
"bcp_47":"bss",
|
5920 |
"speakers":149823,
|
5921 |
+
"language_name":"Akoose",
|
5922 |
+
"autonym":"Akoose",
|
5923 |
"family":"Atlantic-Congo",
|
5924 |
"flores_path":null,
|
5925 |
"fleurs_tag":null,
|
|
|
6223 |
"family":"Abkhaz-Adyge",
|
6224 |
"flores_path":null,
|
6225 |
"fleurs_tag":null,
|
6226 |
+
"commonvoice_hours":67.0,
|
6227 |
"commonvoice_locale":"ab",
|
6228 |
"in_benchmark":false
|
6229 |
},
|
|
|
6821 |
"language_name":"Aragonese",
|
6822 |
"autonym":"Aragonés",
|
6823 |
"family":"Indo-European",
|
6824 |
+
"flores_path":null,
|
6825 |
"fleurs_tag":null,
|
6826 |
"commonvoice_hours":17.0,
|
6827 |
"commonvoice_locale":"an",
|
6828 |
+
"in_benchmark":false
|
6829 |
},
|
6830 |
{
|
6831 |
"bcp_47":"chr",
|
|
|
7272 |
"in_benchmark":false
|
7273 |
},
|
7274 |
{
|
7275 |
+
"bcp_47":"twq",
|
7276 |
"speakers":7970,
|
7277 |
+
"language_name":"Tasawaq",
|
7278 |
+
"autonym":"Tasawaq Senni",
|
7279 |
+
"family":"Songhay",
|
7280 |
"flores_path":null,
|
7281 |
"fleurs_tag":null,
|
7282 |
"commonvoice_hours":null,
|
|
|
7284 |
"in_benchmark":false
|
7285 |
},
|
7286 |
{
|
7287 |
+
"bcp_47":"bku",
|
7288 |
"speakers":7970,
|
7289 |
+
"language_name":"Buhid",
|
7290 |
+
"autonym":"Buhid",
|
7291 |
+
"family":"Austronesian",
|
7292 |
"flores_path":null,
|
7293 |
"fleurs_tag":null,
|
7294 |
"commonvoice_hours":null,
|
|
|
7567 |
"family":"Atlantic-Congo",
|
7568 |
"flores_path":null,
|
7569 |
"fleurs_tag":null,
|
7570 |
+
"commonvoice_hours":2.4,
|
7571 |
"commonvoice_locale":"yav",
|
7572 |
"in_benchmark":false
|
7573 |
},
|
|
|
7836 |
"in_benchmark":false
|
7837 |
},
|
7838 |
{
|
7839 |
+
"bcp_47":"kwk",
|
7840 |
"speakers":377,
|
7841 |
+
"language_name":"Kwakʼwala",
|
7842 |
+
"autonym":"KwakʼWala",
|
7843 |
+
"family":"Wakashan",
|
7844 |
"flores_path":null,
|
7845 |
"fleurs_tag":null,
|
7846 |
"commonvoice_hours":null,
|
|
|
7848 |
"in_benchmark":false
|
7849 |
},
|
7850 |
{
|
7851 |
+
"bcp_47":"crl",
|
7852 |
"speakers":377,
|
7853 |
+
"language_name":"Northern East Cree",
|
7854 |
+
"autonym":"Northern East Cree",
|
7855 |
+
"family":"Algic",
|
7856 |
"flores_path":null,
|
7857 |
"fleurs_tag":null,
|
7858 |
"commonvoice_hours":null,
|
|
|
7968 |
"in_benchmark":false
|
7969 |
},
|
7970 |
{
|
7971 |
+
"bcp_47":"lzh",
|
7972 |
"speakers":0,
|
7973 |
+
"language_name":"Literary Chinese",
|
7974 |
+
"autonym":"Literary Chinese",
|
7975 |
+
"family":"Sino-Tibetan",
|
7976 |
"flores_path":null,
|
7977 |
"fleurs_tag":null,
|
7978 |
"commonvoice_hours":null,
|
|
|
7980 |
"in_benchmark":false
|
7981 |
},
|
7982 |
{
|
7983 |
+
"bcp_47":"io",
|
7984 |
"speakers":0,
|
7985 |
+
"language_name":"Ido",
|
7986 |
+
"autonym":"Ido",
|
7987 |
+
"family":"Artificial Language",
|
7988 |
"flores_path":null,
|
7989 |
"fleurs_tag":null,
|
7990 |
"commonvoice_hours":null,
|
|
|
7992 |
"in_benchmark":false
|
7993 |
},
|
7994 |
{
|
7995 |
+
"bcp_47":"jbo",
|
7996 |
"speakers":0,
|
7997 |
+
"language_name":"Lojban",
|
7998 |
+
"autonym":"La .Lojban.",
|
7999 |
+
"family":"Artificial Language",
|
8000 |
"flores_path":null,
|
8001 |
"fleurs_tag":null,
|
8002 |
+
"commonvoice_hours":0.0,
|
8003 |
+
"commonvoice_locale":"jbo",
|
8004 |
"in_benchmark":false
|
8005 |
},
|
8006 |
{
|
8007 |
+
"bcp_47":"jut",
|
8008 |
"speakers":0,
|
8009 |
+
"language_name":"Jutish",
|
8010 |
+
"autonym":"Jutish",
|
8011 |
"family":"Indo-European",
|
8012 |
"flores_path":null,
|
8013 |
"fleurs_tag":null,
|
|
|
8016 |
"in_benchmark":false
|
8017 |
},
|
8018 |
{
|
8019 |
+
"bcp_47":"vot",
|
8020 |
"speakers":0,
|
8021 |
+
"language_name":"Votic",
|
8022 |
+
"autonym":"Votic",
|
8023 |
+
"family":"Uralic",
|
8024 |
"flores_path":null,
|
8025 |
"fleurs_tag":null,
|
8026 |
+
"commonvoice_hours":0.1,
|
8027 |
+
"commonvoice_locale":"vot",
|
8028 |
"in_benchmark":false
|
8029 |
},
|
8030 |
{
|
8031 |
+
"bcp_47":"gez",
|
8032 |
"speakers":0,
|
8033 |
+
"language_name":"Geez",
|
8034 |
+
"autonym":"Geez",
|
8035 |
+
"family":"Afro-Asiatic",
|
8036 |
"flores_path":null,
|
8037 |
"fleurs_tag":null,
|
8038 |
"commonvoice_hours":null,
|
|
|
8040 |
"in_benchmark":false
|
8041 |
},
|
8042 |
{
|
8043 |
+
"bcp_47":"osa",
|
8044 |
"speakers":0,
|
8045 |
+
"language_name":"Osage",
|
8046 |
+
"autonym":"𐓏𐓘𐓻𐓘𐓻𐓟",
|
8047 |
+
"family":"Siouan",
|
8048 |
"flores_path":null,
|
8049 |
"fleurs_tag":null,
|
8050 |
+
"commonvoice_hours":null,
|
8051 |
+
"commonvoice_locale":null,
|
8052 |
"in_benchmark":false
|
8053 |
},
|
8054 |
{
|
8055 |
+
"bcp_47":"rgn",
|
8056 |
"speakers":0,
|
8057 |
+
"language_name":"Romagnol",
|
8058 |
+
"autonym":"Romagnol",
|
8059 |
+
"family":"Indo-European",
|
8060 |
"flores_path":null,
|
8061 |
"fleurs_tag":null,
|
8062 |
"commonvoice_hours":null,
|
|
|
8064 |
"in_benchmark":false
|
8065 |
},
|
8066 |
{
|
8067 |
+
"bcp_47":"cu",
|
8068 |
"speakers":0,
|
8069 |
+
"language_name":"Church Slavic",
|
8070 |
+
"autonym":"Church Slavic",
|
8071 |
"family":"Indo-European",
|
8072 |
"flores_path":null,
|
8073 |
"fleurs_tag":null,
|
|
|
8076 |
"in_benchmark":false
|
8077 |
},
|
8078 |
{
|
8079 |
+
"bcp_47":"sgs",
|
8080 |
"speakers":0,
|
8081 |
+
"language_name":"Samogitian",
|
8082 |
+
"autonym":"Samogitian",
|
8083 |
+
"family":"Indo-European",
|
8084 |
"flores_path":null,
|
8085 |
"fleurs_tag":null,
|
8086 |
"commonvoice_hours":null,
|
|
|
8088 |
"in_benchmark":false
|
8089 |
},
|
8090 |
{
|
8091 |
+
"bcp_47":"ann",
|
8092 |
"speakers":0,
|
8093 |
+
"language_name":"Obolo",
|
8094 |
+
"autonym":"Obolo",
|
8095 |
+
"family":"Atlantic-Congo",
|
8096 |
"flores_path":null,
|
8097 |
"fleurs_tag":null,
|
8098 |
"commonvoice_hours":null,
|
|
|
8112 |
"in_benchmark":false
|
8113 |
},
|
8114 |
{
|
8115 |
+
"bcp_47":"cad",
|
8116 |
"speakers":0,
|
8117 |
+
"language_name":"Caddo",
|
8118 |
+
"autonym":"Caddo",
|
8119 |
+
"family":"Caddoan",
|
8120 |
"flores_path":null,
|
8121 |
"fleurs_tag":null,
|
8122 |
+
"commonvoice_hours":null,
|
8123 |
+
"commonvoice_locale":null,
|
8124 |
"in_benchmark":false
|
8125 |
},
|
8126 |
{
|
8127 |
+
"bcp_47":"pfl",
|
8128 |
"speakers":0,
|
8129 |
+
"language_name":"Palatine German",
|
8130 |
+
"autonym":"Palatine German",
|
8131 |
+
"family":"Indo-European",
|
8132 |
"flores_path":null,
|
8133 |
"fleurs_tag":null,
|
8134 |
"commonvoice_hours":null,
|
models.json
CHANGED
@@ -1,157 +1,102 @@
|
|
1 |
[
|
2 |
{
|
3 |
-
"id":"
|
4 |
-
"name":"
|
5 |
-
"provider_name":"
|
6 |
-
"cost":0.
|
7 |
-
"hf_id":
|
8 |
-
"size":
|
9 |
-
"type":"
|
10 |
-
"license":
|
11 |
-
"creation_date":
|
12 |
},
|
13 |
{
|
14 |
-
"id":"
|
15 |
-
"name":"
|
16 |
-
"provider_name":"
|
17 |
"cost":0.0,
|
18 |
-
"hf_id":"
|
19 |
-
"size":
|
20 |
"type":"Open",
|
21 |
-
"license":"
|
22 |
-
"creation_date":
|
23 |
},
|
24 |
{
|
25 |
-
"id":"
|
26 |
-
"name":"
|
27 |
-
"provider_name":"
|
28 |
-
"cost":0.
|
29 |
-
"hf_id":"
|
30 |
-
"size":
|
31 |
"type":"Open",
|
32 |
-
"license":"
|
33 |
-
"creation_date":
|
34 |
},
|
35 |
{
|
36 |
-
"id":"
|
37 |
-
"name":"
|
38 |
-
"provider_name":"
|
39 |
-
"cost":0.
|
40 |
-
"hf_id":"
|
41 |
-
"size":
|
42 |
"type":"Open",
|
43 |
-
"license":"
|
44 |
-
"creation_date":
|
45 |
-
},
|
46 |
-
{
|
47 |
-
"id":"openai\/gpt-4.1-mini",
|
48 |
-
"name":"GPT-4.1 Mini",
|
49 |
-
"provider_name":"OpenAI",
|
50 |
-
"cost":1.6,
|
51 |
-
"hf_id":null,
|
52 |
-
"size":null,
|
53 |
-
"type":"Commercial",
|
54 |
-
"license":null,
|
55 |
-
"creation_date":1744588800000
|
56 |
},
|
57 |
{
|
58 |
-
"id":"
|
59 |
-
"name":"
|
60 |
-
"provider_name":"
|
61 |
"cost":0.4,
|
62 |
"hf_id":null,
|
63 |
"size":null,
|
64 |
"type":"Commercial",
|
65 |
"license":null,
|
66 |
-
"creation_date":
|
67 |
-
},
|
68 |
-
{
|
69 |
-
"id":"openai\/gpt-4o-mini",
|
70 |
-
"name":"GPT-4o-mini",
|
71 |
-
"provider_name":"OpenAI",
|
72 |
-
"cost":0.6,
|
73 |
-
"hf_id":null,
|
74 |
-
"size":null,
|
75 |
-
"type":"Commercial",
|
76 |
-
"license":null,
|
77 |
-
"creation_date":1721260800000
|
78 |
-
},
|
79 |
-
{
|
80 |
-
"id":"openai\/gpt-3.5-turbo-0613",
|
81 |
-
"name":"GPT-3.5 Turbo (older v0613)",
|
82 |
-
"provider_name":"OpenAI",
|
83 |
-
"cost":2.0,
|
84 |
-
"hf_id":null,
|
85 |
-
"size":null,
|
86 |
-
"type":"Commercial",
|
87 |
-
"license":null,
|
88 |
-
"creation_date":1706140800000
|
89 |
},
|
90 |
{
|
91 |
-
"id":"
|
92 |
-
"name":"
|
93 |
-
"provider_name":"
|
94 |
-
"cost":
|
95 |
"hf_id":null,
|
96 |
"size":null,
|
97 |
"type":"Commercial",
|
98 |
"license":null,
|
99 |
-
"creation_date":
|
100 |
-
},
|
101 |
-
{
|
102 |
-
"id":"mistralai\/mistral-small-3.1-24b-instruct",
|
103 |
-
"name":"Mistral Small 3.1 24B (free)",
|
104 |
-
"provider_name":"Mistral",
|
105 |
-
"cost":0.0,
|
106 |
-
"hf_id":"mistralai\/Mistral-Small-3.1-24B-Instruct-2503",
|
107 |
-
"size":24011361280.0,
|
108 |
-
"type":"Open",
|
109 |
-
"license":"Apache 2.0",
|
110 |
-
"creation_date":1741651200000
|
111 |
},
|
112 |
{
|
113 |
-
"id":"
|
114 |
-
"name":"
|
115 |
-
"provider_name":"
|
116 |
"cost":0.6,
|
117 |
"hf_id":null,
|
118 |
"size":null,
|
119 |
"type":"Commercial",
|
120 |
"license":null,
|
121 |
-
"creation_date":
|
122 |
-
},
|
123 |
-
{
|
124 |
-
"id":"mistralai\/mistral-nemo",
|
125 |
-
"name":"Mistral Nemo (free)",
|
126 |
-
"provider_name":"Mistral",
|
127 |
-
"cost":0.0,
|
128 |
-
"hf_id":"mistralai\/Mistral-Nemo-Instruct-2407",
|
129 |
-
"size":12247782400.0,
|
130 |
-
"type":"Open",
|
131 |
-
"license":"Apache 2.0",
|
132 |
-
"creation_date":1721174400000
|
133 |
},
|
134 |
{
|
135 |
-
"id":"google\/gemini-
|
136 |
-
"name":"Gemini
|
137 |
"provider_name":"Google",
|
138 |
-
"cost":0.
|
139 |
"hf_id":null,
|
140 |
"size":null,
|
141 |
"type":"Commercial",
|
142 |
"license":null,
|
143 |
-
"creation_date":
|
144 |
},
|
145 |
{
|
146 |
-
"id":"google\/gemini-
|
147 |
-
"name":"Gemini
|
148 |
"provider_name":"Google",
|
149 |
-
"cost":0.
|
150 |
"hf_id":null,
|
151 |
"size":null,
|
152 |
"type":"Commercial",
|
153 |
"license":null,
|
154 |
-
"creation_date":
|
155 |
},
|
156 |
{
|
157 |
"id":"google\/gemma-3-27b-it",
|
@@ -165,26 +110,48 @@
|
|
165 |
"creation_date":1740787200000
|
166 |
},
|
167 |
{
|
168 |
-
"id":"
|
169 |
-
"name":"
|
170 |
-
"provider_name":"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
171 |
"cost":0.0,
|
172 |
-
"hf_id":"
|
173 |
-
"size":
|
174 |
"type":"Open",
|
175 |
-
"license":"
|
176 |
-
"creation_date":
|
177 |
},
|
178 |
{
|
179 |
-
"id":"
|
180 |
-
"name":"
|
181 |
-
"provider_name":"
|
182 |
"cost":0.0,
|
183 |
-
"hf_id":"
|
184 |
-
"size":
|
185 |
"type":"Open",
|
186 |
-
"license":"",
|
187 |
-
"creation_date":
|
188 |
},
|
189 |
{
|
190 |
"id":"microsoft\/phi-4",
|
@@ -209,14 +176,91 @@
|
|
209 |
"creation_date":1740355200000
|
210 |
},
|
211 |
{
|
212 |
-
"id":"
|
213 |
-
"name":"
|
214 |
-
"provider_name":"
|
215 |
-
"cost":0.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
216 |
"hf_id":null,
|
217 |
"size":null,
|
218 |
"type":"Commercial",
|
219 |
"license":null,
|
220 |
-
"creation_date":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
221 |
}
|
222 |
]
|
|
|
1 |
[
|
2 |
{
|
3 |
+
"id":"amazon\/nova-micro-v1",
|
4 |
+
"name":"Nova Micro 1.0",
|
5 |
+
"provider_name":"Amazon",
|
6 |
+
"cost":0.14,
|
7 |
+
"hf_id":null,
|
8 |
+
"size":null,
|
9 |
+
"type":"Commercial",
|
10 |
+
"license":null,
|
11 |
+
"creation_date":1733356800000
|
12 |
},
|
13 |
{
|
14 |
+
"id":"deepseek\/deepseek-chat",
|
15 |
+
"name":"DeepSeek V3 (free)",
|
16 |
+
"provider_name":"DeepSeek",
|
17 |
"cost":0.0,
|
18 |
+
"hf_id":"deepseek-ai\/DeepSeek-V3",
|
19 |
+
"size":684531386000.0,
|
20 |
"type":"Open",
|
21 |
+
"license":"",
|
22 |
+
"creation_date":1735084800000
|
23 |
},
|
24 |
{
|
25 |
+
"id":"deepseek\/deepseek-chat-v3-0324",
|
26 |
+
"name":"DeepSeek V3 0324 (free)",
|
27 |
+
"provider_name":"DeepSeek",
|
28 |
+
"cost":0.0,
|
29 |
+
"hf_id":"deepseek-ai\/DeepSeek-V3-0324",
|
30 |
+
"size":684531386000.0,
|
31 |
"type":"Open",
|
32 |
+
"license":"Mit",
|
33 |
+
"creation_date":1742774400000
|
34 |
},
|
35 |
{
|
36 |
+
"id":"deepseek\/deepseek-r1",
|
37 |
+
"name":"R1 (free)",
|
38 |
+
"provider_name":"DeepSeek",
|
39 |
+
"cost":0.0,
|
40 |
+
"hf_id":"deepseek-ai\/DeepSeek-R1",
|
41 |
+
"size":684531386000.0,
|
42 |
"type":"Open",
|
43 |
+
"license":"Mit",
|
44 |
+
"creation_date":1737331200000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
},
|
46 |
{
|
47 |
+
"id":"google\/gemini-2.0-flash-001",
|
48 |
+
"name":"Gemini 2.0 Flash",
|
49 |
+
"provider_name":"Google",
|
50 |
"cost":0.4,
|
51 |
"hf_id":null,
|
52 |
"size":null,
|
53 |
"type":"Commercial",
|
54 |
"license":null,
|
55 |
+
"creation_date":1738713600000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
56 |
},
|
57 |
{
|
58 |
+
"id":"google\/gemini-2.0-flash-lite-001",
|
59 |
+
"name":"Gemini 2.0 Flash Lite",
|
60 |
+
"provider_name":"Google",
|
61 |
+
"cost":0.3,
|
62 |
"hf_id":null,
|
63 |
"size":null,
|
64 |
"type":"Commercial",
|
65 |
"license":null,
|
66 |
+
"creation_date":1740441600000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
},
|
68 |
{
|
69 |
+
"id":"google\/gemini-2.5-flash-preview",
|
70 |
+
"name":"Gemini 2.5 Flash Preview",
|
71 |
+
"provider_name":"Google",
|
72 |
"cost":0.6,
|
73 |
"hf_id":null,
|
74 |
"size":null,
|
75 |
"type":"Commercial",
|
76 |
"license":null,
|
77 |
+
"creation_date":1744848000000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
},
|
79 |
{
|
80 |
+
"id":"google\/gemini-flash-1.5",
|
81 |
+
"name":"Gemini 1.5 Flash ",
|
82 |
"provider_name":"Google",
|
83 |
+
"cost":0.3,
|
84 |
"hf_id":null,
|
85 |
"size":null,
|
86 |
"type":"Commercial",
|
87 |
"license":null,
|
88 |
+
"creation_date":1715644800000
|
89 |
},
|
90 |
{
|
91 |
+
"id":"google\/gemini-flash-1.5-8b",
|
92 |
+
"name":"Gemini 1.5 Flash 8B",
|
93 |
"provider_name":"Google",
|
94 |
+
"cost":0.15,
|
95 |
"hf_id":null,
|
96 |
"size":null,
|
97 |
"type":"Commercial",
|
98 |
"license":null,
|
99 |
+
"creation_date":1727913600000
|
100 |
},
|
101 |
{
|
102 |
"id":"google\/gemma-3-27b-it",
|
|
|
110 |
"creation_date":1740787200000
|
111 |
},
|
112 |
{
|
113 |
+
"id":"meta-llama\/llama-3-70b-instruct",
|
114 |
+
"name":"Llama 3 70B Instruct",
|
115 |
+
"provider_name":"Meta",
|
116 |
+
"cost":0.4,
|
117 |
+
"hf_id":"meta-llama\/Meta-Llama-3-70B-Instruct",
|
118 |
+
"size":70553706496.0,
|
119 |
+
"type":"Open",
|
120 |
+
"license":"Llama3",
|
121 |
+
"creation_date":1713312000000
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"id":"meta-llama\/llama-3.1-70b-instruct",
|
125 |
+
"name":"Llama 3.1 70B Instruct",
|
126 |
+
"provider_name":"Meta",
|
127 |
+
"cost":0.28,
|
128 |
+
"hf_id":"meta-llama\/Llama-3.1-70B-Instruct",
|
129 |
+
"size":70553706496.0,
|
130 |
+
"type":"Open",
|
131 |
+
"license":"Llama3.1",
|
132 |
+
"creation_date":1721088000000
|
133 |
+
},
|
134 |
+
{
|
135 |
+
"id":"meta-llama\/llama-3.3-70b-instruct",
|
136 |
+
"name":"Llama 3.3 70B Instruct (free)",
|
137 |
+
"provider_name":"Meta",
|
138 |
"cost":0.0,
|
139 |
+
"hf_id":"meta-llama\/Llama-3.3-70B-Instruct",
|
140 |
+
"size":70553706496.0,
|
141 |
"type":"Open",
|
142 |
+
"license":"Llama3.3",
|
143 |
+
"creation_date":1732579200000
|
144 |
},
|
145 |
{
|
146 |
+
"id":"meta-llama\/llama-4-maverick",
|
147 |
+
"name":"Llama 4 Maverick (free)",
|
148 |
+
"provider_name":"Meta",
|
149 |
"cost":0.0,
|
150 |
+
"hf_id":"meta-llama\/Llama-4-Maverick-17B-128E-Instruct",
|
151 |
+
"size":401583781376.0,
|
152 |
"type":"Open",
|
153 |
+
"license":"Other",
|
154 |
+
"creation_date":1743465600000
|
155 |
},
|
156 |
{
|
157 |
"id":"microsoft\/phi-4",
|
|
|
176 |
"creation_date":1740355200000
|
177 |
},
|
178 |
{
|
179 |
+
"id":"mistralai\/mistral-nemo",
|
180 |
+
"name":"Mistral Nemo (free)",
|
181 |
+
"provider_name":"Mistral",
|
182 |
+
"cost":0.0,
|
183 |
+
"hf_id":"mistralai\/Mistral-Nemo-Instruct-2407",
|
184 |
+
"size":12247782400.0,
|
185 |
+
"type":"Open",
|
186 |
+
"license":"Apache 2.0",
|
187 |
+
"creation_date":1721174400000
|
188 |
+
},
|
189 |
+
{
|
190 |
+
"id":"mistralai\/mistral-saba",
|
191 |
+
"name":"Saba",
|
192 |
+
"provider_name":"Mistral",
|
193 |
+
"cost":0.6,
|
194 |
"hf_id":null,
|
195 |
"size":null,
|
196 |
"type":"Commercial",
|
197 |
"license":null,
|
198 |
+
"creation_date":1739750400000
|
199 |
+
},
|
200 |
+
{
|
201 |
+
"id":"mistralai\/mistral-small-3.1-24b-instruct",
|
202 |
+
"name":"Mistral Small 3.1 24B (free)",
|
203 |
+
"provider_name":"Mistral",
|
204 |
+
"cost":0.0,
|
205 |
+
"hf_id":"mistralai\/Mistral-Small-3.1-24B-Instruct-2503",
|
206 |
+
"size":24011361280.0,
|
207 |
+
"type":"Open",
|
208 |
+
"license":"Apache 2.0",
|
209 |
+
"creation_date":1741651200000
|
210 |
+
},
|
211 |
+
{
|
212 |
+
"id":"openai\/gpt-3.5-turbo",
|
213 |
+
"name":"GPT-3.5 Turbo",
|
214 |
+
"provider_name":"OpenAI",
|
215 |
+
"cost":1.5,
|
216 |
+
"hf_id":null,
|
217 |
+
"size":null,
|
218 |
+
"type":"Commercial",
|
219 |
+
"license":null,
|
220 |
+
"creation_date":1685232000000
|
221 |
+
},
|
222 |
+
{
|
223 |
+
"id":"openai\/gpt-3.5-turbo-0613",
|
224 |
+
"name":"GPT-3.5 Turbo (older v0613)",
|
225 |
+
"provider_name":"OpenAI",
|
226 |
+
"cost":2.0,
|
227 |
+
"hf_id":null,
|
228 |
+
"size":null,
|
229 |
+
"type":"Commercial",
|
230 |
+
"license":null,
|
231 |
+
"creation_date":1706140800000
|
232 |
+
},
|
233 |
+
{
|
234 |
+
"id":"openai\/gpt-4.1-mini",
|
235 |
+
"name":"GPT-4.1 Mini",
|
236 |
+
"provider_name":"OpenAI",
|
237 |
+
"cost":1.6,
|
238 |
+
"hf_id":null,
|
239 |
+
"size":null,
|
240 |
+
"type":"Commercial",
|
241 |
+
"license":null,
|
242 |
+
"creation_date":1744588800000
|
243 |
+
},
|
244 |
+
{
|
245 |
+
"id":"openai\/gpt-4.1-nano",
|
246 |
+
"name":"GPT-4.1 Nano",
|
247 |
+
"provider_name":"OpenAI",
|
248 |
+
"cost":0.4,
|
249 |
+
"hf_id":null,
|
250 |
+
"size":null,
|
251 |
+
"type":"Commercial",
|
252 |
+
"license":null,
|
253 |
+
"creation_date":1744588800000
|
254 |
+
},
|
255 |
+
{
|
256 |
+
"id":"openai\/gpt-4o-mini",
|
257 |
+
"name":"GPT-4o-mini",
|
258 |
+
"provider_name":"OpenAI",
|
259 |
+
"cost":0.6,
|
260 |
+
"hf_id":null,
|
261 |
+
"size":null,
|
262 |
+
"type":"Commercial",
|
263 |
+
"license":null,
|
264 |
+
"creation_date":1721260800000
|
265 |
}
|
266 |
]
|
results.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|