David Pomerenke
commited on
Commit
·
6b6f157
1
Parent(s):
86b8b3a
Display all languages and translate from multiple languages
Browse files- bibliography.bib +6 -0
- index.html +15 -18
- languagebench.py +131 -75
- pyproject.toml +5 -0
- results.json +0 -0
- results_summary.json +0 -1202
- uv.lock +119 -0
bibliography.bib
CHANGED
@@ -243,6 +243,12 @@
|
|
243 |
file = {/Users/david/Zotero/storage/VU6IFENR/Siminyu et al. - 2021 - AI4D -- African Language Program.pdf;/Users/david/Zotero/storage/7TV2PS8J/2104.html}
|
244 |
}
|
245 |
|
|
|
|
|
|
|
|
|
|
|
|
|
246 |
@misc{teamNoLanguageLeft2022,
|
247 |
title = {No {{Language Left Behind}}: {{Scaling Human-Centered Machine Translation}}},
|
248 |
shorttitle = {No {{Language Left Behind}}},
|
|
|
243 |
file = {/Users/david/Zotero/storage/VU6IFENR/Siminyu et al. - 2021 - AI4D -- African Language Program.pdf;/Users/david/Zotero/storage/7TV2PS8J/2104.html}
|
244 |
}
|
245 |
|
246 |
+
@misc{Tatoeba,
|
247 |
+
title = {Tatoeba},
|
248 |
+
urldate = {2024-11-03},
|
249 |
+
file = {/Users/david/Zotero/storage/4NDTCGWG/sentences_by_language.html}
|
250 |
+
}
|
251 |
+
|
252 |
@misc{teamNoLanguageLeft2022,
|
253 |
title = {No {{Language Left Behind}}: {{Scaling Human-Centered Machine Translation}}},
|
254 |
shorttitle = {No {{Language Left Behind}}},
|
index.html
CHANGED
@@ -39,42 +39,43 @@
|
|
39 |
const scoreName = "BLEU Score"
|
40 |
const chartsDiv = document.getElementById('charts');
|
41 |
|
42 |
-
const
|
43 |
-
const
|
44 |
// Format captions
|
45 |
-
const
|
|
|
46 |
|
47 |
// Create summary plot
|
48 |
const summaryPlot = Plot.plot({
|
49 |
-
width:
|
50 |
height: 400,
|
51 |
marginBottom: 100,
|
52 |
x: { label: "Number of speakers", axis: null },
|
53 |
y: { label: `${scoreName} (average across models)` },
|
54 |
// color: { scheme: "BrBG" },
|
55 |
marks: [
|
56 |
-
Plot.rectY(
|
57 |
x: "speakers",
|
58 |
order: scoreKey,
|
59 |
reverse: true,
|
60 |
y2: scoreKey, // y2 to avoid stacking by y
|
61 |
title: formatTitle,
|
62 |
tip: true,
|
63 |
-
|
64 |
})),
|
65 |
-
Plot.rectY(
|
66 |
x: "speakers",
|
67 |
order: scoreKey,
|
68 |
reverse: true,
|
69 |
y2: scoreKey, // y2 to avoid stacking by y
|
70 |
fill: "grey",
|
71 |
}))),
|
72 |
-
Plot.text(
|
73 |
x: "speakers",
|
74 |
y2: scoreKey,
|
75 |
order: scoreKey,
|
76 |
reverse: true,
|
77 |
-
text: "
|
78 |
frameAnchor: "bottom",
|
79 |
textAnchor: "end",
|
80 |
dy: 10,
|
@@ -87,14 +88,11 @@
|
|
87 |
// Add summary plot at the top
|
88 |
chartsDiv.insertBefore(summaryPlot, chartsDiv.firstChild);
|
89 |
|
90 |
-
const response = await fetch('results.json');
|
91 |
-
const results = await response.json();
|
92 |
-
|
93 |
// Get unique languages with their speaker counts
|
94 |
const languageMap = new Map();
|
95 |
-
|
96 |
-
if (!languageMap.has(r.
|
97 |
-
languageMap.set(r.
|
98 |
}
|
99 |
});
|
100 |
|
@@ -122,7 +120,7 @@
|
|
122 |
headerDiv.appendChild(speakerP);
|
123 |
chartsDiv.appendChild(headerDiv);
|
124 |
|
125 |
-
const languageData =
|
126 |
|
127 |
const descriptor = code => {
|
128 |
let [org, model] = code.split("/")
|
@@ -130,8 +128,7 @@
|
|
130 |
}
|
131 |
|
132 |
// Plot for how well the models perform on this language
|
133 |
-
if (languageData.length >
|
134 |
-
console.log(languageData);
|
135 |
const plot = Plot.plot({
|
136 |
width: 400,
|
137 |
height: 200,
|
|
|
39 |
const scoreName = "BLEU Score"
|
40 |
const chartsDiv = document.getElementById('charts');
|
41 |
|
42 |
+
const response = await fetch('results.json');
|
43 |
+
const data = await response.json();
|
44 |
// Format captions
|
45 |
+
const formatScore = (score) => score > 0 ? score.toFixed(2) : "No benchmark available!"
|
46 |
+
const formatTitle = d => (d.language_name + "\n" + parseInt(d.speakers / 1_000_00) / 10 + "M speakers\n" + scoreName + ": " + formatScore(d[scoreKey]))
|
47 |
|
48 |
// Create summary plot
|
49 |
const summaryPlot = Plot.plot({
|
50 |
+
width: chartsDiv.clientWidth,
|
51 |
height: 400,
|
52 |
marginBottom: 100,
|
53 |
x: { label: "Number of speakers", axis: null },
|
54 |
y: { label: `${scoreName} (average across models)` },
|
55 |
// color: { scheme: "BrBG" },
|
56 |
marks: [
|
57 |
+
Plot.rectY(data, Plot.stackX({
|
58 |
x: "speakers",
|
59 |
order: scoreKey,
|
60 |
reverse: true,
|
61 |
y2: scoreKey, // y2 to avoid stacking by y
|
62 |
title: formatTitle,
|
63 |
tip: true,
|
64 |
+
fill: d => d[scoreKey] > 0 ? "black" : "pink"
|
65 |
})),
|
66 |
+
Plot.rectY(data, Plot.pointerX(Plot.stackX({
|
67 |
x: "speakers",
|
68 |
order: scoreKey,
|
69 |
reverse: true,
|
70 |
y2: scoreKey, // y2 to avoid stacking by y
|
71 |
fill: "grey",
|
72 |
}))),
|
73 |
+
Plot.text(data, Plot.stackX({
|
74 |
x: "speakers",
|
75 |
y2: scoreKey,
|
76 |
order: scoreKey,
|
77 |
reverse: true,
|
78 |
+
text: "language_name",
|
79 |
frameAnchor: "bottom",
|
80 |
textAnchor: "end",
|
81 |
dy: 10,
|
|
|
88 |
// Add summary plot at the top
|
89 |
chartsDiv.insertBefore(summaryPlot, chartsDiv.firstChild);
|
90 |
|
|
|
|
|
|
|
91 |
// Get unique languages with their speaker counts
|
92 |
const languageMap = new Map();
|
93 |
+
data.forEach(r => {
|
94 |
+
if (!languageMap.has(r.language_name)) {
|
95 |
+
languageMap.set(r.language_name, r.speakers);
|
96 |
}
|
97 |
});
|
98 |
|
|
|
120 |
headerDiv.appendChild(speakerP);
|
121 |
chartsDiv.appendChild(headerDiv);
|
122 |
|
123 |
+
const languageData = data.filter(r => r.language_name === language)[0]["scores"];
|
124 |
|
125 |
const descriptor = code => {
|
126 |
let [org, model] = code.split("/")
|
|
|
128 |
}
|
129 |
|
130 |
// Plot for how well the models perform on this language
|
131 |
+
if (languageData && languageData.length > 1) {
|
|
|
132 |
const plot = Plot.plot({
|
133 |
width: 400,
|
134 |
height: 200,
|
languagebench.py
CHANGED
@@ -1,7 +1,6 @@
|
|
1 |
import asyncio
|
2 |
import json
|
3 |
import os
|
4 |
-
import random
|
5 |
from os import getenv
|
6 |
|
7 |
import evaluate
|
@@ -12,22 +11,19 @@ from dotenv import load_dotenv
|
|
12 |
from joblib.memory import Memory
|
13 |
from openai import AsyncOpenAI
|
14 |
from tqdm.asyncio import tqdm_asyncio
|
|
|
15 |
|
16 |
# config
|
17 |
models = [
|
18 |
-
"openai/gpt-4o
|
19 |
"anthropic/claude-3.5-sonnet",
|
20 |
-
"meta-llama/llama-3.1-
|
21 |
-
"mistralai/mistral-
|
22 |
# "google/gemini-flash-1.5", # very fast
|
23 |
"qwen/qwen-2.5-72b-instruct", # somewhat slow
|
24 |
]
|
25 |
fast_model = "anthropic/claude-3.5-sonnet"
|
26 |
-
|
27 |
-
dataset = "floresp-v2.0-rc.3/dev"
|
28 |
-
random.seed(42)
|
29 |
-
target_languages = [f.split(".")[1] for f in os.listdir(dataset)]
|
30 |
-
detailed_target_languages = random.choices(target_languages, k=5)
|
31 |
|
32 |
# setup
|
33 |
load_dotenv()
|
@@ -36,9 +32,10 @@ client = AsyncOpenAI(
|
|
36 |
api_key=getenv("OPENROUTER_API_KEY"),
|
37 |
)
|
38 |
cache = Memory(location=".cache", verbose=0).cache
|
39 |
-
bleu = evaluate.load("
|
40 |
bertscore = evaluate.load("bertscore")
|
41 |
-
|
|
|
42 |
|
43 |
|
44 |
def reorder(language_name):
|
@@ -47,10 +44,65 @@ def reorder(language_name):
|
|
47 |
return language_name
|
48 |
|
49 |
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
|
55 |
|
56 |
# utils
|
@@ -94,73 +146,77 @@ async def translate(model, target_language, target_script, sentence):
|
|
94 |
return reply.choices[0].message.content
|
95 |
|
96 |
|
97 |
-
def
|
98 |
-
|
99 |
-
script = script.split("_", 1)[0]
|
100 |
-
stats = language_stats[language_stats["iso639_3"] == lang]
|
101 |
-
if not stats.empty:
|
102 |
-
stats = stats.iloc[0].to_dict()
|
103 |
-
else:
|
104 |
-
stats = dict()
|
105 |
-
stats["script"] = script_names[script_names["Code"] == script]["English Name"].iloc[
|
106 |
-
0
|
107 |
-
]
|
108 |
-
name_series = language_names[language_names["LangID"] == lang]["Name"]
|
109 |
-
stats["name"] = (
|
110 |
-
name_series.iloc[0]
|
111 |
-
if not name_series.empty
|
112 |
-
else stats.get("itemLabel_en") or stats.get("itemLabel", lang)
|
113 |
-
)
|
114 |
-
return stats
|
115 |
|
116 |
|
117 |
-
def
|
118 |
-
return
|
|
|
|
|
119 |
|
120 |
|
121 |
# evaluation!
|
122 |
async def main():
|
123 |
-
n = 30
|
124 |
results = []
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
164 |
|
165 |
|
166 |
if __name__ == "__main__":
|
|
|
1 |
import asyncio
|
2 |
import json
|
3 |
import os
|
|
|
4 |
from os import getenv
|
5 |
|
6 |
import evaluate
|
|
|
11 |
from joblib.memory import Memory
|
12 |
from openai import AsyncOpenAI
|
13 |
from tqdm.asyncio import tqdm_asyncio
|
14 |
+
from transformers import NllbTokenizer
|
15 |
|
16 |
# config
|
17 |
models = [
|
18 |
+
"openai/gpt-4o",
|
19 |
"anthropic/claude-3.5-sonnet",
|
20 |
+
"meta-llama/llama-3.1-405b-instruct", # lots of slow repetitions for LRLs
|
21 |
+
"mistralai/mistral-large",
|
22 |
# "google/gemini-flash-1.5", # very fast
|
23 |
"qwen/qwen-2.5-72b-instruct", # somewhat slow
|
24 |
]
|
25 |
fast_model = "anthropic/claude-3.5-sonnet"
|
26 |
+
n_sentences = 30
|
|
|
|
|
|
|
|
|
27 |
|
28 |
# setup
|
29 |
load_dotenv()
|
|
|
32 |
api_key=getenv("OPENROUTER_API_KEY"),
|
33 |
)
|
34 |
cache = Memory(location=".cache", verbose=0).cache
|
35 |
+
bleu = evaluate.load("bleu")
|
36 |
bertscore = evaluate.load("bertscore")
|
37 |
+
tokenizer = NllbTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")
|
38 |
+
rate_limit = AsyncLimiter(max_rate=20, time_period=1)
|
39 |
|
40 |
|
41 |
def reorder(language_name):
|
|
|
44 |
return language_name
|
45 |
|
46 |
|
47 |
+
# load benchmark languages and scripts
|
48 |
+
benchmark_dir = "floresp-v2.0-rc.3/dev"
|
49 |
+
benchmark_languages = pd.DataFrame(
|
50 |
+
[f.split(".")[1].split("_", 1) for f in os.listdir(benchmark_dir)],
|
51 |
+
columns=["language_code", "script_code"],
|
52 |
+
)
|
53 |
+
# hack: drop additional script codes for languages with multiple scripts
|
54 |
+
benchmark_languages = benchmark_languages.groupby("language_code").head(1)
|
55 |
+
benchmark_languages["in_benchmark"] = True
|
56 |
+
|
57 |
+
# load Ethnologue language names
|
58 |
+
language_names = (
|
59 |
+
pd.read_csv("LanguageCodes.tab", sep="\t")
|
60 |
+
.rename(columns={"LangID": "language_code", "Name": "language_name"})[
|
61 |
+
["language_code", "language_name"]
|
62 |
+
]
|
63 |
+
.assign(language_name=lambda df: df["language_name"].apply(reorder).str.strip())
|
64 |
+
)
|
65 |
+
|
66 |
+
# load Wikidata speaker stats
|
67 |
+
language_stats = (
|
68 |
+
pd.read_csv("languages.tsv", sep="\t")
|
69 |
+
.rename(columns={"iso639_3": "language_code", "maxSpeakers": "speakers"})[
|
70 |
+
["language_code", "speakers"]
|
71 |
+
]
|
72 |
+
.dropna(subset=["language_code"])
|
73 |
+
)
|
74 |
+
language_stats["speakers"] = pd.to_numeric(language_stats["speakers"], errors="coerce")
|
75 |
+
ignored_languages = [
|
76 |
+
"zho", # Chinese -> use Mandarin (cmn) instead
|
77 |
+
"ara", # Arabic -> use Standard Arabic (arb) instead
|
78 |
+
"pus", # Pashto -> use Nothern / Central / Southern Pashto instead (pbt / pst / pbu)
|
79 |
+
"fas", # Persian -> use Iranian Persian (pes) instead
|
80 |
+
"msa", # Malay -> use Indonesian (ind) instead
|
81 |
+
]
|
82 |
+
language_stats = language_stats[
|
83 |
+
~language_stats["language_code"].isin(ignored_languages)
|
84 |
+
]
|
85 |
+
|
86 |
+
# load unicode script names
|
87 |
+
script_names = pd.read_csv("ScriptCodes.csv").rename(
|
88 |
+
columns={"Code": "script_code", "English Name": "script_name"}
|
89 |
+
)[["script_code", "script_name"]]
|
90 |
+
|
91 |
+
# merge data
|
92 |
+
languages = pd.merge(language_stats, language_names, on="language_code", how="outer")
|
93 |
+
languages = pd.merge(benchmark_languages, languages, on="language_code", how="outer")
|
94 |
+
languages = pd.merge(languages, script_names, on="script_code", how="left")
|
95 |
+
languages["in_benchmark"] = languages["in_benchmark"].fillna(False)
|
96 |
+
languages = languages.sort_values(by="speakers", ascending=False)
|
97 |
+
|
98 |
+
# sample languages to translate from
|
99 |
+
original_languages = languages[languages["in_benchmark"]].sample(
|
100 |
+
n=n_sentences, weights="speakers", replace=True, random_state=42
|
101 |
+
)
|
102 |
+
# sample languages to analyze with all models
|
103 |
+
detailed_target_languages = languages[languages["in_benchmark"]].sample(
|
104 |
+
n=25, random_state=42
|
105 |
+
)
|
106 |
|
107 |
|
108 |
# utils
|
|
|
146 |
return reply.choices[0].message.content
|
147 |
|
148 |
|
149 |
+
def mean(l):
|
150 |
+
return sum(l) / len(l) if l else 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
151 |
|
152 |
|
153 |
+
def load_sentences(language):
|
154 |
+
return open(
|
155 |
+
f"{benchmark_dir}/dev.{language.language_code}_{language.script_code}"
|
156 |
+
).readlines()
|
157 |
|
158 |
|
159 |
# evaluation!
|
160 |
async def main():
|
|
|
161 |
results = []
|
162 |
+
for language in languages.itertuples():
|
163 |
+
name = (
|
164 |
+
language.language_name
|
165 |
+
if not pd.isna(language.language_name)
|
166 |
+
else language.language_code
|
167 |
+
)
|
168 |
+
print(name)
|
169 |
+
scores = []
|
170 |
+
if language.in_benchmark:
|
171 |
+
target_sentences = load_sentences(language)[:n_sentences]
|
172 |
+
for model in models:
|
173 |
+
if (
|
174 |
+
model != fast_model
|
175 |
+
and language.language_code
|
176 |
+
not in detailed_target_languages.language_code.values
|
177 |
+
):
|
178 |
+
continue
|
179 |
+
original_sentences = [
|
180 |
+
load_sentences(lang)[i]
|
181 |
+
for i, lang in enumerate(original_languages.itertuples())
|
182 |
+
]
|
183 |
+
print(model)
|
184 |
+
predictions = [
|
185 |
+
translate(
|
186 |
+
model, language.language_name, language.script_name, sentence
|
187 |
+
)
|
188 |
+
for sentence in original_sentences
|
189 |
+
]
|
190 |
+
predictions = await tqdm_asyncio.gather(*predictions, miniters=1)
|
191 |
+
metrics_bleu = bleu.compute(
|
192 |
+
predictions=predictions,
|
193 |
+
references=target_sentences,
|
194 |
+
tokenizer=tokenizer.tokenize,
|
195 |
+
)
|
196 |
+
# metrics_bert = bertscore.compute(
|
197 |
+
# predictions=predictions,
|
198 |
+
# references=target_sentences,
|
199 |
+
# model_type="distilbert-base-uncased",
|
200 |
+
# )
|
201 |
+
scores.append(
|
202 |
+
{
|
203 |
+
"model": model,
|
204 |
+
"bleu": metrics_bleu["bleu"],
|
205 |
+
# "bert_score": mean(metrics_bert["f1"]),
|
206 |
+
}
|
207 |
+
)
|
208 |
+
results.append(
|
209 |
+
{
|
210 |
+
"language_name": name,
|
211 |
+
"language_code": language.language_code,
|
212 |
+
"speakers": language.speakers if not pd.isna(language.speakers) else 0,
|
213 |
+
"scores": scores,
|
214 |
+
"bleu": mean([s["bleu"] for s in scores]) or -0.02,
|
215 |
+
# "bert_score": mean([s["bert_score"] for s in scores]),
|
216 |
+
}
|
217 |
+
)
|
218 |
+
with open("results.json", "w") as f:
|
219 |
+
json.dump(results, f, indent=2, ensure_ascii=False)
|
220 |
|
221 |
|
222 |
if __name__ == "__main__":
|
pyproject.toml
CHANGED
@@ -9,9 +9,14 @@ dependencies = [
|
|
9 |
"bert-score>=0.3.13",
|
10 |
"evaluate>=0.4.3",
|
11 |
"joblib>=1.4.2",
|
|
|
12 |
"openai>=1.52.2",
|
13 |
"pandas>=2.2.3",
|
|
|
14 |
"python-dotenv>=1.0.1",
|
15 |
"sacrebleu>=2.4.3",
|
|
|
|
|
16 |
"tqdm>=4.66.6",
|
|
|
17 |
]
|
|
|
9 |
"bert-score>=0.3.13",
|
10 |
"evaluate>=0.4.3",
|
11 |
"joblib>=1.4.2",
|
12 |
+
"nltk>=3.9.1",
|
13 |
"openai>=1.52.2",
|
14 |
"pandas>=2.2.3",
|
15 |
+
"protobuf>=5.28.3",
|
16 |
"python-dotenv>=1.0.1",
|
17 |
"sacrebleu>=2.4.3",
|
18 |
+
"sentencepiece>=0.2.0",
|
19 |
+
"tiktoken>=0.8.0",
|
20 |
"tqdm>=4.66.6",
|
21 |
+
"transformers>=4.46.1",
|
22 |
]
|
results.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
results_summary.json
DELETED
@@ -1,1202 +0,0 @@
|
|
1 |
-
[
|
2 |
-
{
|
3 |
-
"target_language_name":"Aceh",
|
4 |
-
"bleu":39.1659660901,
|
5 |
-
"bert_score":0.8998966595,
|
6 |
-
"speakers":3500032.0
|
7 |
-
},
|
8 |
-
{
|
9 |
-
"target_language_name":"Afrikaans",
|
10 |
-
"bleu":76.8900540777,
|
11 |
-
"bert_score":0.9481831173,
|
12 |
-
"speakers":10300000.0
|
13 |
-
},
|
14 |
-
{
|
15 |
-
"target_language_name":"Amharic",
|
16 |
-
"bleu":43.1544568697,
|
17 |
-
"bert_score":0.989116921,
|
18 |
-
"speakers":25000000.0
|
19 |
-
},
|
20 |
-
{
|
21 |
-
"target_language_name":"Armenian",
|
22 |
-
"bleu":64.6804400806,
|
23 |
-
"bert_score":0.9550812801,
|
24 |
-
"speakers":6700000.0
|
25 |
-
},
|
26 |
-
{
|
27 |
-
"target_language_name":"Assamese",
|
28 |
-
"bleu":47.0351331605,
|
29 |
-
"bert_score":0.928119574,
|
30 |
-
"speakers":15300000.0
|
31 |
-
},
|
32 |
-
{
|
33 |
-
"target_language_name":"Asturian",
|
34 |
-
"bleu":71.3445623493,
|
35 |
-
"bert_score":0.931475842,
|
36 |
-
"speakers":450000.0
|
37 |
-
},
|
38 |
-
{
|
39 |
-
"target_language_name":"Awadhi",
|
40 |
-
"bleu":46.0797144146,
|
41 |
-
"bert_score":0.9333642821,
|
42 |
-
"speakers":22000000.0
|
43 |
-
},
|
44 |
-
{
|
45 |
-
"target_language_name":"Ayacucho Quechua",
|
46 |
-
"bleu":45.6534927028,
|
47 |
-
"bert_score":0.8731370111,
|
48 |
-
"speakers":918200.0
|
49 |
-
},
|
50 |
-
{
|
51 |
-
"target_language_name":"Bali (Indonesia)",
|
52 |
-
"bleu":52.8752419159,
|
53 |
-
"bert_score":0.8934772114,
|
54 |
-
"speakers":4000000.0
|
55 |
-
},
|
56 |
-
{
|
57 |
-
"target_language_name":"Bamanankan",
|
58 |
-
"bleu":38.6939091408,
|
59 |
-
"bert_score":0.8872043769,
|
60 |
-
"speakers":2700000.0
|
61 |
-
},
|
62 |
-
{
|
63 |
-
"target_language_name":"Banjar",
|
64 |
-
"bleu":46.5453977487,
|
65 |
-
"bert_score":0.91599799,
|
66 |
-
"speakers":3500000.0
|
67 |
-
},
|
68 |
-
{
|
69 |
-
"target_language_name":"Bashkort",
|
70 |
-
"bleu":57.5453842927,
|
71 |
-
"bert_score":0.9298217595,
|
72 |
-
"speakers":1200000.0
|
73 |
-
},
|
74 |
-
{
|
75 |
-
"target_language_name":"Basque",
|
76 |
-
"bleu":65.8968721377,
|
77 |
-
"bert_score":0.9192741295,
|
78 |
-
"speakers":750000.0
|
79 |
-
},
|
80 |
-
{
|
81 |
-
"target_language_name":"Belarusian",
|
82 |
-
"bleu":54.5195166442,
|
83 |
-
"bert_score":0.9329862595,
|
84 |
-
"speakers":7900000.0
|
85 |
-
},
|
86 |
-
{
|
87 |
-
"target_language_name":"Bemba",
|
88 |
-
"bleu":47.8068548956,
|
89 |
-
"bert_score":0.889907831,
|
90 |
-
"speakers":3600000.0
|
91 |
-
},
|
92 |
-
{
|
93 |
-
"target_language_name":"Bengali",
|
94 |
-
"bleu":57.1417588816,
|
95 |
-
"bert_score":0.9483523647,
|
96 |
-
"speakers":300000000.0
|
97 |
-
},
|
98 |
-
{
|
99 |
-
"target_language_name":"Bhojpuri",
|
100 |
-
"bleu":44.5412337907,
|
101 |
-
"bert_score":0.9288184981,
|
102 |
-
"speakers":52200000.0
|
103 |
-
},
|
104 |
-
{
|
105 |
-
"target_language_name":"Bokm\u00e5l",
|
106 |
-
"bleu":77.4939513016,
|
107 |
-
"bert_score":0.9550971886,
|
108 |
-
"speakers":4000000.0
|
109 |
-
},
|
110 |
-
{
|
111 |
-
"target_language_name":"Boro (India)",
|
112 |
-
"bleu":36.1100474969,
|
113 |
-
"bert_score":0.925187854,
|
114 |
-
"speakers":1482929.0
|
115 |
-
},
|
116 |
-
{
|
117 |
-
"target_language_name":"Bosnian",
|
118 |
-
"bleu":72.5488027131,
|
119 |
-
"bert_score":0.947693936,
|
120 |
-
"speakers":3500000.0
|
121 |
-
},
|
122 |
-
{
|
123 |
-
"target_language_name":"Bugis",
|
124 |
-
"bleu":44.8388170031,
|
125 |
-
"bert_score":0.8647923966,
|
126 |
-
"speakers":5017800.0
|
127 |
-
},
|
128 |
-
{
|
129 |
-
"target_language_name":"Bulgarian",
|
130 |
-
"bleu":72.9695925131,
|
131 |
-
"bert_score":0.9545443177,
|
132 |
-
"speakers":9000000.0
|
133 |
-
},
|
134 |
-
{
|
135 |
-
"target_language_name":"Burmese",
|
136 |
-
"bleu":55.7235911677,
|
137 |
-
"bert_score":0.9759751062,
|
138 |
-
"speakers":32900000.0
|
139 |
-
},
|
140 |
-
{
|
141 |
-
"target_language_name":"Catalan",
|
142 |
-
"bleu":74.4595007932,
|
143 |
-
"bert_score":0.9464139263,
|
144 |
-
"speakers":5100000.0
|
145 |
-
},
|
146 |
-
{
|
147 |
-
"target_language_name":"Cebuano",
|
148 |
-
"bleu":69.4557958655,
|
149 |
-
"bert_score":0.9321281234,
|
150 |
-
"speakers":15900000.0
|
151 |
-
},
|
152 |
-
{
|
153 |
-
"target_language_name":"Central Aymara",
|
154 |
-
"bleu":42.7698436669,
|
155 |
-
"bert_score":0.8625142018,
|
156 |
-
"speakers":0.0
|
157 |
-
},
|
158 |
-
{
|
159 |
-
"target_language_name":"Central Kurdish",
|
160 |
-
"bleu":59.1927910692,
|
161 |
-
"bert_score":0.9332568824,
|
162 |
-
"speakers":7250000.0
|
163 |
-
},
|
164 |
-
{
|
165 |
-
"target_language_name":"Central Tibetan",
|
166 |
-
"bleu":51.349075274,
|
167 |
-
"bert_score":0.967157503,
|
168 |
-
"speakers":1200000.0
|
169 |
-
},
|
170 |
-
{
|
171 |
-
"target_language_name":"Chhattisgarhi",
|
172 |
-
"bleu":47.9797501304,
|
173 |
-
"bert_score":0.9363766015,
|
174 |
-
"speakers":16300000.0
|
175 |
-
},
|
176 |
-
{
|
177 |
-
"target_language_name":"Chichewa",
|
178 |
-
"bleu":59.7601680161,
|
179 |
-
"bert_score":0.9069253902,
|
180 |
-
"speakers":12000000.0
|
181 |
-
},
|
182 |
-
{
|
183 |
-
"target_language_name":"Chokwe",
|
184 |
-
"bleu":10.1864074161,
|
185 |
-
"bert_score":0.727788798,
|
186 |
-
"speakers":0.0
|
187 |
-
},
|
188 |
-
{
|
189 |
-
"target_language_name":"Chuvash",
|
190 |
-
"bleu":45.0546658723,
|
191 |
-
"bert_score":0.9203916192,
|
192 |
-
"speakers":1279650.0
|
193 |
-
},
|
194 |
-
{
|
195 |
-
"target_language_name":"Crimean Tatar",
|
196 |
-
"bleu":52.7050249448,
|
197 |
-
"bert_score":0.8972040812,
|
198 |
-
"speakers":552740.0
|
199 |
-
},
|
200 |
-
{
|
201 |
-
"target_language_name":"Croatian",
|
202 |
-
"bleu":69.5456983662,
|
203 |
-
"bert_score":0.9444877982,
|
204 |
-
"speakers":7000000.0
|
205 |
-
},
|
206 |
-
{
|
207 |
-
"target_language_name":"Czech",
|
208 |
-
"bleu":69.7112290599,
|
209 |
-
"bert_score":0.9384464244,
|
210 |
-
"speakers":10700000.0
|
211 |
-
},
|
212 |
-
{
|
213 |
-
"target_language_name":"Danish",
|
214 |
-
"bleu":78.0935433284,
|
215 |
-
"bert_score":0.9506490747,
|
216 |
-
"speakers":6000000.0
|
217 |
-
},
|
218 |
-
{
|
219 |
-
"target_language_name":"Dari",
|
220 |
-
"bleu":52.5539795795,
|
221 |
-
"bert_score":0.9466466506,
|
222 |
-
"speakers":9600000.0
|
223 |
-
},
|
224 |
-
{
|
225 |
-
"target_language_name":"Dholuo",
|
226 |
-
"bleu":46.4119479071,
|
227 |
-
"bert_score":0.8803233822,
|
228 |
-
"speakers":3000000.0
|
229 |
-
},
|
230 |
-
{
|
231 |
-
"target_language_name":"Dogri",
|
232 |
-
"bleu":44.9153535278,
|
233 |
-
"bert_score":0.934070154,
|
234 |
-
"speakers":2000000.0
|
235 |
-
},
|
236 |
-
{
|
237 |
-
"target_language_name":"Dutch",
|
238 |
-
"bleu":71.1849326315,
|
239 |
-
"bert_score":0.9376831949,
|
240 |
-
"speakers":23100000.0
|
241 |
-
},
|
242 |
-
{
|
243 |
-
"target_language_name":"Dzongkha",
|
244 |
-
"bleu":44.3573814017,
|
245 |
-
"bert_score":0.9664796074,
|
246 |
-
"speakers":237080.0
|
247 |
-
},
|
248 |
-
{
|
249 |
-
"target_language_name":"Eastern Punjabi",
|
250 |
-
"bleu":60.468441109,
|
251 |
-
"bert_score":0.988244007,
|
252 |
-
"speakers":125000000.0
|
253 |
-
},
|
254 |
-
{
|
255 |
-
"target_language_name":"Eastern Yiddish",
|
256 |
-
"bleu":47.5562009325,
|
257 |
-
"bert_score":0.9590989411,
|
258 |
-
"speakers":0.0
|
259 |
-
},
|
260 |
-
{
|
261 |
-
"target_language_name":"Egyptian Arabic",
|
262 |
-
"bleu":53.6818081038,
|
263 |
-
"bert_score":0.9394114673,
|
264 |
-
"speakers":100542400.0
|
265 |
-
},
|
266 |
-
{
|
267 |
-
"target_language_name":"English",
|
268 |
-
"bleu":75.3501173486,
|
269 |
-
"bert_score":0.8807334363,
|
270 |
-
"speakers":1132366680.0
|
271 |
-
},
|
272 |
-
{
|
273 |
-
"target_language_name":"Esperanto",
|
274 |
-
"bleu":69.6056577554,
|
275 |
-
"bert_score":0.9302131255,
|
276 |
-
"speakers":2000000.0
|
277 |
-
},
|
278 |
-
{
|
279 |
-
"target_language_name":"Faroese",
|
280 |
-
"bleu":65.9147902483,
|
281 |
-
"bert_score":0.9332413753,
|
282 |
-
"speakers":69150.0
|
283 |
-
},
|
284 |
-
{
|
285 |
-
"target_language_name":"Fijian",
|
286 |
-
"bleu":58.2892667246,
|
287 |
-
"bert_score":0.9183188617,
|
288 |
-
"speakers":341270.0
|
289 |
-
},
|
290 |
-
{
|
291 |
-
"target_language_name":"Filipino",
|
292 |
-
"bleu":70.1928498378,
|
293 |
-
"bert_score":0.9269425154,
|
294 |
-
"speakers":90000000.0
|
295 |
-
},
|
296 |
-
{
|
297 |
-
"target_language_name":"Finnish",
|
298 |
-
"bleu":70.9425029518,
|
299 |
-
"bert_score":0.9320579688,
|
300 |
-
"speakers":5413380.0
|
301 |
-
},
|
302 |
-
{
|
303 |
-
"target_language_name":"Fon",
|
304 |
-
"bleu":25.2797773666,
|
305 |
-
"bert_score":0.8664443592,
|
306 |
-
"speakers":1935500.0
|
307 |
-
},
|
308 |
-
{
|
309 |
-
"target_language_name":"French",
|
310 |
-
"bleu":79.3023871219,
|
311 |
-
"bert_score":0.9554367423,
|
312 |
-
"speakers":208157220.0
|
313 |
-
},
|
314 |
-
{
|
315 |
-
"target_language_name":"Friulian",
|
316 |
-
"bleu":66.5488092372,
|
317 |
-
"bert_score":0.9255799611,
|
318 |
-
"speakers":300000.0
|
319 |
-
},
|
320 |
-
{
|
321 |
-
"target_language_name":"Galician",
|
322 |
-
"bleu":68.7024786904,
|
323 |
-
"bert_score":0.9283550183,
|
324 |
-
"speakers":2500000.0
|
325 |
-
},
|
326 |
-
{
|
327 |
-
"target_language_name":"Ganda",
|
328 |
-
"bleu":45.8693322936,
|
329 |
-
"bert_score":0.88344028,
|
330 |
-
"speakers":4100000.0
|
331 |
-
},
|
332 |
-
{
|
333 |
-
"target_language_name":"Georgian",
|
334 |
-
"bleu":61.0166361442,
|
335 |
-
"bert_score":0.9546662311,
|
336 |
-
"speakers":3700000.0
|
337 |
-
},
|
338 |
-
{
|
339 |
-
"target_language_name":"Gikuyu",
|
340 |
-
"bleu":40.9288275291,
|
341 |
-
"bert_score":0.8850945433,
|
342 |
-
"speakers":6623000.0
|
343 |
-
},
|
344 |
-
{
|
345 |
-
"target_language_name":"Goan Konkani",
|
346 |
-
"bleu":47.1084017945,
|
347 |
-
"bert_score":0.9314287245,
|
348 |
-
"speakers":3633900.0
|
349 |
-
},
|
350 |
-
{
|
351 |
-
"target_language_name":"Greek",
|
352 |
-
"bleu":66.2347782153,
|
353 |
-
"bert_score":0.9577525119,
|
354 |
-
"speakers":15000000.0
|
355 |
-
},
|
356 |
-
{
|
357 |
-
"target_language_name":"Gujarati",
|
358 |
-
"bleu":55.5884513452,
|
359 |
-
"bert_score":0.9753397226,
|
360 |
-
"speakers":56400000.0
|
361 |
-
},
|
362 |
-
{
|
363 |
-
"target_language_name":"Haitian Creole",
|
364 |
-
"bleu":63.8532187591,
|
365 |
-
"bert_score":0.93236112,
|
366 |
-
"speakers":9600000.0
|
367 |
-
},
|
368 |
-
{
|
369 |
-
"target_language_name":"Halh Mongolian",
|
370 |
-
"bleu":58.5037789971,
|
371 |
-
"bert_score":0.9380823056,
|
372 |
-
"speakers":2704030.0
|
373 |
-
},
|
374 |
-
{
|
375 |
-
"target_language_name":"Hausa",
|
376 |
-
"bleu":56.3431957901,
|
377 |
-
"bert_score":0.9012877802,
|
378 |
-
"speakers":43900000.0
|
379 |
-
},
|
380 |
-
{
|
381 |
-
"target_language_name":"Hebrew",
|
382 |
-
"bleu":72.0702990513,
|
383 |
-
"bert_score":0.964064618,
|
384 |
-
"speakers":9303950.0
|
385 |
-
},
|
386 |
-
{
|
387 |
-
"target_language_name":"Hindi",
|
388 |
-
"bleu":64.9362166898,
|
389 |
-
"bert_score":0.9463364283,
|
390 |
-
"speakers":341000000.0
|
391 |
-
},
|
392 |
-
{
|
393 |
-
"target_language_name":"Hungarian",
|
394 |
-
"bleu":66.1301119408,
|
395 |
-
"bert_score":0.9249218643,
|
396 |
-
"speakers":12600000.0
|
397 |
-
},
|
398 |
-
{
|
399 |
-
"target_language_name":"Icelandic",
|
400 |
-
"bleu":54.4330055353,
|
401 |
-
"bert_score":0.9120460276,
|
402 |
-
"speakers":358000.0
|
403 |
-
},
|
404 |
-
{
|
405 |
-
"target_language_name":"Igbo",
|
406 |
-
"bleu":46.4017344934,
|
407 |
-
"bert_score":0.9137314638,
|
408 |
-
"speakers":27000000.0
|
409 |
-
},
|
410 |
-
{
|
411 |
-
"target_language_name":"Ilocano",
|
412 |
-
"bleu":62.6058864594,
|
413 |
-
"bert_score":0.9115280092,
|
414 |
-
"speakers":9100000.0
|
415 |
-
},
|
416 |
-
{
|
417 |
-
"target_language_name":"Indonesian",
|
418 |
-
"bleu":72.9087066262,
|
419 |
-
"bert_score":0.9301403503,
|
420 |
-
"speakers":198996550.0
|
421 |
-
},
|
422 |
-
{
|
423 |
-
"target_language_name":"Iranian Persian",
|
424 |
-
"bleu":57.6444169698,
|
425 |
-
"bert_score":0.9476486345,
|
426 |
-
"speakers":52800000.0
|
427 |
-
},
|
428 |
-
{
|
429 |
-
"target_language_name":"Irish",
|
430 |
-
"bleu":69.9725194524,
|
431 |
-
"bert_score":0.9440232972,
|
432 |
-
"speakers":1030000.0
|
433 |
-
},
|
434 |
-
{
|
435 |
-
"target_language_name":"Italian",
|
436 |
-
"bleu":69.1588343572,
|
437 |
-
"bert_score":0.9358606537,
|
438 |
-
"speakers":64819790.0
|
439 |
-
},
|
440 |
-
{
|
441 |
-
"target_language_name":"Japanese",
|
442 |
-
"bleu":49.9166135693,
|
443 |
-
"bert_score":0.9425287286,
|
444 |
-
"speakers":128000000.0
|
445 |
-
},
|
446 |
-
{
|
447 |
-
"target_language_name":"Javanese",
|
448 |
-
"bleu":60.440335299,
|
449 |
-
"bert_score":0.9125308077,
|
450 |
-
"speakers":84308740.0
|
451 |
-
},
|
452 |
-
{
|
453 |
-
"target_language_name":"Jingpho",
|
454 |
-
"bleu":43.5500581403,
|
455 |
-
"bert_score":0.8727998992,
|
456 |
-
"speakers":940000.0
|
457 |
-
},
|
458 |
-
{
|
459 |
-
"target_language_name":"Jula",
|
460 |
-
"bleu":29.5415180297,
|
461 |
-
"bert_score":0.822332112,
|
462 |
-
"speakers":2700000.0
|
463 |
-
},
|
464 |
-
{
|
465 |
-
"target_language_name":"Kabiy\u00e8",
|
466 |
-
"bleu":22.5498504655,
|
467 |
-
"bert_score":0.8587520639,
|
468 |
-
"speakers":1000000.0
|
469 |
-
},
|
470 |
-
{
|
471 |
-
"target_language_name":"Kabuverdianu",
|
472 |
-
"bleu":65.1106010391,
|
473 |
-
"bert_score":0.9213403026,
|
474 |
-
"speakers":871000.0
|
475 |
-
},
|
476 |
-
{
|
477 |
-
"target_language_name":"Kabyle",
|
478 |
-
"bleu":41.1442992587,
|
479 |
-
"bert_score":0.8803219795,
|
480 |
-
"speakers":5586000.0
|
481 |
-
},
|
482 |
-
{
|
483 |
-
"target_language_name":"Kamba",
|
484 |
-
"bleu":41.733489671,
|
485 |
-
"bert_score":0.8780206362,
|
486 |
-
"speakers":3893000.0
|
487 |
-
},
|
488 |
-
{
|
489 |
-
"target_language_name":"Kannada",
|
490 |
-
"bleu":60.0142028332,
|
491 |
-
"bert_score":0.9730932295,
|
492 |
-
"speakers":43600000.0
|
493 |
-
},
|
494 |
-
{
|
495 |
-
"target_language_name":"Kashmiri",
|
496 |
-
"bleu":22.3019416547,
|
497 |
-
"bert_score":0.8984790143,
|
498 |
-
"speakers":6900000.0
|
499 |
-
},
|
500 |
-
{
|
501 |
-
"target_language_name":"Kazakh",
|
502 |
-
"bleu":61.1251621375,
|
503 |
-
"bert_score":0.9379647116,
|
504 |
-
"speakers":13161980.0
|
505 |
-
},
|
506 |
-
{
|
507 |
-
"target_language_name":"Khmer",
|
508 |
-
"bleu":49.2098257043,
|
509 |
-
"bert_score":0.8907732884,
|
510 |
-
"speakers":16600000.0
|
511 |
-
},
|
512 |
-
{
|
513 |
-
"target_language_name":"Kimbundu",
|
514 |
-
"bleu":5.8523457224,
|
515 |
-
"bert_score":0.6849321783,
|
516 |
-
"speakers":0.0
|
517 |
-
},
|
518 |
-
{
|
519 |
-
"target_language_name":"Kinyarwanda",
|
520 |
-
"bleu":57.2410626756,
|
521 |
-
"bert_score":0.906923449,
|
522 |
-
"speakers":12100000.0
|
523 |
-
},
|
524 |
-
{
|
525 |
-
"target_language_name":"Kituba (Democratic Republic of the Congo)",
|
526 |
-
"bleu":52.8484601602,
|
527 |
-
"bert_score":0.9017938395,
|
528 |
-
"speakers":0.0
|
529 |
-
},
|
530 |
-
{
|
531 |
-
"target_language_name":"Korean",
|
532 |
-
"bleu":43.6872285974,
|
533 |
-
"bert_score":0.9579092761,
|
534 |
-
"speakers":77300000.0
|
535 |
-
},
|
536 |
-
{
|
537 |
-
"target_language_name":"Kyrgyz",
|
538 |
-
"bleu":57.0824422453,
|
539 |
-
"bert_score":0.9317750076,
|
540 |
-
"speakers":4568480.0
|
541 |
-
},
|
542 |
-
{
|
543 |
-
"target_language_name":"Lao",
|
544 |
-
"bleu":60.0210909677,
|
545 |
-
"bert_score":0.904438438,
|
546 |
-
"speakers":5225552.0
|
547 |
-
},
|
548 |
-
{
|
549 |
-
"target_language_name":"Latgalian",
|
550 |
-
"bleu":56.4843556524,
|
551 |
-
"bert_score":0.9078494012,
|
552 |
-
"speakers":200000.0
|
553 |
-
},
|
554 |
-
{
|
555 |
-
"target_language_name":"Levantine Arabic",
|
556 |
-
"bleu":56.0898634013,
|
557 |
-
"bert_score":0.9437467565,
|
558 |
-
"speakers":44000000.0
|
559 |
-
},
|
560 |
-
{
|
561 |
-
"target_language_name":"Ligurian",
|
562 |
-
"bleu":55.8530636302,
|
563 |
-
"bert_score":0.9047620773,
|
564 |
-
"speakers":500000.0
|
565 |
-
},
|
566 |
-
{
|
567 |
-
"target_language_name":"Limburgish",
|
568 |
-
"bleu":59.4485504982,
|
569 |
-
"bert_score":0.8987095455,
|
570 |
-
"speakers":1600000.0
|
571 |
-
},
|
572 |
-
{
|
573 |
-
"target_language_name":"Lingala",
|
574 |
-
"bleu":30.4322896531,
|
575 |
-
"bert_score":0.8553236572,
|
576 |
-
"speakers":20000000.0
|
577 |
-
},
|
578 |
-
{
|
579 |
-
"target_language_name":"Lithuanian",
|
580 |
-
"bleu":67.1625695571,
|
581 |
-
"bert_score":0.9154702902,
|
582 |
-
"speakers":4000000.0
|
583 |
-
},
|
584 |
-
{
|
585 |
-
"target_language_name":"Lombard",
|
586 |
-
"bleu":46.3884402674,
|
587 |
-
"bert_score":0.8643471499,
|
588 |
-
"speakers":3900000.0
|
589 |
-
},
|
590 |
-
{
|
591 |
-
"target_language_name":"Luba-Kasai",
|
592 |
-
"bleu":45.0655291655,
|
593 |
-
"bert_score":0.8749240279,
|
594 |
-
"speakers":6300000.0
|
595 |
-
},
|
596 |
-
{
|
597 |
-
"target_language_name":"Luxembourgish",
|
598 |
-
"bleu":70.8338190438,
|
599 |
-
"bert_score":0.9297492107,
|
600 |
-
"speakers":391200.0
|
601 |
-
},
|
602 |
-
{
|
603 |
-
"target_language_name":"Macedonian",
|
604 |
-
"bleu":72.2733471437,
|
605 |
-
"bert_score":0.9558346649,
|
606 |
-
"speakers":2000000.0
|
607 |
-
},
|
608 |
-
{
|
609 |
-
"target_language_name":"Magahi",
|
610 |
-
"bleu":58.5474221546,
|
611 |
-
"bert_score":0.9458349566,
|
612 |
-
"speakers":20700000.0
|
613 |
-
},
|
614 |
-
{
|
615 |
-
"target_language_name":"Maithili",
|
616 |
-
"bleu":54.6530071391,
|
617 |
-
"bert_score":0.9433513383,
|
618 |
-
"speakers":33900000.0
|
619 |
-
},
|
620 |
-
{
|
621 |
-
"target_language_name":"Malayalam",
|
622 |
-
"bleu":64.0655894091,
|
623 |
-
"bert_score":0.9803075671,
|
624 |
-
"speakers":37100000.0
|
625 |
-
},
|
626 |
-
{
|
627 |
-
"target_language_name":"Maltese",
|
628 |
-
"bleu":80.0866777263,
|
629 |
-
"bert_score":0.9520254652,
|
630 |
-
"speakers":570000.0
|
631 |
-
},
|
632 |
-
{
|
633 |
-
"target_language_name":"Mandarin Chinese",
|
634 |
-
"bleu":42.5300166785,
|
635 |
-
"bert_score":0.9634857118,
|
636 |
-
"speakers":1074000000.0
|
637 |
-
},
|
638 |
-
{
|
639 |
-
"target_language_name":"Maori",
|
640 |
-
"bleu":54.8319935643,
|
641 |
-
"bert_score":0.9185245017,
|
642 |
-
"speakers":160000.0
|
643 |
-
},
|
644 |
-
{
|
645 |
-
"target_language_name":"Marathi",
|
646 |
-
"bleu":57.4434090711,
|
647 |
-
"bert_score":0.9421781262,
|
648 |
-
"speakers":83100000.0
|
649 |
-
},
|
650 |
-
{
|
651 |
-
"target_language_name":"Meadow Mari",
|
652 |
-
"bleu":49.7911680582,
|
653 |
-
"bert_score":0.9295116961,
|
654 |
-
"speakers":482000.0
|
655 |
-
},
|
656 |
-
{
|
657 |
-
"target_language_name":"Meitei",
|
658 |
-
"bleu":41.2619945571,
|
659 |
-
"bert_score":0.9528288851,
|
660 |
-
"speakers":1470000.0
|
661 |
-
},
|
662 |
-
{
|
663 |
-
"target_language_name":"Merina Malagasy",
|
664 |
-
"bleu":61.0968434546,
|
665 |
-
"bert_score":0.9032936792,
|
666 |
-
"speakers":0.0
|
667 |
-
},
|
668 |
-
{
|
669 |
-
"target_language_name":"Mesopotamian Arabic",
|
670 |
-
"bleu":49.5184865297,
|
671 |
-
"bert_score":0.9382626355,
|
672 |
-
"speakers":15700000.0
|
673 |
-
},
|
674 |
-
{
|
675 |
-
"target_language_name":"Minangkabau",
|
676 |
-
"bleu":50.7407956197,
|
677 |
-
"bert_score":0.9252789746,
|
678 |
-
"speakers":5530000.0
|
679 |
-
},
|
680 |
-
{
|
681 |
-
"target_language_name":"Mizo",
|
682 |
-
"bleu":51.6558017488,
|
683 |
-
"bert_score":0.8875152906,
|
684 |
-
"speakers":500000.0
|
685 |
-
},
|
686 |
-
{
|
687 |
-
"target_language_name":"Moore",
|
688 |
-
"bleu":32.8458097983,
|
689 |
-
"bert_score":0.8583020627,
|
690 |
-
"speakers":7600000.0
|
691 |
-
},
|
692 |
-
{
|
693 |
-
"target_language_name":"Moroccan Arabic",
|
694 |
-
"bleu":49.3082976781,
|
695 |
-
"bert_score":0.9317501009,
|
696 |
-
"speakers":27500000.0
|
697 |
-
},
|
698 |
-
{
|
699 |
-
"target_language_name":"Najdi Arabic",
|
700 |
-
"bleu":46.4102430377,
|
701 |
-
"bert_score":0.9332984229,
|
702 |
-
"speakers":0.0
|
703 |
-
},
|
704 |
-
{
|
705 |
-
"target_language_name":"Nepali",
|
706 |
-
"bleu":55.2919347352,
|
707 |
-
"bert_score":0.9358912428,
|
708 |
-
"speakers":0.0
|
709 |
-
},
|
710 |
-
{
|
711 |
-
"target_language_name":"Nigerian Fulfulde",
|
712 |
-
"bleu":28.1761055913,
|
713 |
-
"bert_score":0.8343587597,
|
714 |
-
"speakers":14500000.0
|
715 |
-
},
|
716 |
-
{
|
717 |
-
"target_language_name":"North Azerbaijani",
|
718 |
-
"bleu":55.5265107063,
|
719 |
-
"bert_score":0.9145456314,
|
720 |
-
"speakers":9220610.0
|
721 |
-
},
|
722 |
-
{
|
723 |
-
"target_language_name":"Northern Kurdish",
|
724 |
-
"bleu":55.7965878227,
|
725 |
-
"bert_score":0.9104436457,
|
726 |
-
"speakers":14600000.0
|
727 |
-
},
|
728 |
-
{
|
729 |
-
"target_language_name":"Northern Sotho",
|
730 |
-
"bleu":62.8769401692,
|
731 |
-
"bert_score":0.9261207898,
|
732 |
-
"speakers":4100000.0
|
733 |
-
},
|
734 |
-
{
|
735 |
-
"target_language_name":"Northern Uzbek",
|
736 |
-
"bleu":63.205573851,
|
737 |
-
"bert_score":0.9120756924,
|
738 |
-
"speakers":26912410.0
|
739 |
-
},
|
740 |
-
{
|
741 |
-
"target_language_name":"Nuer",
|
742 |
-
"bleu":16.5796987951,
|
743 |
-
"bert_score":0.8528214693,
|
744 |
-
"speakers":900000.0
|
745 |
-
},
|
746 |
-
{
|
747 |
-
"target_language_name":"N\u2019Ko",
|
748 |
-
"bleu":32.483490799,
|
749 |
-
"bert_score":0.9823745767,
|
750 |
-
"speakers":0.0
|
751 |
-
},
|
752 |
-
{
|
753 |
-
"target_language_name":"Occitan",
|
754 |
-
"bleu":71.532740184,
|
755 |
-
"bert_score":0.9337525626,
|
756 |
-
"speakers":542000.0
|
757 |
-
},
|
758 |
-
{
|
759 |
-
"target_language_name":"Odia",
|
760 |
-
"bleu":57.3628096518,
|
761 |
-
"bert_score":0.9768644154,
|
762 |
-
"speakers":34500000.0
|
763 |
-
},
|
764 |
-
{
|
765 |
-
"target_language_name":"Pangasinan",
|
766 |
-
"bleu":56.0048183827,
|
767 |
-
"bert_score":0.8906280657,
|
768 |
-
"speakers":1100000.0
|
769 |
-
},
|
770 |
-
{
|
771 |
-
"target_language_name":"Papiamentu",
|
772 |
-
"bleu":69.7955328133,
|
773 |
-
"bert_score":0.9325902323,
|
774 |
-
"speakers":321300.0
|
775 |
-
},
|
776 |
-
{
|
777 |
-
"target_language_name":"Paraguayan Guaran\u00ed",
|
778 |
-
"bleu":41.7929863707,
|
779 |
-
"bert_score":0.8764786462,
|
780 |
-
"speakers":0.0
|
781 |
-
},
|
782 |
-
{
|
783 |
-
"target_language_name":"Polish",
|
784 |
-
"bleu":61.8768399674,
|
785 |
-
"bert_score":0.9179250948,
|
786 |
-
"speakers":40200000.0
|
787 |
-
},
|
788 |
-
{
|
789 |
-
"target_language_name":"Portuguese",
|
790 |
-
"bleu":77.4978074222,
|
791 |
-
"bert_score":0.9494876027,
|
792 |
-
"speakers":254300000.0
|
793 |
-
},
|
794 |
-
{
|
795 |
-
"target_language_name":"Romanian",
|
796 |
-
"bleu":76.4907159035,
|
797 |
-
"bert_score":0.9455295324,
|
798 |
-
"speakers":24300000.0
|
799 |
-
},
|
800 |
-
{
|
801 |
-
"target_language_name":"Rundi",
|
802 |
-
"bleu":48.943513629,
|
803 |
-
"bert_score":0.8933652222,
|
804 |
-
"speakers":10800000.0
|
805 |
-
},
|
806 |
-
{
|
807 |
-
"target_language_name":"Russian",
|
808 |
-
"bleu":71.1489441039,
|
809 |
-
"bert_score":0.9518508852,
|
810 |
-
"speakers":171428900.0
|
811 |
-
},
|
812 |
-
{
|
813 |
-
"target_language_name":"Samoan",
|
814 |
-
"bleu":56.7138831423,
|
815 |
-
"bert_score":0.9166683555,
|
816 |
-
"speakers":415720.0
|
817 |
-
},
|
818 |
-
{
|
819 |
-
"target_language_name":"Sango",
|
820 |
-
"bleu":34.8754222657,
|
821 |
-
"bert_score":0.8720244229,
|
822 |
-
"speakers":4600000.0
|
823 |
-
},
|
824 |
-
{
|
825 |
-
"target_language_name":"Sanskrit",
|
826 |
-
"bleu":32.7813249911,
|
827 |
-
"bert_score":0.8987655501,
|
828 |
-
"speakers":49736.0
|
829 |
-
},
|
830 |
-
{
|
831 |
-
"target_language_name":"Santhali",
|
832 |
-
"bleu":31.5119247269,
|
833 |
-
"bert_score":0.944095705,
|
834 |
-
"speakers":7200000.0
|
835 |
-
},
|
836 |
-
{
|
837 |
-
"target_language_name":"Sardinian",
|
838 |
-
"bleu":62.6903914771,
|
839 |
-
"bert_score":0.9118991812,
|
840 |
-
"speakers":1300000.0
|
841 |
-
},
|
842 |
-
{
|
843 |
-
"target_language_name":"Scottish Gaelic",
|
844 |
-
"bleu":62.6044371338,
|
845 |
-
"bert_score":0.9264988482,
|
846 |
-
"speakers":60130.0
|
847 |
-
},
|
848 |
-
{
|
849 |
-
"target_language_name":"Serbian",
|
850 |
-
"bleu":69.9691396176,
|
851 |
-
"bert_score":0.9582955678,
|
852 |
-
"speakers":9000000.0
|
853 |
-
},
|
854 |
-
{
|
855 |
-
"target_language_name":"Setswana",
|
856 |
-
"bleu":55.2288890228,
|
857 |
-
"bert_score":0.9117900888,
|
858 |
-
"speakers":4500000.0
|
859 |
-
},
|
860 |
-
{
|
861 |
-
"target_language_name":"Shan",
|
862 |
-
"bleu":29.2129948577,
|
863 |
-
"bert_score":0.9378574808,
|
864 |
-
"speakers":3000000.0
|
865 |
-
},
|
866 |
-
{
|
867 |
-
"target_language_name":"Shona",
|
868 |
-
"bleu":51.5592191405,
|
869 |
-
"bert_score":0.8798740129,
|
870 |
-
"speakers":9023000.0
|
871 |
-
},
|
872 |
-
{
|
873 |
-
"target_language_name":"Sicilian",
|
874 |
-
"bleu":58.5895359443,
|
875 |
-
"bert_score":0.90428345,
|
876 |
-
"speakers":4700000.0
|
877 |
-
},
|
878 |
-
{
|
879 |
-
"target_language_name":"Silesian",
|
880 |
-
"bleu":56.7836392069,
|
881 |
-
"bert_score":0.9106028736,
|
882 |
-
"speakers":522000.0
|
883 |
-
},
|
884 |
-
{
|
885 |
-
"target_language_name":"Sindhi",
|
886 |
-
"bleu":48.1876056648,
|
887 |
-
"bert_score":0.936702015,
|
888 |
-
"speakers":25000000.0
|
889 |
-
},
|
890 |
-
{
|
891 |
-
"target_language_name":"Sinhala",
|
892 |
-
"bleu":56.7567311796,
|
893 |
-
"bert_score":0.9713358581,
|
894 |
-
"speakers":15300000.0
|
895 |
-
},
|
896 |
-
{
|
897 |
-
"target_language_name":"Slovak",
|
898 |
-
"bleu":67.9284804086,
|
899 |
-
"bert_score":0.9360236605,
|
900 |
-
"speakers":6000000.0
|
901 |
-
},
|
902 |
-
{
|
903 |
-
"target_language_name":"Slovene",
|
904 |
-
"bleu":72.5691270757,
|
905 |
-
"bert_score":0.9432346245,
|
906 |
-
"speakers":2400000.0
|
907 |
-
},
|
908 |
-
{
|
909 |
-
"target_language_name":"Somali",
|
910 |
-
"bleu":55.3706496473,
|
911 |
-
"bert_score":0.908571593,
|
912 |
-
"speakers":16200000.0
|
913 |
-
},
|
914 |
-
{
|
915 |
-
"target_language_name":"South Azerbaijani",
|
916 |
-
"bleu":44.3712804302,
|
917 |
-
"bert_score":0.9420697371,
|
918 |
-
"speakers":15000000.0
|
919 |
-
},
|
920 |
-
{
|
921 |
-
"target_language_name":"Southern Pashto",
|
922 |
-
"bleu":38.3124819374,
|
923 |
-
"bert_score":0.921268179,
|
924 |
-
"speakers":10900000.0
|
925 |
-
},
|
926 |
-
{
|
927 |
-
"target_language_name":"Southern Sotho",
|
928 |
-
"bleu":56.735299554,
|
929 |
-
"bert_score":0.9102749407,
|
930 |
-
"speakers":6000000.0
|
931 |
-
},
|
932 |
-
{
|
933 |
-
"target_language_name":"Southwestern Dinka",
|
934 |
-
"bleu":17.5913281403,
|
935 |
-
"bert_score":0.8016291638,
|
936 |
-
"speakers":0.0
|
937 |
-
},
|
938 |
-
{
|
939 |
-
"target_language_name":"Spanish",
|
940 |
-
"bleu":63.8467073379,
|
941 |
-
"bert_score":0.9224406302,
|
942 |
-
"speakers":485000000.0
|
943 |
-
},
|
944 |
-
{
|
945 |
-
"target_language_name":"Standard Arabic",
|
946 |
-
"bleu":56.8831262708,
|
947 |
-
"bert_score":0.9168330083,
|
948 |
-
"speakers":0.0
|
949 |
-
},
|
950 |
-
{
|
951 |
-
"target_language_name":"Standard Estonian",
|
952 |
-
"bleu":67.4156919517,
|
953 |
-
"bert_score":0.9277306815,
|
954 |
-
"speakers":1164770.0
|
955 |
-
},
|
956 |
-
{
|
957 |
-
"target_language_name":"Standard German",
|
958 |
-
"bleu":77.1966515107,
|
959 |
-
"bert_score":0.9468763133,
|
960 |
-
"speakers":105000000.0
|
961 |
-
},
|
962 |
-
{
|
963 |
-
"target_language_name":"Standard Latvian",
|
964 |
-
"bleu":65.0833210037,
|
965 |
-
"bert_score":0.9217625757,
|
966 |
-
"speakers":0.0
|
967 |
-
},
|
968 |
-
{
|
969 |
-
"target_language_name":"Standard Malay",
|
970 |
-
"bleu":74.2657232798,
|
971 |
-
"bert_score":0.9445500493,
|
972 |
-
"speakers":0.0
|
973 |
-
},
|
974 |
-
{
|
975 |
-
"target_language_name":"Standard Moroccan Tamazight",
|
976 |
-
"bleu":35.6247648109,
|
977 |
-
"bert_score":0.9847298423,
|
978 |
-
"speakers":0.0
|
979 |
-
},
|
980 |
-
{
|
981 |
-
"target_language_name":"Sunda",
|
982 |
-
"bleu":56.4065999104,
|
983 |
-
"bert_score":0.9077177823,
|
984 |
-
"speakers":32400000.0
|
985 |
-
},
|
986 |
-
{
|
987 |
-
"target_language_name":"Swahili",
|
988 |
-
"bleu":73.5199042142,
|
989 |
-
"bert_score":0.9450787365,
|
990 |
-
"speakers":82300000.0
|
991 |
-
},
|
992 |
-
{
|
993 |
-
"target_language_name":"Swati",
|
994 |
-
"bleu":52.7746096439,
|
995 |
-
"bert_score":0.8899940272,
|
996 |
-
"speakers":2034200.0
|
997 |
-
},
|
998 |
-
{
|
999 |
-
"target_language_name":"Swedish",
|
1000 |
-
"bleu":77.421610247,
|
1001 |
-
"bert_score":0.9571870168,
|
1002 |
-
"speakers":9244250.0
|
1003 |
-
},
|
1004 |
-
{
|
1005 |
-
"target_language_name":"Tajik",
|
1006 |
-
"bleu":60.9783684158,
|
1007 |
-
"bert_score":0.9378365338,
|
1008 |
-
"speakers":14000000.0
|
1009 |
-
},
|
1010 |
-
{
|
1011 |
-
"target_language_name":"Tamasheq",
|
1012 |
-
"bleu":18.4319889721,
|
1013 |
-
"bert_score":0.8427422295,
|
1014 |
-
"speakers":500000.0
|
1015 |
-
},
|
1016 |
-
{
|
1017 |
-
"target_language_name":"Tamil",
|
1018 |
-
"bleu":65.7863221054,
|
1019 |
-
"bert_score":0.9536473691,
|
1020 |
-
"speakers":75000000.0
|
1021 |
-
},
|
1022 |
-
{
|
1023 |
-
"target_language_name":"Tatar",
|
1024 |
-
"bleu":60.3447467213,
|
1025 |
-
"bert_score":0.9364115715,
|
1026 |
-
"speakers":5427318.0
|
1027 |
-
},
|
1028 |
-
{
|
1029 |
-
"target_language_name":"Ta\u2019izzi-Adeni Arabic",
|
1030 |
-
"bleu":49.4139335281,
|
1031 |
-
"bert_score":0.9354432185,
|
1032 |
-
"speakers":10500000.0
|
1033 |
-
},
|
1034 |
-
{
|
1035 |
-
"target_language_name":"Telugu",
|
1036 |
-
"bleu":61.6352457629,
|
1037 |
-
"bert_score":0.9790697515,
|
1038 |
-
"speakers":82000000.0
|
1039 |
-
},
|
1040 |
-
{
|
1041 |
-
"target_language_name":"Thai",
|
1042 |
-
"bleu":62.8125360944,
|
1043 |
-
"bert_score":0.9225328485,
|
1044 |
-
"speakers":40000000.0
|
1045 |
-
},
|
1046 |
-
{
|
1047 |
-
"target_language_name":"Tigrigna",
|
1048 |
-
"bleu":32.8711961703,
|
1049 |
-
"bert_score":0.9852415164,
|
1050 |
-
"speakers":7507780.0
|
1051 |
-
},
|
1052 |
-
{
|
1053 |
-
"target_language_name":"Tok Pisin",
|
1054 |
-
"bleu":56.5407760367,
|
1055 |
-
"bert_score":0.9031182428,
|
1056 |
-
"speakers":4000000.0
|
1057 |
-
},
|
1058 |
-
{
|
1059 |
-
"target_language_name":"Tosk Albanian",
|
1060 |
-
"bleu":69.4218765092,
|
1061 |
-
"bert_score":0.9402680953,
|
1062 |
-
"speakers":3000000.0
|
1063 |
-
},
|
1064 |
-
{
|
1065 |
-
"target_language_name":"Tsonga",
|
1066 |
-
"bleu":58.3516573597,
|
1067 |
-
"bert_score":0.9134832978,
|
1068 |
-
"speakers":13000000.0
|
1069 |
-
},
|
1070 |
-
{
|
1071 |
-
"target_language_name":"Tumbuka",
|
1072 |
-
"bleu":44.0490017392,
|
1073 |
-
"bert_score":0.8865564326,
|
1074 |
-
"speakers":2680000.0
|
1075 |
-
},
|
1076 |
-
{
|
1077 |
-
"target_language_name":"Tunisian Arabic",
|
1078 |
-
"bleu":49.6714090744,
|
1079 |
-
"bert_score":0.9337966998,
|
1080 |
-
"speakers":11600000.0
|
1081 |
-
},
|
1082 |
-
{
|
1083 |
-
"target_language_name":"Turkish",
|
1084 |
-
"bleu":67.1600625676,
|
1085 |
-
"bert_score":0.9309494158,
|
1086 |
-
"speakers":82231620.0
|
1087 |
-
},
|
1088 |
-
{
|
1089 |
-
"target_language_name":"Turkmen",
|
1090 |
-
"bleu":60.5593705936,
|
1091 |
-
"bert_score":0.9125106474,
|
1092 |
-
"speakers":16000000.0
|
1093 |
-
},
|
1094 |
-
{
|
1095 |
-
"target_language_name":"Twi",
|
1096 |
-
"bleu":44.7976562068,
|
1097 |
-
"bert_score":0.8913615406,
|
1098 |
-
"speakers":3000000.0
|
1099 |
-
},
|
1100 |
-
{
|
1101 |
-
"target_language_name":"Ukrainian",
|
1102 |
-
"bleu":68.0976232544,
|
1103 |
-
"bert_score":0.9468558848,
|
1104 |
-
"speakers":34710100.0
|
1105 |
-
},
|
1106 |
-
{
|
1107 |
-
"target_language_name":"Umbundu",
|
1108 |
-
"bleu":21.0802775597,
|
1109 |
-
"bert_score":0.8461364289,
|
1110 |
-
"speakers":6000000.0
|
1111 |
-
},
|
1112 |
-
{
|
1113 |
-
"target_language_name":"Urdu",
|
1114 |
-
"bleu":61.1255457272,
|
1115 |
-
"bert_score":0.953888009,
|
1116 |
-
"speakers":94022900.0
|
1117 |
-
},
|
1118 |
-
{
|
1119 |
-
"target_language_name":"Uyghur",
|
1120 |
-
"bleu":53.5346877103,
|
1121 |
-
"bert_score":0.9397906005,
|
1122 |
-
"speakers":10400000.0
|
1123 |
-
},
|
1124 |
-
{
|
1125 |
-
"target_language_name":"Venetian",
|
1126 |
-
"bleu":60.6140876271,
|
1127 |
-
"bert_score":0.9080212533,
|
1128 |
-
"speakers":2000000.0
|
1129 |
-
},
|
1130 |
-
{
|
1131 |
-
"target_language_name":"Vietnamese",
|
1132 |
-
"bleu":70.3560749464,
|
1133 |
-
"bert_score":0.9527418713,
|
1134 |
-
"speakers":76000000.0
|
1135 |
-
},
|
1136 |
-
{
|
1137 |
-
"target_language_name":"Waray-Waray",
|
1138 |
-
"bleu":66.3850231243,
|
1139 |
-
"bert_score":0.920412008,
|
1140 |
-
"speakers":3100000.0
|
1141 |
-
},
|
1142 |
-
{
|
1143 |
-
"target_language_name":"Welsh",
|
1144 |
-
"bleu":83.3437724474,
|
1145 |
-
"bert_score":0.9662299534,
|
1146 |
-
"speakers":977366.0
|
1147 |
-
},
|
1148 |
-
{
|
1149 |
-
"target_language_name":"West Central Oromo",
|
1150 |
-
"bleu":46.9090350028,
|
1151 |
-
"bert_score":0.8845542371,
|
1152 |
-
"speakers":0.0
|
1153 |
-
},
|
1154 |
-
{
|
1155 |
-
"target_language_name":"Wolof",
|
1156 |
-
"bleu":42.6430127569,
|
1157 |
-
"bert_score":0.8762976408,
|
1158 |
-
"speakers":3700000.0
|
1159 |
-
},
|
1160 |
-
{
|
1161 |
-
"target_language_name":"Xhosa",
|
1162 |
-
"bleu":55.4688091009,
|
1163 |
-
"bert_score":0.9008744816,
|
1164 |
-
"speakers":11000000.0
|
1165 |
-
},
|
1166 |
-
{
|
1167 |
-
"target_language_name":"Yerwa Kanuri",
|
1168 |
-
"bleu":18.5081787556,
|
1169 |
-
"bert_score":0.839997381,
|
1170 |
-
"speakers":0.0
|
1171 |
-
},
|
1172 |
-
{
|
1173 |
-
"target_language_name":"Yoruba",
|
1174 |
-
"bleu":34.2642542268,
|
1175 |
-
"bert_score":0.9001545012,
|
1176 |
-
"speakers":40000000.0
|
1177 |
-
},
|
1178 |
-
{
|
1179 |
-
"target_language_name":"Yue Chinese",
|
1180 |
-
"bleu":34.5614651228,
|
1181 |
-
"bert_score":0.9634495397,
|
1182 |
-
"speakers":73100000.0
|
1183 |
-
},
|
1184 |
-
{
|
1185 |
-
"target_language_name":"Zulu",
|
1186 |
-
"bleu":59.1762078389,
|
1187 |
-
"bert_score":0.9099391103,
|
1188 |
-
"speakers":15700000.0
|
1189 |
-
},
|
1190 |
-
{
|
1191 |
-
"target_language_name":"nno",
|
1192 |
-
"bleu":71.8615646296,
|
1193 |
-
"bert_score":0.9335320314,
|
1194 |
-
"speakers":0.0
|
1195 |
-
},
|
1196 |
-
{
|
1197 |
-
"target_language_name":"\u00c9w\u00e9",
|
1198 |
-
"bleu":41.6614038791,
|
1199 |
-
"bert_score":0.8829316159,
|
1200 |
-
"speakers":3000000.0
|
1201 |
-
}
|
1202 |
-
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
uv.lock
CHANGED
@@ -253,6 +253,18 @@ wheels = [
|
|
253 |
{ url = "https://files.pythonhosted.org/packages/bf/9b/08c0432272d77b04803958a4598a51e2a4b51c06640af8b8f0f908c18bf2/charset_normalizer-3.4.0-py3-none-any.whl", hash = "sha256:fe9f97feb71aa9896b81973a7bbada8c49501dc73e58a10fcef6663af95e5079", size = 49446 },
|
254 |
]
|
255 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
256 |
[[package]]
|
257 |
name = "colorama"
|
258 |
version = "0.4.6"
|
@@ -767,11 +779,16 @@ dependencies = [
|
|
767 |
{ name = "bert-score" },
|
768 |
{ name = "evaluate" },
|
769 |
{ name = "joblib" },
|
|
|
770 |
{ name = "openai" },
|
771 |
{ name = "pandas" },
|
|
|
772 |
{ name = "python-dotenv" },
|
773 |
{ name = "sacrebleu" },
|
|
|
|
|
774 |
{ name = "tqdm" },
|
|
|
775 |
]
|
776 |
|
777 |
[package.metadata]
|
@@ -780,11 +797,16 @@ requires-dist = [
|
|
780 |
{ name = "bert-score", specifier = ">=0.3.13" },
|
781 |
{ name = "evaluate", specifier = ">=0.4.3" },
|
782 |
{ name = "joblib", specifier = ">=1.4.2" },
|
|
|
783 |
{ name = "openai", specifier = ">=1.52.2" },
|
784 |
{ name = "pandas", specifier = ">=2.2.3" },
|
|
|
785 |
{ name = "python-dotenv", specifier = ">=1.0.1" },
|
786 |
{ name = "sacrebleu", specifier = ">=2.4.3" },
|
|
|
|
|
787 |
{ name = "tqdm", specifier = ">=4.66.6" },
|
|
|
788 |
]
|
789 |
|
790 |
[[package]]
|
@@ -1083,6 +1105,21 @@ wheels = [
|
|
1083 |
{ url = "https://files.pythonhosted.org/packages/b9/54/dd730b32ea14ea797530a4479b2ed46a6fb250f682a9cfb997e968bf0261/networkx-3.4.2-py3-none-any.whl", hash = "sha256:df5d4365b724cf81b8c6a7312509d0c22386097011ad1abe274afd5e9d3bbc5f", size = 1723263 },
|
1084 |
]
|
1085 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1086 |
[[package]]
|
1087 |
name = "numpy"
|
1088 |
version = "2.1.2"
|
@@ -1491,6 +1528,20 @@ wheels = [
|
|
1491 |
{ url = "https://files.pythonhosted.org/packages/3d/b6/e6d98278f2d49b22b4d033c9f792eda783b9ab2094b041f013fc69bcde87/propcache-0.2.0-py3-none-any.whl", hash = "sha256:2ccc28197af5313706511fab3a8b66dcd6da067a1331372c82ea1cb74285e036", size = 11603 },
|
1492 |
]
|
1493 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1494 |
[[package]]
|
1495 |
name = "pyarrow"
|
1496 |
version = "18.0.0"
|
@@ -1878,6 +1929,38 @@ wheels = [
|
|
1878 |
{ url = "https://files.pythonhosted.org/packages/19/46/5d11dc300feaad285c2f1bd784ff3f689f5e0ab6be49aaf568f3a77019eb/safetensors-0.4.5-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:21742b391b859e67b26c0b2ac37f52c9c0944a879a25ad2f9f9f3cd61e7fda8f", size = 606660 },
|
1879 |
]
|
1880 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1881 |
[[package]]
|
1882 |
name = "setuptools"
|
1883 |
version = "75.3.0"
|
@@ -1926,6 +2009,42 @@ wheels = [
|
|
1926 |
{ url = "https://files.pythonhosted.org/packages/40/44/4a5f08c96eb108af5cb50b41f76142f0afa346dfa99d5296fe7202a11854/tabulate-0.9.0-py3-none-any.whl", hash = "sha256:024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f", size = 35252 },
|
1927 |
]
|
1928 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1929 |
[[package]]
|
1930 |
name = "tokenizers"
|
1931 |
version = "0.20.1"
|
|
|
253 |
{ url = "https://files.pythonhosted.org/packages/bf/9b/08c0432272d77b04803958a4598a51e2a4b51c06640af8b8f0f908c18bf2/charset_normalizer-3.4.0-py3-none-any.whl", hash = "sha256:fe9f97feb71aa9896b81973a7bbada8c49501dc73e58a10fcef6663af95e5079", size = 49446 },
|
254 |
]
|
255 |
|
256 |
+
[[package]]
|
257 |
+
name = "click"
|
258 |
+
version = "8.1.7"
|
259 |
+
source = { registry = "https://pypi.org/simple" }
|
260 |
+
dependencies = [
|
261 |
+
{ name = "colorama", marker = "platform_system == 'Windows'" },
|
262 |
+
]
|
263 |
+
sdist = { url = "https://files.pythonhosted.org/packages/96/d3/f04c7bfcf5c1862a2a5b845c6b2b360488cf47af55dfa79c98f6a6bf98b5/click-8.1.7.tar.gz", hash = "sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de", size = 336121 }
|
264 |
+
wheels = [
|
265 |
+
{ url = "https://files.pythonhosted.org/packages/00/2e/d53fa4befbf2cfa713304affc7ca780ce4fc1fd8710527771b58311a3229/click-8.1.7-py3-none-any.whl", hash = "sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28", size = 97941 },
|
266 |
+
]
|
267 |
+
|
268 |
[[package]]
|
269 |
name = "colorama"
|
270 |
version = "0.4.6"
|
|
|
779 |
{ name = "bert-score" },
|
780 |
{ name = "evaluate" },
|
781 |
{ name = "joblib" },
|
782 |
+
{ name = "nltk" },
|
783 |
{ name = "openai" },
|
784 |
{ name = "pandas" },
|
785 |
+
{ name = "protobuf" },
|
786 |
{ name = "python-dotenv" },
|
787 |
{ name = "sacrebleu" },
|
788 |
+
{ name = "sentencepiece" },
|
789 |
+
{ name = "tiktoken" },
|
790 |
{ name = "tqdm" },
|
791 |
+
{ name = "transformers" },
|
792 |
]
|
793 |
|
794 |
[package.metadata]
|
|
|
797 |
{ name = "bert-score", specifier = ">=0.3.13" },
|
798 |
{ name = "evaluate", specifier = ">=0.4.3" },
|
799 |
{ name = "joblib", specifier = ">=1.4.2" },
|
800 |
+
{ name = "nltk", specifier = ">=3.9.1" },
|
801 |
{ name = "openai", specifier = ">=1.52.2" },
|
802 |
{ name = "pandas", specifier = ">=2.2.3" },
|
803 |
+
{ name = "protobuf", specifier = ">=5.28.3" },
|
804 |
{ name = "python-dotenv", specifier = ">=1.0.1" },
|
805 |
{ name = "sacrebleu", specifier = ">=2.4.3" },
|
806 |
+
{ name = "sentencepiece", specifier = ">=0.2.0" },
|
807 |
+
{ name = "tiktoken", specifier = ">=0.8.0" },
|
808 |
{ name = "tqdm", specifier = ">=4.66.6" },
|
809 |
+
{ name = "transformers", specifier = ">=4.46.1" },
|
810 |
]
|
811 |
|
812 |
[[package]]
|
|
|
1105 |
{ url = "https://files.pythonhosted.org/packages/b9/54/dd730b32ea14ea797530a4479b2ed46a6fb250f682a9cfb997e968bf0261/networkx-3.4.2-py3-none-any.whl", hash = "sha256:df5d4365b724cf81b8c6a7312509d0c22386097011ad1abe274afd5e9d3bbc5f", size = 1723263 },
|
1106 |
]
|
1107 |
|
1108 |
+
[[package]]
|
1109 |
+
name = "nltk"
|
1110 |
+
version = "3.9.1"
|
1111 |
+
source = { registry = "https://pypi.org/simple" }
|
1112 |
+
dependencies = [
|
1113 |
+
{ name = "click" },
|
1114 |
+
{ name = "joblib" },
|
1115 |
+
{ name = "regex" },
|
1116 |
+
{ name = "tqdm" },
|
1117 |
+
]
|
1118 |
+
sdist = { url = "https://files.pythonhosted.org/packages/3c/87/db8be88ad32c2d042420b6fd9ffd4a149f9a0d7f0e86b3f543be2eeeedd2/nltk-3.9.1.tar.gz", hash = "sha256:87d127bd3de4bd89a4f81265e5fa59cb1b199b27440175370f7417d2bc7ae868", size = 2904691 }
|
1119 |
+
wheels = [
|
1120 |
+
{ url = "https://files.pythonhosted.org/packages/4d/66/7d9e26593edda06e8cb531874633f7c2372279c3b0f46235539fe546df8b/nltk-3.9.1-py3-none-any.whl", hash = "sha256:4fa26829c5b00715afe3061398a8989dc643b92ce7dd93fb4585a70930d168a1", size = 1505442 },
|
1121 |
+
]
|
1122 |
+
|
1123 |
[[package]]
|
1124 |
name = "numpy"
|
1125 |
version = "2.1.2"
|
|
|
1528 |
{ url = "https://files.pythonhosted.org/packages/3d/b6/e6d98278f2d49b22b4d033c9f792eda783b9ab2094b041f013fc69bcde87/propcache-0.2.0-py3-none-any.whl", hash = "sha256:2ccc28197af5313706511fab3a8b66dcd6da067a1331372c82ea1cb74285e036", size = 11603 },
|
1529 |
]
|
1530 |
|
1531 |
+
[[package]]
|
1532 |
+
name = "protobuf"
|
1533 |
+
version = "5.28.3"
|
1534 |
+
source = { registry = "https://pypi.org/simple" }
|
1535 |
+
sdist = { url = "https://files.pythonhosted.org/packages/74/6e/e69eb906fddcb38f8530a12f4b410699972ab7ced4e21524ece9d546ac27/protobuf-5.28.3.tar.gz", hash = "sha256:64badbc49180a5e401f373f9ce7ab1d18b63f7dd4a9cdc43c92b9f0b481cef7b", size = 422479 }
|
1536 |
+
wheels = [
|
1537 |
+
{ url = "https://files.pythonhosted.org/packages/d1/c5/05163fad52d7c43e124a545f1372d18266db36036377ad29de4271134a6a/protobuf-5.28.3-cp310-abi3-win32.whl", hash = "sha256:0c4eec6f987338617072592b97943fdbe30d019c56126493111cf24344c1cc24", size = 419624 },
|
1538 |
+
{ url = "https://files.pythonhosted.org/packages/9c/4c/4563ebe001ff30dca9d7ed12e471fa098d9759712980cde1fd03a3a44fb7/protobuf-5.28.3-cp310-abi3-win_amd64.whl", hash = "sha256:91fba8f445723fcf400fdbe9ca796b19d3b1242cd873907979b9ed71e4afe868", size = 431464 },
|
1539 |
+
{ url = "https://files.pythonhosted.org/packages/1c/f2/baf397f3dd1d3e4af7e3f5a0382b868d25ac068eefe1ebde05132333436c/protobuf-5.28.3-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:a3f6857551e53ce35e60b403b8a27b0295f7d6eb63d10484f12bc6879c715687", size = 414743 },
|
1540 |
+
{ url = "https://files.pythonhosted.org/packages/85/50/cd61a358ba1601f40e7d38bcfba22e053f40ef2c50d55b55926aecc8fec7/protobuf-5.28.3-cp38-abi3-manylinux2014_aarch64.whl", hash = "sha256:3fa2de6b8b29d12c61911505d893afe7320ce7ccba4df913e2971461fa36d584", size = 316511 },
|
1541 |
+
{ url = "https://files.pythonhosted.org/packages/5d/ae/3257b09328c0b4e59535e497b0c7537d4954038bdd53a2f0d2f49d15a7c4/protobuf-5.28.3-cp38-abi3-manylinux2014_x86_64.whl", hash = "sha256:712319fbdddb46f21abb66cd33cb9e491a5763b2febd8f228251add221981135", size = 316624 },
|
1542 |
+
{ url = "https://files.pythonhosted.org/packages/ad/c3/2377c159e28ea89a91cf1ca223f827ae8deccb2c9c401e5ca233cd73002f/protobuf-5.28.3-py3-none-any.whl", hash = "sha256:cee1757663fa32a1ee673434fcf3bf24dd54763c79690201208bafec62f19eed", size = 169511 },
|
1543 |
+
]
|
1544 |
+
|
1545 |
[[package]]
|
1546 |
name = "pyarrow"
|
1547 |
version = "18.0.0"
|
|
|
1929 |
{ url = "https://files.pythonhosted.org/packages/19/46/5d11dc300feaad285c2f1bd784ff3f689f5e0ab6be49aaf568f3a77019eb/safetensors-0.4.5-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:21742b391b859e67b26c0b2ac37f52c9c0944a879a25ad2f9f9f3cd61e7fda8f", size = 606660 },
|
1930 |
]
|
1931 |
|
1932 |
+
[[package]]
|
1933 |
+
name = "sentencepiece"
|
1934 |
+
version = "0.2.0"
|
1935 |
+
source = { registry = "https://pypi.org/simple" }
|
1936 |
+
sdist = { url = "https://files.pythonhosted.org/packages/c9/d2/b9c7ca067c26d8ff085d252c89b5f69609ca93fb85a00ede95f4857865d4/sentencepiece-0.2.0.tar.gz", hash = "sha256:a52c19171daaf2e697dc6cbe67684e0fa341b1248966f6aebb541de654d15843", size = 2632106 }
|
1937 |
+
wheels = [
|
1938 |
+
{ url = "https://files.pythonhosted.org/packages/f6/71/98648c3b64b23edb5403f74bcc906ad21766872a6e1ada26ea3f1eb941ab/sentencepiece-0.2.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:188779e1298a1c8b8253c7d3ad729cb0a9891e5cef5e5d07ce4592c54869e227", size = 2408979 },
|
1939 |
+
{ url = "https://files.pythonhosted.org/packages/77/9f/7efbaa6d4c0c718a9affbecc536b03ca62f99f421bdffb531c16030e2d2b/sentencepiece-0.2.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:bed9cf85b296fa2b76fc2547b9cbb691a523864cebaee86304c43a7b4cb1b452", size = 1238845 },
|
1940 |
+
{ url = "https://files.pythonhosted.org/packages/1c/e4/c2541027a43ec6962ba9b601805d17ba3f86b38bdeae0e8ac65a2981e248/sentencepiece-0.2.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d7b67e724bead13f18db6e1d10b6bbdc454af574d70efbb36f27d90387be1ca3", size = 1181472 },
|
1941 |
+
{ url = "https://files.pythonhosted.org/packages/fd/46/316c1ba6c52b97de76aff7b9da678f7afbb52136afb2987c474d95630e65/sentencepiece-0.2.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2fde4b08cfe237be4484c6c7c2e2c75fb862cfeab6bd5449ce4caeafd97b767a", size = 1259151 },
|
1942 |
+
{ url = "https://files.pythonhosted.org/packages/aa/5a/3c48738a0835d76dd06c62b6ac48d39c923cde78dd0f587353bdcbb99851/sentencepiece-0.2.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4c378492056202d1c48a4979650981635fd97875a00eabb1f00c6a236b013b5e", size = 1355931 },
|
1943 |
+
{ url = "https://files.pythonhosted.org/packages/a6/27/33019685023221ca8ed98e8ceb7ae5e166032686fa3662c68f1f1edf334e/sentencepiece-0.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1380ce6540a368de2ef6d7e6ba14ba8f3258df650d39ba7d833b79ee68a52040", size = 1301537 },
|
1944 |
+
{ url = "https://files.pythonhosted.org/packages/ca/e4/55f97cef14293171fef5f96e96999919ab5b4d1ce95b53547ad653d7e3bf/sentencepiece-0.2.0-cp310-cp310-win32.whl", hash = "sha256:a1151d6a6dd4b43e552394aed0edfe9292820272f0194bd56c7c1660a0c06c3d", size = 936747 },
|
1945 |
+
{ url = "https://files.pythonhosted.org/packages/85/f4/4ef1a6e0e9dbd8a60780a91df8b7452ada14cfaa0e17b3b8dfa42cecae18/sentencepiece-0.2.0-cp310-cp310-win_amd64.whl", hash = "sha256:d490142b0521ef22bc1085f061d922a2a6666175bb6b42e588ff95c0db6819b2", size = 991525 },
|
1946 |
+
{ url = "https://files.pythonhosted.org/packages/32/43/8f8885168a47a02eba1455bd3f4f169f50ad5b8cebd2402d0f5e20854d04/sentencepiece-0.2.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:17982700c4f6dbb55fa3594f3d7e5dd1c8659a274af3738e33c987d2a27c9d5c", size = 2409036 },
|
1947 |
+
{ url = "https://files.pythonhosted.org/packages/0f/35/e63ba28062af0a3d688a9f128e407a1a2608544b2f480cb49bf7f4b1cbb9/sentencepiece-0.2.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:7c867012c0e8bcd5bdad0f791609101cb5c66acb303ab3270218d6debc68a65e", size = 1238921 },
|
1948 |
+
{ url = "https://files.pythonhosted.org/packages/de/42/ae30952c4a0bd773e90c9bf2579f5533037c886dfc8ec68133d5694f4dd2/sentencepiece-0.2.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7fd6071249c74f779c5b27183295b9202f8dedb68034e716784364443879eaa6", size = 1181477 },
|
1949 |
+
{ url = "https://files.pythonhosted.org/packages/e3/ac/2f2ab1d60bb2d795d054eebe5e3f24b164bc21b5a9b75fba7968b3b91b5a/sentencepiece-0.2.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:27f90c55a65013cbb8f4d7aab0599bf925cde4adc67ae43a0d323677b5a1c6cb", size = 1259182 },
|
1950 |
+
{ url = "https://files.pythonhosted.org/packages/45/fb/14633c6ecf262c468759ffcdb55c3a7ee38fe4eda6a70d75ee7c7d63c58b/sentencepiece-0.2.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b293734059ef656dcd65be62ff771507bea8fed0a711b6733976e1ed3add4553", size = 1355537 },
|
1951 |
+
{ url = "https://files.pythonhosted.org/packages/fb/12/2f5c8d4764b00033cf1c935b702d3bb878d10be9f0b87f0253495832d85f/sentencepiece-0.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e58b47f933aca74c6a60a79dcb21d5b9e47416256c795c2d58d55cec27f9551d", size = 1301464 },
|
1952 |
+
{ url = "https://files.pythonhosted.org/packages/4e/b1/67afc0bde24f6dcb3acdea0dd8dcdf4b8b0db240f6bacd39378bd32d09f8/sentencepiece-0.2.0-cp311-cp311-win32.whl", hash = "sha256:c581258cf346b327c62c4f1cebd32691826306f6a41d8c4bec43b010dee08e75", size = 936749 },
|
1953 |
+
{ url = "https://files.pythonhosted.org/packages/a2/f6/587c62fd21fc988555b85351f50bbde43a51524caafd63bc69240ded14fd/sentencepiece-0.2.0-cp311-cp311-win_amd64.whl", hash = "sha256:0993dbc665f4113017892f1b87c3904a44d0640eda510abcacdfb07f74286d36", size = 991520 },
|
1954 |
+
{ url = "https://files.pythonhosted.org/packages/27/5a/141b227ed54293360a9ffbb7bf8252b4e5efc0400cdeac5809340e5d2b21/sentencepiece-0.2.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:ea5f536e32ea8ec96086ee00d7a4a131ce583a1b18d130711707c10e69601cb2", size = 2409370 },
|
1955 |
+
{ url = "https://files.pythonhosted.org/packages/2e/08/a4c135ad6fc2ce26798d14ab72790d66e813efc9589fd30a5316a88ca8d5/sentencepiece-0.2.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:d0cb51f53b6aae3c36bafe41e86167c71af8370a039f542c43b0cce5ef24a68c", size = 1239288 },
|
1956 |
+
{ url = "https://files.pythonhosted.org/packages/49/0a/2fe387f825ac5aad5a0bfe221904882106cac58e1b693ba7818785a882b6/sentencepiece-0.2.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3212121805afc58d8b00ab4e7dd1f8f76c203ddb9dc94aa4079618a31cf5da0f", size = 1181597 },
|
1957 |
+
{ url = "https://files.pythonhosted.org/packages/cc/38/e4698ee2293fe4835dc033c49796a39b3eebd8752098f6bd0aa53a14af1f/sentencepiece-0.2.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2a3149e3066c2a75e0d68a43eb632d7ae728c7925b517f4c05c40f6f7280ce08", size = 1259220 },
|
1958 |
+
{ url = "https://files.pythonhosted.org/packages/12/24/fd7ef967c9dad2f6e6e5386d0cadaf65cda8b7be6e3861a9ab3121035139/sentencepiece-0.2.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:632f3594d3e7ac8b367bca204cb3fd05a01d5b21455acd097ea4c0e30e2f63d7", size = 1355962 },
|
1959 |
+
{ url = "https://files.pythonhosted.org/packages/4f/d2/18246f43ca730bb81918f87b7e886531eda32d835811ad9f4657c54eee35/sentencepiece-0.2.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f295105c6bdbb05bd5e1b0cafbd78ff95036f5d3641e7949455a3f4e5e7c3109", size = 1301706 },
|
1960 |
+
{ url = "https://files.pythonhosted.org/packages/8a/47/ca237b562f420044ab56ddb4c278672f7e8c866e183730a20e413b38a989/sentencepiece-0.2.0-cp312-cp312-win32.whl", hash = "sha256:fb89f811e5efd18bab141afc3fea3de141c3f69f3fe9e898f710ae7fe3aab251", size = 936941 },
|
1961 |
+
{ url = "https://files.pythonhosted.org/packages/c6/97/d159c32642306ee2b70732077632895438867b3b6df282354bd550cf2a67/sentencepiece-0.2.0-cp312-cp312-win_amd64.whl", hash = "sha256:7a673a72aab81fef5ebe755c6e0cc60087d1f3a4700835d40537183c1703a45f", size = 991994 },
|
1962 |
+
]
|
1963 |
+
|
1964 |
[[package]]
|
1965 |
name = "setuptools"
|
1966 |
version = "75.3.0"
|
|
|
2009 |
{ url = "https://files.pythonhosted.org/packages/40/44/4a5f08c96eb108af5cb50b41f76142f0afa346dfa99d5296fe7202a11854/tabulate-0.9.0-py3-none-any.whl", hash = "sha256:024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f", size = 35252 },
|
2010 |
]
|
2011 |
|
2012 |
+
[[package]]
|
2013 |
+
name = "tiktoken"
|
2014 |
+
version = "0.8.0"
|
2015 |
+
source = { registry = "https://pypi.org/simple" }
|
2016 |
+
dependencies = [
|
2017 |
+
{ name = "regex" },
|
2018 |
+
{ name = "requests" },
|
2019 |
+
]
|
2020 |
+
sdist = { url = "https://files.pythonhosted.org/packages/37/02/576ff3a6639e755c4f70997b2d315f56d6d71e0d046f4fb64cb81a3fb099/tiktoken-0.8.0.tar.gz", hash = "sha256:9ccbb2740f24542534369c5635cfd9b2b3c2490754a78ac8831d99f89f94eeb2", size = 35107 }
|
2021 |
+
wheels = [
|
2022 |
+
{ url = "https://files.pythonhosted.org/packages/c9/ba/a35fad753bbca8ba0cc1b0f3402a70256a110ced7ac332cf84ba89fc87ab/tiktoken-0.8.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b07e33283463089c81ef1467180e3e00ab00d46c2c4bbcef0acab5f771d6695e", size = 1039905 },
|
2023 |
+
{ url = "https://files.pythonhosted.org/packages/91/05/13dab8fd7460391c387b3e69e14bf1e51ff71fe0a202cd2933cc3ea93fb6/tiktoken-0.8.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9269348cb650726f44dd3bbb3f9110ac19a8dcc8f54949ad3ef652ca22a38e21", size = 982417 },
|
2024 |
+
{ url = "https://files.pythonhosted.org/packages/e9/98/18ec4a8351a6cf4537e40cd6e19a422c10cce1ef00a2fcb716e0a96af58b/tiktoken-0.8.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:25e13f37bc4ef2d012731e93e0fef21dc3b7aea5bb9009618de9a4026844e560", size = 1144915 },
|
2025 |
+
{ url = "https://files.pythonhosted.org/packages/2e/28/cf3633018cbcc6deb7805b700ccd6085c9a5a7f72b38974ee0bffd56d311/tiktoken-0.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f13d13c981511331eac0d01a59b5df7c0d4060a8be1e378672822213da51e0a2", size = 1177221 },
|
2026 |
+
{ url = "https://files.pythonhosted.org/packages/57/81/8a5be305cbd39d4e83a794f9e80c7f2c84b524587b7feb27c797b2046d51/tiktoken-0.8.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:6b2ddbc79a22621ce8b1166afa9f9a888a664a579350dc7c09346a3b5de837d9", size = 1237398 },
|
2027 |
+
{ url = "https://files.pythonhosted.org/packages/dc/da/8d1cc3089a83f5cf11c2e489332752981435280285231924557350523a59/tiktoken-0.8.0-cp310-cp310-win_amd64.whl", hash = "sha256:d8c2d0e5ba6453a290b86cd65fc51fedf247e1ba170191715b049dac1f628005", size = 884215 },
|
2028 |
+
{ url = "https://files.pythonhosted.org/packages/f6/1e/ca48e7bfeeccaf76f3a501bd84db1fa28b3c22c9d1a1f41af9fb7579c5f6/tiktoken-0.8.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d622d8011e6d6f239297efa42a2657043aaed06c4f68833550cac9e9bc723ef1", size = 1039700 },
|
2029 |
+
{ url = "https://files.pythonhosted.org/packages/8c/f8/f0101d98d661b34534769c3818f5af631e59c36ac6d07268fbfc89e539ce/tiktoken-0.8.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2efaf6199717b4485031b4d6edb94075e4d79177a172f38dd934d911b588d54a", size = 982413 },
|
2030 |
+
{ url = "https://files.pythonhosted.org/packages/ac/3c/2b95391d9bd520a73830469f80a96e3790e6c0a5ac2444f80f20b4b31051/tiktoken-0.8.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5637e425ce1fc49cf716d88df3092048359a4b3bbb7da762840426e937ada06d", size = 1144242 },
|
2031 |
+
{ url = "https://files.pythonhosted.org/packages/01/c4/c4a4360de845217b6aa9709c15773484b50479f36bb50419c443204e5de9/tiktoken-0.8.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9fb0e352d1dbe15aba082883058b3cce9e48d33101bdaac1eccf66424feb5b47", size = 1176588 },
|
2032 |
+
{ url = "https://files.pythonhosted.org/packages/f8/a3/ef984e976822cd6c2227c854f74d2e60cf4cd6fbfca46251199914746f78/tiktoken-0.8.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:56edfefe896c8f10aba372ab5706b9e3558e78db39dd497c940b47bf228bc419", size = 1237261 },
|
2033 |
+
{ url = "https://files.pythonhosted.org/packages/1e/86/eea2309dc258fb86c7d9b10db536434fc16420feaa3b6113df18b23db7c2/tiktoken-0.8.0-cp311-cp311-win_amd64.whl", hash = "sha256:326624128590def898775b722ccc327e90b073714227175ea8febbc920ac0a99", size = 884537 },
|
2034 |
+
{ url = "https://files.pythonhosted.org/packages/c1/22/34b2e136a6f4af186b6640cbfd6f93400783c9ef6cd550d9eab80628d9de/tiktoken-0.8.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:881839cfeae051b3628d9823b2e56b5cc93a9e2efb435f4cf15f17dc45f21586", size = 1039357 },
|
2035 |
+
{ url = "https://files.pythonhosted.org/packages/04/d2/c793cf49c20f5855fd6ce05d080c0537d7418f22c58e71f392d5e8c8dbf7/tiktoken-0.8.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:fe9399bdc3f29d428f16a2f86c3c8ec20be3eac5f53693ce4980371c3245729b", size = 982616 },
|
2036 |
+
{ url = "https://files.pythonhosted.org/packages/b3/a1/79846e5ef911cd5d75c844de3fa496a10c91b4b5f550aad695c5df153d72/tiktoken-0.8.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9a58deb7075d5b69237a3ff4bb51a726670419db6ea62bdcd8bd80c78497d7ab", size = 1144011 },
|
2037 |
+
{ url = "https://files.pythonhosted.org/packages/26/32/e0e3a859136e95c85a572e4806dc58bf1ddf651108ae8b97d5f3ebe1a244/tiktoken-0.8.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2908c0d043a7d03ebd80347266b0e58440bdef5564f84f4d29fb235b5df3b04", size = 1175432 },
|
2038 |
+
{ url = "https://files.pythonhosted.org/packages/c7/89/926b66e9025b97e9fbabeaa59048a736fe3c3e4530a204109571104f921c/tiktoken-0.8.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:294440d21a2a51e12d4238e68a5972095534fe9878be57d905c476017bff99fc", size = 1236576 },
|
2039 |
+
{ url = "https://files.pythonhosted.org/packages/45/e2/39d4aa02a52bba73b2cd21ba4533c84425ff8786cc63c511d68c8897376e/tiktoken-0.8.0-cp312-cp312-win_amd64.whl", hash = "sha256:d8f3192733ac4d77977432947d563d7e1b310b96497acd3c196c9bddb36ed9db", size = 883824 },
|
2040 |
+
{ url = "https://files.pythonhosted.org/packages/e3/38/802e79ba0ee5fcbf240cd624143f57744e5d411d2e9d9ad2db70d8395986/tiktoken-0.8.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:02be1666096aff7da6cbd7cdaa8e7917bfed3467cd64b38b1f112e96d3b06a24", size = 1039648 },
|
2041 |
+
{ url = "https://files.pythonhosted.org/packages/b1/da/24cdbfc302c98663fbea66f5866f7fa1048405c7564ab88483aea97c3b1a/tiktoken-0.8.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:c94ff53c5c74b535b2cbf431d907fc13c678bbd009ee633a2aca269a04389f9a", size = 982763 },
|
2042 |
+
{ url = "https://files.pythonhosted.org/packages/e4/f0/0ecf79a279dfa41fc97d00adccf976ecc2556d3c08ef3e25e45eb31f665b/tiktoken-0.8.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6b231f5e8982c245ee3065cd84a4712d64692348bc609d84467c57b4b72dcbc5", size = 1144417 },
|
2043 |
+
{ url = "https://files.pythonhosted.org/packages/ab/d3/155d2d4514f3471a25dc1d6d20549ef254e2aa9bb5b1060809b1d3b03d3a/tiktoken-0.8.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4177faa809bd55f699e88c96d9bb4635d22e3f59d635ba6fd9ffedf7150b9953", size = 1175108 },
|
2044 |
+
{ url = "https://files.pythonhosted.org/packages/19/eb/5989e16821ee8300ef8ee13c16effc20dfc26c777d05fbb6825e3c037b81/tiktoken-0.8.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:5376b6f8dc4753cd81ead935c5f518fa0fbe7e133d9e25f648d8c4dabdd4bad7", size = 1236520 },
|
2045 |
+
{ url = "https://files.pythonhosted.org/packages/40/59/14b20465f1d1cb89cfbc96ec27e5617b2d41c79da12b5e04e96d689be2a7/tiktoken-0.8.0-cp313-cp313-win_amd64.whl", hash = "sha256:18228d624807d66c87acd8f25fc135665617cab220671eb65b50f5d70fa51f69", size = 883849 },
|
2046 |
+
]
|
2047 |
+
|
2048 |
[[package]]
|
2049 |
name = "tokenizers"
|
2050 |
version = "0.20.1"
|