Merge branch 'feature-observable'
Browse files- .gitignore +6 -0
- README.md +1 -1
- index.html +0 -254
- observablehq.config.js +34 -0
- package-lock.json +0 -0
- package.json +22 -0
- results.json +0 -0
- src/.gitignore +1 -0
- src/compare-ai-models.md +65 -0
- src/compare-languages.md +20 -0
- src/components/language-chart.js +68 -0
- data.txt β src/data/data.txt +1 -1
- languagebench.py β src/data/languagebench.json.py +17 -16
- languages.rq β src/data/languages.rq +0 -0
- languages.tsv β src/data/languages.tsv +0 -0
- src/index.md +53 -0
- src/methodology.md +12 -0
.gitignore
CHANGED
@@ -4,6 +4,12 @@ ScriptCodes.csv
|
|
4 |
.cache
|
5 |
.env
|
6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
# Python-generated files
|
8 |
__pycache__/
|
9 |
*.py[oc]
|
|
|
4 |
.cache
|
5 |
.env
|
6 |
|
7 |
+
# Observable
|
8 |
+
.DS_Store
|
9 |
+
/dist/
|
10 |
+
node_modules/
|
11 |
+
yarn-error.log
|
12 |
+
|
13 |
# Python-generated files
|
14 |
__pycache__/
|
15 |
*.py[oc]
|
README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
---
|
2 |
title: AI Language Monitor
|
3 |
-
emoji:
|
4 |
colorFrom: purple
|
5 |
colorTo: pink
|
6 |
sdk: static
|
|
|
1 |
---
|
2 |
title: AI Language Monitor
|
3 |
+
emoji: π
|
4 |
colorFrom: purple
|
5 |
colorTo: pink
|
6 |
sdk: static
|
index.html
DELETED
@@ -1,254 +0,0 @@
|
|
1 |
-
<!DOCTYPE html>
|
2 |
-
<html>
|
3 |
-
|
4 |
-
<head>
|
5 |
-
<title>AI Language Monitor</title>
|
6 |
-
<script src="https://cdn.tailwindcss.com"></script>
|
7 |
-
<style>
|
8 |
-
body {
|
9 |
-
margin: 0 auto;
|
10 |
-
padding: 20px;
|
11 |
-
font-family: sans-serif;
|
12 |
-
}
|
13 |
-
|
14 |
-
.language-header {
|
15 |
-
margin-bottom: 10px;
|
16 |
-
}
|
17 |
-
|
18 |
-
.speaker-count {
|
19 |
-
font-size: 0.8em;
|
20 |
-
color: #666;
|
21 |
-
font-weight: normal;
|
22 |
-
margin: 0;
|
23 |
-
}
|
24 |
-
</style>
|
25 |
-
<link rel="icon"
|
26 |
-
href="data:image/svg+xml,<svg xmlns=%22http://www.w3.org/2000/svg%22 viewBox=%220 0 100 100%22><text y=%22.9em%22 font-size=%2290%22 fill=%22black%22>π</text></svg>">
|
27 |
-
</head>
|
28 |
-
|
29 |
-
<body>
|
30 |
-
<nav>
|
31 |
-
<div class="max-w-7xl mx-auto px-4 sm:px-6 lg:px-8">
|
32 |
-
<!-- Mobile menu button -->
|
33 |
-
<div class="sm:hidden absolute left-4 top-4">
|
34 |
-
<button onclick="toggleMobileMenu()" class="text-gray-500 hover:text-gray-700 focus:outline-none">
|
35 |
-
<svg class="h-6 w-6" fill="none" viewBox="0 0 24 24" stroke="currentColor">
|
36 |
-
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M4 6h16M4 12h16M4 18h16" />
|
37 |
-
</svg>
|
38 |
-
</button>
|
39 |
-
</div>
|
40 |
-
|
41 |
-
<!-- Mobile menu (hidden by default) -->
|
42 |
-
<div id="mobileMenu" class="hidden sm:hidden absolute left-0 top-16 bg-white shadow-lg py-4 mx-4 rounded-lg border border-gray-200">
|
43 |
-
<div class="flex flex-col">
|
44 |
-
<h3 class="px-6 py-2 text-gray-400 text-sm font-medium">Navigation</h3>
|
45 |
-
<a href="#" onclick="showSection('coverage'); toggleMobileMenu()" class="nav-link px-6 py-3 text-gray-600 hover:bg-gray-50">
|
46 |
-
Language Coverage
|
47 |
-
</a>
|
48 |
-
<a href="#" onclick="showSection('comparison'); toggleMobileMenu()" class="nav-link px-6 py-3 text-gray-600 hover:bg-gray-50">
|
49 |
-
LLM Comparison
|
50 |
-
</a>
|
51 |
-
<a href="#" onclick="showSection('results'); toggleMobileMenu()" class="nav-link px-6 py-3 text-gray-600 hover:bg-gray-50">
|
52 |
-
Results by Language
|
53 |
-
</a>
|
54 |
-
</div>
|
55 |
-
</div>
|
56 |
-
|
57 |
-
<!-- Desktop menu -->
|
58 |
-
<div class="hidden sm:flex justify-center h-16 border-b border-gray-200">
|
59 |
-
<div class="flex">
|
60 |
-
<div class="flex space-x-8">
|
61 |
-
<a href="#" onclick="showSection('coverage')" class="nav-link active inline-flex items-center px-1 pt-1 border-b-2 border-indigo-500 text-sm font-medium text-gray-900">
|
62 |
-
Language Coverage
|
63 |
-
</a>
|
64 |
-
<a href="#" onclick="showSection('comparison')" class="nav-link inline-flex items-center px-1 pt-1 border-b-2 border-transparent text-sm font-medium text-gray-500 hover:border-gray-300 hover:text-gray-700">
|
65 |
-
LLM Comparison
|
66 |
-
</a>
|
67 |
-
<a href="#" onclick="showSection('results')" class="nav-link inline-flex items-center px-1 pt-1 border-b-2 border-transparent text-sm font-medium text-gray-500 hover:border-gray-300 hover:text-gray-700">
|
68 |
-
Results by Language
|
69 |
-
</a>
|
70 |
-
</div>
|
71 |
-
</div>
|
72 |
-
</div>
|
73 |
-
</div>
|
74 |
-
</nav>
|
75 |
-
|
76 |
-
<div class="p-6">
|
77 |
-
<section id="coverage" class="section">
|
78 |
-
<div id="summary-chart"></div>
|
79 |
-
</section>
|
80 |
-
|
81 |
-
<section id="comparison" class="section hidden">
|
82 |
-
<p class="text-gray-600">Coming soon...</p>
|
83 |
-
<!--
|
84 |
-
- Leaderboard
|
85 |
-
- Filters
|
86 |
-
- commercial vs open source
|
87 |
-
- Eval results per task (across all languages)
|
88 |
-
- Timeline
|
89 |
-
-->
|
90 |
-
</section>
|
91 |
-
|
92 |
-
<section id="results" class="section hidden">
|
93 |
-
<div id="language-list"></div>
|
94 |
-
<!--
|
95 |
-
- Filters
|
96 |
-
- free-text search
|
97 |
-
- by continent, by language family
|
98 |
-
- sort by: population ><, performance ><, datasets ><
|
99 |
-
- Language list with details
|
100 |
-
- Eval results for each task and model
|
101 |
-
- Available datasets
|
102 |
-
- Form field to submit more datasets and custom models
|
103 |
-
-->
|
104 |
-
</section>
|
105 |
-
</div>
|
106 |
-
|
107 |
-
<script type="module">
|
108 |
-
// Import Plot using ESM
|
109 |
-
import * as Plot from "https://cdn.jsdelivr.net/npm/@observablehq/[email protected]/+esm";
|
110 |
-
|
111 |
-
function showSection(sectionId) {
|
112 |
-
// Update nav links
|
113 |
-
document.querySelectorAll('.nav-link').forEach(link => {
|
114 |
-
link.classList.remove('border-indigo-500', 'text-gray-900');
|
115 |
-
link.classList.add('border-transparent', 'text-gray-500');
|
116 |
-
});
|
117 |
-
const activeLink = document.querySelector(`[onclick="showSection('${sectionId}')"]`);
|
118 |
-
activeLink.classList.remove('border-transparent', 'text-gray-500');
|
119 |
-
activeLink.classList.add('border-indigo-500', 'text-gray-900');
|
120 |
-
|
121 |
-
// Show/hide sections
|
122 |
-
document.querySelectorAll('.section').forEach(section => {
|
123 |
-
section.classList.add('hidden');
|
124 |
-
});
|
125 |
-
document.getElementById(sectionId).classList.remove('hidden');
|
126 |
-
}
|
127 |
-
window.showSection = showSection;
|
128 |
-
|
129 |
-
function toggleMobileMenu() {
|
130 |
-
const mobileMenu = document.getElementById('mobileMenu');
|
131 |
-
mobileMenu.classList.toggle('hidden');
|
132 |
-
}
|
133 |
-
window.toggleMobileMenu = toggleMobileMenu;
|
134 |
-
|
135 |
-
async function init() {
|
136 |
-
const scoreKey = "bleu"
|
137 |
-
const scoreName = "BLEU Score"
|
138 |
-
const summaryChartDiv = document.getElementById('summary-chart');
|
139 |
-
const languageListDiv = document.getElementById('language-list');
|
140 |
-
|
141 |
-
const response = await fetch('results.json');
|
142 |
-
const data = await response.json();
|
143 |
-
// Format captions
|
144 |
-
const formatScore = (score) => score > 0 ? score.toFixed(2) : "No benchmark available!"
|
145 |
-
const formatTitle = d => (d.language_name + "\n" + parseInt(d.speakers / 1_000_00) / 10 + "M speakers\n" + scoreName + ": " + formatScore(d[scoreKey]))
|
146 |
-
|
147 |
-
// Create summary plot
|
148 |
-
const summaryPlot = Plot.plot({
|
149 |
-
width: summaryChartDiv.clientWidth,
|
150 |
-
height: 400,
|
151 |
-
marginBottom: 100,
|
152 |
-
x: { label: "Number of speakers", axis: null },
|
153 |
-
y: { label: `${scoreName} (average across models)` },
|
154 |
-
// color: { scheme: "BrBG" },
|
155 |
-
marks: [
|
156 |
-
Plot.rectY(data, Plot.stackX({
|
157 |
-
x: "speakers",
|
158 |
-
order: scoreKey,
|
159 |
-
reverse: true,
|
160 |
-
y2: scoreKey, // y2 to avoid stacking by y
|
161 |
-
title: formatTitle,
|
162 |
-
tip: true,
|
163 |
-
fill: d => d[scoreKey] > 0 ? "black" : "pink"
|
164 |
-
})),
|
165 |
-
Plot.rectY(data, Plot.pointerX(Plot.stackX({
|
166 |
-
x: "speakers",
|
167 |
-
order: scoreKey,
|
168 |
-
reverse: true,
|
169 |
-
y2: scoreKey, // y2 to avoid stacking by y
|
170 |
-
fill: "grey",
|
171 |
-
}))),
|
172 |
-
Plot.text(data, Plot.stackX({
|
173 |
-
x: "speakers",
|
174 |
-
y2: scoreKey,
|
175 |
-
order: scoreKey,
|
176 |
-
reverse: true,
|
177 |
-
text: "language_name",
|
178 |
-
frameAnchor: "bottom",
|
179 |
-
textAnchor: "end",
|
180 |
-
dy: 10,
|
181 |
-
rotate: 270,
|
182 |
-
opacity: (d) => d.speakers > 50_000_000 ? 1 : 0,
|
183 |
-
}))
|
184 |
-
]
|
185 |
-
});
|
186 |
-
|
187 |
-
// Add summary plot to the coverage section
|
188 |
-
summaryChartDiv.appendChild(summaryPlot);
|
189 |
-
|
190 |
-
// Get unique languages with their speaker counts
|
191 |
-
const languageMap = new Map();
|
192 |
-
data.forEach(r => {
|
193 |
-
if (!languageMap.has(r.language_name)) {
|
194 |
-
languageMap.set(r.language_name, r.speakers);
|
195 |
-
}
|
196 |
-
});
|
197 |
-
|
198 |
-
// Sort languages by speaker count (descending)
|
199 |
-
const languages = [...languageMap.entries()]
|
200 |
-
.sort((a, b) => b[1] - a[1])
|
201 |
-
.map(([lang]) => lang);
|
202 |
-
|
203 |
-
// Section for each language
|
204 |
-
languages.forEach(language => {
|
205 |
-
const headerDiv = document.createElement('div');
|
206 |
-
headerDiv.className = 'language-header';
|
207 |
-
|
208 |
-
const h2 = document.createElement('h2');
|
209 |
-
h2.textContent = language;
|
210 |
-
h2.style.marginBottom = '5px';
|
211 |
-
|
212 |
-
const speakerP = document.createElement('p');
|
213 |
-
speakerP.className = 'speaker-count';
|
214 |
-
const speakerCount = (languageMap.get(language) / 1_000_000).toFixed(1);
|
215 |
-
speakerP.textContent = `${speakerCount}M speakers`;
|
216 |
-
|
217 |
-
headerDiv.appendChild(h2);
|
218 |
-
headerDiv.appendChild(speakerP);
|
219 |
-
languageListDiv.appendChild(headerDiv);
|
220 |
-
|
221 |
-
const languageData = data.filter(r => r.language_name === language)[0]["scores"];
|
222 |
-
|
223 |
-
const descriptor = code => {
|
224 |
-
let [org, model] = code.split("/")
|
225 |
-
return model.split("-")[0]
|
226 |
-
}
|
227 |
-
|
228 |
-
// Plot for how well the models perform on this language
|
229 |
-
if (languageData && languageData.length > 1) {
|
230 |
-
const plot = Plot.plot({
|
231 |
-
width: 400,
|
232 |
-
height: 200,
|
233 |
-
margin: 30,
|
234 |
-
y: {
|
235 |
-
domain: [0, 1],
|
236 |
-
label: scoreName
|
237 |
-
},
|
238 |
-
marks: [
|
239 |
-
Plot.barY(languageData, {
|
240 |
-
x: d => descriptor(d.model),
|
241 |
-
y: scoreKey
|
242 |
-
})
|
243 |
-
]
|
244 |
-
});
|
245 |
-
languageListDiv.appendChild(plot);
|
246 |
-
}
|
247 |
-
});
|
248 |
-
}
|
249 |
-
|
250 |
-
init();
|
251 |
-
</script>
|
252 |
-
</body>
|
253 |
-
|
254 |
-
</html>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
observablehq.config.js
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
// See https://observablehq.com/framework/config for documentation.
|
2 |
+
export default {
|
3 |
+
// The appβs title; used in the sidebar and webpage titles.
|
4 |
+
title: "AI Language Monitor",
|
5 |
+
|
6 |
+
// The pages and sections in the sidebar. If you donβt specify this option,
|
7 |
+
// all pages will be listed in alphabetical order. Listing pages explicitly
|
8 |
+
// lets you organize them into sections and have unlisted pages.
|
9 |
+
pages: [
|
10 |
+
{ name: "Compare Languages", path: "/compare-languages" },
|
11 |
+
{ name: "Compare AI Models", path: "/compare-ai-models" },
|
12 |
+
{ name: "Methodology", path: "/methodology" },
|
13 |
+
],
|
14 |
+
|
15 |
+
// Content to add to the head of the page, e.g. for a favicon:
|
16 |
+
head: '<link rel="icon" href="data:image/svg+xml,<svg xmlns=%22http://www.w3.org/2000/svg%22 viewBox=%220 0 100 100%22><text y=%22.9em%22 font-size=%2290%22 fill=%22black%22>π</text></svg>">',
|
17 |
+
|
18 |
+
// The path to the source root.
|
19 |
+
root: "src",
|
20 |
+
|
21 |
+
// Some additional configuration options and their defaults:
|
22 |
+
// theme: "default", // try "light", "dark", "slate", etc.
|
23 |
+
// header: "", // what to show in the header (HTML)
|
24 |
+
// footer: "Built with Observable.", // what to show in the footer (HTML)
|
25 |
+
// sidebar: true, // whether to show the sidebar
|
26 |
+
// toc: true, // whether to show the table of contents
|
27 |
+
// pager: true, // whether to show previous & next links in the footer
|
28 |
+
// output: "dist", // path to the output root for build
|
29 |
+
// search: true, // activate search
|
30 |
+
// linkify: true, // convert URLs in Markdown to links
|
31 |
+
// typographer: false, // smart quotes and other typographic improvements
|
32 |
+
// preserveExtension: false, // drop .html from URLs
|
33 |
+
// preserveIndex: false, // drop /index from URLs
|
34 |
+
};
|
package-lock.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
package.json
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"type": "module",
|
3 |
+
"private": true,
|
4 |
+
"scripts": {
|
5 |
+
"clean": "rimraf src/.observablehq/cache",
|
6 |
+
"build": "observable build",
|
7 |
+
"dev": "observable preview",
|
8 |
+
"deploy": "observable deploy",
|
9 |
+
"observable": "observable"
|
10 |
+
},
|
11 |
+
"dependencies": {
|
12 |
+
"@observablehq/framework": "^1.13.2",
|
13 |
+
"d3-dsv": "^3.0.1",
|
14 |
+
"d3-time-format": "^4.1.0"
|
15 |
+
},
|
16 |
+
"devDependencies": {
|
17 |
+
"rimraf": "^5.0.5"
|
18 |
+
},
|
19 |
+
"engines": {
|
20 |
+
"node": ">=18"
|
21 |
+
}
|
22 |
+
}
|
results.json
DELETED
The diff for this file is too large to render.
See raw diff
|
|
src/.gitignore
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
/.observablehq/cache/
|
src/compare-ai-models.md
ADDED
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
theme: dashboard
|
3 |
+
title: Compare AI models
|
4 |
+
---
|
5 |
+
|
6 |
+
# Compare AI models
|
7 |
+
|
8 |
+
```js
|
9 |
+
const data = FileAttachment("data/languagebench.json").json();
|
10 |
+
```
|
11 |
+
|
12 |
+
```js
|
13 |
+
const scoreKey = "bleu"
|
14 |
+
const scoreName = "BLEU Score"
|
15 |
+
|
16 |
+
// Get unique languages with their speaker counts
|
17 |
+
const languageMap = new Map();
|
18 |
+
data.forEach(r => {
|
19 |
+
if (!languageMap.has(r.language_name)) {
|
20 |
+
languageMap.set(r.language_name, r.speakers);
|
21 |
+
}
|
22 |
+
});
|
23 |
+
|
24 |
+
// Sort languages by speaker count (descending)
|
25 |
+
const languages = [...languageMap.entries()]
|
26 |
+
.sort((a, b) => b[1] - a[1])
|
27 |
+
.map(([lang]) => lang);
|
28 |
+
|
29 |
+
// Section for each language
|
30 |
+
languages.forEach(language => {
|
31 |
+
display(html`<h2 class="language-header">${language}</h2>`)
|
32 |
+
|
33 |
+
const speakerCount = (languageMap.get(language) / 1_000_000).toFixed(1);
|
34 |
+
display(html`${speakerCount}M speakers`);
|
35 |
+
|
36 |
+
const languageData = data.filter(r => r.language_name === language)[0]["scores"];
|
37 |
+
console.log(languageData)
|
38 |
+
|
39 |
+
const descriptor = code => {
|
40 |
+
let [org, model] = code.split("/")
|
41 |
+
return model.split("-")[0]
|
42 |
+
}
|
43 |
+
|
44 |
+
// Plot for how well the models perform on this language
|
45 |
+
if (languageData && languageData.length >= 1) {
|
46 |
+
console.log("yes")
|
47 |
+
const chart = Plot.plot({
|
48 |
+
width: 400,
|
49 |
+
height: 200,
|
50 |
+
margin: 30,
|
51 |
+
y: {
|
52 |
+
domain: [0, 1],
|
53 |
+
label: scoreName
|
54 |
+
},
|
55 |
+
marks: [
|
56 |
+
Plot.barY(languageData, {
|
57 |
+
x: d => descriptor(d.model),
|
58 |
+
y: scoreKey
|
59 |
+
})
|
60 |
+
]
|
61 |
+
});
|
62 |
+
display(chart)
|
63 |
+
}
|
64 |
+
});
|
65 |
+
```
|
src/compare-languages.md
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
theme: dashboard
|
3 |
+
title: Compare languages
|
4 |
+
---
|
5 |
+
|
6 |
+
# Compare languages
|
7 |
+
|
8 |
+
```js
|
9 |
+
import { languageChart } from "./components/language-chart.js";
|
10 |
+
|
11 |
+
const data = FileAttachment("data/languagebench.json").json();
|
12 |
+
```
|
13 |
+
|
14 |
+
```js
|
15 |
+
const scoreKey = "bleu"
|
16 |
+
const scoreName = "BLEU Score"
|
17 |
+
|
18 |
+
// Create summary plot
|
19 |
+
display(languageChart(data, {width: 1000, height: 400, scoreKey: scoreKey, scoreName: scoreName}))
|
20 |
+
```
|
src/components/language-chart.js
ADDED
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import * as Plot from "npm:@observablehq/plot";
|
2 |
+
|
3 |
+
export function languageChart(
|
4 |
+
languageData,
|
5 |
+
{ width, height, scoreKey, scoreName } = {}
|
6 |
+
) {
|
7 |
+
// Format captions
|
8 |
+
const formatScore = (score) =>
|
9 |
+
score > 0 ? score.toFixed(2) : "No benchmark available!";
|
10 |
+
const formatTitle = (d) =>
|
11 |
+
d.language_name +
|
12 |
+
"\n" +
|
13 |
+
parseInt(d.speakers / 1_000_00) / 10 +
|
14 |
+
"M speakers\n" +
|
15 |
+
scoreName +
|
16 |
+
": " +
|
17 |
+
formatScore(d[scoreKey]);
|
18 |
+
|
19 |
+
return Plot.plot({
|
20 |
+
width: width,
|
21 |
+
height: height,
|
22 |
+
marginBottom: 100,
|
23 |
+
x: { label: "Number of speakers", axis: null },
|
24 |
+
y: { label: `${scoreName} (average across models)` },
|
25 |
+
// color: { scheme: "BrBG" },
|
26 |
+
marks: [
|
27 |
+
Plot.rectY(
|
28 |
+
languageData,
|
29 |
+
Plot.stackX({
|
30 |
+
x: "speakers",
|
31 |
+
order: scoreKey,
|
32 |
+
reverse: true,
|
33 |
+
y2: scoreKey, // y2 to avoid stacking by y
|
34 |
+
title: formatTitle,
|
35 |
+
tip: true,
|
36 |
+
fill: (d) => (d[scoreKey] > 0 ? "black" : "pink"),
|
37 |
+
})
|
38 |
+
),
|
39 |
+
Plot.rectY(
|
40 |
+
languageData,
|
41 |
+
Plot.pointerX(
|
42 |
+
Plot.stackX({
|
43 |
+
x: "speakers",
|
44 |
+
order: scoreKey,
|
45 |
+
reverse: true,
|
46 |
+
y2: scoreKey, // y2 to avoid stacking by y
|
47 |
+
fill: "grey",
|
48 |
+
})
|
49 |
+
)
|
50 |
+
),
|
51 |
+
Plot.text(
|
52 |
+
languageData,
|
53 |
+
Plot.stackX({
|
54 |
+
x: "speakers",
|
55 |
+
y2: scoreKey,
|
56 |
+
order: scoreKey,
|
57 |
+
reverse: true,
|
58 |
+
text: "language_name",
|
59 |
+
frameAnchor: "bottom",
|
60 |
+
textAnchor: "end",
|
61 |
+
dy: 10,
|
62 |
+
rotate: 270,
|
63 |
+
opacity: (d) => (d.speakers > 50_000_000 ? 1 : 0),
|
64 |
+
})
|
65 |
+
),
|
66 |
+
],
|
67 |
+
});
|
68 |
+
}
|
data.txt β src/data/data.txt
RENAMED
@@ -1,4 +1,4 @@
|
|
1 |
floresp-v2.0-rc.3: https://github.com/openlanguagedata/flores
|
2 |
languages.csv: generated from https://query.wikidata.org/ using the languages.rq query
|
3 |
LanguageCodes.tab: https://www.ethnologue.com/
|
4 |
-
ScriptCodes.csv: https://www.unicode.org/iso15924/iso15924-codes.html
|
|
|
1 |
floresp-v2.0-rc.3: https://github.com/openlanguagedata/flores
|
2 |
languages.csv: generated from https://query.wikidata.org/ using the languages.rq query
|
3 |
LanguageCodes.tab: https://www.ethnologue.com/
|
4 |
+
ScriptCodes.csv: https://www.unicode.org/iso15924/iso15924-codes.html
|
languagebench.py β src/data/languagebench.json.py
RENAMED
@@ -1,7 +1,9 @@
|
|
1 |
import asyncio
|
2 |
import json
|
3 |
import os
|
|
|
4 |
from os import getenv
|
|
|
5 |
|
6 |
import evaluate
|
7 |
import pandas as pd
|
@@ -15,14 +17,14 @@ from transformers import NllbTokenizer
|
|
15 |
|
16 |
# config
|
17 |
models = [
|
18 |
-
"openai/gpt-4o",
|
19 |
-
"anthropic/claude-3.5-
|
20 |
-
"meta-llama/llama-3.1-405b-instruct", # lots of slow repetitions for LRLs
|
21 |
-
"mistralai/mistral-large",
|
22 |
# "google/gemini-flash-1.5", # very fast
|
23 |
-
"qwen/qwen-2.5-72b-instruct", # somewhat slow
|
24 |
]
|
25 |
-
fast_model = "anthropic/claude-3.5-
|
26 |
n_sentences = 30
|
27 |
|
28 |
# setup
|
@@ -43,9 +45,9 @@ def reorder(language_name):
|
|
43 |
return language_name.split(",")[1] + " " + language_name.split(",")[0]
|
44 |
return language_name
|
45 |
|
46 |
-
|
47 |
# load benchmark languages and scripts
|
48 |
-
|
|
|
49 |
benchmark_languages = pd.DataFrame(
|
50 |
[f.split(".")[1].split("_", 1) for f in os.listdir(benchmark_dir)],
|
51 |
columns=["language_code", "script_code"],
|
@@ -56,7 +58,7 @@ benchmark_languages["in_benchmark"] = True
|
|
56 |
|
57 |
# load Ethnologue language names
|
58 |
language_names = (
|
59 |
-
pd.read_csv("LanguageCodes.tab", sep="\t")
|
60 |
.rename(columns={"LangID": "language_code", "Name": "language_name"})[
|
61 |
["language_code", "language_name"]
|
62 |
]
|
@@ -65,7 +67,7 @@ language_names = (
|
|
65 |
|
66 |
# load Wikidata speaker stats
|
67 |
language_stats = (
|
68 |
-
pd.read_csv("languages.tsv", sep="\t")
|
69 |
.rename(columns={"iso639_3": "language_code", "maxSpeakers": "speakers"})[
|
70 |
["language_code", "speakers"]
|
71 |
]
|
@@ -84,7 +86,7 @@ language_stats = language_stats[
|
|
84 |
]
|
85 |
|
86 |
# load unicode script names
|
87 |
-
script_names = pd.read_csv("ScriptCodes.csv").rename(
|
88 |
columns={"Code": "script_code", "English Name": "script_name"}
|
89 |
)[["script_code", "script_name"]]
|
90 |
|
@@ -160,13 +162,13 @@ def load_sentences(language):
|
|
160 |
# evaluation!
|
161 |
async def main():
|
162 |
results = []
|
163 |
-
for language in languages.itertuples():
|
164 |
name = (
|
165 |
language.language_name
|
166 |
if not pd.isna(language.language_name)
|
167 |
else language.language_code
|
168 |
)
|
169 |
-
print(name)
|
170 |
scores = []
|
171 |
if language.in_benchmark:
|
172 |
target_sentences = load_sentences(language)[:n_sentences]
|
@@ -185,7 +187,7 @@ async def main():
|
|
185 |
load_sentences(lang)[i]
|
186 |
for i, lang in enumerate(_original_languages.itertuples())
|
187 |
]
|
188 |
-
print(model)
|
189 |
predictions = [
|
190 |
translate(
|
191 |
model, language.language_name, language.script_name, sentence
|
@@ -220,8 +222,7 @@ async def main():
|
|
220 |
# "bert_score": mean([s["bert_score"] for s in scores]),
|
221 |
}
|
222 |
)
|
223 |
-
|
224 |
-
json.dump(results, f, indent=2, ensure_ascii=False)
|
225 |
|
226 |
|
227 |
if __name__ == "__main__":
|
|
|
1 |
import asyncio
|
2 |
import json
|
3 |
import os
|
4 |
+
import sys
|
5 |
from os import getenv
|
6 |
+
from pathlib import Path
|
7 |
|
8 |
import evaluate
|
9 |
import pandas as pd
|
|
|
17 |
|
18 |
# config
|
19 |
models = [
|
20 |
+
"openai/gpt-4o-mini",
|
21 |
+
"anthropic/claude-3.5-haiku",
|
22 |
+
# "meta-llama/llama-3.1-405b-instruct", # lots of slow repetitions for LRLs
|
23 |
+
# "mistralai/mistral-large",
|
24 |
# "google/gemini-flash-1.5", # very fast
|
25 |
+
# "qwen/qwen-2.5-72b-instruct", # somewhat slow
|
26 |
]
|
27 |
+
fast_model = "anthropic/claude-3.5-haiku"
|
28 |
n_sentences = 30
|
29 |
|
30 |
# setup
|
|
|
45 |
return language_name.split(",")[1] + " " + language_name.split(",")[0]
|
46 |
return language_name
|
47 |
|
|
|
48 |
# load benchmark languages and scripts
|
49 |
+
data = Path("src/data")
|
50 |
+
benchmark_dir = data / "floresp-v2.0-rc.3/dev"
|
51 |
benchmark_languages = pd.DataFrame(
|
52 |
[f.split(".")[1].split("_", 1) for f in os.listdir(benchmark_dir)],
|
53 |
columns=["language_code", "script_code"],
|
|
|
58 |
|
59 |
# load Ethnologue language names
|
60 |
language_names = (
|
61 |
+
pd.read_csv(data / "LanguageCodes.tab", sep="\t")
|
62 |
.rename(columns={"LangID": "language_code", "Name": "language_name"})[
|
63 |
["language_code", "language_name"]
|
64 |
]
|
|
|
67 |
|
68 |
# load Wikidata speaker stats
|
69 |
language_stats = (
|
70 |
+
pd.read_csv(data / "languages.tsv", sep="\t")
|
71 |
.rename(columns={"iso639_3": "language_code", "maxSpeakers": "speakers"})[
|
72 |
["language_code", "speakers"]
|
73 |
]
|
|
|
86 |
]
|
87 |
|
88 |
# load unicode script names
|
89 |
+
script_names = pd.read_csv(data / "ScriptCodes.csv").rename(
|
90 |
columns={"Code": "script_code", "English Name": "script_name"}
|
91 |
)[["script_code", "script_name"]]
|
92 |
|
|
|
162 |
# evaluation!
|
163 |
async def main():
|
164 |
results = []
|
165 |
+
for language in list(languages.itertuples())[:5]:
|
166 |
name = (
|
167 |
language.language_name
|
168 |
if not pd.isna(language.language_name)
|
169 |
else language.language_code
|
170 |
)
|
171 |
+
print(name, file=sys.stderr)
|
172 |
scores = []
|
173 |
if language.in_benchmark:
|
174 |
target_sentences = load_sentences(language)[:n_sentences]
|
|
|
187 |
load_sentences(lang)[i]
|
188 |
for i, lang in enumerate(_original_languages.itertuples())
|
189 |
]
|
190 |
+
print(model, file=sys.stderr)
|
191 |
predictions = [
|
192 |
translate(
|
193 |
model, language.language_name, language.script_name, sentence
|
|
|
222 |
# "bert_score": mean([s["bert_score"] for s in scores]),
|
223 |
}
|
224 |
)
|
225 |
+
print(json.dumps(results, indent=2, ensure_ascii=False))
|
|
|
226 |
|
227 |
|
228 |
if __name__ == "__main__":
|
languages.rq β src/data/languages.rq
RENAMED
File without changes
|
languages.tsv β src/data/languages.tsv
RENAMED
File without changes
|
src/index.md
ADDED
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
toc: false
|
3 |
+
---
|
4 |
+
|
5 |
+
<div class="hero">
|
6 |
+
<h1>AI Language Monitor</h1>
|
7 |
+
<h2>Benchmarking all big AI models on all benchmarkable languages.</h2>
|
8 |
+
</div>
|
9 |
+
|
10 |
+
```js
|
11 |
+
import { languageChart } from "./components/language-chart.js";
|
12 |
+
|
13 |
+
const data = FileAttachment("data/languagebench.json").json();
|
14 |
+
```
|
15 |
+
|
16 |
+
|
17 |
+
<div class="grid grid-cols-2" style="grid-auto-rows: 504px;">
|
18 |
+
<div class="card">
|
19 |
+
<h2 class="hero">Compare languages</h2>
|
20 |
+
${resize((width) => languageChart(data, {width: 1000, height: 400, scoreKey: "bleu", scoreName: "BLEU Score"}))}
|
21 |
+
</div>
|
22 |
+
<div class="card">
|
23 |
+
<h2 class="hero">Compare AI models</h2>
|
24 |
+
...
|
25 |
+
</div>
|
26 |
+
</div>
|
27 |
+
|
28 |
+
<style>
|
29 |
+
|
30 |
+
.hero {
|
31 |
+
display: flex;
|
32 |
+
flex-direction: column;
|
33 |
+
align-items: center;
|
34 |
+
font-family: var(--sans-serif);
|
35 |
+
margin: 4rem 0 8rem;
|
36 |
+
text-wrap: balance;
|
37 |
+
text-align: center;
|
38 |
+
}
|
39 |
+
|
40 |
+
.hero h1 {
|
41 |
+
margin: 1rem 0;
|
42 |
+
padding: 1rem 0;
|
43 |
+
max-width: none;
|
44 |
+
font-size: 90px;
|
45 |
+
font-weight: 900;
|
46 |
+
line-height: 1;
|
47 |
+
background: linear-gradient(30deg, var(--theme-foreground-focus), currentColor);
|
48 |
+
-webkit-background-clip: text;
|
49 |
+
-webkit-text-fill-color: transparent;
|
50 |
+
background-clip: text;
|
51 |
+
}
|
52 |
+
|
53 |
+
</style>
|
src/methodology.md
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
title: Methodology
|
3 |
+
---
|
4 |
+
|
5 |
+
# Methodology
|
6 |
+
|
7 |
+
Sources:
|
8 |
+
|
9 |
+
1. For AI models: [OpenRouter](https://openrouter.ai/)
|
10 |
+
2. For language benchmarks: [FLORES+](https://github.com/openlanguagedata/flores)
|
11 |
+
3. For language statistics: [Wikidata](https://gist.github.com/unhammer/3e8f2e0f79972bf5008a4c970081502d), [Ethnologue](https://www.ethnologue.com/browse/names/)
|
12 |
+
|