File size: 5,625 Bytes
544091e 50128d8 544091e d178010 544091e 1983d1c 544091e 86b8b3a b909c3a 6b6f157 5640508 6b6f157 b909c3a 6b6f157 b909c3a d178010 86b8b3a 5640508 b909c3a 6b6f157 b909c3a 86b8b3a b909c3a 86b8b3a d178010 6b6f157 b909c3a 6b6f157 d178010 86b8b3a d178010 86b8b3a d178010 6b6f157 d178010 86b8b3a d178010 6b6f157 d178010 9aa08d7 5640508 d178010 b909c3a 0e5691e 50128d8 6b6f157 50128d8 b909c3a 5640508 544091e 50128d8 544091e 50128d8 544091e 6b6f157 544091e 0e5691e 5640508 6b6f157 5640508 86b8b3a 5640508 86b8b3a 5640508 544091e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 |
<!DOCTYPE html>
<html>
<head>
<title>Language Bench</title>
<style>
body {
max-width: 1200px;
margin: 0 auto;
padding: 20px;
font-family: sans-serif;
}
.language-header {
margin-bottom: 10px;
}
.speaker-count {
font-size: 0.8em;
color: #666;
font-weight: normal;
margin: 0;
}
</style>
<link rel="icon"
href="data:image/svg+xml,<svg xmlns=%22http://www.w3.org/2000/svg%22 viewBox=%220 0 100 100%22><text y=%22.9em%22 font-size=%2290%22 fill=%22black%22>π</text></svg>">
</head>
<body>
<h1>Language Bench</h1>
<div id="charts"></div>
<script type="module">
// Import Plot using ESM
import * as Plot from "https://cdn.jsdelivr.net/npm/@observablehq/[email protected]/+esm";
async function init() {
const scoreKey = "bleu"
const scoreName = "BLEU Score"
const chartsDiv = document.getElementById('charts');
const response = await fetch('results.json');
const data = await response.json();
// Format captions
const formatScore = (score) => score > 0 ? score.toFixed(2) : "No benchmark available!"
const formatTitle = d => (d.language_name + "\n" + parseInt(d.speakers / 1_000_00) / 10 + "M speakers\n" + scoreName + ": " + formatScore(d[scoreKey]))
// Create summary plot
const summaryPlot = Plot.plot({
width: chartsDiv.clientWidth,
height: 400,
marginBottom: 100,
x: { label: "Number of speakers", axis: null },
y: { label: `${scoreName} (average across models)` },
// color: { scheme: "BrBG" },
marks: [
Plot.rectY(data, Plot.stackX({
x: "speakers",
order: scoreKey,
reverse: true,
y2: scoreKey, // y2 to avoid stacking by y
title: formatTitle,
tip: true,
fill: d => d[scoreKey] > 0 ? "black" : "pink"
})),
Plot.rectY(data, Plot.pointerX(Plot.stackX({
x: "speakers",
order: scoreKey,
reverse: true,
y2: scoreKey, // y2 to avoid stacking by y
fill: "grey",
}))),
Plot.text(data, Plot.stackX({
x: "speakers",
y2: scoreKey,
order: scoreKey,
reverse: true,
text: "language_name",
frameAnchor: "bottom",
textAnchor: "end",
dy: 10,
rotate: 270,
opacity: (d) => d.speakers > 50_000_000 ? 1 : 0,
}))
]
});
// Add summary plot at the top
chartsDiv.insertBefore(summaryPlot, chartsDiv.firstChild);
// Get unique languages with their speaker counts
const languageMap = new Map();
data.forEach(r => {
if (!languageMap.has(r.language_name)) {
languageMap.set(r.language_name, r.speakers);
}
});
// Sort languages by speaker count (descending)
const languages = [...languageMap.entries()]
.sort((a, b) => b[1] - a[1])
.map(([lang]) => lang);
// Section for each language
languages.forEach(language => {
const headerDiv = document.createElement('div');
headerDiv.className = 'language-header';
const h2 = document.createElement('h2');
h2.textContent = language;
h2.style.marginBottom = '5px';
const speakerP = document.createElement('p');
speakerP.className = 'speaker-count';
const speakerCount = (languageMap.get(language) / 1_000_000).toFixed(1);
speakerP.textContent = `${speakerCount}M speakers`;
headerDiv.appendChild(h2);
headerDiv.appendChild(speakerP);
chartsDiv.appendChild(headerDiv);
const languageData = data.filter(r => r.language_name === language)[0]["scores"];
const descriptor = code => {
let [org, model] = code.split("/")
return model.split("-")[0]
}
// Plot for how well the models perform on this language
if (languageData && languageData.length > 1) {
const plot = Plot.plot({
width: 400,
height: 200,
margin: 30,
y: {
domain: [0, 1],
label: scoreName
},
marks: [
Plot.barY(languageData, {
x: d => descriptor(d.model),
y: scoreKey
})
]
});
chartsDiv.appendChild(plot);
}
});
}
init();
</script>
</body>
</html> |