File size: 5,625 Bytes
544091e
 
 
 
 
 
 
 
 
 
 
 
50128d8
 
 
 
 
 
 
 
 
 
 
544091e
d178010
 
544091e
 
 
 
 
 
1983d1c
 
 
 
544091e
86b8b3a
 
b909c3a
 
6b6f157
 
5640508
6b6f157
 
b909c3a
 
 
6b6f157
b909c3a
d178010
 
86b8b3a
5640508
b909c3a
6b6f157
b909c3a
86b8b3a
b909c3a
86b8b3a
d178010
 
6b6f157
b909c3a
6b6f157
d178010
86b8b3a
d178010
86b8b3a
d178010
 
6b6f157
d178010
86b8b3a
 
d178010
6b6f157
d178010
 
 
9aa08d7
5640508
d178010
b909c3a
 
 
 
 
0e5691e
50128d8
 
6b6f157
 
 
50128d8
 
 
 
 
 
 
 
b909c3a
5640508
544091e
50128d8
 
 
544091e
 
50128d8
 
 
 
 
 
 
 
 
 
544091e
6b6f157
544091e
0e5691e
 
 
 
 
5640508
6b6f157
5640508
 
 
 
 
86b8b3a
 
5640508
 
 
 
86b8b3a
5640508
 
 
 
 
 
 
544091e
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
<!DOCTYPE html>
<html>

<head>
    <title>Language Bench</title>
    <style>
        body {
            max-width: 1200px;
            margin: 0 auto;
            padding: 20px;
            font-family: sans-serif;
        }

        .language-header {
            margin-bottom: 10px;
        }

        .speaker-count {
            font-size: 0.8em;
            color: #666;
            font-weight: normal;
            margin: 0;
        }
    </style>
    <link rel="icon"
        href="data:image/svg+xml,<svg xmlns=%22http://www.w3.org/2000/svg%22 viewBox=%220 0 100 100%22><text y=%22.9em%22 font-size=%2290%22 fill=%22black%22>🌍</text></svg>">
</head>

<body>
    <h1>Language Bench</h1>
    <div id="charts"></div>

    <script type="module">
        // Import Plot using ESM
        import * as Plot from "https://cdn.jsdelivr.net/npm/@observablehq/[email protected]/+esm";

        async function init() {
            const scoreKey = "bleu"
            const scoreName = "BLEU Score"
            const chartsDiv = document.getElementById('charts');

            const response = await fetch('results.json');
            const data = await response.json();
            // Format captions
            const formatScore = (score) => score > 0 ? score.toFixed(2) : "No benchmark available!"
            const formatTitle = d => (d.language_name + "\n" + parseInt(d.speakers / 1_000_00) / 10 + "M speakers\n" + scoreName + ": " + formatScore(d[scoreKey]))

            // Create summary plot
            const summaryPlot = Plot.plot({
                width: chartsDiv.clientWidth,
                height: 400,
                marginBottom: 100,
                x: { label: "Number of speakers", axis: null },
                y: { label: `${scoreName} (average across models)` },
                // color: { scheme: "BrBG" },
                marks: [
                    Plot.rectY(data, Plot.stackX({
                        x: "speakers",
                        order: scoreKey,
                        reverse: true,
                        y2: scoreKey, // y2 to avoid stacking by y
                        title: formatTitle,
                        tip: true,
                        fill: d => d[scoreKey] > 0 ? "black" : "pink"
                    })),
                    Plot.rectY(data, Plot.pointerX(Plot.stackX({
                        x: "speakers",
                        order: scoreKey,
                        reverse: true,
                        y2: scoreKey, // y2 to avoid stacking by y
                        fill: "grey",
                    }))),
                    Plot.text(data, Plot.stackX({
                        x: "speakers",
                        y2: scoreKey,
                        order: scoreKey,
                        reverse: true,
                        text: "language_name",
                        frameAnchor: "bottom",
                        textAnchor: "end",
                        dy: 10,
                        rotate: 270,
                        opacity: (d) => d.speakers > 50_000_000 ? 1 : 0,
                    }))
                ]
            });

            // Add summary plot at the top
            chartsDiv.insertBefore(summaryPlot, chartsDiv.firstChild);

            // Get unique languages with their speaker counts
            const languageMap = new Map();
            data.forEach(r => {
                if (!languageMap.has(r.language_name)) {
                    languageMap.set(r.language_name, r.speakers);
                }
            });

            // Sort languages by speaker count (descending)
            const languages = [...languageMap.entries()]
                .sort((a, b) => b[1] - a[1])
                .map(([lang]) => lang);


            // Section for each language
            languages.forEach(language => {
                const headerDiv = document.createElement('div');
                headerDiv.className = 'language-header';

                const h2 = document.createElement('h2');
                h2.textContent = language;
                h2.style.marginBottom = '5px';

                const speakerP = document.createElement('p');
                speakerP.className = 'speaker-count';
                const speakerCount = (languageMap.get(language) / 1_000_000).toFixed(1);
                speakerP.textContent = `${speakerCount}M speakers`;

                headerDiv.appendChild(h2);
                headerDiv.appendChild(speakerP);
                chartsDiv.appendChild(headerDiv);

                const languageData = data.filter(r => r.language_name === language)[0]["scores"];

                const descriptor = code => {
                    let [org, model] = code.split("/")
                    return model.split("-")[0]
                }

                // Plot for how well the models perform on this language
                if (languageData && languageData.length > 1) {
                    const plot = Plot.plot({
                        width: 400,
                        height: 200,
                        margin: 30,
                        y: {
                            domain: [0, 1],
                            label: scoreName
                        },
                        marks: [
                            Plot.barY(languageData, {
                                x: d => descriptor(d.model),
                                y: scoreKey
                            })
                        ]
                    });
                    chartsDiv.appendChild(plot);
                }


            });
        }

        init();
    </script>
</body>

</html>