Spaces:
Running
Running
Mark Duppenthaler
commited on
Commit
·
cf4e7fe
1
Parent(s):
f76d503
Model Filters
Browse files
frontend/src/components/LeaderboardFilter.tsx
CHANGED
@@ -8,14 +8,12 @@ interface FilterProps {
|
|
8 |
groups: Groups
|
9 |
selectedMetrics: Set<string>
|
10 |
setSelectedMetrics: (metrics: Set<string>) => void
|
11 |
-
defaultSelectedMetrics?: string[]
|
12 |
}
|
13 |
|
14 |
const LeaderboardFilter: React.FC<FilterProps> = ({
|
15 |
groups,
|
16 |
selectedMetrics,
|
17 |
setSelectedMetrics,
|
18 |
-
defaultSelectedMetrics,
|
19 |
}) => {
|
20 |
const [openGroups, setOpenGroups] = useState<{ [key: string]: boolean }>({})
|
21 |
const [openSubGroups, setOpenSubGroups] = useState<{ [key: string]: { [key: string]: boolean } }>(
|
@@ -23,30 +21,6 @@ const LeaderboardFilter: React.FC<FilterProps> = ({
|
|
23 |
)
|
24 |
const [searchTerm, setSearchTerm] = useState('')
|
25 |
|
26 |
-
// Initialize openGroups and openSubGroups based on defaultSelectedMetrics on page load
|
27 |
-
useEffect(() => {
|
28 |
-
if (!defaultSelectedMetrics) return
|
29 |
-
|
30 |
-
const initialOpenGroups: { [key: string]: boolean } = {}
|
31 |
-
const initialOpenSubGroups: { [key: string]: { [key: string]: boolean } } = {}
|
32 |
-
|
33 |
-
Object.entries(groups).forEach(([group, subGroups]) => {
|
34 |
-
let groupHasDefault = false
|
35 |
-
initialOpenSubGroups[group] = {}
|
36 |
-
|
37 |
-
Object.entries(subGroups).forEach(([subGroup, metrics]) => {
|
38 |
-
const hasDefault = metrics.some((metric) => defaultSelectedMetrics.includes(metric))
|
39 |
-
initialOpenSubGroups[group][subGroup] = hasDefault
|
40 |
-
if (hasDefault) groupHasDefault = true
|
41 |
-
})
|
42 |
-
|
43 |
-
initialOpenGroups[group] = groupHasDefault
|
44 |
-
})
|
45 |
-
|
46 |
-
setOpenGroups(initialOpenGroups)
|
47 |
-
setOpenSubGroups(initialOpenSubGroups)
|
48 |
-
}, [groups, defaultSelectedMetrics])
|
49 |
-
|
50 |
const toggleGroup = (group: string) => {
|
51 |
setOpenGroups((prev) => ({ ...prev, [group]: !prev[group] }))
|
52 |
}
|
@@ -141,25 +115,6 @@ const LeaderboardFilter: React.FC<FilterProps> = ({
|
|
141 |
}))
|
142 |
}
|
143 |
|
144 |
-
const selectDefaultsInFilter = () => {
|
145 |
-
if (!defaultSelectedMetrics) return
|
146 |
-
setSelectedMetrics(new Set(defaultSelectedMetrics))
|
147 |
-
const openGroups: { [key: string]: boolean } = {}
|
148 |
-
const openSubGroups: { [key: string]: { [key: string]: boolean } } = {}
|
149 |
-
Object.entries(groups).forEach(([group, subGroups]) => {
|
150 |
-
let groupHasDefault = false
|
151 |
-
openSubGroups[group] = {}
|
152 |
-
Object.entries(subGroups).forEach(([subGroup, metrics]) => {
|
153 |
-
const hasDefault = metrics.some((metric) => defaultSelectedMetrics.includes(metric))
|
154 |
-
openSubGroups[group][subGroup] = hasDefault
|
155 |
-
if (hasDefault) groupHasDefault = true
|
156 |
-
})
|
157 |
-
openGroups[group] = groupHasDefault
|
158 |
-
})
|
159 |
-
setOpenGroups(openGroups)
|
160 |
-
setOpenSubGroups(openSubGroups)
|
161 |
-
}
|
162 |
-
|
163 |
const selectAllGlobal = () => {
|
164 |
const allMetrics = Object.values(groups)
|
165 |
.flatMap((subGroups) => Object.values(subGroups).flat())
|
@@ -272,15 +227,6 @@ const LeaderboardFilter: React.FC<FilterProps> = ({
|
|
272 |
>
|
273 |
None
|
274 |
</button>
|
275 |
-
{defaultSelectedMetrics && (
|
276 |
-
<button
|
277 |
-
type="button"
|
278 |
-
className="text-xs px-3 py-1 border rounded font-semibold bg-base-200 cursor-pointer"
|
279 |
-
onClick={selectDefaultsInFilter}
|
280 |
-
>
|
281 |
-
Defaults
|
282 |
-
</button>
|
283 |
-
)}
|
284 |
</div>
|
285 |
<div className="flex flex-row flex-wrap gap-4 w-full items-start">
|
286 |
{Object.entries(groups).map(([group, subGroups]) => (
|
|
|
8 |
groups: Groups
|
9 |
selectedMetrics: Set<string>
|
10 |
setSelectedMetrics: (metrics: Set<string>) => void
|
|
|
11 |
}
|
12 |
|
13 |
const LeaderboardFilter: React.FC<FilterProps> = ({
|
14 |
groups,
|
15 |
selectedMetrics,
|
16 |
setSelectedMetrics,
|
|
|
17 |
}) => {
|
18 |
const [openGroups, setOpenGroups] = useState<{ [key: string]: boolean }>({})
|
19 |
const [openSubGroups, setOpenSubGroups] = useState<{ [key: string]: { [key: string]: boolean } }>(
|
|
|
21 |
)
|
22 |
const [searchTerm, setSearchTerm] = useState('')
|
23 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
const toggleGroup = (group: string) => {
|
25 |
setOpenGroups((prev) => ({ ...prev, [group]: !prev[group] }))
|
26 |
}
|
|
|
115 |
}))
|
116 |
}
|
117 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
118 |
const selectAllGlobal = () => {
|
119 |
const allMetrics = Object.values(groups)
|
120 |
.flatMap((subGroups) => Object.values(subGroups).flat())
|
|
|
227 |
>
|
228 |
None
|
229 |
</button>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
230 |
</div>
|
231 |
<div className="flex flex-row flex-wrap gap-4 w-full items-start">
|
232 |
{Object.entries(groups).map(([group, subGroups]) => (
|
frontend/src/components/LeaderboardTable.tsx
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
import React, { useEffect, useState } from 'react'
|
2 |
import API from '../API'
|
3 |
import LeaderboardFilter from './LeaderboardFilter'
|
|
|
4 |
|
5 |
interface LeaderboardTableProps {
|
6 |
file: string
|
@@ -32,7 +33,7 @@ const LeaderboardTable: React.FC<LeaderboardTableProps> = ({ file }) => {
|
|
32 |
)
|
33 |
|
34 |
const [selectedMetrics, setSelectedMetrics] = useState<Set<string>>(new Set())
|
35 |
-
const [
|
36 |
|
37 |
// To store the unique metrics from the Overall group
|
38 |
const [overallMetrics, setOverallMetrics] = useState<string[]>([])
|
@@ -42,10 +43,11 @@ const LeaderboardTable: React.FC<LeaderboardTableProps> = ({ file }) => {
|
|
42 |
.then((response) => {
|
43 |
const data = JSON.parse(response)
|
44 |
const rows: Row[] = data['rows']
|
45 |
-
|
46 |
-
|
47 |
-
//
|
48 |
-
|
|
|
49 |
const uniqueMetrics = new Set<string>()
|
50 |
|
51 |
overallGroup.forEach((metric) => {
|
@@ -110,8 +112,11 @@ const LeaderboardTable: React.FC<LeaderboardTableProps> = ({ file }) => {
|
|
110 |
})
|
111 |
})
|
112 |
|
113 |
-
|
114 |
-
|
|
|
|
|
|
|
115 |
setTableHeader(headers)
|
116 |
setTableRows(rows)
|
117 |
setGroups(groupsData)
|
@@ -125,10 +130,6 @@ const LeaderboardTable: React.FC<LeaderboardTableProps> = ({ file }) => {
|
|
125 |
})
|
126 |
}, [file])
|
127 |
|
128 |
-
const handleSelectDefaults = () => {
|
129 |
-
setSelectedMetrics(new Set(defaultSelectedMetrics))
|
130 |
-
}
|
131 |
-
|
132 |
const toggleGroup = (group: string) => {
|
133 |
setOpenGroups((prev) => ({ ...prev, [group]: !prev[group] }))
|
134 |
}
|
@@ -157,6 +158,23 @@ const LeaderboardTable: React.FC<LeaderboardTableProps> = ({ file }) => {
|
|
157 |
.map((row) => row.metric as string)
|
158 |
}
|
159 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
160 |
// Calculate average and standard deviation for a set of metrics for a specific column
|
161 |
const calculateStats = (
|
162 |
metricNames: string[],
|
@@ -216,301 +234,386 @@ const LeaderboardTable: React.FC<LeaderboardTableProps> = ({ file }) => {
|
|
216 |
|
217 |
{!loading && !error && (
|
218 |
<div className="overflow-x-auto">
|
219 |
-
<
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
242 |
</th>
|
243 |
))}
|
244 |
-
</
|
245 |
-
|
246 |
-
|
247 |
-
|
248 |
-
|
249 |
-
|
250 |
-
|
251 |
-
|
252 |
-
|
253 |
-
|
254 |
-
|
255 |
-
|
256 |
-
|
257 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
258 |
|
259 |
-
|
260 |
-
|
261 |
-
|
262 |
-
|
263 |
-
|
264 |
-
|
265 |
-
|
266 |
-
|
267 |
-
|
268 |
-
|
269 |
-
|
270 |
-
|
271 |
-
|
272 |
-
|
273 |
-
|
274 |
-
|
275 |
-
|
276 |
-
|
277 |
-
|
278 |
-
|
|
|
|
|
|
|
279 |
)
|
280 |
-
|
|
|
|
|
281 |
|
282 |
return (
|
283 |
-
<
|
284 |
-
|
285 |
-
|
286 |
-
|
287 |
-
|
288 |
-
|
289 |
-
|
290 |
-
|
291 |
-
|
292 |
-
|
293 |
-
|
294 |
-
|
295 |
-
|
296 |
-
|
297 |
-
|
298 |
-
|
299 |
-
|
300 |
-
|
301 |
-
|
302 |
-
|
303 |
-
|
304 |
-
|
305 |
-
|
306 |
-
|
307 |
-
|
308 |
-
|
309 |
-
|
310 |
-
return (
|
311 |
-
<React.Fragment key={`${group}-${subGroup}`}>
|
312 |
-
{/* Subgroup row with average stats for the subgroup */}
|
313 |
-
<tr
|
314 |
-
className="bg-base-100 cursor-pointer hover:bg-base-200"
|
315 |
-
onClick={() => toggleSubGroup(group, subGroup)}
|
316 |
-
>
|
317 |
-
<td className="pl-6 font-medium">
|
318 |
-
{openSubGroups[group]?.[subGroup] ? '▼ ' : '▶ '}
|
319 |
-
{subGroup}
|
320 |
-
</td>
|
321 |
-
{/* For each metric column */}
|
322 |
-
{overallMetrics.map((metric) => (
|
323 |
-
// Render sub-columns for each model
|
324 |
-
<React.Fragment key={`${group}-${subGroup}-${metric}`}>
|
325 |
-
{tableHeader.map((col) => {
|
326 |
-
// Find all metrics in this subgroup that match the current metric name
|
327 |
-
const allMetricsWithName = findAllMetricsForName(metric)
|
328 |
-
const metricsInSubgroupForThisMetric =
|
329 |
-
visibleSubgroupMetrics.filter((m) =>
|
330 |
-
allMetricsWithName.includes(m)
|
331 |
-
)
|
332 |
-
const stats = calculateStats(
|
333 |
-
metricsInSubgroupForThisMetric,
|
334 |
-
col
|
335 |
-
)
|
336 |
-
|
337 |
-
return (
|
338 |
-
<td
|
339 |
-
key={`${group}-${subGroup}-${metric}-${col}`}
|
340 |
-
className="font-medium text-center"
|
341 |
-
>
|
342 |
-
{!isNaN(stats.avg)
|
343 |
-
? `${stats.avg.toFixed(3)} ± ${stats.stdDev.toFixed(3)}`
|
344 |
-
: 'N/A'}
|
345 |
-
</td>
|
346 |
-
)
|
347 |
-
})}
|
348 |
-
</React.Fragment>
|
349 |
-
))}
|
350 |
-
</tr>
|
351 |
|
352 |
-
|
353 |
-
|
354 |
-
|
355 |
-
|
356 |
-
|
357 |
-
|
358 |
-
|
359 |
-
|
360 |
-
|
361 |
-
|
362 |
-
|
363 |
-
|
364 |
-
|
365 |
-
|
366 |
-
|
367 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
368 |
}
|
369 |
-
}
|
370 |
-
return ''
|
371 |
-
}
|
372 |
-
|
373 |
-
const overallA = getOverallMetricGroup(a)
|
374 |
-
const overallB = getOverallMetricGroup(b)
|
375 |
-
|
376 |
-
// Extract the strength (last part before the overall metric)
|
377 |
-
const stripOverall = (metric: string, overall: string) => {
|
378 |
-
if (metric.endsWith(`_${overall}`)) {
|
379 |
-
// Remove the overall metric group and any preceding underscore
|
380 |
-
const stripped = metric.slice(
|
381 |
-
0,
|
382 |
-
metric.length - overall.length - 1
|
383 |
-
)
|
384 |
-
const parts = stripped.split('_')
|
385 |
-
return parts.length > 0 ? parts[parts.length - 1] : ''
|
386 |
-
}
|
387 |
-
return metric
|
388 |
-
}
|
389 |
-
|
390 |
-
// Extract the category (what remains after removing strength and overall_metric_name)
|
391 |
-
const getCategory = (metric: string, overall: string) => {
|
392 |
-
if (metric.endsWith(`_${overall}`)) {
|
393 |
-
const stripped = metric.slice(
|
394 |
-
0,
|
395 |
-
metric.length - overall.length - 1
|
396 |
-
)
|
397 |
-
const parts = stripped.split('_')
|
398 |
-
// Remove the last part (strength) and join the rest (category)
|
399 |
-
return parts.length > 1
|
400 |
-
? parts.slice(0, parts.length - 1).join('_')
|
401 |
-
: ''
|
402 |
-
}
|
403 |
-
return metric
|
404 |
-
}
|
405 |
-
|
406 |
-
const categoryA = getCategory(a, overallA)
|
407 |
-
const categoryB = getCategory(b, overallB)
|
408 |
-
|
409 |
-
// First sort by category
|
410 |
-
if (categoryA !== categoryB) {
|
411 |
-
return categoryA.localeCompare(categoryB)
|
412 |
-
}
|
413 |
-
|
414 |
-
// Then sort by overall metric name
|
415 |
-
if (overallA !== overallB) {
|
416 |
-
return overallA.localeCompare(overallB)
|
417 |
-
}
|
418 |
-
|
419 |
-
// Finally sort by strength
|
420 |
-
const subA = stripOverall(a, overallA)
|
421 |
-
const subB = stripOverall(b, overallB)
|
422 |
-
|
423 |
-
// Try to parse subA and subB as numbers, handling k/m/b suffixes
|
424 |
-
const parseNumber = (str: string) => {
|
425 |
-
const match = str.match(/^(\d+(?:\.\d+)?)([kKmMbB]?)$/)
|
426 |
-
if (!match) return NaN
|
427 |
-
let [_, num, suffix] = match
|
428 |
-
let value = parseFloat(num)
|
429 |
-
switch (suffix.toLowerCase()) {
|
430 |
-
case 'k':
|
431 |
-
value *= 1e3
|
432 |
-
break
|
433 |
-
case 'm':
|
434 |
-
value *= 1e6
|
435 |
-
break
|
436 |
-
case 'b':
|
437 |
-
value *= 1e9
|
438 |
-
break
|
439 |
-
}
|
440 |
-
return value
|
441 |
-
}
|
442 |
-
|
443 |
-
const numA = parseNumber(subA)
|
444 |
-
const numB = parseNumber(subB)
|
445 |
-
|
446 |
-
if (!isNaN(numA) && !isNaN(numB)) {
|
447 |
-
return numA - numB
|
448 |
-
}
|
449 |
-
// Fallback to string comparison if not both numbers
|
450 |
-
return subA.localeCompare(subB)
|
451 |
-
})
|
452 |
-
.map((metric) => {
|
453 |
-
const row = tableRows.find((r) => r.metric === metric)
|
454 |
-
if (!row) return null
|
455 |
-
|
456 |
-
// Extract the metric name (after the underscore)
|
457 |
-
const metricName = metric.includes('_')
|
458 |
-
? metric.split('_').slice(1).join('_')
|
459 |
-
: metric
|
460 |
|
461 |
-
|
462 |
-
|
463 |
-
|
464 |
-
|
465 |
-
|
466 |
-
|
467 |
-
|
468 |
-
|
469 |
-
|
470 |
-
|
471 |
-
// Fill empty cells for non-matching metrics
|
472 |
-
return (
|
473 |
-
<React.Fragment key={`${metric}-${oMetric}`}>
|
474 |
-
{tableHeader.map((col) => (
|
475 |
-
<td
|
476 |
-
key={`${metric}-${oMetric}-${col}`}
|
477 |
-
className="text-center"
|
478 |
-
></td>
|
479 |
-
))}
|
480 |
-
</React.Fragment>
|
481 |
)
|
|
|
|
|
482 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
483 |
|
484 |
-
|
485 |
-
|
486 |
-
|
487 |
-
|
488 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
489 |
return (
|
490 |
-
<
|
491 |
-
|
492 |
-
|
493 |
-
|
494 |
-
|
495 |
-
|
496 |
-
|
497 |
-
|
|
|
|
|
498 |
)
|
499 |
-
}
|
500 |
-
|
501 |
-
|
502 |
-
|
503 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
504 |
)
|
505 |
})}
|
506 |
-
|
507 |
-
|
508 |
-
|
509 |
-
|
|
|
|
|
510 |
)
|
511 |
-
})}
|
512 |
-
|
513 |
-
|
514 |
</div>
|
515 |
)}
|
516 |
</div>
|
|
|
1 |
import React, { useEffect, useState } from 'react'
|
2 |
import API from '../API'
|
3 |
import LeaderboardFilter from './LeaderboardFilter'
|
4 |
+
import ModelFilter from './ModelFilter'
|
5 |
|
6 |
interface LeaderboardTableProps {
|
7 |
file: string
|
|
|
33 |
)
|
34 |
|
35 |
const [selectedMetrics, setSelectedMetrics] = useState<Set<string>>(new Set())
|
36 |
+
const [selectedModels, setSelectedModels] = useState<Set<string>>(new Set())
|
37 |
|
38 |
// To store the unique metrics from the Overall group
|
39 |
const [overallMetrics, setOverallMetrics] = useState<string[]>([])
|
|
|
43 |
.then((response) => {
|
44 |
const data = JSON.parse(response)
|
45 |
const rows: Row[] = data['rows']
|
46 |
+
// Split out the Overall group from groups
|
47 |
+
const allGroups = data['groups'] as { [key: string]: string[] }
|
48 |
+
// const overallGroup = allGroups['Overall'] || []
|
49 |
+
// Remove 'Overall' from groups
|
50 |
+
const { Overall: overallGroup, ...groups } = allGroups
|
51 |
const uniqueMetrics = new Set<string>()
|
52 |
|
53 |
overallGroup.forEach((metric) => {
|
|
|
112 |
})
|
113 |
})
|
114 |
|
115 |
+
// Get all metrics from all groups
|
116 |
+
const allMetrics = Object.values(groups).flat()
|
117 |
+
setSelectedMetrics(new Set(allMetrics))
|
118 |
+
// Initialize all models as selected
|
119 |
+
setSelectedModels(new Set(headers))
|
120 |
setTableHeader(headers)
|
121 |
setTableRows(rows)
|
122 |
setGroups(groupsData)
|
|
|
130 |
})
|
131 |
}, [file])
|
132 |
|
|
|
|
|
|
|
|
|
133 |
const toggleGroup = (group: string) => {
|
134 |
setOpenGroups((prev) => ({ ...prev, [group]: !prev[group] }))
|
135 |
}
|
|
|
158 |
.map((row) => row.metric as string)
|
159 |
}
|
160 |
|
161 |
+
// Identify metrics that don't belong to any overall metric group
|
162 |
+
const findStandaloneMetrics = (): string[] => {
|
163 |
+
// Get all metrics from the table rows
|
164 |
+
const allMetrics = tableRows.map((row) => row.metric as string)
|
165 |
+
|
166 |
+
// Filter to only include metrics that aren't part of any of the overall metrics
|
167 |
+
return allMetrics.filter((metric) => {
|
168 |
+
// Check if this metric is part of any of the overall metrics
|
169 |
+
for (const overall of overallMetrics) {
|
170 |
+
if (metric.endsWith(`_${overall}`) || metric === overall) {
|
171 |
+
return false // This metric belongs to an overall group
|
172 |
+
}
|
173 |
+
}
|
174 |
+
return true
|
175 |
+
})
|
176 |
+
}
|
177 |
+
|
178 |
// Calculate average and standard deviation for a set of metrics for a specific column
|
179 |
const calculateStats = (
|
180 |
metricNames: string[],
|
|
|
234 |
|
235 |
{!loading && !error && (
|
236 |
<div className="overflow-x-auto">
|
237 |
+
<div className="flex flex-col gap-4">
|
238 |
+
<ModelFilter
|
239 |
+
models={tableHeader}
|
240 |
+
selectedModels={selectedModels}
|
241 |
+
setSelectedModels={setSelectedModels}
|
242 |
+
/>
|
243 |
+
<LeaderboardFilter
|
244 |
+
groups={groups}
|
245 |
+
selectedMetrics={selectedMetrics}
|
246 |
+
setSelectedMetrics={setSelectedMetrics}
|
247 |
+
/>
|
248 |
+
</div>
|
249 |
+
|
250 |
+
{selectedModels.size === 0 || selectedMetrics.size === 0 ? (
|
251 |
+
<div className="text-center p-4 text-lg">
|
252 |
+
Please select at least one model and one metric to display the data
|
253 |
+
</div>
|
254 |
+
) : (
|
255 |
+
<>
|
256 |
+
<table className="table w-full">
|
257 |
+
<thead>
|
258 |
+
<tr>
|
259 |
+
<th>Group / Subgroup</th>
|
260 |
+
{overallMetrics.map((metric) => (
|
261 |
+
<th
|
262 |
+
key={metric}
|
263 |
+
colSpan={tableHeader.filter((model) => selectedModels.has(model)).length}
|
264 |
+
className="text-center border-x"
|
265 |
+
>
|
266 |
+
{metric}
|
267 |
</th>
|
268 |
))}
|
269 |
+
</tr>
|
270 |
+
<tr>
|
271 |
+
<th></th>
|
272 |
+
{overallMetrics.map((metric) => (
|
273 |
+
<React.Fragment key={`header-models-${metric}`}>
|
274 |
+
{tableHeader
|
275 |
+
.filter((model) => selectedModels.has(model))
|
276 |
+
.map((model) => (
|
277 |
+
<th key={`${metric}-${model}`} className="text-center text-xs">
|
278 |
+
{model}
|
279 |
+
</th>
|
280 |
+
))}
|
281 |
+
</React.Fragment>
|
282 |
+
))}
|
283 |
+
</tr>
|
284 |
+
</thead>
|
285 |
+
<tbody>
|
286 |
+
{/* First render each group */}
|
287 |
+
{Object.entries(groups).map(([group, subGroups]) => {
|
288 |
+
// Skip the "Overall" group completely
|
289 |
+
if (group === 'Overall') return null
|
290 |
+
|
291 |
+
// Get all metrics for this group
|
292 |
+
const allGroupMetrics = Object.values(subGroups).flat()
|
293 |
+
// Filter to only include selected metrics
|
294 |
+
const visibleGroupMetrics = filterMetricsByGroupAndSubgroup(
|
295 |
+
allGroupMetrics,
|
296 |
+
group
|
297 |
+
)
|
298 |
+
|
299 |
+
// Skip this group if no metrics are selected
|
300 |
+
if (visibleGroupMetrics.length === 0) return null
|
301 |
+
|
302 |
+
return (
|
303 |
+
<React.Fragment key={group}>
|
304 |
+
{/* Group row with average stats for the entire group */}
|
305 |
+
<tr
|
306 |
+
className="bg-base-200 cursor-pointer hover:bg-base-300"
|
307 |
+
onClick={() => toggleGroup(group)}
|
308 |
+
>
|
309 |
+
<td className="font-medium">
|
310 |
+
{openGroups[group] ? '▼ ' : '▶ '}
|
311 |
+
{group}
|
312 |
+
</td>
|
313 |
+
{/* For each metric column */}
|
314 |
+
{overallMetrics.map((metric) => (
|
315 |
+
// Render sub-columns for each model
|
316 |
+
<React.Fragment key={`${group}-${metric}`}>
|
317 |
+
{tableHeader
|
318 |
+
.filter((model) => selectedModels.has(model))
|
319 |
+
.map((col) => {
|
320 |
+
// Find all metrics in this group that match the current metric name
|
321 |
+
const allMetricsWithName = findAllMetricsForName(metric)
|
322 |
+
const metricsInGroupForThisMetric = visibleGroupMetrics.filter(
|
323 |
+
(m) => allMetricsWithName.includes(m)
|
324 |
+
)
|
325 |
+
const stats = calculateStats(metricsInGroupForThisMetric, col)
|
326 |
|
327 |
+
return (
|
328 |
+
<td
|
329 |
+
key={`${group}-${metric}-${col}`}
|
330 |
+
className="font-medium text-center"
|
331 |
+
>
|
332 |
+
{!isNaN(stats.avg)
|
333 |
+
? `${stats.avg.toFixed(3)} ± ${stats.stdDev.toFixed(3)}`
|
334 |
+
: 'N/A'}
|
335 |
+
</td>
|
336 |
+
)
|
337 |
+
})}
|
338 |
+
</React.Fragment>
|
339 |
+
))}
|
340 |
+
</tr>
|
341 |
+
|
342 |
+
{/* Only render subgroups if group is open */}
|
343 |
+
{openGroups[group] &&
|
344 |
+
Object.entries(subGroups).map(([subGroup, metrics]) => {
|
345 |
+
// Filter to only include selected metrics in this subgroup
|
346 |
+
const visibleSubgroupMetrics = filterMetricsByGroupAndSubgroup(
|
347 |
+
metrics,
|
348 |
+
group,
|
349 |
+
subGroup
|
350 |
)
|
351 |
+
|
352 |
+
// Skip this subgroup if no metrics are selected
|
353 |
+
if (visibleSubgroupMetrics.length === 0) return null
|
354 |
|
355 |
return (
|
356 |
+
<React.Fragment key={`${group}-${subGroup}`}>
|
357 |
+
{/* Subgroup row with average stats for the subgroup */}
|
358 |
+
<tr
|
359 |
+
className="bg-base-100 cursor-pointer hover:bg-base-200"
|
360 |
+
onClick={() => toggleSubGroup(group, subGroup)}
|
361 |
+
>
|
362 |
+
<td className="pl-6 font-medium">
|
363 |
+
{openSubGroups[group]?.[subGroup] ? '▼ ' : '▶ '}
|
364 |
+
{subGroup}
|
365 |
+
</td>
|
366 |
+
{/* For each metric column */}
|
367 |
+
{overallMetrics.map((metric) => (
|
368 |
+
// Render sub-columns for each model
|
369 |
+
<React.Fragment key={`${group}-${subGroup}-${metric}`}>
|
370 |
+
{tableHeader
|
371 |
+
.filter((model) => selectedModels.has(model))
|
372 |
+
.map((col) => {
|
373 |
+
// Find all metrics in this subgroup that match the current metric name
|
374 |
+
const allMetricsWithName = findAllMetricsForName(metric)
|
375 |
+
const metricsInSubgroupForThisMetric =
|
376 |
+
visibleSubgroupMetrics.filter((m) =>
|
377 |
+
allMetricsWithName.includes(m)
|
378 |
+
)
|
379 |
+
const stats = calculateStats(
|
380 |
+
metricsInSubgroupForThisMetric,
|
381 |
+
col
|
382 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
383 |
|
384 |
+
return (
|
385 |
+
<td
|
386 |
+
key={`${group}-${subGroup}-${metric}-${col}`}
|
387 |
+
className="font-medium text-center"
|
388 |
+
>
|
389 |
+
{!isNaN(stats.avg)
|
390 |
+
? `${stats.avg.toFixed(3)} ± ${stats.stdDev.toFixed(3)}`
|
391 |
+
: 'N/A'}
|
392 |
+
</td>
|
393 |
+
)
|
394 |
+
})}
|
395 |
+
</React.Fragment>
|
396 |
+
))}
|
397 |
+
</tr>
|
398 |
+
|
399 |
+
{/* Individual metric rows */}
|
400 |
+
{openSubGroups[group]?.[subGroup] &&
|
401 |
+
// Sort visibleSubgroupMetrics alphabetically by the clean metric name
|
402 |
+
[...visibleSubgroupMetrics]
|
403 |
+
.sort((a, b) => {
|
404 |
+
// For metrics with format {category}_{strength}_{overall_metric_name},
|
405 |
+
// First sort by category, then by overall_metric_name, then by strength
|
406 |
+
|
407 |
+
// First extract the overall metric group
|
408 |
+
const getOverallMetricGroup = (metric: string) => {
|
409 |
+
for (const overall of overallMetrics) {
|
410 |
+
if (
|
411 |
+
metric.endsWith(`_${overall}`) ||
|
412 |
+
metric === overall
|
413 |
+
) {
|
414 |
+
return overall
|
415 |
+
}
|
416 |
+
}
|
417 |
+
return ''
|
418 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
419 |
|
420 |
+
const overallA = getOverallMetricGroup(a)
|
421 |
+
const overallB = getOverallMetricGroup(b)
|
422 |
+
|
423 |
+
// Extract the strength (last part before the overall metric)
|
424 |
+
const stripOverall = (metric: string, overall: string) => {
|
425 |
+
if (metric.endsWith(`_${overall}`)) {
|
426 |
+
// Remove the overall metric group and any preceding underscore
|
427 |
+
const stripped = metric.slice(
|
428 |
+
0,
|
429 |
+
metric.length - overall.length - 1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
430 |
)
|
431 |
+
const parts = stripped.split('_')
|
432 |
+
return parts.length > 0 ? parts[parts.length - 1] : ''
|
433 |
}
|
434 |
+
return metric
|
435 |
+
}
|
436 |
+
|
437 |
+
// Extract the category (what remains after removing strength and overall_metric_name)
|
438 |
+
const getCategory = (metric: string, overall: string) => {
|
439 |
+
if (metric.endsWith(`_${overall}`)) {
|
440 |
+
const stripped = metric.slice(
|
441 |
+
0,
|
442 |
+
metric.length - overall.length - 1
|
443 |
+
)
|
444 |
+
const parts = stripped.split('_')
|
445 |
+
// Remove the last part (strength) and join the rest (category)
|
446 |
+
return parts.length > 1
|
447 |
+
? parts.slice(0, parts.length - 1).join('_')
|
448 |
+
: ''
|
449 |
+
}
|
450 |
+
return metric
|
451 |
+
}
|
452 |
|
453 |
+
const categoryA = getCategory(a, overallA)
|
454 |
+
const categoryB = getCategory(b, overallB)
|
455 |
+
|
456 |
+
// First sort by category
|
457 |
+
if (categoryA !== categoryB) {
|
458 |
+
return categoryA.localeCompare(categoryB)
|
459 |
+
}
|
460 |
+
|
461 |
+
// Then sort by overall metric name
|
462 |
+
if (overallA !== overallB) {
|
463 |
+
return overallA.localeCompare(overallB)
|
464 |
+
}
|
465 |
+
|
466 |
+
// Finally sort by strength
|
467 |
+
const subA = stripOverall(a, overallA)
|
468 |
+
const subB = stripOverall(b, overallB)
|
469 |
+
|
470 |
+
// Try to parse subA and subB as numbers, handling k/m/b suffixes
|
471 |
+
const parseNumber = (str: string) => {
|
472 |
+
const match = str.match(/^(\d+(?:\.\d+)?)([kKmMbB]?)$/)
|
473 |
+
if (!match) return NaN
|
474 |
+
let [_, num, suffix] = match
|
475 |
+
let value = parseFloat(num)
|
476 |
+
switch (suffix.toLowerCase()) {
|
477 |
+
case 'k':
|
478 |
+
value *= 1e3
|
479 |
+
break
|
480 |
+
case 'm':
|
481 |
+
value *= 1e6
|
482 |
+
break
|
483 |
+
case 'b':
|
484 |
+
value *= 1e9
|
485 |
+
break
|
486 |
+
}
|
487 |
+
return value
|
488 |
+
}
|
489 |
+
|
490 |
+
const numA = parseNumber(subA)
|
491 |
+
const numB = parseNumber(subB)
|
492 |
+
|
493 |
+
if (!isNaN(numA) && !isNaN(numB)) {
|
494 |
+
return numA - numB
|
495 |
+
}
|
496 |
+
// Fallback to string comparison if not both numbers
|
497 |
+
return subA.localeCompare(subB)
|
498 |
+
})
|
499 |
+
.map((metric) => {
|
500 |
+
const row = tableRows.find((r) => r.metric === metric)
|
501 |
+
if (!row) return null
|
502 |
+
|
503 |
+
// Extract the metric name (after the underscore)
|
504 |
+
const metricName = metric.includes('_')
|
505 |
+
? metric.split('_').slice(1).join('_')
|
506 |
+
: metric
|
507 |
+
|
508 |
+
return (
|
509 |
+
<tr key={metric} className="hover:bg-base-100">
|
510 |
+
<td className="pl-10">{metric}</td>
|
511 |
+
{/* For each metric column */}
|
512 |
+
{overallMetrics.map((oMetric) => {
|
513 |
+
// Only show values for the matching metric
|
514 |
+
const isMatchingMetric =
|
515 |
+
findAllMetricsForName(oMetric).includes(metric)
|
516 |
+
|
517 |
+
if (!isMatchingMetric) {
|
518 |
+
// Fill empty cells for non-matching metrics
|
519 |
return (
|
520 |
+
<React.Fragment key={`${metric}-${oMetric}`}>
|
521 |
+
{tableHeader
|
522 |
+
.filter((model) => selectedModels.has(model))
|
523 |
+
.map((col) => (
|
524 |
+
<td
|
525 |
+
key={`${metric}-${oMetric}-${col}`}
|
526 |
+
className="text-center"
|
527 |
+
></td>
|
528 |
+
))}
|
529 |
+
</React.Fragment>
|
530 |
)
|
531 |
+
}
|
532 |
+
|
533 |
+
// Show values for the matching metric
|
534 |
+
return (
|
535 |
+
<React.Fragment key={`${metric}-${oMetric}`}>
|
536 |
+
{tableHeader
|
537 |
+
.filter((model) => selectedModels.has(model))
|
538 |
+
.map((col) => {
|
539 |
+
const cell = row[col]
|
540 |
+
return (
|
541 |
+
<td
|
542 |
+
key={`${metric}-${oMetric}-${col}`}
|
543 |
+
className="text-center"
|
544 |
+
>
|
545 |
+
{!isNaN(Number(cell))
|
546 |
+
? Number(Number(cell).toFixed(3))
|
547 |
+
: cell}
|
548 |
+
</td>
|
549 |
+
)
|
550 |
+
})}
|
551 |
+
</React.Fragment>
|
552 |
+
)
|
553 |
+
})}
|
554 |
+
</tr>
|
555 |
+
)
|
556 |
+
})}
|
557 |
+
</React.Fragment>
|
558 |
+
)
|
559 |
+
})}
|
560 |
+
</React.Fragment>
|
561 |
+
)
|
562 |
+
})}
|
563 |
+
</tbody>
|
564 |
+
</table>
|
565 |
+
|
566 |
+
{/* Separate table for metrics that don't belong to any overall group */}
|
567 |
+
{(() => {
|
568 |
+
const standaloneMetrics = findStandaloneMetrics()
|
569 |
+
if (standaloneMetrics.length === 0) return null
|
570 |
+
|
571 |
+
return (
|
572 |
+
<div className="mt-8">
|
573 |
+
<h4 className="font-bold mb-2">Other Metrics</h4>
|
574 |
+
<table className="table w-full">
|
575 |
+
<thead>
|
576 |
+
<tr>
|
577 |
+
<th>Metric</th>
|
578 |
+
{tableHeader
|
579 |
+
.filter((model) => selectedModels.has(model))
|
580 |
+
.map((model) => (
|
581 |
+
<th key={`standalone-${model}`} className="text-center text-xs">
|
582 |
+
{model}
|
583 |
+
</th>
|
584 |
+
))}
|
585 |
+
</tr>
|
586 |
+
</thead>
|
587 |
+
<tbody>
|
588 |
+
{standaloneMetrics.sort().map((metric) => {
|
589 |
+
const row = tableRows.find((r) => r.metric === metric)
|
590 |
+
if (!row) return null
|
591 |
+
|
592 |
+
return (
|
593 |
+
<tr key={`standalone-${metric}`} className="hover:bg-base-100">
|
594 |
+
<td>{metric}</td>
|
595 |
+
{tableHeader
|
596 |
+
.filter((model) => selectedModels.has(model))
|
597 |
+
.map((col) => {
|
598 |
+
const cell = row[col]
|
599 |
+
return (
|
600 |
+
<td key={`standalone-${metric}-${col}`} className="text-center">
|
601 |
+
{!isNaN(Number(cell))
|
602 |
+
? Number(Number(cell).toFixed(3))
|
603 |
+
: cell}
|
604 |
+
</td>
|
605 |
)
|
606 |
})}
|
607 |
+
</tr>
|
608 |
+
)
|
609 |
+
})}
|
610 |
+
</tbody>
|
611 |
+
</table>
|
612 |
+
</div>
|
613 |
)
|
614 |
+
})()}
|
615 |
+
</>
|
616 |
+
)}
|
617 |
</div>
|
618 |
)}
|
619 |
</div>
|
frontend/src/components/ModelFilter.tsx
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import React from 'react'
|
2 |
+
|
3 |
+
interface ModelFilterProps {
|
4 |
+
models: string[]
|
5 |
+
selectedModels: Set<string>
|
6 |
+
setSelectedModels: (models: Set<string>) => void
|
7 |
+
}
|
8 |
+
|
9 |
+
const ModelFilter: React.FC<ModelFilterProps> = ({ models, selectedModels, setSelectedModels }) => {
|
10 |
+
const toggleModel = (model: string) => {
|
11 |
+
const newSelected = new Set(selectedModels)
|
12 |
+
if (newSelected.has(model)) {
|
13 |
+
newSelected.delete(model)
|
14 |
+
} else {
|
15 |
+
newSelected.add(model)
|
16 |
+
}
|
17 |
+
setSelectedModels(newSelected)
|
18 |
+
}
|
19 |
+
|
20 |
+
return (
|
21 |
+
<div className="w-full mb-4">
|
22 |
+
<fieldset className="fieldset w-full p-4 rounded border">
|
23 |
+
<legend className="fieldset-legend font-semibold">
|
24 |
+
Models ({selectedModels.size}/{models.length})
|
25 |
+
</legend>
|
26 |
+
<div className="grid grid-cols-2 md:grid-cols-4 lg:grid-cols-6 gap-1 max-h-48 overflow-y-auto pr-2">
|
27 |
+
{models.map((model) => (
|
28 |
+
<label key={model} className="flex items-center gap-2 text-sm">
|
29 |
+
<input
|
30 |
+
type="checkbox"
|
31 |
+
className="form-checkbox h-4 w-4"
|
32 |
+
checked={selectedModels.has(model)}
|
33 |
+
onChange={() => toggleModel(model)}
|
34 |
+
/>
|
35 |
+
<span className="truncate" title={model}>
|
36 |
+
{model}
|
37 |
+
</span>
|
38 |
+
</label>
|
39 |
+
))}
|
40 |
+
</div>
|
41 |
+
</fieldset>
|
42 |
+
</div>
|
43 |
+
)
|
44 |
+
}
|
45 |
+
|
46 |
+
export default ModelFilter
|