Spaces:

fair-forward
/

evals-for-every-language

Running

App Files Files Community

evals-for-every-language / frontend /src /components /ScoreColumns.js

davidpomerenke's picture

Upload from GitHub Actions: Get more results, compute average based on all tasks

98c6811 verified 16 days ago

3.13 kB

	import { Column } from 'primereact/column'
	import ScoreField from './ScoreField'

	const scoreBodyTemplate = (field, options = {}) => {
	const { minScore = 0, maxScore = 1 } = options

	return rowData => {
	const score = rowData[field]
	return ScoreField(score, minScore, maxScore)
	}
	}

	const ScoreColumns = [
	<Column
	field='average'
	header='Proficiency'
	headerTooltip='Language Proficiency Score (average of the scores for each task, after min-max normalization)'
	sortable
	body={scoreBodyTemplate('average', { minScore: 0.2, maxScore: 0.5 })}
	style={{ minWidth: '5rem', maxWidth: '10rem' }}
	/>,
	<Column
	field='translation_from_bleu'
	header='Translation (from)'
	headerTooltip='Translation performance from a language to all other languages (spBLEU score on a sample of the FLORES+ benchmark)'
	sortable
	body={scoreBodyTemplate('translation_from_bleu', {
	minScore: 0,
	maxScore: 0.5
	})}
	style={{ minWidth: '5rem', maxWidth: '10rem' }}
	/>,
	<Column
	field='translation_to_bleu'
	header='Translation (to)'
	headerTooltip='Translation performance from all other languages to a language (spBLEU score on a sample of the FLORES+ benchmark)'
	sortable
	body={scoreBodyTemplate('translation_to_bleu', {
	minScore: 0,
	maxScore: 0.5
	})}
	style={{ minWidth: '5rem', maxWidth: '10rem' }}
	/>,
	<Column
	field='classification_accuracy'
	header='Classification'
	headerTooltip='Classification performance (accuracy on a sample of the SIB-200 / FLORES+ classification benchmark)'
	sortable
	body={scoreBodyTemplate('classification_accuracy', {
	minScore: 0,
	maxScore: 0.5
	})}
	style={{ minWidth: '5rem', maxWidth: '10rem' }}
	/>,
	// <Column
	// field='language_modeling_chrf'
	// header='Language Modeling'
	// sortable
	// body={scoreBodyTemplate('language_modeling_chrf', {
	// minScore: 0.8,
	// maxScore: 1
	// })}
	// style={{ minWidth: '5rem', maxWidth: '10rem' }}
	// />,
	<Column
	field='mmlu_accuracy'
	header='Q&A'
	headerTooltip='Question Answering performance (accuracy on a sample of multilingual versions of the MMLU benchmark)'
	sortable
	body={scoreBodyTemplate('mmlu_accuracy', {
	minScore: 0,
	maxScore: 1
	})}
	style={{ minWidth: '5rem', maxWidth: '10rem' }}
	/>,
	<Column
	field='arc_accuracy'
	header='Advanced Q&A'
	headerTooltip='Advanced Question Answering performance (accuracy on a sample of multilingual versions of the ARC-Easy benchmark)'
	sortable
	body={scoreBodyTemplate('arc_accuracy', {
	minScore: 0,
	maxScore: 1
	})}
	style={{ minWidth: '5rem', maxWidth: '10rem' }}
	/>,
	<Column
	field='mgsm_accuracy'
	header='Math'
	headerTooltip='Math Problem Solving performance (accuracy on a sample of multilingual versions of the GSM8K benchmark)'
	sortable
	body={scoreBodyTemplate('mgsm_accuracy', {
	minScore: 0,
	maxScore: 1
	})}
	style={{ minWidth: '5rem', maxWidth: '10rem' }}
	/>,
	]

	export default ScoreColumns