Spaces:

jszheng
/

RACE_leaderboard

Runtime error

RACE_leaderboard / app.py

Jason Zheng

add links

c380c7e 12 months ago

6.62 kB

	import json

	import gradio as gr
	import pandas as pd

	from css_html import custom_css
	from text_content import ABOUT_TEXT, CITATION_BUTTON_TEXT, CITATION_BUTTON_LABEL, ACKNOWLEDGEMENT_TEXT, NOTES_TEXT
	from utils import (
	AutoEvalColumn,
	fields,
	)

	result_path = './RESULTS.json'
	with open(result_path, 'r') as f:
	data = json.load(f)

	rows = []
	for col, subcols in data.items():
	row = {"model": col}
	for subcol, datas in subcols.items():
	if subcol == 'readability':
	symbol = '📖'
	elif subcol == 'maintainability':
	symbol = '🔨'
	elif subcol == 'efficiency':
	symbol = '🚀'
	elif subcol == 'correctness':
	symbol = '✅'
	elif subcol == 'overall':
	symbol = '💯'

	for key, value in datas.items():
	row[f'{symbol} {key}'] = value

	rows.append(row)

	df = pd.DataFrame(rows)
	df = df.sort_values(by='💯 RACE Score', ascending=False)

	COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
	TYPES = [c.type for c in fields(AutoEvalColumn) if not c.hidden]
	COLS_LITE = [
	c.name for c in fields(AutoEvalColumn) if c.displayed_by_default and not c.hidden
	]
	TYPES_LITE = [
	c.type for c in fields(AutoEvalColumn) if c.displayed_by_default and not c.hidden
	]

	def select_columns(df, columns):
	always_here_cols = [
	AutoEvalColumn.model.name,
	]
	# We use COLS to maintain sorting
	filtered_df = df[
	always_here_cols + [c for c in COLS if c in df.columns and c in columns]
	]
	return filtered_df

	demo = gr.Blocks(css=custom_css)
	with demo:
	with gr.Column():
	gr.Markdown(
	"""<div style="text-align: center;"><h1> 🏎️RACE Leaderboard</h1></div>\
	<br>\
	""",
	elem_classes="markdown-text",
	)

	gr.Markdown(
	"""
	Based on the 🏎️RACE benchmark, we demonstrated the ability of different LLMs to generate code that is _correct_ and _meets the requirements of real-world development scenarios_.

	More details about how to evalute the LLM are available in the [🏎️RACE GitHub repository](https://github.com/jszheng21/RACE). For a complete description of RACE benchmark and related experimental analysis, please refer to the paper: [Beyond Correctness: Benchmarking Multi-dimensional Code Generation for Large Language Models](https://arxiv.org/abs/2407.11470). [![](https://img.shields.io/badge/arXiv-2407.11470-b31b1b.svg)](https://arxiv.org/abs/2407.11470)
	""",
	elem_classes="markdown-text",
	)

	# gr.Markdown(
	# """<div style="text-align: center;"><h1> 🏎️RACE Leaderboard</h1></div>\
	# <br>\
	# <p>Based on the 🏎️RACE benchmark, we demonstrated the ability of different LLMs to generate code that is <b><i>correct</i></b> and <b><i>meets the requirements of real-world development scenarios</i></b>.</p>
	# <p>More details about how to evalute the LLM are available in the <a href="https://github.com/jszheng21/RACE">🏎️RACE GitHub repository</a>. For a complete description of RACE benchmark and related experimental analysis, please refer to the paper: Beyond Correctness: Benchmarking Multi-dimensional Code Generation for Large Language Models</p>
	# """,
	# elem_classes="markdown-text",
	# )

	with gr.Tabs(elem_classes="tab-buttons") as tabs:
	with gr.Column():
	with gr.Tabs(elem_classes="A100-tabs") as A100_tabs:
	with gr.TabItem("🔍 Evaluation Table", id=0):
	with gr.Column():
	with gr.Accordion("⏬ Hidden Columns", open=False):
	shown_columns = gr.CheckboxGroup(
	choices=[
	c
	for c in COLS
	if c
	not in [
	AutoEvalColumn.model.name,
	]
	],
	value=[
	c
	for c in COLS_LITE
	if c
	not in [
	AutoEvalColumn.model.name,
	]
	],
	label="",
	elem_id="column-select",
	interactive=True,
	)

	leaderboard_df = gr.components.Dataframe(
	value=df[
	[
	AutoEvalColumn.model.name,
	]
	+ shown_columns.value
	],
	headers=COLS,
	datatype=TYPES,
	elem_id="leaderboard-table",
	interactive=False,
	)

	hidden_leaderboard_df = gr.components.Dataframe(
	value=df,
	headers=COLS,
	datatype=["str" for _ in range(len(COLS))],
	visible=False,
	)

	shown_columns.change(
	select_columns,
	[hidden_leaderboard_df, shown_columns],
	leaderboard_df,
	)

	gr.Markdown(NOTES_TEXT, elem_classes="markdown-text")

	with gr.TabItem("📝 About", id=1):
	gr.Markdown(ABOUT_TEXT, elem_classes="markdown-text")

	with gr.Row():
	with gr.Accordion("📙 Citation", open=False):
	citation_button = gr.Textbox(
	value=CITATION_BUTTON_TEXT,
	label=CITATION_BUTTON_LABEL,
	lines=10,
	elem_id="citation-button",
	show_copy_button=True,
	)

	with gr.Row():
	with gr.Accordion("🙏 Acknowledgement", open=False):
	gr.Markdown(ACKNOWLEDGEMENT_TEXT)

	demo.launch()