JavaBench / src /about.py
CPunisher's picture
Other than data
948d4dc
from dataclasses import dataclass
from enum import Enum
@dataclass
class Task:
benchmark: str
metric: str
col_name: str
# Select your tasks here
# ---------------------------------------------------
class Tasks(Enum):
# task_key in the json file, metric_key in the json file, name to display in the leaderboard
task0 = Task("anli_r1", "acc", "ANLI")
task1 = Task("logiqa", "acc_norm", "LogiQA")
NUM_FEWSHOT = 0 # Change with your few shot
# ---------------------------------------------------
# Your leaderboard name
TITLE = """<h1 align="center" id="space-title">JavaBench Leaderboard</h1>"""
# What does your leaderboard evaluate?
INTRODUCTION_TEXT = """
<p>
A Benchmark of Object-Oriented Code Generation for Evaluating Large Language Models
</p>
<p class="shields">
<a href="https://arxiv.org/abs/2406.12902">
<img src="https://img.shields.io/badge/arXiv-2406.12902-b31b1b.svg" />
</a>
<a href="https://github.com/java-bench/JavaBench">
<img src="https://img.shields.io/badge/Github-JavaBench-white.svg" />
</a>
<a href="https://huggingface.co/spaces/CPunisher/JavaBench">
<img src="https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-JavaBench-ffc107?color=ffc107&logoColor=white" />
</a>
</p>
"""
# Which evaluations are you running? how can people reproduce what you have?
LLM_BENCHMARKS_TEXT = f"""
## How it works
## Reproducibility
To reproduce our results, here is the commands you can run:
"""
EVALUATION_QUEUE_TEXT = """
Thank you for your interest in JavaBench. We warmly welcome researchers to submit additional benchmarking results, as we believe that collaborative efforts can significantly advance the study of Large Language Models and software engineering. For submission guidelines, please refer to our [Github Repo](https://github.com/java-bench/JavaBench?tab=readme-ov-file#usage).
"""
CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
CITATION_BUTTON_TEXT = r"""
@misc{cao2024aibeatundergraduatesentrylevel,
title={Can AI Beat Undergraduates in Entry-level Java Assignments? Benchmarking Large Language Models on JavaBench},
author={Jialun Cao and Zhiyong Chen and Jiarong Wu and Shing-chi Cheung and Chang Xu},
year={2024},
eprint={2406.12902},
archivePrefix={arXiv},
primaryClass={cs.LG},
url={https://arxiv.org/abs/2406.12902},
}
"""