|
from dataclasses import dataclass |
|
from enum import Enum |
|
|
|
@dataclass |
|
class Task: |
|
benchmark: str |
|
metric: str |
|
col_name: str |
|
|
|
|
|
|
|
|
|
class Tasks(Enum): |
|
|
|
task0 = Task("anli_r1", "acc", "ANLI") |
|
task1 = Task("logiqa", "acc_norm", "LogiQA") |
|
|
|
NUM_FEWSHOT = 0 |
|
|
|
|
|
|
|
|
|
|
|
TITLE = """<h1 align="center" id="space-title">JavaBench Leaderboard</h1>""" |
|
|
|
|
|
INTRODUCTION_TEXT = """ |
|
<p> |
|
A Benchmark of Object-Oriented Code Generation for Evaluating Large Language Models |
|
</p> |
|
|
|
<p class="shields"> |
|
<a href="https://arxiv.org/abs/2406.12902"> |
|
<img src="https://img.shields.io/badge/arXiv-2406.12902-b31b1b.svg" /> |
|
</a> |
|
<a href="https://github.com/java-bench/JavaBench"> |
|
<img src="https://img.shields.io/badge/Github-JavaBench-white.svg" /> |
|
</a> |
|
<a href="https://huggingface.co/spaces/CPunisher/JavaBench"> |
|
<img src="https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-JavaBench-ffc107?color=ffc107&logoColor=white" /> |
|
</a> |
|
</p> |
|
""" |
|
|
|
|
|
LLM_BENCHMARKS_TEXT = f""" |
|
## How it works |
|
|
|
## Reproducibility |
|
To reproduce our results, here is the commands you can run: |
|
|
|
""" |
|
|
|
EVALUATION_QUEUE_TEXT = """ |
|
Thank you for your interest in JavaBench. We warmly welcome researchers to submit additional benchmarking results, as we believe that collaborative efforts can significantly advance the study of Large Language Models and software engineering. For submission guidelines, please refer to our [Github Repo](https://github.com/java-bench/JavaBench?tab=readme-ov-file#usage). |
|
""" |
|
|
|
CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results" |
|
CITATION_BUTTON_TEXT = r""" |
|
@misc{cao2024aibeatundergraduatesentrylevel, |
|
title={Can AI Beat Undergraduates in Entry-level Java Assignments? Benchmarking Large Language Models on JavaBench}, |
|
author={Jialun Cao and Zhiyong Chen and Jiarong Wu and Shing-chi Cheung and Chang Xu}, |
|
year={2024}, |
|
eprint={2406.12902}, |
|
archivePrefix={arXiv}, |
|
primaryClass={cs.LG}, |
|
url={https://arxiv.org/abs/2406.12902}, |
|
} |
|
""" |
|
|