Update
Browse files- app.py +1 -1
- src/display/utils.py +2 -1
- src/leaderboard/read_evals.py +2 -1
- src/populate.py +2 -2
app.py
CHANGED
@@ -41,7 +41,7 @@ except Exception:
|
|
41 |
restart_space()
|
42 |
|
43 |
total_issues = load_dataset("dtcxzyw/llvm-apr-benchmark").num_rows["test"]
|
44 |
-
LEADERBOARD_DF = get_leaderboard_df(EVAL_REQUESTS_PATH, COLS)
|
45 |
|
46 |
|
47 |
def init_leaderboard(dataframe):
|
|
|
41 |
restart_space()
|
42 |
|
43 |
total_issues = load_dataset("dtcxzyw/llvm-apr-benchmark").num_rows["test"]
|
44 |
+
LEADERBOARD_DF = get_leaderboard_df(EVAL_REQUESTS_PATH, COLS, total_issues)
|
45 |
|
46 |
|
47 |
def init_leaderboard(dataframe):
|
src/display/utils.py
CHANGED
@@ -28,7 +28,8 @@ auto_eval_column_dict.append(
|
|
28 |
)
|
29 |
auto_eval_column_dict.append(["model_name", ColumnContent, ColumnContent("Base Model", "markdown", True)])
|
30 |
# Scores
|
31 |
-
auto_eval_column_dict.append(["
|
|
|
32 |
auto_eval_column_dict.append(["fast_pass_count", ColumnContent, ColumnContent("Repaired (Fast)", "number", True)])
|
33 |
auto_eval_column_dict.append(["with_hint", ColumnContent, ColumnContent("Hint", "str", True)])
|
34 |
auto_eval_column_dict.append(["attempts", ColumnContent, ColumnContent("Number of attempts", "number", True)])
|
|
|
28 |
)
|
29 |
auto_eval_column_dict.append(["model_name", ColumnContent, ColumnContent("Base Model", "markdown", True)])
|
30 |
# Scores
|
31 |
+
auto_eval_column_dict.append(["score", ColumnContent, ColumnContent("Score", "number", True)])
|
32 |
+
auto_eval_column_dict.append(["full_pass_count", ColumnContent, ColumnContent("Repaired", "number", True)])
|
33 |
auto_eval_column_dict.append(["fast_pass_count", ColumnContent, ColumnContent("Repaired (Fast)", "number", True)])
|
34 |
auto_eval_column_dict.append(["with_hint", ColumnContent, ColumnContent("Hint", "str", True)])
|
35 |
auto_eval_column_dict.append(["attempts", ColumnContent, ColumnContent("Number of attempts", "number", True)])
|
src/leaderboard/read_evals.py
CHANGED
@@ -61,12 +61,13 @@ class EvalResult:
|
|
61 |
full_pass_count_miscompilation=full_pass_count_cat.get("miscompilation", 0),
|
62 |
)
|
63 |
|
64 |
-
def to_dict(self):
|
65 |
"""Converts the Eval Result to a dict compatible with our dataframe display"""
|
66 |
data_dict = {
|
67 |
AutoEvalColumn.method_name.name: make_hyperlink(self.method_url, self.method_name),
|
68 |
AutoEvalColumn.model_name.name: make_hyperlink(self.model_url, self.model_name),
|
69 |
AutoEvalColumn.with_hint.name: "w/ hint" if self.with_hint else "w/o hint",
|
|
|
70 |
AutoEvalColumn.attempts.name: self.attempts,
|
71 |
AutoEvalColumn.fast_pass_count.name: self.fast_pass_count,
|
72 |
AutoEvalColumn.full_pass_count.name: self.full_pass_count,
|
|
|
61 |
full_pass_count_miscompilation=full_pass_count_cat.get("miscompilation", 0),
|
62 |
)
|
63 |
|
64 |
+
def to_dict(self, total_issues):
|
65 |
"""Converts the Eval Result to a dict compatible with our dataframe display"""
|
66 |
data_dict = {
|
67 |
AutoEvalColumn.method_name.name: make_hyperlink(self.method_url, self.method_name),
|
68 |
AutoEvalColumn.model_name.name: make_hyperlink(self.model_url, self.model_name),
|
69 |
AutoEvalColumn.with_hint.name: "w/ hint" if self.with_hint else "w/o hint",
|
70 |
+
AutoEvalColumn.score.name: round(self.full_pass_count * 100.0 / total_issues, 1),
|
71 |
AutoEvalColumn.attempts.name: self.attempts,
|
72 |
AutoEvalColumn.fast_pass_count.name: self.fast_pass_count,
|
73 |
AutoEvalColumn.full_pass_count.name: self.full_pass_count,
|
src/populate.py
CHANGED
@@ -7,10 +7,10 @@ from src.display.utils import AutoEvalColumn
|
|
7 |
from src.leaderboard.read_evals import get_raw_eval_results
|
8 |
|
9 |
|
10 |
-
def get_leaderboard_df(requests_path: str, cols: list) -> pd.DataFrame:
|
11 |
"""Creates a dataframe from all the individual experiment results"""
|
12 |
raw_data = get_raw_eval_results(requests_path)
|
13 |
-
all_data_json = [v.to_dict() for v in raw_data]
|
14 |
|
15 |
df = pd.DataFrame.from_records(all_data_json)
|
16 |
df = df.sort_values(by=[AutoEvalColumn.full_pass_count.name], ascending=False)
|
|
|
7 |
from src.leaderboard.read_evals import get_raw_eval_results
|
8 |
|
9 |
|
10 |
+
def get_leaderboard_df(requests_path: str, cols: list, total_issues: int) -> pd.DataFrame:
|
11 |
"""Creates a dataframe from all the individual experiment results"""
|
12 |
raw_data = get_raw_eval_results(requests_path)
|
13 |
+
all_data_json = [v.to_dict(total_issues) for v in raw_data]
|
14 |
|
15 |
df = pd.DataFrame.from_records(all_data_json)
|
16 |
df = df.sort_values(by=[AutoEvalColumn.full_pass_count.name], ascending=False)
|