dtcxzyw commited on
Commit
ac176c3
·
unverified ·
1 Parent(s): 50181ba
app.py CHANGED
@@ -41,7 +41,7 @@ except Exception:
41
  restart_space()
42
 
43
  total_issues = load_dataset("dtcxzyw/llvm-apr-benchmark").num_rows["test"]
44
- LEADERBOARD_DF = get_leaderboard_df(EVAL_REQUESTS_PATH, COLS)
45
 
46
 
47
  def init_leaderboard(dataframe):
 
41
  restart_space()
42
 
43
  total_issues = load_dataset("dtcxzyw/llvm-apr-benchmark").num_rows["test"]
44
+ LEADERBOARD_DF = get_leaderboard_df(EVAL_REQUESTS_PATH, COLS, total_issues)
45
 
46
 
47
  def init_leaderboard(dataframe):
src/display/utils.py CHANGED
@@ -28,7 +28,8 @@ auto_eval_column_dict.append(
28
  )
29
  auto_eval_column_dict.append(["model_name", ColumnContent, ColumnContent("Base Model", "markdown", True)])
30
  # Scores
31
- auto_eval_column_dict.append(["full_pass_count", ColumnContent, ColumnContent("Repaired ⬆️", "number", True)])
 
32
  auto_eval_column_dict.append(["fast_pass_count", ColumnContent, ColumnContent("Repaired (Fast)", "number", True)])
33
  auto_eval_column_dict.append(["with_hint", ColumnContent, ColumnContent("Hint", "str", True)])
34
  auto_eval_column_dict.append(["attempts", ColumnContent, ColumnContent("Number of attempts", "number", True)])
 
28
  )
29
  auto_eval_column_dict.append(["model_name", ColumnContent, ColumnContent("Base Model", "markdown", True)])
30
  # Scores
31
+ auto_eval_column_dict.append(["score", ColumnContent, ColumnContent("Score", "number", True)])
32
+ auto_eval_column_dict.append(["full_pass_count", ColumnContent, ColumnContent("Repaired", "number", True)])
33
  auto_eval_column_dict.append(["fast_pass_count", ColumnContent, ColumnContent("Repaired (Fast)", "number", True)])
34
  auto_eval_column_dict.append(["with_hint", ColumnContent, ColumnContent("Hint", "str", True)])
35
  auto_eval_column_dict.append(["attempts", ColumnContent, ColumnContent("Number of attempts", "number", True)])
src/leaderboard/read_evals.py CHANGED
@@ -61,12 +61,13 @@ class EvalResult:
61
  full_pass_count_miscompilation=full_pass_count_cat.get("miscompilation", 0),
62
  )
63
 
64
- def to_dict(self):
65
  """Converts the Eval Result to a dict compatible with our dataframe display"""
66
  data_dict = {
67
  AutoEvalColumn.method_name.name: make_hyperlink(self.method_url, self.method_name),
68
  AutoEvalColumn.model_name.name: make_hyperlink(self.model_url, self.model_name),
69
  AutoEvalColumn.with_hint.name: "w/ hint" if self.with_hint else "w/o hint",
 
70
  AutoEvalColumn.attempts.name: self.attempts,
71
  AutoEvalColumn.fast_pass_count.name: self.fast_pass_count,
72
  AutoEvalColumn.full_pass_count.name: self.full_pass_count,
 
61
  full_pass_count_miscompilation=full_pass_count_cat.get("miscompilation", 0),
62
  )
63
 
64
+ def to_dict(self, total_issues):
65
  """Converts the Eval Result to a dict compatible with our dataframe display"""
66
  data_dict = {
67
  AutoEvalColumn.method_name.name: make_hyperlink(self.method_url, self.method_name),
68
  AutoEvalColumn.model_name.name: make_hyperlink(self.model_url, self.model_name),
69
  AutoEvalColumn.with_hint.name: "w/ hint" if self.with_hint else "w/o hint",
70
+ AutoEvalColumn.score.name: round(self.full_pass_count * 100.0 / total_issues, 1),
71
  AutoEvalColumn.attempts.name: self.attempts,
72
  AutoEvalColumn.fast_pass_count.name: self.fast_pass_count,
73
  AutoEvalColumn.full_pass_count.name: self.full_pass_count,
src/populate.py CHANGED
@@ -7,10 +7,10 @@ from src.display.utils import AutoEvalColumn
7
  from src.leaderboard.read_evals import get_raw_eval_results
8
 
9
 
10
- def get_leaderboard_df(requests_path: str, cols: list) -> pd.DataFrame:
11
  """Creates a dataframe from all the individual experiment results"""
12
  raw_data = get_raw_eval_results(requests_path)
13
- all_data_json = [v.to_dict() for v in raw_data]
14
 
15
  df = pd.DataFrame.from_records(all_data_json)
16
  df = df.sort_values(by=[AutoEvalColumn.full_pass_count.name], ascending=False)
 
7
  from src.leaderboard.read_evals import get_raw_eval_results
8
 
9
 
10
+ def get_leaderboard_df(requests_path: str, cols: list, total_issues: int) -> pd.DataFrame:
11
  """Creates a dataframe from all the individual experiment results"""
12
  raw_data = get_raw_eval_results(requests_path)
13
+ all_data_json = [v.to_dict(total_issues) for v in raw_data]
14
 
15
  df = pd.DataFrame.from_records(all_data_json)
16
  df = df.sort_values(by=[AutoEvalColumn.full_pass_count.name], ascending=False)