dtcxzyw commited on
Commit
f19f8d1
·
unverified ·
1 Parent(s): 171faf6
Files changed (3) hide show
  1. app.py +1 -2
  2. src/display/utils.py +17 -64
  3. src/populate.py +2 -6
app.py CHANGED
@@ -14,7 +14,6 @@ from src.about import (
14
  )
15
  from src.display.css_html_js import custom_css
16
  from src.display.utils import (
17
- BENCHMARK_COLS,
18
  COLS,
19
  AutoEvalColumn,
20
  fields,
@@ -42,7 +41,7 @@ except Exception:
42
  restart_space()
43
 
44
  total_issues = load_dataset("dtcxzyw/llvm-apr-benchmark").num_rows["test"]
45
- LEADERBOARD_DF = get_leaderboard_df(EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
46
 
47
 
48
  def init_leaderboard(dataframe):
 
14
  )
15
  from src.display.css_html_js import custom_css
16
  from src.display.utils import (
 
17
  COLS,
18
  AutoEvalColumn,
19
  fields,
 
41
  restart_space()
42
 
43
  total_issues = load_dataset("dtcxzyw/llvm-apr-benchmark").num_rows["test"]
44
+ LEADERBOARD_DF = get_leaderboard_df(EVAL_REQUESTS_PATH, COLS)
45
 
46
 
47
  def init_leaderboard(dataframe):
src/display/utils.py CHANGED
@@ -3,7 +3,6 @@ from enum import Enum
3
 
4
  import pandas as pd
5
 
6
- from src.about import Tasks
7
 
8
  def fields(raw_class):
9
  return [v for k, v in raw_class.__dict__.items() if k[:2] != "__" and k[-2:] != "__"]
@@ -20,75 +19,29 @@ class ColumnContent:
20
  hidden: bool = False
21
  never_hidden: bool = False
22
 
 
23
  ## Leaderboard columns
24
  auto_eval_column_dict = []
25
  # Init
26
- auto_eval_column_dict.append(["model_type_symbol", ColumnContent, ColumnContent("T", "str", True, never_hidden=True)])
27
- auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
28
- #Scores
29
- auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Average ⬆️", "number", True)])
30
- # Model information
31
- auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False)])
32
- auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
33
- auto_eval_column_dict.append(["weight_type", ColumnContent, ColumnContent("Weight type", "str", False, True)])
34
- auto_eval_column_dict.append(["precision", ColumnContent, ColumnContent("Precision", "str", False)])
35
- auto_eval_column_dict.append(["license", ColumnContent, ColumnContent("Hub License", "str", False)])
36
- auto_eval_column_dict.append(["params", ColumnContent, ColumnContent("#Params (B)", "number", False)])
37
- auto_eval_column_dict.append(["likes", ColumnContent, ColumnContent("Hub ❤️", "number", False)])
38
- auto_eval_column_dict.append(["still_on_hub", ColumnContent, ColumnContent("Available on the hub", "bool", False)])
39
- auto_eval_column_dict.append(["revision", ColumnContent, ColumnContent("Model sha", "str", False, False)])
 
 
40
 
41
  # We use make dataclass to dynamically fill the scores from Tasks
42
  AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
43
 
44
- ## All the model information that we might need
45
- @dataclass
46
- class ModelDetails:
47
- name: str
48
- display_name: str = ""
49
- symbol: str = "" # emoji
50
-
51
-
52
- class ModelType(Enum):
53
- PT = ModelDetails(name="pretrained", symbol="🟢")
54
- FT = ModelDetails(name="fine-tuned", symbol="🔶")
55
- IFT = ModelDetails(name="instruction-tuned", symbol="⭕")
56
- RL = ModelDetails(name="RL-tuned", symbol="🟦")
57
- Unknown = ModelDetails(name="", symbol="?")
58
-
59
- def to_str(self, separator=" "):
60
- return f"{self.value.symbol}{separator}{self.value.name}"
61
-
62
- @staticmethod
63
- def from_str(type):
64
- if "fine-tuned" in type or "🔶" in type:
65
- return ModelType.FT
66
- if "pretrained" in type or "🟢" in type:
67
- return ModelType.PT
68
- if "RL-tuned" in type or "🟦" in type:
69
- return ModelType.RL
70
- if "instruction-tuned" in type or "⭕" in type:
71
- return ModelType.IFT
72
- return ModelType.Unknown
73
-
74
- class WeightType(Enum):
75
- Adapter = ModelDetails("Adapter")
76
- Original = ModelDetails("Original")
77
- Delta = ModelDetails("Delta")
78
-
79
- class Precision(Enum):
80
- float16 = ModelDetails("float16")
81
- bfloat16 = ModelDetails("bfloat16")
82
- Unknown = ModelDetails("?")
83
-
84
- def from_str(precision):
85
- if precision in ["torch.float16", "float16"]:
86
- return Precision.float16
87
- if precision in ["torch.bfloat16", "bfloat16"]:
88
- return Precision.bfloat16
89
- return Precision.Unknown
90
-
91
  # Column selection
92
  COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
93
-
94
- BENCHMARK_COLS = [t.value.col_name for t in Tasks]
 
3
 
4
  import pandas as pd
5
 
 
6
 
7
  def fields(raw_class):
8
  return [v for k, v in raw_class.__dict__.items() if k[:2] != "__" and k[-2:] != "__"]
 
19
  hidden: bool = False
20
  never_hidden: bool = False
21
 
22
+
23
  ## Leaderboard columns
24
  auto_eval_column_dict = []
25
  # Init
26
+ auto_eval_column_dict.append(
27
+ ["method_name", ColumnContent, ColumnContent("Method", "markdown", True, never_hidden=True)]
28
+ )
29
+ auto_eval_column_dict.append(["model_name", ColumnContent, ColumnContent("Model", "markdown", True)])
30
+ # Scores
31
+ auto_eval_column_dict.append(["full_pass_count", ColumnContent, ColumnContent("Repaired ⬆️", "number", True)])
32
+ auto_eval_column_dict.append(["fast_pass_count", ColumnContent, ColumnContent("Repaired (Fast)", "number", True)])
33
+ auto_eval_column_dict.append(["with_hint", ColumnContent, ColumnContent("Repair with hint", "bool", True)])
34
+ auto_eval_column_dict.append(["attempts", ColumnContent, ColumnContent("Number of attempts", "number", True)])
35
+ auto_eval_column_dict.append(
36
+ ["full_pass_count_crash", ColumnContent, ColumnContent("Repaired (Crash)", "number", True)]
37
+ )
38
+ auto_eval_column_dict.append(
39
+ ["full_pass_count_miscompilation", ColumnContent, ColumnContent("Repaired (Miscompilation)", "number", True)]
40
+ )
41
+ auto_eval_column_dict.append(["full_pass_count_hang", ColumnContent, ColumnContent("Repaired (Hang)", "number", True)])
42
 
43
  # We use make dataclass to dynamically fill the scores from Tasks
44
  AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
45
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  # Column selection
47
  COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
 
 
src/populate.py CHANGED
@@ -3,20 +3,16 @@ import os
3
 
4
  import pandas as pd
5
 
6
- from src.display.formatting import has_no_nan_values
7
  from src.display.utils import AutoEvalColumn
8
  from src.leaderboard.read_evals import get_raw_eval_results
9
 
10
 
11
- def get_leaderboard_df(requests_path: str, cols: list, benchmark_cols: list) -> pd.DataFrame:
12
  """Creates a dataframe from all the individual experiment results"""
13
  raw_data = get_raw_eval_results(requests_path)
14
  all_data_json = [v.to_dict() for v in raw_data]
15
 
16
  df = pd.DataFrame.from_records(all_data_json)
17
- df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
18
  df = df[cols].round(decimals=2)
19
-
20
- # filter out if any of the benchmarks have not been produced
21
- df = df[has_no_nan_values(df, benchmark_cols)]
22
  return df
 
3
 
4
  import pandas as pd
5
 
 
6
  from src.display.utils import AutoEvalColumn
7
  from src.leaderboard.read_evals import get_raw_eval_results
8
 
9
 
10
+ def get_leaderboard_df(requests_path: str, cols: list) -> pd.DataFrame:
11
  """Creates a dataframe from all the individual experiment results"""
12
  raw_data = get_raw_eval_results(requests_path)
13
  all_data_json = [v.to_dict() for v in raw_data]
14
 
15
  df = pd.DataFrame.from_records(all_data_json)
16
+ df = df.sort_values(by=[AutoEvalColumn.full_pass_count.name], ascending=False)
17
  df = df[cols].round(decimals=2)
 
 
 
18
  return df