Jacqueline Garrahan commited on
Commit
2df02bd
1 Parent(s): d0d0673

Check in failed eval col viewer

Browse files
Files changed (3) hide show
  1. app.py +18 -1
  2. src/display/utils.py +10 -1
  3. src/populate.py +4 -2
app.py CHANGED
@@ -18,6 +18,8 @@ from src.display.utils import (
18
  COLS,
19
  EVAL_COLS,
20
  EVAL_TYPES,
 
 
21
  AutoEvalColumn,
22
  fields,
23
  )
@@ -52,7 +54,8 @@ LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS,
52
  finished_eval_queue_df,
53
  running_eval_queue_df,
54
  pending_eval_queue_df,
55
- ) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
 
56
 
57
 
58
  def init_leaderboard(dataframe):
@@ -132,6 +135,20 @@ with demo:
132
  datatype=EVAL_TYPES,
133
  row_count=5,
134
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
  with gr.Row():
136
  gr.Markdown("# ✉️✨ Submit your model here!", elem_classes="markdown-text")
137
 
 
18
  COLS,
19
  EVAL_COLS,
20
  EVAL_TYPES,
21
+ FAILED_EVAL_COLS,
22
+ FAILED_EVAL_TYPES,
23
  AutoEvalColumn,
24
  fields,
25
  )
 
54
  finished_eval_queue_df,
55
  running_eval_queue_df,
56
  pending_eval_queue_df,
57
+ failed_eval_queue_df
58
+ ) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS, FAILED_EVAL_COLS)
59
 
60
 
61
  def init_leaderboard(dataframe):
 
135
  datatype=EVAL_TYPES,
136
  row_count=5,
137
  )
138
+
139
+
140
+ with gr.Accordion(
141
+ f"Failed ({len(failed_eval_queue_df)})",
142
+ open=False,
143
+ ):
144
+ with gr.Row():
145
+ failed_eval_table = gr.components.Dataframe(
146
+ value=failed_eval_queue_df,
147
+ headers=FAILED_EVAL_COLS,
148
+ datatype=FAILED_EVAL_TYPES,
149
+ row_count=5,
150
+ )
151
+
152
  with gr.Row():
153
  gr.Markdown("# ✉️✨ Submit your model here!", elem_classes="markdown-text")
154
 
src/display/utils.py CHANGED
@@ -35,7 +35,6 @@ auto_eval_column_dict.append(["still_on_hub", ColumnContent, ColumnContent("Avai
35
  auto_eval_column_dict.append(["license", ColumnContent, ColumnContent("License", "str", False)])
36
 
37
 
38
-
39
  # We use make dataclass to dynamically fill the scores from Tasks
40
  AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
41
 
@@ -46,6 +45,13 @@ class EvalQueueColumn: # Queue column
46
  private = ColumnContent("private", "bool", True)
47
  status = ColumnContent("status", "str", True)
48
 
 
 
 
 
 
 
 
49
  ## All the model information that we might need
50
  @dataclass
51
  class ModelDetails:
@@ -59,4 +65,7 @@ COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
59
  EVAL_COLS = [c.name for c in fields(EvalQueueColumn)]
60
  EVAL_TYPES = [c.type for c in fields(EvalQueueColumn)]
61
 
 
 
 
62
  BENCHMARK_COLS = [t.value.col_name for t in Tasks]
 
35
  auto_eval_column_dict.append(["license", ColumnContent, ColumnContent("License", "str", False)])
36
 
37
 
 
38
  # We use make dataclass to dynamically fill the scores from Tasks
39
  AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
40
 
 
45
  private = ColumnContent("private", "bool", True)
46
  status = ColumnContent("status", "str", True)
47
 
48
+ @dataclass(frozen=True)
49
+ class FailedEvalQueueColumn: # Queue column
50
+ model = ColumnContent("model", "markdown", True)
51
+ private = ColumnContent("private", "bool", True)
52
+ status = ColumnContent("status", "str", True)
53
+ reason = ColumnContent("reason", "str", True)
54
+
55
  ## All the model information that we might need
56
  @dataclass
57
  class ModelDetails:
 
65
  EVAL_COLS = [c.name for c in fields(EvalQueueColumn)]
66
  EVAL_TYPES = [c.type for c in fields(EvalQueueColumn)]
67
 
68
+ FAILED_EVAL_COLS = [c.name for c in fields(FailedEvalQueueColumn)]
69
+ FAILED_EVAL_TYPES = [c.type for c in fields(FailedEvalQueueColumn)]
70
+
71
  BENCHMARK_COLS = [t.value.col_name for t in Tasks]
src/populate.py CHANGED
@@ -22,7 +22,7 @@ def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchm
22
  return df
23
 
24
 
25
- def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
26
  """Creates the different dataframes for the evaluation queues requestes"""
27
  entries = [entry for entry in os.listdir(save_path) if not entry.startswith(".")]
28
  all_evals = []
@@ -50,7 +50,9 @@ def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
50
  pending_list = [e for e in all_evals if e["status"] in ["PENDING", "RERUN"]]
51
  running_list = [e for e in all_evals if e["status"] == "RUNNING"]
52
  finished_list = [e for e in all_evals if e["status"].startswith("FINISHED") or e["status"] == "PENDING_NEW_EVAL"]
 
53
  df_pending = pd.DataFrame.from_records(pending_list, columns=cols)
54
  df_running = pd.DataFrame.from_records(running_list, columns=cols)
55
  df_finished = pd.DataFrame.from_records(finished_list, columns=cols)
56
- return df_finished[cols], df_running[cols], df_pending[cols]
 
 
22
  return df
23
 
24
 
25
+ def get_evaluation_queue_df(save_path: str, cols: list, failed_cols: list) -> list[pd.DataFrame]:
26
  """Creates the different dataframes for the evaluation queues requestes"""
27
  entries = [entry for entry in os.listdir(save_path) if not entry.startswith(".")]
28
  all_evals = []
 
50
  pending_list = [e for e in all_evals if e["status"] in ["PENDING", "RERUN"]]
51
  running_list = [e for e in all_evals if e["status"] == "RUNNING"]
52
  finished_list = [e for e in all_evals if e["status"].startswith("FINISHED") or e["status"] == "PENDING_NEW_EVAL"]
53
+ failed_list = [e for e in all_evals if e["status"] == "FAILED"]
54
  df_pending = pd.DataFrame.from_records(pending_list, columns=cols)
55
  df_running = pd.DataFrame.from_records(running_list, columns=cols)
56
  df_finished = pd.DataFrame.from_records(finished_list, columns=cols)
57
+ df_failed = pd.DataFrame.from_records(failed_list, columns=failed_cols)
58
+ return df_finished[cols], df_running[cols], df_pending[cols], df_failed[failed_cols]