Jacqueline Garrahan
commited on
Commit
•
2df02bd
1
Parent(s):
d0d0673
Check in failed eval col viewer
Browse files- app.py +18 -1
- src/display/utils.py +10 -1
- src/populate.py +4 -2
app.py
CHANGED
@@ -18,6 +18,8 @@ from src.display.utils import (
|
|
18 |
COLS,
|
19 |
EVAL_COLS,
|
20 |
EVAL_TYPES,
|
|
|
|
|
21 |
AutoEvalColumn,
|
22 |
fields,
|
23 |
)
|
@@ -52,7 +54,8 @@ LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS,
|
|
52 |
finished_eval_queue_df,
|
53 |
running_eval_queue_df,
|
54 |
pending_eval_queue_df,
|
55 |
-
|
|
|
56 |
|
57 |
|
58 |
def init_leaderboard(dataframe):
|
@@ -132,6 +135,20 @@ with demo:
|
|
132 |
datatype=EVAL_TYPES,
|
133 |
row_count=5,
|
134 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
135 |
with gr.Row():
|
136 |
gr.Markdown("# ✉️✨ Submit your model here!", elem_classes="markdown-text")
|
137 |
|
|
|
18 |
COLS,
|
19 |
EVAL_COLS,
|
20 |
EVAL_TYPES,
|
21 |
+
FAILED_EVAL_COLS,
|
22 |
+
FAILED_EVAL_TYPES,
|
23 |
AutoEvalColumn,
|
24 |
fields,
|
25 |
)
|
|
|
54 |
finished_eval_queue_df,
|
55 |
running_eval_queue_df,
|
56 |
pending_eval_queue_df,
|
57 |
+
failed_eval_queue_df
|
58 |
+
) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS, FAILED_EVAL_COLS)
|
59 |
|
60 |
|
61 |
def init_leaderboard(dataframe):
|
|
|
135 |
datatype=EVAL_TYPES,
|
136 |
row_count=5,
|
137 |
)
|
138 |
+
|
139 |
+
|
140 |
+
with gr.Accordion(
|
141 |
+
f"Failed ({len(failed_eval_queue_df)})",
|
142 |
+
open=False,
|
143 |
+
):
|
144 |
+
with gr.Row():
|
145 |
+
failed_eval_table = gr.components.Dataframe(
|
146 |
+
value=failed_eval_queue_df,
|
147 |
+
headers=FAILED_EVAL_COLS,
|
148 |
+
datatype=FAILED_EVAL_TYPES,
|
149 |
+
row_count=5,
|
150 |
+
)
|
151 |
+
|
152 |
with gr.Row():
|
153 |
gr.Markdown("# ✉️✨ Submit your model here!", elem_classes="markdown-text")
|
154 |
|
src/display/utils.py
CHANGED
@@ -35,7 +35,6 @@ auto_eval_column_dict.append(["still_on_hub", ColumnContent, ColumnContent("Avai
|
|
35 |
auto_eval_column_dict.append(["license", ColumnContent, ColumnContent("License", "str", False)])
|
36 |
|
37 |
|
38 |
-
|
39 |
# We use make dataclass to dynamically fill the scores from Tasks
|
40 |
AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
|
41 |
|
@@ -46,6 +45,13 @@ class EvalQueueColumn: # Queue column
|
|
46 |
private = ColumnContent("private", "bool", True)
|
47 |
status = ColumnContent("status", "str", True)
|
48 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
## All the model information that we might need
|
50 |
@dataclass
|
51 |
class ModelDetails:
|
@@ -59,4 +65,7 @@ COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
|
|
59 |
EVAL_COLS = [c.name for c in fields(EvalQueueColumn)]
|
60 |
EVAL_TYPES = [c.type for c in fields(EvalQueueColumn)]
|
61 |
|
|
|
|
|
|
|
62 |
BENCHMARK_COLS = [t.value.col_name for t in Tasks]
|
|
|
35 |
auto_eval_column_dict.append(["license", ColumnContent, ColumnContent("License", "str", False)])
|
36 |
|
37 |
|
|
|
38 |
# We use make dataclass to dynamically fill the scores from Tasks
|
39 |
AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
|
40 |
|
|
|
45 |
private = ColumnContent("private", "bool", True)
|
46 |
status = ColumnContent("status", "str", True)
|
47 |
|
48 |
+
@dataclass(frozen=True)
|
49 |
+
class FailedEvalQueueColumn: # Queue column
|
50 |
+
model = ColumnContent("model", "markdown", True)
|
51 |
+
private = ColumnContent("private", "bool", True)
|
52 |
+
status = ColumnContent("status", "str", True)
|
53 |
+
reason = ColumnContent("reason", "str", True)
|
54 |
+
|
55 |
## All the model information that we might need
|
56 |
@dataclass
|
57 |
class ModelDetails:
|
|
|
65 |
EVAL_COLS = [c.name for c in fields(EvalQueueColumn)]
|
66 |
EVAL_TYPES = [c.type for c in fields(EvalQueueColumn)]
|
67 |
|
68 |
+
FAILED_EVAL_COLS = [c.name for c in fields(FailedEvalQueueColumn)]
|
69 |
+
FAILED_EVAL_TYPES = [c.type for c in fields(FailedEvalQueueColumn)]
|
70 |
+
|
71 |
BENCHMARK_COLS = [t.value.col_name for t in Tasks]
|
src/populate.py
CHANGED
@@ -22,7 +22,7 @@ def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchm
|
|
22 |
return df
|
23 |
|
24 |
|
25 |
-
def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
|
26 |
"""Creates the different dataframes for the evaluation queues requestes"""
|
27 |
entries = [entry for entry in os.listdir(save_path) if not entry.startswith(".")]
|
28 |
all_evals = []
|
@@ -50,7 +50,9 @@ def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
|
|
50 |
pending_list = [e for e in all_evals if e["status"] in ["PENDING", "RERUN"]]
|
51 |
running_list = [e for e in all_evals if e["status"] == "RUNNING"]
|
52 |
finished_list = [e for e in all_evals if e["status"].startswith("FINISHED") or e["status"] == "PENDING_NEW_EVAL"]
|
|
|
53 |
df_pending = pd.DataFrame.from_records(pending_list, columns=cols)
|
54 |
df_running = pd.DataFrame.from_records(running_list, columns=cols)
|
55 |
df_finished = pd.DataFrame.from_records(finished_list, columns=cols)
|
56 |
-
|
|
|
|
22 |
return df
|
23 |
|
24 |
|
25 |
+
def get_evaluation_queue_df(save_path: str, cols: list, failed_cols: list) -> list[pd.DataFrame]:
|
26 |
"""Creates the different dataframes for the evaluation queues requestes"""
|
27 |
entries = [entry for entry in os.listdir(save_path) if not entry.startswith(".")]
|
28 |
all_evals = []
|
|
|
50 |
pending_list = [e for e in all_evals if e["status"] in ["PENDING", "RERUN"]]
|
51 |
running_list = [e for e in all_evals if e["status"] == "RUNNING"]
|
52 |
finished_list = [e for e in all_evals if e["status"].startswith("FINISHED") or e["status"] == "PENDING_NEW_EVAL"]
|
53 |
+
failed_list = [e for e in all_evals if e["status"] == "FAILED"]
|
54 |
df_pending = pd.DataFrame.from_records(pending_list, columns=cols)
|
55 |
df_running = pd.DataFrame.from_records(running_list, columns=cols)
|
56 |
df_finished = pd.DataFrame.from_records(finished_list, columns=cols)
|
57 |
+
df_failed = pd.DataFrame.from_records(failed_list, columns=failed_cols)
|
58 |
+
return df_finished[cols], df_running[cols], df_pending[cols], df_failed[failed_cols]
|