Spaces:

dtcxzyw
/

llvm-apr-benchmark-leaderboard

Running

App Files Files Community

dtcxzyw commited on Feb 11

Commit

18620af

unverified ·

1 Parent(s): f9dd401

Update

Browse files

Files changed (1) hide show

app.py +17 -0

app.py CHANGED Viewed

@@ -71,6 +71,7 @@ for cat, bug_ids in bug_id_by_cat.items():
         timeline_cols.append(str(cat).capitalize())
         timeline_bugids.append(bug_id)
 LEADERBOARD_DF = get_leaderboard_df(EVAL_REQUESTS_PATH, total_issues)
 for row in LEADERBOARD_DF.itertuples():
     print(row)
     model_cnt += 1
@@ -78,6 +79,7 @@ for row in LEADERBOARD_DF.itertuples():
         timeline_ys.append(-model_cnt)
         timeline_cols.append(row.method_id)
         timeline_bugids.append(fix)
 timeline_bugtypes = []
 for bug_id in timeline_bugids:
     timeline_xs.append(bug_id_to_time[bug_id])
@@ -91,6 +93,20 @@ timeline_df = pd.DataFrame(
         "bug_type": timeline_bugtypes,
     }
 )
 def init_leaderboard(dataframe):
@@ -133,6 +149,7 @@ with demo:
                 y_lim=(-model_cnt - 1, 4),
                 tooltip=["bug_id", "method_name", "time", "bug_type"],
             )
         with gr.TabItem("🚀 Submission", elem_id="llm-benchmark-tab-table", id=1):
             gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")

         timeline_cols.append(str(cat).capitalize())
         timeline_bugids.append(bug_id)
 LEADERBOARD_DF = get_leaderboard_df(EVAL_REQUESTS_PATH, total_issues)
+fixed_bug_ids = set()
 for row in LEADERBOARD_DF.itertuples():
     print(row)
     model_cnt += 1
         timeline_ys.append(-model_cnt)
         timeline_cols.append(row.method_id)
         timeline_bugids.append(fix)
+        fixed_bug_ids.add(fix)
 timeline_bugtypes = []
 for bug_id in timeline_bugids:
     timeline_xs.append(bug_id_to_time[bug_id])
         "bug_type": timeline_bugtypes,
     }
 )
+fixed_by_cat = dict()
+for bug_id in fixed_bug_ids:
+    fixed_by_cat[bug_id_to_type[bug_id]] = fixed_by_cat.get(bug_id_to_type[bug_id], 0) + 1
+fixed_by_cat_df = pd.DataFrame(
+    {
+        "Category": [str(cat).capitalize() for cat in fixed_by_cat.keys()],
+        "Total": [len(bug_id_by_cat[cat]) for cat in fixed_by_cat.keys()],
+        "Repaired": list(fixed_by_cat.values()),
+        "Repair Rate (%)": [
+            round(fixed_by_cat[cat] / len(bug_id_by_cat[cat]) * 100, 1) for cat in fixed_by_cat.keys()
+        ],
+    }
+)
+fixed_by_cat_df.sort_values("Category", inplace=True)
 def init_leaderboard(dataframe):
                 y_lim=(-model_cnt - 1, 4),
                 tooltip=["bug_id", "method_name", "time", "bug_type"],
             )
+            gr.Dataframe(fixed_by_cat_df)
         with gr.TabItem("🚀 Submission", elem_id="llm-benchmark-tab-table", id=1):
             gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")