Spaces:

dtcxzyw
/

llvm-apr-benchmark-leaderboard

Running

App Files Files Community

dtcxzyw commited on Jun 24

Commit

c1ed8f5

unverified ·

1 Parent(s): 345bfd4

Update

Browse files

Files changed (2) hide show

app.py +22 -3
src/leaderboard/read_evals.py +10 -0

app.py CHANGED Viewed

@@ -83,6 +83,7 @@ LEADERBOARD_DF = get_leaderboard_df(EVAL_REQUESTS_PATH, total_issues)
 fixed_bug_ids = set()
 fixed_bug_ids_count = dict()
 fixed_bug_ids_fast = set()
 for row in LEADERBOARD_DF.itertuples():
     print(row)
     model_cnt += 1
@@ -94,6 +95,7 @@ for row in LEADERBOARD_DF.itertuples():
         fixed_bug_ids_count[fix] = fixed_bug_ids_count.get(fix, 0) + 1
     for fix in row.fixed_bug_ids_fast:
         fixed_bug_ids_fast.add(fix)
 unique_bug_ids = set([bug_id for bug_id, count in fixed_bug_ids_count.items() if count == 1])
 timeline_bugtypes = []
 for bug_id in timeline_bugids:
@@ -212,10 +214,27 @@ with demo:
             fixed_bug_title_id_pairs = [(bug_id_to_title[bug_id], bug_id) for bug_id in sorted(fixed_bug_ids)]
             inspect_issue = gr.Dropdown(fixed_bug_title_id_pairs, label="Inspct Issue", interactive=True)
             golden_patch = gr.Code("", language="cpp", label="Golden Patch")
             inspect_issue.change(
-                fn=lambda bug_id: bug_id_to_patch.get(bug_id, f"Not Available (bug_id = {bug_id})"),
-                inputs=inspect_issue,
-                outputs=golden_patch,
             )
         with gr.TabItem("🚀 Submission", elem_id="llm-benchmark-tab-table", id=1):

 fixed_bug_ids = set()
 fixed_bug_ids_count = dict()
 fixed_bug_ids_fast = set()
+bug_ids_to_patches = dict()
 for row in LEADERBOARD_DF.itertuples():
     print(row)
     model_cnt += 1
         fixed_bug_ids_count[fix] = fixed_bug_ids_count.get(fix, 0) + 1
     for fix in row.fixed_bug_ids_fast:
         fixed_bug_ids_fast.add(fix)
+    bug_ids_to_patches[row.method_id] = row.patches
 unique_bug_ids = set([bug_id for bug_id, count in fixed_bug_ids_count.items() if count == 1])
 timeline_bugtypes = []
 for bug_id in timeline_bugids:
             fixed_bug_title_id_pairs = [(bug_id_to_title[bug_id], bug_id) for bug_id in sorted(fixed_bug_ids)]
             inspect_issue = gr.Dropdown(fixed_bug_title_id_pairs, label="Inspct Issue", interactive=True)
             golden_patch = gr.Code("", language="cpp", label="Golden Patch")
+            inspect_fix = gr.Dropdown(list(bug_ids_to_patches.keys()), label="Method(Model)", interactive=True)
+            method_patch = gr.Code("", language="cpp", label="APR Patch")
             inspect_issue.change(
+                fn=lambda bug_id, method: (
+                    bug_id_to_patch.get(bug_id, f"Not Available (bug_id = {bug_id})"),
+                    bug_ids_to_patches.get(method, dict()).get(
+                        bug_id, f"Not Available (method = {method}, bug_id = {bug_id})"
+                    ),
+                ),
+                inputs=[inspect_issue, inspect_fix],
+                outputs=[golden_patch, method_patch],
+            )
+            inspect_fix.change(
+                fn=lambda bug_id, method: (
+                    bug_id_to_patch.get(bug_id, f"Not Available (bug_id = {bug_id})"),
+                    bug_ids_to_patches.get(method, dict()).get(
+                        bug_id, f"Not Available (method = {method}, bug_id = {bug_id})"
+                    ),
+                ),
+                inputs=[inspect_issue, inspect_fix],
+                outputs=[golden_patch, method_patch],
             )
         with gr.TabItem("🚀 Submission", elem_id="llm-benchmark-tab-table", id=1):

src/leaderboard/read_evals.py CHANGED Viewed

@@ -28,6 +28,7 @@ class EvalResult:
     sample_count: float
     fixed_bug_ids: list[str]
     fixed_bug_ids_fast: list[str]
     @classmethod
     def init_from_json_file(self, json_filepath):
@@ -51,6 +52,7 @@ class EvalResult:
         fixed_bug_ids = []
         fixed_bug_ids_fast = []
         sample_count = 0
         for fix in fixes:
             bug_type = fix.get("bug_type", "")
             if fix.get("fast_check_pass", False):
@@ -65,6 +67,12 @@ class EvalResult:
             build_count += fix.get("build_count", 0)
             build_failure_count += fix.get("build_failure_count", 0)
         return self(
             method_name=method_name,
             method_url=method_url,
@@ -83,6 +91,7 @@ class EvalResult:
             fixed_bug_ids=fixed_bug_ids,
             fixed_bug_ids_fast=fixed_bug_ids_fast,
             sample_count=round(sample_count / full_pass_count, 1) if full_pass_count > 0 else 0,
         )
     def to_dict(self, total_issues):
@@ -105,6 +114,7 @@ class EvalResult:
             "fixed_bug_ids": self.fixed_bug_ids,
             "fixed_bug_ids_fast": self.fixed_bug_ids_fast,
             "method_id": self.method_name + "(" + self.model_name + ")",
             AutoEvalColumn.sample_count.name: self.sample_count,
         }

     sample_count: float
     fixed_bug_ids: list[str]
     fixed_bug_ids_fast: list[str]
+    patches: dict[str, str]
     @classmethod
     def init_from_json_file(self, json_filepath):
         fixed_bug_ids = []
         fixed_bug_ids_fast = []
         sample_count = 0
+        patches = []
         for fix in fixes:
             bug_type = fix.get("bug_type", "")
             if fix.get("fast_check_pass", False):
             build_count += fix.get("build_count", 0)
             build_failure_count += fix.get("build_failure_count", 0)
+            patch = ""
+            patch += f"// Fast check: {fix.get('fast_check_pass', False)}\n"
+            patch += f"// Full check: {fix.get('full_check_pass', False)}\n"
+            patch += fix.get("patch", "")
+            patches[fix.get("bug_id", "")] = patch
         return self(
             method_name=method_name,
             method_url=method_url,
             fixed_bug_ids=fixed_bug_ids,
             fixed_bug_ids_fast=fixed_bug_ids_fast,
             sample_count=round(sample_count / full_pass_count, 1) if full_pass_count > 0 else 0,
+            patches=patches,
         )
     def to_dict(self, total_issues):
             "fixed_bug_ids": self.fixed_bug_ids,
             "fixed_bug_ids_fast": self.fixed_bug_ids_fast,
             "method_id": self.method_name + "(" + self.model_name + ")",
+            "patches": self.patches,
             AutoEvalColumn.sample_count.name: self.sample_count,
         }