Spaces:

double-ai
/

FormulaOne-Leaderboard

Running on CPU Upgrade

App Files Files Community

Alvinn-aai commited on 11 days ago

Commit

54e1175

1 Parent(s): 322ffad

Tier1 and 2 columns, drop sys type, handle edge cases

Browse files

Files changed (5) hide show

app.py +8 -12
dummy_submission.jsonl +2 -0
src/display/utils.py +3 -3
src/populate.py +25 -17
src/submission/submit.py +7 -6

app.py CHANGED Viewed

@@ -69,7 +69,7 @@ def init_leaderboard(dataframe: pd.DataFrame):
             cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden],
             label="Select Columns to Display:",
         ),
-        search_columns=[AutoEvalColumn.system.name, AutoEvalColumn.system_type.name],
         hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden],
         bool_checkboxgroup_label="Hide models",
         interactive=False,
@@ -81,7 +81,6 @@ def init_leaderboard(dataframe: pd.DataFrame):
 def add_solution_cbk(
     system_name: str,
     org: str,
-    sys_type: str,
     submission_path: str,
     profile: gr.OAuthProfile | None,
     oauth_token: gr.OAuthToken | None,
@@ -128,7 +127,6 @@ def add_solution_cbk(
         for val, val_name in [
             (system_name, "System name"),
             (org, "Organisation name"),
-            (sys_type, "System type"),
         ]:
             if len(val) == 0:
                 return styled_error(f"Please fill in the '{val_name}' field.")
@@ -149,7 +147,6 @@ def add_solution_cbk(
         user_id,
         system_name,
         org,
-        sys_type,
         submission_path,
         is_warmup_dataset=(SPLIT == "warmup"),
         ensure_all_present=ENSURE_ALL_PRESENT,
@@ -258,13 +255,13 @@ with blocks:
                     with gr.Column():
                         system_name_textbox = gr.Textbox(label=AutoEvalColumn.system.name)
                         org_textbox = gr.Textbox(label=AutoEvalColumn.organization.name)
-                        sys_type_dropdown = gr.Dropdown(
-                            choices=[t.to_str() for t in ModelType],
-                            label=AutoEvalColumn.system_type.name,
-                            multiselect=False,
-                            value=ModelType.LLM.to_str(),
-                            interactive=True,
-                        )
                         submission_file = gr.File(label="JSONL solutions file", file_types=[".jsonl"])
@@ -278,7 +275,6 @@ with blocks:
                     [
                         system_name_textbox,
                         org_textbox,
-                        sys_type_dropdown,
                         submission_file,
                     ],
                     submission_result,

             cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden],
             label="Select Columns to Display:",
         ),
+        search_columns=[AutoEvalColumn.system.name, AutoEvalColumn.organization.name],
         hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden],
         bool_checkboxgroup_label="Hide models",
         interactive=False,
 def add_solution_cbk(
     system_name: str,
     org: str,
     submission_path: str,
     profile: gr.OAuthProfile | None,
     oauth_token: gr.OAuthToken | None,
         for val, val_name in [
             (system_name, "System name"),
             (org, "Organisation name"),
         ]:
             if len(val) == 0:
                 return styled_error(f"Please fill in the '{val_name}' field.")
         user_id,
         system_name,
         org,
         submission_path,
         is_warmup_dataset=(SPLIT == "warmup"),
         ensure_all_present=ENSURE_ALL_PRESENT,
                     with gr.Column():
                         system_name_textbox = gr.Textbox(label=AutoEvalColumn.system.name)
                         org_textbox = gr.Textbox(label=AutoEvalColumn.organization.name)
+                        # sys_type_dropdown = gr.Dropdown(
+                        #     choices=[t.to_str() for t in ModelType],
+                        #     label=AutoEvalColumn.system_type.name,
+                        #     multiselect=False,
+                        #     value=ModelType.LLM.to_str(),
+                        #     interactive=True,
+                        # )
                         submission_file = gr.File(label="JSONL solutions file", file_types=[".jsonl"])
                     [
                         system_name_textbox,
                         org_textbox,
                         submission_file,
                     ],
                     submission_result,

dummy_submission.jsonl ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ {"problem_id": "142", "solution": "from dataclasses import dataclass\nfrom typing import TypeAlias\n\nfrom evaluation.graph_structure import Graph\n\nMOD = 1_000_000_007\n\n\n@dataclass(frozen=True)\nclass DPState:\n \"\"\"\n Dynamic Programming state for the Dominating Set problem on tree decompositions.\n This dataclass encapsulates the complete state information needed at each bag\n during the tree decomposition-based dynamic programming algorithm.\n\n Fields:\n ( 1 ) assign_mask : int Bitmask indicating which vertices in the current bag\n are assigned to the dominating set (1 = IN, 0 = OUT).\n Bit i corresponds to the i-th vertex in the sorted bag.\n\n ( 2 ) need_mask : int Bitmask indicating which OUT vertices in the current bag\n still require domination (1 = needs domination, 0 = already dominated).\n Only meaningful for OUT vertices (assign_mask bit = 0).\n IN vertices never have the need bit set since they dominate themselves.\n\n State Invariants:\n - For any bit position i: if (assign_mask >> i) & 1 == 1, then (need_mask >> i) & 1 == 0\n - The need_mask only tracks vertices that are OUT and not yet dominated by adjacent IN vertices\n - When a vertex is forgotten, it must not have the need bit set (invalid state otherwise)\n\n Usage in Algorithm:\n - LEAF: Initialize with vertex either IN (assign=1, need=0) or OUT (assign=0, need=1)\n - INTRODUCE: Insert new bit at appropriate position, update domination status\n - FORGET: Remove bit at appropriate position, reject states with undominated OUT vertices\n - JOIN: Merge compatible states (same assignment), combine need masks with AND operation\n\n All fields are immutable and hashable, making this object suitable as a dictionary key.\n \"\"\"\n\n assign_mask: int\n need_mask: int\n\n\n@dataclass\nclass DPValue:\n \"\"\"\n Dynamic Programming value representing the computational result for a given state.\n This dataclass replaces the previous Tuple[int, int] representation with a more\n structured and self-documenting approach for weighted model counting.\n\n Fields:\n ( 1 ) count : int Number of distinct vertex subsets (dominating sets) that\n achieve the current state configuration. This counts the\n multiplicity of solutions that lead to the same DP state.\n\n ( 2 ) weight : int Total weighted sum across all vertex subsets that achieve\n the current state configuration. Each dominating set contributes\n its total vertex weight sum to this field.\n\n Implementation Details:\n - Both fields are maintained modulo MOD (1,000,000,007)\n - The count field enables tracking the number of valid dominating sets\n - The weight field accumulates the total weight contribution from all valid sets\n - When combining values from different DP branches:\n * Counts are multiplied for independent choices\n * Weights are combined using inclusion-exclusion principle to avoid double-counting\n\n This structure enables simultaneous tracking of both solution count and cumulative\n weight during the tree decomposition DP computation.\n \"\"\"\n\n count: int\n weight: int\n\n\nBag: TypeAlias = set[int] # a bag is a set of vertices\n\n\ndef insert_bit(\n mask: int,\n pos: int,\n bit: int,\n) -> int:\n \"\"\"\n Insert `bit` (0/1) at position `pos` (LSB == position 0) in `mask`\n shifting higher bits left by one.\n \"\"\"\n lower = mask & ((1 << pos) - 1)\n higher = mask >> pos\n return lower \| (bit << pos) \| (higher << (pos + 1))\n\n\ndef remove_bit(\n mask: int,\n pos: int,\n) -> int:\n \"\"\"\n Delete the bit at position `pos` from `mask`, shifting higher bits right.\n \"\"\"\n lower = mask & ((1 << pos) - 1)\n higher = mask >> (pos + 1)\n return lower \| (higher << pos)\n\n\ndef bag_tuple(bag: Bag) -> tuple[int, ...]:\n return tuple(sorted(bag))\n\n\ndef bag_selected_weight(\n assign_mask: int,\n bag_vertices: tuple[int, ...],\n vertex_weights: dict[int, int],\n) -> int:\n \"\"\"Sum of weights of vertices in the bag that are selected (IN).\"\"\"\n s = 0\n for idx, v in enumerate(bag_vertices):\n if (assign_mask >> idx) & 1:\n s += vertex_weights[v]\n return s % MOD\n\n\ndef accumulate(\n table: dict[DPState, DPValue],\n state: DPState,\n cnt: int,\n wsum: int,\n) -> None:\n \"\"\"Add (cnt, wsum) to existing entry of state inside table (MOD arithmetic).\"\"\"\n cnt %= MOD\n wsum %= MOD\n if state in table:\n existing = table[state]\n table[state] = DPValue(count=(existing.count + cnt) % MOD, weight=(existing.weight + wsum) % MOD)\n else:\n table[state] = DPValue(count=cnt, weight=wsum)\n\n\ndef leaf_callback(\n graph: Graph,\n cur_table: dict[DPState, DPValue],\n cur_bag_info: tuple[int, Bag],\n leaf_vertex: int,\n):\n bag_vertices = bag_tuple(cur_bag_info[1]) # (leaf_vertex,)\n assert len(bag_vertices) == 1 and bag_vertices[0] == leaf_vertex\n\n w_v = graph.vertex_weights[leaf_vertex] if graph.vertex_weights else 1\n\n # Case 1: vertex is IN the dominating set\n assign_mask = 1 # bit 0 == 1\n need_mask = 0 # IN vertices never need domination\n state_in = DPState(assign_mask=assign_mask, need_mask=need_mask)\n accumulate(cur_table, state_in, cnt=1, wsum=w_v % MOD)\n\n # Case 2: vertex is OUT - needs domination\n assign_mask = 0\n need_mask = 1 # NEEDS_DOMINATION\n state_out = DPState(assign_mask=assign_mask, need_mask=need_mask)\n accumulate(cur_table, state_out, cnt=1, wsum=0)\n\n\ndef introduce_callback(\n graph: Graph,\n cur_table: dict[DPState, DPValue],\n cur_bag_info: tuple[int, Bag],\n child_table: dict[DPState, DPValue],\n child_bag_info: tuple[int, Bag],\n introduced_vertex: int,\n):\n parent_vertices = bag_tuple(cur_bag_info[1])\n\n # index at which the new vertex was inserted\n idx_new = parent_vertices.index(introduced_vertex)\n w_new = graph.vertex_weights[introduced_vertex] if graph.vertex_weights else 1\n\n # pre-compute adjacency between introduced vertex and vertices in parent bag\n is_adj = [(v in graph.neighbors(introduced_vertex)) for v in parent_vertices]\n\n for child_state, dp_value in child_table.items():\n child_assign, child_need = child_state.assign_mask, child_state.need_mask\n cnt_child, wsum_child = dp_value.count, dp_value.weight\n # \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n # Choice A: new vertex is IN_X\n # \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n assign_in = insert_bit(child_assign, idx_new, 1)\n need_in = insert_bit(child_need, idx_new, 0)\n\n # when y is IN it may dominate some previously undominated OUT vertices\n for idx, adj in enumerate(is_adj):\n if idx == idx_new or not adj:\n continue\n # vertex idx is OUT?\n if (assign_in >> idx) & 1:\n continue # IN vertices never carry NEED flag\n # if it was NEED, clear it\n if (need_in >> idx) & 1:\n need_in &= ~(1 << idx)\n\n cnt_new = cnt_child\n wsum_new = (wsum_child + cnt_child * w_new) % MOD\n state_in = DPState(assign_mask=assign_in, need_mask=need_in)\n accumulate(cur_table, state_in, cnt_new, wsum_new)\n\n # \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n # Choice B: new vertex is NOT_IN_X\n # \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n assign_out = insert_bit(child_assign, idx_new, 0)\n\n # Determine if introduced vertex is already dominated by some\n # IN vertex present in the (extended) bag.\n dominated = False\n for idx, adj in enumerate(is_adj):\n if idx == idx_new or not adj:\n continue\n if (assign_out >> idx) & 1: # neighbor is IN\n dominated = True\n break\n need_bit = 0 if dominated else 1\n need_out = insert_bit(child_need, idx_new, need_bit)\n\n # ( no other vertices change status )\n state_out = DPState(assign_mask=assign_out, need_mask=need_out)\n accumulate(cur_table, state_out, cnt_child, wsum_child)\n\n\ndef forget_callback(\n graph: Graph,\n cur_table: dict[DPState, DPValue],\n cur_bag_info: tuple[int, Bag],\n child_table: dict[DPState, DPValue],\n child_bag_info: tuple[int, Bag],\n forgotten_vertex: int,\n):\n child_vertices = bag_tuple(child_bag_info[1])\n idx_forgot = child_vertices.index(forgotten_vertex)\n\n for child_state, dp_value in child_table.items():\n assign_child, need_child = child_state.assign_mask, child_state.need_mask\n cnt_child, wsum_child = dp_value.count, dp_value.weight\n bit_assign = (assign_child >> idx_forgot) & 1\n bit_need = (need_child >> idx_forgot) & 1\n\n # If forgotten vertex is OUT and still needs domination -> invalid state\n if bit_assign == 0 and bit_need == 1:\n continue\n\n assign_par = remove_bit(assign_child, idx_forgot)\n need_par = remove_bit(need_child, idx_forgot)\n\n state_par = DPState(assign_mask=assign_par, need_mask=need_par)\n accumulate(cur_table, state_par, cnt_child, wsum_child)\n\n\ndef join_callback(\n graph: Graph,\n cur_table: dict[DPState, DPValue],\n cur_bag_info: tuple[int, Bag],\n left_child_table: dict[DPState, DPValue],\n left_child_bag_info: tuple[int, Bag],\n right_child_table: dict[DPState, DPValue],\n right_child_bag_info: tuple[int, Bag],\n):\n bag_vertices = bag_tuple(cur_bag_info[1])\n vertex_weights = graph.vertex_weights\n assert vertex_weights is not None\n\n # Group right states by assignment mask for O(\|L\| + \|R\|) compatibility\n right_by_assign: dict[int, list[tuple[int, int, int]]] = {}\n for right_state, dp_value in right_child_table.items():\n assign_r, need_r = right_state.assign_mask, right_state.need_mask\n cnt_r, wsum_r = dp_value.count, dp_value.weight\n right_by_assign.setdefault(assign_r, []).append((need_r, cnt_r, wsum_r))\n\n for left_state, dp_value in left_child_table.items():\n assign_l, need_l = left_state.assign_mask, left_state.need_mask\n cnt_l, wsum_l = dp_value.count, dp_value.weight\n if assign_l not in right_by_assign:\n continue\n for need_r, cnt_r, wsum_r in right_by_assign[assign_l]:\n # Merge NEED flags: dominated if dominated in either side\n need_merge = need_l & need_r # bitwise AND keeps 1 only if both have NEED\n\n cnt_merge = (cnt_l * cnt_r) % MOD\n\n w_bag_sel = bag_selected_weight(assign_l, bag_vertices, vertex_weights)\n w_merge = (wsum_l * cnt_r + wsum_r * cnt_l - cnt_merge * w_bag_sel) % MOD\n if w_merge < 0:\n w_merge += MOD\n\n state_merge = DPState(assign_mask=assign_l, need_mask=need_merge)\n accumulate(cur_table, state_merge, cnt_merge, w_merge)\n\n\ndef extract_solution(root_table: dict[DPState, DPValue]) -> int:\n \"\"\"\n Sum the total weights of all globally valid dominating sets.\n Return -1 if none exist.\n \"\"\"\n answer = 0\n found = False\n for state, dp_value in root_table.items():\n assign_mask, need_mask = state.assign_mask, state.need_mask\n cnt, wsum = dp_value.count, dp_value.weight\n # Bag may be empty or not\n if assign_mask == 0 and need_mask == 0 and cnt == 0:\n # Defensive - shouldn't happen\n continue\n if need_mask != 0:\n # some vertex in root bag still needs domination -> invalid\n continue\n answer = (answer + wsum) % MOD\n found = True\n return answer if found else -1\n"}
2	+ {"problem_id": "120", "solution": "from dataclasses import dataclass\nfrom typing import TypeAlias\n\nfrom evaluation.graph_structure import Graph\n\nMOD = 1_000_000_007\n\n\n@dataclass(frozen=True)\nclass DPState:\n \"\"\"\n Dynamic Programming state for the Dominating Set problem on tree decompositions.\n This dataclass encapsulates the complete state information needed at each bag\n during the tree decomposition-based dynamic programming algorithm.\n\n Fields:\n ( 1 ) assign_mask : int Bitmask indicating which vertices in the current bag\n are assigned to the dominating set (1 = IN, 0 = OUT).\n Bit i corresponds to the i-th vertex in the sorted bag.\n\n ( 2 ) need_mask : int Bitmask indicating which OUT vertices in the current bag\n still require domination (1 = needs domination, 0 = already dominated).\n Only meaningful for OUT vertices (assign_mask bit = 0).\n IN vertices never have the need bit set since they dominate themselves.\n\n State Invariants:\n - For any bit position i: if (assign_mask >> i) & 1 == 1, then (need_mask >> i) & 1 == 0\n - The need_mask only tracks vertices that are OUT and not yet dominated by adjacent IN vertices\n - When a vertex is forgotten, it must not have the need bit set (invalid state otherwise)\n\n Usage in Algorithm:\n - LEAF: Initialize with vertex either IN (assign=1, need=0) or OUT (assign=0, need=1)\n - INTRODUCE: Insert new bit at appropriate position, update domination status\n - FORGET: Remove bit at appropriate position, reject states with undominated OUT vertices\n - JOIN: Merge compatible states (same assignment), combine need masks with AND operation\n\n All fields are immutable and hashable, making this object suitable as a dictionary key.\n \"\"\"\n\n assign_mask: int\n need_mask: int\n\n\n@dataclass\nclass DPValue:\n \"\"\"\n Dynamic Programming value representing the computational result for a given state.\n This dataclass replaces the previous Tuple[int, int] representation with a more\n structured and self-documenting approach for weighted model counting.\n\n Fields:\n ( 1 ) count : int Number of distinct vertex subsets (dominating sets) that\n achieve the current state configuration. This counts the\n multiplicity of solutions that lead to the same DP state.\n\n ( 2 ) weight : int Total weighted sum across all vertex subsets that achieve\n the current state configuration. Each dominating set contributes\n its total vertex weight sum to this field.\n\n Implementation Details:\n - Both fields are maintained modulo MOD (1,000,000,007)\n - The count field enables tracking the number of valid dominating sets\n - The weight field accumulates the total weight contribution from all valid sets\n - When combining values from different DP branches:\n * Counts are multiplied for independent choices\n * Weights are combined using inclusion-exclusion principle to avoid double-counting\n\n This structure enables simultaneous tracking of both solution count and cumulative\n weight during the tree decomposition DP computation.\n \"\"\"\n\n count: int\n weight: int\n\n\nBag: TypeAlias = set[int] # a bag is a set of vertices\n\n\ndef insert_bit(\n mask: int,\n pos: int,\n bit: int,\n) -> int:\n \"\"\"\n Insert `bit` (0/1) at position `pos` (LSB == position 0) in `mask`\n shifting higher bits left by one.\n \"\"\"\n lower = mask & ((1 << pos) - 1)\n higher = mask >> pos\n return lower \| (bit << pos) \| (higher << (pos + 1))\n\n\ndef remove_bit(\n mask: int,\n pos: int,\n) -> int:\n \"\"\"\n Delete the bit at position `pos` from `mask`, shifting higher bits right.\n \"\"\"\n lower = mask & ((1 << pos) - 1)\n higher = mask >> (pos + 1)\n return lower \| (higher << pos)\n\n\ndef bag_tuple(bag: Bag) -> tuple[int, ...]:\n return tuple(sorted(bag))\n\n\ndef bag_selected_weight(\n assign_mask: int,\n bag_vertices: tuple[int, ...],\n vertex_weights: dict[int, int],\n) -> int:\n \"\"\"Sum of weights of vertices in the bag that are selected (IN).\"\"\"\n s = 0\n for idx, v in enumerate(bag_vertices):\n if (assign_mask >> idx) & 1:\n s += vertex_weights[v]\n return s % MOD\n\n\ndef accumulate(\n table: dict[DPState, DPValue],\n state: DPState,\n cnt: int,\n wsum: int,\n) -> None:\n \"\"\"Add (cnt, wsum) to existing entry of state inside table (MOD arithmetic).\"\"\"\n cnt %= MOD\n wsum %= MOD\n if state in table:\n existing = table[state]\n table[state] = DPValue(count=(existing.count + cnt) % MOD, weight=(existing.weight + wsum) % MOD)\n else:\n table[state] = DPValue(count=cnt, weight=wsum)\n\n\ndef leaf_callback(\n graph: Graph,\n cur_table: dict[DPState, DPValue],\n cur_bag_info: tuple[int, Bag],\n leaf_vertex: int,\n):\n bag_vertices = bag_tuple(cur_bag_info[1]) # (leaf_vertex,)\n assert len(bag_vertices) == 1 and bag_vertices[0] == leaf_vertex\n\n w_v = graph.vertex_weights[leaf_vertex] if graph.vertex_weights else 1\n\n # Case 1: vertex is IN the dominating set\n assign_mask = 1 # bit 0 == 1\n need_mask = 0 # IN vertices never need domination\n state_in = DPState(assign_mask=assign_mask, need_mask=need_mask)\n accumulate(cur_table, state_in, cnt=1, wsum=w_v % MOD)\n\n # Case 2: vertex is OUT - needs domination\n assign_mask = 0\n need_mask = 1 # NEEDS_DOMINATION\n state_out = DPState(assign_mask=assign_mask, need_mask=need_mask)\n accumulate(cur_table, state_out, cnt=1, wsum=0)\n\n\ndef introduce_callback(\n graph: Graph,\n cur_table: dict[DPState, DPValue],\n cur_bag_info: tuple[int, Bag],\n child_table: dict[DPState, DPValue],\n child_bag_info: tuple[int, Bag],\n introduced_vertex: int,\n):\n parent_vertices = bag_tuple(cur_bag_info[1])\n\n # index at which the new vertex was inserted\n idx_new = parent_vertices.index(introduced_vertex)\n w_new = graph.vertex_weights[introduced_vertex] if graph.vertex_weights else 1\n\n # pre-compute adjacency between introduced vertex and vertices in parent bag\n is_adj = [(v in graph.neighbors(introduced_vertex)) for v in parent_vertices]\n\n for child_state, dp_value in child_table.items():\n child_assign, child_need = child_state.assign_mask, child_state.need_mask\n cnt_child, wsum_child = dp_value.count, dp_value.weight\n # \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n # Choice A: new vertex is IN_X\n # \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n assign_in = insert_bit(child_assign, idx_new, 1)\n need_in = insert_bit(child_need, idx_new, 0)\n\n # when y is IN it may dominate some previously undominated OUT vertices\n for idx, adj in enumerate(is_adj):\n if idx == idx_new or not adj:\n continue\n # vertex idx is OUT?\n if (assign_in >> idx) & 1:\n continue # IN vertices never carry NEED flag\n # if it was NEED, clear it\n if (need_in >> idx) & 1:\n need_in &= ~(1 << idx)\n\n cnt_new = cnt_child\n wsum_new = (wsum_child + cnt_child * w_new) % MOD\n state_in = DPState(assign_mask=assign_in, need_mask=need_in)\n accumulate(cur_table, state_in, cnt_new, wsum_new)\n\n # \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n # Choice B: new vertex is NOT_IN_X\n # \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n assign_out = insert_bit(child_assign, idx_new, 0)\n\n # Determine if introduced vertex is already dominated by some\n # IN vertex present in the (extended) bag.\n dominated = False\n for idx, adj in enumerate(is_adj):\n if idx == idx_new or not adj:\n continue\n if (assign_out >> idx) & 1: # neighbor is IN\n dominated = True\n break\n need_bit = 0 if dominated else 1\n need_out = insert_bit(child_need, idx_new, need_bit)\n\n # ( no other vertices change status )\n state_out = DPState(assign_mask=assign_out, need_mask=need_out)\n accumulate(cur_table, state_out, cnt_child, wsum_child)\n\n\ndef forget_callback(\n graph: Graph,\n cur_table: dict[DPState, DPValue],\n cur_bag_info: tuple[int, Bag],\n child_table: dict[DPState, DPValue],\n child_bag_info: tuple[int, Bag],\n forgotten_vertex: int,\n):\n child_vertices = bag_tuple(child_bag_info[1])\n idx_forgot = child_vertices.index(forgotten_vertex)\n\n for child_state, dp_value in child_table.items():\n assign_child, need_child = child_state.assign_mask, child_state.need_mask\n cnt_child, wsum_child = dp_value.count, dp_value.weight\n bit_assign = (assign_child >> idx_forgot) & 1\n bit_need = (need_child >> idx_forgot) & 1\n\n # If forgotten vertex is OUT and still needs domination -> invalid state\n if bit_assign == 0 and bit_need == 1:\n continue\n\n assign_par = remove_bit(assign_child, idx_forgot)\n need_par = remove_bit(need_child, idx_forgot)\n\n state_par = DPState(assign_mask=assign_par, need_mask=need_par)\n accumulate(cur_table, state_par, cnt_child, wsum_child)\n\n\ndef join_callback(\n graph: Graph,\n cur_table: dict[DPState, DPValue],\n cur_bag_info: tuple[int, Bag],\n left_child_table: dict[DPState, DPValue],\n left_child_bag_info: tuple[int, Bag],\n right_child_table: dict[DPState, DPValue],\n right_child_bag_info: tuple[int, Bag],\n):\n bag_vertices = bag_tuple(cur_bag_info[1])\n vertex_weights = graph.vertex_weights\n assert vertex_weights is not None\n\n # Group right states by assignment mask for O(\|L\| + \|R\|) compatibility\n right_by_assign: dict[int, list[tuple[int, int, int]]] = {}\n for right_state, dp_value in right_child_table.items():\n assign_r, need_r = right_state.assign_mask, right_state.need_mask\n cnt_r, wsum_r = dp_value.count, dp_value.weight\n right_by_assign.setdefault(assign_r, []).append((need_r, cnt_r, wsum_r))\n\n for left_state, dp_value in left_child_table.items():\n assign_l, need_l = left_state.assign_mask, left_state.need_mask\n cnt_l, wsum_l = dp_value.count, dp_value.weight\n if assign_l not in right_by_assign:\n continue\n for need_r, cnt_r, wsum_r in right_by_assign[assign_l]:\n # Merge NEED flags: dominated if dominated in either side\n need_merge = need_l & need_r # bitwise AND keeps 1 only if both have NEED\n\n cnt_merge = (cnt_l * cnt_r) % MOD\n\n w_bag_sel = bag_selected_weight(assign_l, bag_vertices, vertex_weights)\n w_merge = (wsum_l * cnt_r + wsum_r * cnt_l - cnt_merge * w_bag_sel) % MOD\n if w_merge < 0:\n w_merge += MOD\n\n state_merge = DPState(assign_mask=assign_l, need_mask=need_merge)\n accumulate(cur_table, state_merge, cnt_merge, w_merge)\n\n\ndef extract_solution(root_table: dict[DPState, DPValue]) -> int:\n \"\"\"\n Sum the total weights of all globally valid dominating sets.\n Return -1 if none exist.\n \"\"\"\n answer = 0\n found = False\n for state, dp_value in root_table.items():\n assign_mask, need_mask = state.assign_mask, state.need_mask\n cnt, wsum = dp_value.count, dp_value.weight\n # Bag may be empty or not\n if assign_mask == 0 and need_mask == 0 and cnt == 0:\n # Defensive - shouldn't happen\n continue\n if need_mask != 0:\n # some vertex in root bag still needs domination -> invalid\n continue\n answer = (answer + wsum) % MOD\n found = True\n return answer if found else -1\n"}

src/display/utils.py CHANGED Viewed

@@ -22,10 +22,10 @@ class ColumnContent:
 @dataclass(frozen=True)
 class AutoEvalColumn:
     system = ColumnContent("System Name", "markdown", True, never_hidden=True)
-    system_type = ColumnContent("System Type", "str", True)
     organization = ColumnContent("Organization", "str", True, never_hidden=True)
-    success_rate = ColumnContent("Success Rate (%)", "number", True)
-    problems_solved = ColumnContent("Problems Solved", "number", True)
     submitted_on = ColumnContent("Submitted On", "datetime", True)

 @dataclass(frozen=True)
 class AutoEvalColumn:
     system = ColumnContent("System Name", "markdown", True, never_hidden=True)
     organization = ColumnContent("Organization", "str", True, never_hidden=True)
+    success_rate_overall = ColumnContent("Overall Success (%)", "number", True)
+    success_rate_tier1 = ColumnContent("Tier 1 Success (%)", "number", True)
+    success_rate_tier2 = ColumnContent("Tier 2 Success (%)", "number", True)
     submitted_on = ColumnContent("Submitted On", "datetime", True)

src/populate.py CHANGED Viewed

@@ -15,6 +15,17 @@ def get_leaderboard_df(results_dataset_name: str) -> pd.DataFrame:
     @brief Creates a dataframe from all the individual experiment results.
     """
     try:
         configs = get_dataset_config_names(
             results_dataset_name,
@@ -24,16 +35,11 @@ def get_leaderboard_df(results_dataset_name: str) -> pd.DataFrame:
         # Return an empty DataFrame with expected columns
         logger.warning("Failed to load configuration", exc_info=True)
-        return pd.DataFrame(
-            columns=[
-                "System Name",
-                "System Type",
-                "Organization",
-                "Success Rate (%)",
-                "Problems Solved",
-                "Submitted On",
-            ]
-        )
     rows = []
     for submission_id in tqdm(
@@ -53,18 +59,20 @@ def get_leaderboard_df(results_dataset_name: str) -> pd.DataFrame:
             logger.warning(f"Skipping {submission_id} due to invalid did_pass values")
             continue
         success_rate = 100 * submission_df["did_pass"].mean()
-        num_solved = submission_df["did_pass"].sum()
         first_row = submission_df.iloc[0]
         rows.append(
             {
-                "System Name": first_row["system_name"],
-                "System Type": first_row["system_type"],
-                "Organization": first_row["organization"],
-                "Success Rate (%)": success_rate,
-                "Problems Solved": num_solved,
-                "Submitted On": pd.to_datetime(first_row["submission_ts"]).strftime("%Y-%m-%d %H:%M"),
             }
         )

     @brief Creates a dataframe from all the individual experiment results.
     """
+    empty_df = pd.DataFrame(
+        columns=[
+            AutoEvalColumn.system.name,
+            AutoEvalColumn.organization.name,
+            AutoEvalColumn.success_rate_overall.name,
+            AutoEvalColumn.success_rate_tier1.name,
+            AutoEvalColumn.success_rate_tier2.name,
+            AutoEvalColumn.submitted_on.name,
+        ]
+    )
     try:
         configs = get_dataset_config_names(
             results_dataset_name,
         # Return an empty DataFrame with expected columns
         logger.warning("Failed to load configuration", exc_info=True)
+        return empty_df
+    if configs == ["default"]:
+        logger.info("Dataset has only default config — treating as empty")
+        return empty_df
     rows = []
     for submission_id in tqdm(
             logger.warning(f"Skipping {submission_id} due to invalid did_pass values")
             continue
+        assert submission_df["tier"].isin([1, 2]).all(), "Invalid tier values found in submission_df"
         success_rate = 100 * submission_df["did_pass"].mean()
+        tier1_success_rate = 100 * submission_df[submission_df["tier"] == 1]["did_pass"].mean()
+        tier2_success_rate = 100 * submission_df[submission_df["tier"] == 2]["did_pass"].mean()
         first_row = submission_df.iloc[0]
         rows.append(
             {
+                AutoEvalColumn.system.name: first_row["system_name"],
+                AutoEvalColumn.organization.name: first_row["organization"],
+                AutoEvalColumn.success_rate_overall.name: success_rate,
+                AutoEvalColumn.success_rate_tier1.name: tier1_success_rate,
+                AutoEvalColumn.success_rate_tier2.name: tier2_success_rate,
+                AutoEvalColumn.submitted_on.name: pd.to_datetime(first_row["submission_ts"]).strftime("%Y-%m-%d %H:%M"),
             }
         )

src/submission/submit.py CHANGED Viewed

@@ -33,7 +33,6 @@ def add_new_solutions(
     user_id: str,
     system_name: str,
     org: str,
-    sys_type: str,
     submission_path: str,
     is_warmup_dataset: bool,
     ensure_all_present: bool = False,
@@ -44,6 +43,10 @@ def add_new_solutions(
         except (DatasetNotFoundError, FileNotFoundError):
             submitted_ids = []
     logger.info(f"Found {len(submitted_ids)} submissions")
     # Rate limits:
@@ -80,16 +83,14 @@ def add_new_solutions(
         return styled_error("The leaderboard has reached its submission capacity for now. Please try again later.")
     logger.info(
-        f"Adding new submission: {system_name=}, {org=}, {sys_type=} and {submission_path=}",
     )
     # Double-checking.
-    for val in [system_name, org, sys_type]:
         assert is_valid(val)
     assert is_submission_file_valid(submission_path, is_warmup_dataset=is_warmup_dataset)
-    sys_type = ModelType.from_str(sys_type).name
     try:
         submission_df = pd.read_json(submission_path, lines=True)
         if ensure_all_present:
@@ -111,7 +112,6 @@ def add_new_solutions(
             **row,
             "system_name": system_name,
             "organization": org,
-            "system_type": sys_type,
             "submission_id": submission_id,
             "submission_ts": submission_ts,
             "evaluation_id": "",  # This will be set later when the evaluation is launched in the backend
@@ -173,6 +173,7 @@ def fetch_user_info(oauth_token: gr.OAuthToken | None) -> dict | None:
         logger.exception("Cannot get user info")
         return None
 def _validate_all_submissions_present(
     lbdb: F1Data,
     pd_ds: pd.DataFrame,

     user_id: str,
     system_name: str,
     org: str,
     submission_path: str,
     is_warmup_dataset: bool,
     ensure_all_present: bool = False,
         except (DatasetNotFoundError, FileNotFoundError):
             submitted_ids = []
+    if submitted_ids == ["default"]:
+        # means empty dataset
+        submitted_ids = []
     logger.info(f"Found {len(submitted_ids)} submissions")
     # Rate limits:
         return styled_error("The leaderboard has reached its submission capacity for now. Please try again later.")
     logger.info(
+        f"Adding new submission: {system_name=}, {org=}, and {submission_path=}",
     )
     # Double-checking.
+    for val in [system_name, org]:
         assert is_valid(val)
     assert is_submission_file_valid(submission_path, is_warmup_dataset=is_warmup_dataset)
     try:
         submission_df = pd.read_json(submission_path, lines=True)
         if ensure_all_present:
             **row,
             "system_name": system_name,
             "organization": org,
             "submission_id": submission_id,
             "submission_ts": submission_ts,
             "evaluation_id": "",  # This will be set later when the evaluation is launched in the backend
         logger.exception("Cannot get user info")
         return None
 def _validate_all_submissions_present(
     lbdb: F1Data,
     pd_ds: pd.DataFrame,