Alvinn-aai commited on
Commit
54e1175
·
1 Parent(s): 322ffad

Tier1 and 2 columns, drop sys type, handle edge cases

Browse files
app.py CHANGED
@@ -69,7 +69,7 @@ def init_leaderboard(dataframe: pd.DataFrame):
69
  cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden],
70
  label="Select Columns to Display:",
71
  ),
72
- search_columns=[AutoEvalColumn.system.name, AutoEvalColumn.system_type.name],
73
  hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden],
74
  bool_checkboxgroup_label="Hide models",
75
  interactive=False,
@@ -81,7 +81,6 @@ def init_leaderboard(dataframe: pd.DataFrame):
81
  def add_solution_cbk(
82
  system_name: str,
83
  org: str,
84
- sys_type: str,
85
  submission_path: str,
86
  profile: gr.OAuthProfile | None,
87
  oauth_token: gr.OAuthToken | None,
@@ -128,7 +127,6 @@ def add_solution_cbk(
128
  for val, val_name in [
129
  (system_name, "System name"),
130
  (org, "Organisation name"),
131
- (sys_type, "System type"),
132
  ]:
133
  if len(val) == 0:
134
  return styled_error(f"Please fill in the '{val_name}' field.")
@@ -149,7 +147,6 @@ def add_solution_cbk(
149
  user_id,
150
  system_name,
151
  org,
152
- sys_type,
153
  submission_path,
154
  is_warmup_dataset=(SPLIT == "warmup"),
155
  ensure_all_present=ENSURE_ALL_PRESENT,
@@ -258,13 +255,13 @@ with blocks:
258
  with gr.Column():
259
  system_name_textbox = gr.Textbox(label=AutoEvalColumn.system.name)
260
  org_textbox = gr.Textbox(label=AutoEvalColumn.organization.name)
261
- sys_type_dropdown = gr.Dropdown(
262
- choices=[t.to_str() for t in ModelType],
263
- label=AutoEvalColumn.system_type.name,
264
- multiselect=False,
265
- value=ModelType.LLM.to_str(),
266
- interactive=True,
267
- )
268
 
269
  submission_file = gr.File(label="JSONL solutions file", file_types=[".jsonl"])
270
 
@@ -278,7 +275,6 @@ with blocks:
278
  [
279
  system_name_textbox,
280
  org_textbox,
281
- sys_type_dropdown,
282
  submission_file,
283
  ],
284
  submission_result,
 
69
  cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden],
70
  label="Select Columns to Display:",
71
  ),
72
+ search_columns=[AutoEvalColumn.system.name, AutoEvalColumn.organization.name],
73
  hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden],
74
  bool_checkboxgroup_label="Hide models",
75
  interactive=False,
 
81
  def add_solution_cbk(
82
  system_name: str,
83
  org: str,
 
84
  submission_path: str,
85
  profile: gr.OAuthProfile | None,
86
  oauth_token: gr.OAuthToken | None,
 
127
  for val, val_name in [
128
  (system_name, "System name"),
129
  (org, "Organisation name"),
 
130
  ]:
131
  if len(val) == 0:
132
  return styled_error(f"Please fill in the '{val_name}' field.")
 
147
  user_id,
148
  system_name,
149
  org,
 
150
  submission_path,
151
  is_warmup_dataset=(SPLIT == "warmup"),
152
  ensure_all_present=ENSURE_ALL_PRESENT,
 
255
  with gr.Column():
256
  system_name_textbox = gr.Textbox(label=AutoEvalColumn.system.name)
257
  org_textbox = gr.Textbox(label=AutoEvalColumn.organization.name)
258
+ # sys_type_dropdown = gr.Dropdown(
259
+ # choices=[t.to_str() for t in ModelType],
260
+ # label=AutoEvalColumn.system_type.name,
261
+ # multiselect=False,
262
+ # value=ModelType.LLM.to_str(),
263
+ # interactive=True,
264
+ # )
265
 
266
  submission_file = gr.File(label="JSONL solutions file", file_types=[".jsonl"])
267
 
 
275
  [
276
  system_name_textbox,
277
  org_textbox,
 
278
  submission_file,
279
  ],
280
  submission_result,
dummy_submission.jsonl ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ {"problem_id": "142", "solution": "from dataclasses import dataclass\nfrom typing import TypeAlias\n\nfrom evaluation.graph_structure import Graph\n\nMOD = 1_000_000_007\n\n\n@dataclass(frozen=True)\nclass DPState:\n \"\"\"\n Dynamic Programming state for the Dominating Set problem on tree decompositions.\n This dataclass encapsulates the complete state information needed at each bag\n during the tree decomposition-based dynamic programming algorithm.\n\n Fields:\n ( 1 ) assign_mask : int Bitmask indicating which vertices in the current bag\n are assigned to the dominating set (1 = IN, 0 = OUT).\n Bit i corresponds to the i-th vertex in the sorted bag.\n\n ( 2 ) need_mask : int Bitmask indicating which OUT vertices in the current bag\n still require domination (1 = needs domination, 0 = already dominated).\n Only meaningful for OUT vertices (assign_mask bit = 0).\n IN vertices never have the need bit set since they dominate themselves.\n\n State Invariants:\n - For any bit position i: if (assign_mask >> i) & 1 == 1, then (need_mask >> i) & 1 == 0\n - The need_mask only tracks vertices that are OUT and not yet dominated by adjacent IN vertices\n - When a vertex is forgotten, it must not have the need bit set (invalid state otherwise)\n\n Usage in Algorithm:\n - LEAF: Initialize with vertex either IN (assign=1, need=0) or OUT (assign=0, need=1)\n - INTRODUCE: Insert new bit at appropriate position, update domination status\n - FORGET: Remove bit at appropriate position, reject states with undominated OUT vertices\n - JOIN: Merge compatible states (same assignment), combine need masks with AND operation\n\n All fields are immutable and hashable, making this object suitable as a dictionary key.\n \"\"\"\n\n assign_mask: int\n need_mask: int\n\n\n@dataclass\nclass DPValue:\n \"\"\"\n Dynamic Programming value representing the computational result for a given state.\n This dataclass replaces the previous Tuple[int, int] representation with a more\n structured and self-documenting approach for weighted model counting.\n\n Fields:\n ( 1 ) count : int Number of distinct vertex subsets (dominating sets) that\n achieve the current state configuration. This counts the\n multiplicity of solutions that lead to the same DP state.\n\n ( 2 ) weight : int Total weighted sum across all vertex subsets that achieve\n the current state configuration. Each dominating set contributes\n its total vertex weight sum to this field.\n\n Implementation Details:\n - Both fields are maintained modulo MOD (1,000,000,007)\n - The count field enables tracking the number of valid dominating sets\n - The weight field accumulates the total weight contribution from all valid sets\n - When combining values from different DP branches:\n * Counts are multiplied for independent choices\n * Weights are combined using inclusion-exclusion principle to avoid double-counting\n\n This structure enables simultaneous tracking of both solution count and cumulative\n weight during the tree decomposition DP computation.\n \"\"\"\n\n count: int\n weight: int\n\n\nBag: TypeAlias = set[int] # a bag is a set of vertices\n\n\ndef insert_bit(\n mask: int,\n pos: int,\n bit: int,\n) -> int:\n \"\"\"\n Insert `bit` (0/1) at position `pos` (LSB == position 0) in `mask`\n shifting higher bits left by one.\n \"\"\"\n lower = mask & ((1 << pos) - 1)\n higher = mask >> pos\n return lower | (bit << pos) | (higher << (pos + 1))\n\n\ndef remove_bit(\n mask: int,\n pos: int,\n) -> int:\n \"\"\"\n Delete the bit at position `pos` from `mask`, shifting higher bits right.\n \"\"\"\n lower = mask & ((1 << pos) - 1)\n higher = mask >> (pos + 1)\n return lower | (higher << pos)\n\n\ndef bag_tuple(bag: Bag) -> tuple[int, ...]:\n return tuple(sorted(bag))\n\n\ndef bag_selected_weight(\n assign_mask: int,\n bag_vertices: tuple[int, ...],\n vertex_weights: dict[int, int],\n) -> int:\n \"\"\"Sum of weights of vertices in the bag that are selected (IN).\"\"\"\n s = 0\n for idx, v in enumerate(bag_vertices):\n if (assign_mask >> idx) & 1:\n s += vertex_weights[v]\n return s % MOD\n\n\ndef accumulate(\n table: dict[DPState, DPValue],\n state: DPState,\n cnt: int,\n wsum: int,\n) -> None:\n \"\"\"Add (cnt, wsum) to existing entry of state inside table (MOD arithmetic).\"\"\"\n cnt %= MOD\n wsum %= MOD\n if state in table:\n existing = table[state]\n table[state] = DPValue(count=(existing.count + cnt) % MOD, weight=(existing.weight + wsum) % MOD)\n else:\n table[state] = DPValue(count=cnt, weight=wsum)\n\n\ndef leaf_callback(\n graph: Graph,\n cur_table: dict[DPState, DPValue],\n cur_bag_info: tuple[int, Bag],\n leaf_vertex: int,\n):\n bag_vertices = bag_tuple(cur_bag_info[1]) # (leaf_vertex,)\n assert len(bag_vertices) == 1 and bag_vertices[0] == leaf_vertex\n\n w_v = graph.vertex_weights[leaf_vertex] if graph.vertex_weights else 1\n\n # Case 1: vertex is IN the dominating set\n assign_mask = 1 # bit 0 == 1\n need_mask = 0 # IN vertices never need domination\n state_in = DPState(assign_mask=assign_mask, need_mask=need_mask)\n accumulate(cur_table, state_in, cnt=1, wsum=w_v % MOD)\n\n # Case 2: vertex is OUT - needs domination\n assign_mask = 0\n need_mask = 1 # NEEDS_DOMINATION\n state_out = DPState(assign_mask=assign_mask, need_mask=need_mask)\n accumulate(cur_table, state_out, cnt=1, wsum=0)\n\n\ndef introduce_callback(\n graph: Graph,\n cur_table: dict[DPState, DPValue],\n cur_bag_info: tuple[int, Bag],\n child_table: dict[DPState, DPValue],\n child_bag_info: tuple[int, Bag],\n introduced_vertex: int,\n):\n parent_vertices = bag_tuple(cur_bag_info[1])\n\n # index at which the new vertex was inserted\n idx_new = parent_vertices.index(introduced_vertex)\n w_new = graph.vertex_weights[introduced_vertex] if graph.vertex_weights else 1\n\n # pre-compute adjacency between introduced vertex and vertices in parent bag\n is_adj = [(v in graph.neighbors(introduced_vertex)) for v in parent_vertices]\n\n for child_state, dp_value in child_table.items():\n child_assign, child_need = child_state.assign_mask, child_state.need_mask\n cnt_child, wsum_child = dp_value.count, dp_value.weight\n # \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n # Choice A: new vertex is IN_X\n # \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n assign_in = insert_bit(child_assign, idx_new, 1)\n need_in = insert_bit(child_need, idx_new, 0)\n\n # when y is IN it may dominate some previously undominated OUT vertices\n for idx, adj in enumerate(is_adj):\n if idx == idx_new or not adj:\n continue\n # vertex idx is OUT?\n if (assign_in >> idx) & 1:\n continue # IN vertices never carry NEED flag\n # if it was NEED, clear it\n if (need_in >> idx) & 1:\n need_in &= ~(1 << idx)\n\n cnt_new = cnt_child\n wsum_new = (wsum_child + cnt_child * w_new) % MOD\n state_in = DPState(assign_mask=assign_in, need_mask=need_in)\n accumulate(cur_table, state_in, cnt_new, wsum_new)\n\n # \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n # Choice B: new vertex is NOT_IN_X\n # \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n assign_out = insert_bit(child_assign, idx_new, 0)\n\n # Determine if introduced vertex is already dominated by some\n # IN vertex present in the (extended) bag.\n dominated = False\n for idx, adj in enumerate(is_adj):\n if idx == idx_new or not adj:\n continue\n if (assign_out >> idx) & 1: # neighbor is IN\n dominated = True\n break\n need_bit = 0 if dominated else 1\n need_out = insert_bit(child_need, idx_new, need_bit)\n\n # ( no other vertices change status )\n state_out = DPState(assign_mask=assign_out, need_mask=need_out)\n accumulate(cur_table, state_out, cnt_child, wsum_child)\n\n\ndef forget_callback(\n graph: Graph,\n cur_table: dict[DPState, DPValue],\n cur_bag_info: tuple[int, Bag],\n child_table: dict[DPState, DPValue],\n child_bag_info: tuple[int, Bag],\n forgotten_vertex: int,\n):\n child_vertices = bag_tuple(child_bag_info[1])\n idx_forgot = child_vertices.index(forgotten_vertex)\n\n for child_state, dp_value in child_table.items():\n assign_child, need_child = child_state.assign_mask, child_state.need_mask\n cnt_child, wsum_child = dp_value.count, dp_value.weight\n bit_assign = (assign_child >> idx_forgot) & 1\n bit_need = (need_child >> idx_forgot) & 1\n\n # If forgotten vertex is OUT and still needs domination -> invalid state\n if bit_assign == 0 and bit_need == 1:\n continue\n\n assign_par = remove_bit(assign_child, idx_forgot)\n need_par = remove_bit(need_child, idx_forgot)\n\n state_par = DPState(assign_mask=assign_par, need_mask=need_par)\n accumulate(cur_table, state_par, cnt_child, wsum_child)\n\n\ndef join_callback(\n graph: Graph,\n cur_table: dict[DPState, DPValue],\n cur_bag_info: tuple[int, Bag],\n left_child_table: dict[DPState, DPValue],\n left_child_bag_info: tuple[int, Bag],\n right_child_table: dict[DPState, DPValue],\n right_child_bag_info: tuple[int, Bag],\n):\n bag_vertices = bag_tuple(cur_bag_info[1])\n vertex_weights = graph.vertex_weights\n assert vertex_weights is not None\n\n # Group right states by assignment mask for O(|L| + |R|) compatibility\n right_by_assign: dict[int, list[tuple[int, int, int]]] = {}\n for right_state, dp_value in right_child_table.items():\n assign_r, need_r = right_state.assign_mask, right_state.need_mask\n cnt_r, wsum_r = dp_value.count, dp_value.weight\n right_by_assign.setdefault(assign_r, []).append((need_r, cnt_r, wsum_r))\n\n for left_state, dp_value in left_child_table.items():\n assign_l, need_l = left_state.assign_mask, left_state.need_mask\n cnt_l, wsum_l = dp_value.count, dp_value.weight\n if assign_l not in right_by_assign:\n continue\n for need_r, cnt_r, wsum_r in right_by_assign[assign_l]:\n # Merge NEED flags: dominated if dominated in either side\n need_merge = need_l & need_r # bitwise AND keeps 1 only if both have NEED\n\n cnt_merge = (cnt_l * cnt_r) % MOD\n\n w_bag_sel = bag_selected_weight(assign_l, bag_vertices, vertex_weights)\n w_merge = (wsum_l * cnt_r + wsum_r * cnt_l - cnt_merge * w_bag_sel) % MOD\n if w_merge < 0:\n w_merge += MOD\n\n state_merge = DPState(assign_mask=assign_l, need_mask=need_merge)\n accumulate(cur_table, state_merge, cnt_merge, w_merge)\n\n\ndef extract_solution(root_table: dict[DPState, DPValue]) -> int:\n \"\"\"\n Sum the total weights of all globally valid dominating sets.\n Return -1 if none exist.\n \"\"\"\n answer = 0\n found = False\n for state, dp_value in root_table.items():\n assign_mask, need_mask = state.assign_mask, state.need_mask\n cnt, wsum = dp_value.count, dp_value.weight\n # Bag may be empty or not\n if assign_mask == 0 and need_mask == 0 and cnt == 0:\n # Defensive - shouldn't happen\n continue\n if need_mask != 0:\n # some vertex in root bag still needs domination -> invalid\n continue\n answer = (answer + wsum) % MOD\n found = True\n return answer if found else -1\n"}
2
+ {"problem_id": "120", "solution": "from dataclasses import dataclass\nfrom typing import TypeAlias\n\nfrom evaluation.graph_structure import Graph\n\nMOD = 1_000_000_007\n\n\n@dataclass(frozen=True)\nclass DPState:\n \"\"\"\n Dynamic Programming state for the Dominating Set problem on tree decompositions.\n This dataclass encapsulates the complete state information needed at each bag\n during the tree decomposition-based dynamic programming algorithm.\n\n Fields:\n ( 1 ) assign_mask : int Bitmask indicating which vertices in the current bag\n are assigned to the dominating set (1 = IN, 0 = OUT).\n Bit i corresponds to the i-th vertex in the sorted bag.\n\n ( 2 ) need_mask : int Bitmask indicating which OUT vertices in the current bag\n still require domination (1 = needs domination, 0 = already dominated).\n Only meaningful for OUT vertices (assign_mask bit = 0).\n IN vertices never have the need bit set since they dominate themselves.\n\n State Invariants:\n - For any bit position i: if (assign_mask >> i) & 1 == 1, then (need_mask >> i) & 1 == 0\n - The need_mask only tracks vertices that are OUT and not yet dominated by adjacent IN vertices\n - When a vertex is forgotten, it must not have the need bit set (invalid state otherwise)\n\n Usage in Algorithm:\n - LEAF: Initialize with vertex either IN (assign=1, need=0) or OUT (assign=0, need=1)\n - INTRODUCE: Insert new bit at appropriate position, update domination status\n - FORGET: Remove bit at appropriate position, reject states with undominated OUT vertices\n - JOIN: Merge compatible states (same assignment), combine need masks with AND operation\n\n All fields are immutable and hashable, making this object suitable as a dictionary key.\n \"\"\"\n\n assign_mask: int\n need_mask: int\n\n\n@dataclass\nclass DPValue:\n \"\"\"\n Dynamic Programming value representing the computational result for a given state.\n This dataclass replaces the previous Tuple[int, int] representation with a more\n structured and self-documenting approach for weighted model counting.\n\n Fields:\n ( 1 ) count : int Number of distinct vertex subsets (dominating sets) that\n achieve the current state configuration. This counts the\n multiplicity of solutions that lead to the same DP state.\n\n ( 2 ) weight : int Total weighted sum across all vertex subsets that achieve\n the current state configuration. Each dominating set contributes\n its total vertex weight sum to this field.\n\n Implementation Details:\n - Both fields are maintained modulo MOD (1,000,000,007)\n - The count field enables tracking the number of valid dominating sets\n - The weight field accumulates the total weight contribution from all valid sets\n - When combining values from different DP branches:\n * Counts are multiplied for independent choices\n * Weights are combined using inclusion-exclusion principle to avoid double-counting\n\n This structure enables simultaneous tracking of both solution count and cumulative\n weight during the tree decomposition DP computation.\n \"\"\"\n\n count: int\n weight: int\n\n\nBag: TypeAlias = set[int] # a bag is a set of vertices\n\n\ndef insert_bit(\n mask: int,\n pos: int,\n bit: int,\n) -> int:\n \"\"\"\n Insert `bit` (0/1) at position `pos` (LSB == position 0) in `mask`\n shifting higher bits left by one.\n \"\"\"\n lower = mask & ((1 << pos) - 1)\n higher = mask >> pos\n return lower | (bit << pos) | (higher << (pos + 1))\n\n\ndef remove_bit(\n mask: int,\n pos: int,\n) -> int:\n \"\"\"\n Delete the bit at position `pos` from `mask`, shifting higher bits right.\n \"\"\"\n lower = mask & ((1 << pos) - 1)\n higher = mask >> (pos + 1)\n return lower | (higher << pos)\n\n\ndef bag_tuple(bag: Bag) -> tuple[int, ...]:\n return tuple(sorted(bag))\n\n\ndef bag_selected_weight(\n assign_mask: int,\n bag_vertices: tuple[int, ...],\n vertex_weights: dict[int, int],\n) -> int:\n \"\"\"Sum of weights of vertices in the bag that are selected (IN).\"\"\"\n s = 0\n for idx, v in enumerate(bag_vertices):\n if (assign_mask >> idx) & 1:\n s += vertex_weights[v]\n return s % MOD\n\n\ndef accumulate(\n table: dict[DPState, DPValue],\n state: DPState,\n cnt: int,\n wsum: int,\n) -> None:\n \"\"\"Add (cnt, wsum) to existing entry of state inside table (MOD arithmetic).\"\"\"\n cnt %= MOD\n wsum %= MOD\n if state in table:\n existing = table[state]\n table[state] = DPValue(count=(existing.count + cnt) % MOD, weight=(existing.weight + wsum) % MOD)\n else:\n table[state] = DPValue(count=cnt, weight=wsum)\n\n\ndef leaf_callback(\n graph: Graph,\n cur_table: dict[DPState, DPValue],\n cur_bag_info: tuple[int, Bag],\n leaf_vertex: int,\n):\n bag_vertices = bag_tuple(cur_bag_info[1]) # (leaf_vertex,)\n assert len(bag_vertices) == 1 and bag_vertices[0] == leaf_vertex\n\n w_v = graph.vertex_weights[leaf_vertex] if graph.vertex_weights else 1\n\n # Case 1: vertex is IN the dominating set\n assign_mask = 1 # bit 0 == 1\n need_mask = 0 # IN vertices never need domination\n state_in = DPState(assign_mask=assign_mask, need_mask=need_mask)\n accumulate(cur_table, state_in, cnt=1, wsum=w_v % MOD)\n\n # Case 2: vertex is OUT - needs domination\n assign_mask = 0\n need_mask = 1 # NEEDS_DOMINATION\n state_out = DPState(assign_mask=assign_mask, need_mask=need_mask)\n accumulate(cur_table, state_out, cnt=1, wsum=0)\n\n\ndef introduce_callback(\n graph: Graph,\n cur_table: dict[DPState, DPValue],\n cur_bag_info: tuple[int, Bag],\n child_table: dict[DPState, DPValue],\n child_bag_info: tuple[int, Bag],\n introduced_vertex: int,\n):\n parent_vertices = bag_tuple(cur_bag_info[1])\n\n # index at which the new vertex was inserted\n idx_new = parent_vertices.index(introduced_vertex)\n w_new = graph.vertex_weights[introduced_vertex] if graph.vertex_weights else 1\n\n # pre-compute adjacency between introduced vertex and vertices in parent bag\n is_adj = [(v in graph.neighbors(introduced_vertex)) for v in parent_vertices]\n\n for child_state, dp_value in child_table.items():\n child_assign, child_need = child_state.assign_mask, child_state.need_mask\n cnt_child, wsum_child = dp_value.count, dp_value.weight\n # \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n # Choice A: new vertex is IN_X\n # \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n assign_in = insert_bit(child_assign, idx_new, 1)\n need_in = insert_bit(child_need, idx_new, 0)\n\n # when y is IN it may dominate some previously undominated OUT vertices\n for idx, adj in enumerate(is_adj):\n if idx == idx_new or not adj:\n continue\n # vertex idx is OUT?\n if (assign_in >> idx) & 1:\n continue # IN vertices never carry NEED flag\n # if it was NEED, clear it\n if (need_in >> idx) & 1:\n need_in &= ~(1 << idx)\n\n cnt_new = cnt_child\n wsum_new = (wsum_child + cnt_child * w_new) % MOD\n state_in = DPState(assign_mask=assign_in, need_mask=need_in)\n accumulate(cur_table, state_in, cnt_new, wsum_new)\n\n # \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n # Choice B: new vertex is NOT_IN_X\n # \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n assign_out = insert_bit(child_assign, idx_new, 0)\n\n # Determine if introduced vertex is already dominated by some\n # IN vertex present in the (extended) bag.\n dominated = False\n for idx, adj in enumerate(is_adj):\n if idx == idx_new or not adj:\n continue\n if (assign_out >> idx) & 1: # neighbor is IN\n dominated = True\n break\n need_bit = 0 if dominated else 1\n need_out = insert_bit(child_need, idx_new, need_bit)\n\n # ( no other vertices change status )\n state_out = DPState(assign_mask=assign_out, need_mask=need_out)\n accumulate(cur_table, state_out, cnt_child, wsum_child)\n\n\ndef forget_callback(\n graph: Graph,\n cur_table: dict[DPState, DPValue],\n cur_bag_info: tuple[int, Bag],\n child_table: dict[DPState, DPValue],\n child_bag_info: tuple[int, Bag],\n forgotten_vertex: int,\n):\n child_vertices = bag_tuple(child_bag_info[1])\n idx_forgot = child_vertices.index(forgotten_vertex)\n\n for child_state, dp_value in child_table.items():\n assign_child, need_child = child_state.assign_mask, child_state.need_mask\n cnt_child, wsum_child = dp_value.count, dp_value.weight\n bit_assign = (assign_child >> idx_forgot) & 1\n bit_need = (need_child >> idx_forgot) & 1\n\n # If forgotten vertex is OUT and still needs domination -> invalid state\n if bit_assign == 0 and bit_need == 1:\n continue\n\n assign_par = remove_bit(assign_child, idx_forgot)\n need_par = remove_bit(need_child, idx_forgot)\n\n state_par = DPState(assign_mask=assign_par, need_mask=need_par)\n accumulate(cur_table, state_par, cnt_child, wsum_child)\n\n\ndef join_callback(\n graph: Graph,\n cur_table: dict[DPState, DPValue],\n cur_bag_info: tuple[int, Bag],\n left_child_table: dict[DPState, DPValue],\n left_child_bag_info: tuple[int, Bag],\n right_child_table: dict[DPState, DPValue],\n right_child_bag_info: tuple[int, Bag],\n):\n bag_vertices = bag_tuple(cur_bag_info[1])\n vertex_weights = graph.vertex_weights\n assert vertex_weights is not None\n\n # Group right states by assignment mask for O(|L| + |R|) compatibility\n right_by_assign: dict[int, list[tuple[int, int, int]]] = {}\n for right_state, dp_value in right_child_table.items():\n assign_r, need_r = right_state.assign_mask, right_state.need_mask\n cnt_r, wsum_r = dp_value.count, dp_value.weight\n right_by_assign.setdefault(assign_r, []).append((need_r, cnt_r, wsum_r))\n\n for left_state, dp_value in left_child_table.items():\n assign_l, need_l = left_state.assign_mask, left_state.need_mask\n cnt_l, wsum_l = dp_value.count, dp_value.weight\n if assign_l not in right_by_assign:\n continue\n for need_r, cnt_r, wsum_r in right_by_assign[assign_l]:\n # Merge NEED flags: dominated if dominated in either side\n need_merge = need_l & need_r # bitwise AND keeps 1 only if both have NEED\n\n cnt_merge = (cnt_l * cnt_r) % MOD\n\n w_bag_sel = bag_selected_weight(assign_l, bag_vertices, vertex_weights)\n w_merge = (wsum_l * cnt_r + wsum_r * cnt_l - cnt_merge * w_bag_sel) % MOD\n if w_merge < 0:\n w_merge += MOD\n\n state_merge = DPState(assign_mask=assign_l, need_mask=need_merge)\n accumulate(cur_table, state_merge, cnt_merge, w_merge)\n\n\ndef extract_solution(root_table: dict[DPState, DPValue]) -> int:\n \"\"\"\n Sum the total weights of all globally valid dominating sets.\n Return -1 if none exist.\n \"\"\"\n answer = 0\n found = False\n for state, dp_value in root_table.items():\n assign_mask, need_mask = state.assign_mask, state.need_mask\n cnt, wsum = dp_value.count, dp_value.weight\n # Bag may be empty or not\n if assign_mask == 0 and need_mask == 0 and cnt == 0:\n # Defensive - shouldn't happen\n continue\n if need_mask != 0:\n # some vertex in root bag still needs domination -> invalid\n continue\n answer = (answer + wsum) % MOD\n found = True\n return answer if found else -1\n"}
src/display/utils.py CHANGED
@@ -22,10 +22,10 @@ class ColumnContent:
22
  @dataclass(frozen=True)
23
  class AutoEvalColumn:
24
  system = ColumnContent("System Name", "markdown", True, never_hidden=True)
25
- system_type = ColumnContent("System Type", "str", True)
26
  organization = ColumnContent("Organization", "str", True, never_hidden=True)
27
- success_rate = ColumnContent("Success Rate (%)", "number", True)
28
- problems_solved = ColumnContent("Problems Solved", "number", True)
 
29
  submitted_on = ColumnContent("Submitted On", "datetime", True)
30
 
31
 
 
22
  @dataclass(frozen=True)
23
  class AutoEvalColumn:
24
  system = ColumnContent("System Name", "markdown", True, never_hidden=True)
 
25
  organization = ColumnContent("Organization", "str", True, never_hidden=True)
26
+ success_rate_overall = ColumnContent("Overall Success (%)", "number", True)
27
+ success_rate_tier1 = ColumnContent("Tier 1 Success (%)", "number", True)
28
+ success_rate_tier2 = ColumnContent("Tier 2 Success (%)", "number", True)
29
  submitted_on = ColumnContent("Submitted On", "datetime", True)
30
 
31
 
src/populate.py CHANGED
@@ -15,6 +15,17 @@ def get_leaderboard_df(results_dataset_name: str) -> pd.DataFrame:
15
  @brief Creates a dataframe from all the individual experiment results.
16
  """
17
 
 
 
 
 
 
 
 
 
 
 
 
18
  try:
19
  configs = get_dataset_config_names(
20
  results_dataset_name,
@@ -24,16 +35,11 @@ def get_leaderboard_df(results_dataset_name: str) -> pd.DataFrame:
24
 
25
  # Return an empty DataFrame with expected columns
26
  logger.warning("Failed to load configuration", exc_info=True)
27
- return pd.DataFrame(
28
- columns=[
29
- "System Name",
30
- "System Type",
31
- "Organization",
32
- "Success Rate (%)",
33
- "Problems Solved",
34
- "Submitted On",
35
- ]
36
- )
37
 
38
  rows = []
39
  for submission_id in tqdm(
@@ -53,18 +59,20 @@ def get_leaderboard_df(results_dataset_name: str) -> pd.DataFrame:
53
  logger.warning(f"Skipping {submission_id} due to invalid did_pass values")
54
  continue
55
 
 
56
  success_rate = 100 * submission_df["did_pass"].mean()
57
- num_solved = submission_df["did_pass"].sum()
 
58
  first_row = submission_df.iloc[0]
59
 
60
  rows.append(
61
  {
62
- "System Name": first_row["system_name"],
63
- "System Type": first_row["system_type"],
64
- "Organization": first_row["organization"],
65
- "Success Rate (%)": success_rate,
66
- "Problems Solved": num_solved,
67
- "Submitted On": pd.to_datetime(first_row["submission_ts"]).strftime("%Y-%m-%d %H:%M"),
68
  }
69
  )
70
 
 
15
  @brief Creates a dataframe from all the individual experiment results.
16
  """
17
 
18
+ empty_df = pd.DataFrame(
19
+ columns=[
20
+ AutoEvalColumn.system.name,
21
+ AutoEvalColumn.organization.name,
22
+ AutoEvalColumn.success_rate_overall.name,
23
+ AutoEvalColumn.success_rate_tier1.name,
24
+ AutoEvalColumn.success_rate_tier2.name,
25
+ AutoEvalColumn.submitted_on.name,
26
+ ]
27
+ )
28
+
29
  try:
30
  configs = get_dataset_config_names(
31
  results_dataset_name,
 
35
 
36
  # Return an empty DataFrame with expected columns
37
  logger.warning("Failed to load configuration", exc_info=True)
38
+ return empty_df
39
+
40
+ if configs == ["default"]:
41
+ logger.info("Dataset has only default config — treating as empty")
42
+ return empty_df
 
 
 
 
 
43
 
44
  rows = []
45
  for submission_id in tqdm(
 
59
  logger.warning(f"Skipping {submission_id} due to invalid did_pass values")
60
  continue
61
 
62
+ assert submission_df["tier"].isin([1, 2]).all(), "Invalid tier values found in submission_df"
63
  success_rate = 100 * submission_df["did_pass"].mean()
64
+ tier1_success_rate = 100 * submission_df[submission_df["tier"] == 1]["did_pass"].mean()
65
+ tier2_success_rate = 100 * submission_df[submission_df["tier"] == 2]["did_pass"].mean()
66
  first_row = submission_df.iloc[0]
67
 
68
  rows.append(
69
  {
70
+ AutoEvalColumn.system.name: first_row["system_name"],
71
+ AutoEvalColumn.organization.name: first_row["organization"],
72
+ AutoEvalColumn.success_rate_overall.name: success_rate,
73
+ AutoEvalColumn.success_rate_tier1.name: tier1_success_rate,
74
+ AutoEvalColumn.success_rate_tier2.name: tier2_success_rate,
75
+ AutoEvalColumn.submitted_on.name: pd.to_datetime(first_row["submission_ts"]).strftime("%Y-%m-%d %H:%M"),
76
  }
77
  )
78
 
src/submission/submit.py CHANGED
@@ -33,7 +33,6 @@ def add_new_solutions(
33
  user_id: str,
34
  system_name: str,
35
  org: str,
36
- sys_type: str,
37
  submission_path: str,
38
  is_warmup_dataset: bool,
39
  ensure_all_present: bool = False,
@@ -44,6 +43,10 @@ def add_new_solutions(
44
  except (DatasetNotFoundError, FileNotFoundError):
45
  submitted_ids = []
46
 
 
 
 
 
47
  logger.info(f"Found {len(submitted_ids)} submissions")
48
 
49
  # Rate limits:
@@ -80,16 +83,14 @@ def add_new_solutions(
80
  return styled_error("The leaderboard has reached its submission capacity for now. Please try again later.")
81
 
82
  logger.info(
83
- f"Adding new submission: {system_name=}, {org=}, {sys_type=} and {submission_path=}",
84
  )
85
 
86
  # Double-checking.
87
- for val in [system_name, org, sys_type]:
88
  assert is_valid(val)
89
  assert is_submission_file_valid(submission_path, is_warmup_dataset=is_warmup_dataset)
90
 
91
- sys_type = ModelType.from_str(sys_type).name
92
-
93
  try:
94
  submission_df = pd.read_json(submission_path, lines=True)
95
  if ensure_all_present:
@@ -111,7 +112,6 @@ def add_new_solutions(
111
  **row,
112
  "system_name": system_name,
113
  "organization": org,
114
- "system_type": sys_type,
115
  "submission_id": submission_id,
116
  "submission_ts": submission_ts,
117
  "evaluation_id": "", # This will be set later when the evaluation is launched in the backend
@@ -173,6 +173,7 @@ def fetch_user_info(oauth_token: gr.OAuthToken | None) -> dict | None:
173
  logger.exception("Cannot get user info")
174
  return None
175
 
 
176
  def _validate_all_submissions_present(
177
  lbdb: F1Data,
178
  pd_ds: pd.DataFrame,
 
33
  user_id: str,
34
  system_name: str,
35
  org: str,
 
36
  submission_path: str,
37
  is_warmup_dataset: bool,
38
  ensure_all_present: bool = False,
 
43
  except (DatasetNotFoundError, FileNotFoundError):
44
  submitted_ids = []
45
 
46
+ if submitted_ids == ["default"]:
47
+ # means empty dataset
48
+ submitted_ids = []
49
+
50
  logger.info(f"Found {len(submitted_ids)} submissions")
51
 
52
  # Rate limits:
 
83
  return styled_error("The leaderboard has reached its submission capacity for now. Please try again later.")
84
 
85
  logger.info(
86
+ f"Adding new submission: {system_name=}, {org=}, and {submission_path=}",
87
  )
88
 
89
  # Double-checking.
90
+ for val in [system_name, org]:
91
  assert is_valid(val)
92
  assert is_submission_file_valid(submission_path, is_warmup_dataset=is_warmup_dataset)
93
 
 
 
94
  try:
95
  submission_df = pd.read_json(submission_path, lines=True)
96
  if ensure_all_present:
 
112
  **row,
113
  "system_name": system_name,
114
  "organization": org,
 
115
  "submission_id": submission_id,
116
  "submission_ts": submission_ts,
117
  "evaluation_id": "", # This will be set later when the evaluation is launched in the backend
 
173
  logger.exception("Cannot get user info")
174
  return None
175
 
176
+
177
  def _validate_all_submissions_present(
178
  lbdb: F1Data,
179
  pd_ds: pd.DataFrame,