yuchenlin commited on
Commit
01ebdd6
·
verified ·
1 Parent(s): 01d80b4

Update ZeroEval-main/result_dirs/zebra-grid.summary.json

Browse files
ZeroEval-main/result_dirs/zebra-grid.summary.json CHANGED
@@ -1,4 +1,21 @@
1
  [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  {
3
  "Model": "o3-mini-2025-01-31-high",
4
  "Mode": "greedy",
@@ -50,6 +67,23 @@
50
  "N_Mode": "single",
51
  "N_Size": 1
52
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  {
54
  "Model": "deepseek-R1",
55
  "Mode": "greedy",
 
1
  [
2
+ {
3
+ "Model": "grok-3-mini-fast-beta-high",
4
+ "Mode": "greedy",
5
+ "Puzzle Acc": "92.60",
6
+ "Cell Acc": "94.63",
7
+ "No answer": "1.00",
8
+ "Easy Puzzle Acc": "98.93",
9
+ "Hard Puzzle Acc": "90.14",
10
+ "Small Puzzle Acc": "98.75",
11
+ "Medium Puzzle Acc": "96.43",
12
+ "Large Puzzle Acc": "93.50",
13
+ "XL Puzzle Acc": "76.50",
14
+ "Total Puzzles": 1000,
15
+ "Reason Lens": "782.25",
16
+ "N_Mode": "single",
17
+ "N_Size": 1
18
+ },
19
  {
20
  "Model": "o3-mini-2025-01-31-high",
21
  "Mode": "greedy",
 
67
  "N_Mode": "single",
68
  "N_Size": 1
69
  },
70
+ {
71
+ "Model": "grok-3-mini-fast-beta-low",
72
+ "Mode": "greedy",
73
+ "Puzzle Acc": "80.70",
74
+ "Cell Acc": "84.22",
75
+ "No answer": "0.00",
76
+ "Easy Puzzle Acc": "98.57",
77
+ "Hard Puzzle Acc": "73.75",
78
+ "Small Puzzle Acc": "98.75",
79
+ "Medium Puzzle Acc": "96.43",
80
+ "Large Puzzle Acc": "77.00",
81
+ "XL Puzzle Acc": "33.50",
82
+ "Total Puzzles": 1000,
83
+ "Reason Lens": "874.09",
84
+ "N_Mode": "single",
85
+ "N_Size": 1
86
+ },
87
  {
88
  "Model": "deepseek-R1",
89
  "Mode": "greedy",