SondosMB commited on
Commit
13f9f4d
·
verified ·
1 Parent(s): 05bf560

Update small.json

Browse files
Files changed (1) hide show
  1. small.json +93 -92
small.json CHANGED
@@ -1,85 +1,94 @@
1
  [
2
- {
3
- "model": "OPT (1.3B)",
4
- "Average": 7.84,
5
- "MMLU": 7.4,
6
- "WinoGrande": 12.47,
7
- "PiQA": 4.45,
8
- "CommonsenseQA": 7.61,
9
- "Race": 13.61,
10
- "MedMCQA": 1.25,
11
- "OpenkookQA": 4.48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  },
 
13
  {
14
- "model": "SlimPajama",
15
- "Average": 9.54,
16
  "MMLU": 9.22,
 
17
  "WinoGrande": 14.76,
18
  "PiQA": 5.32,
19
  "CommonsenseQA": 9.01,
20
  "Race": 16.19,
21
  "MedMCQA": 1.68,
22
- "OpenkookQA": 5.7
23
- },
24
- {
25
- "model": "OLMo (1B)",
26
- "Average": 8.8,
27
- "MMLU": 8.54,
28
- "WinoGrande": 6.16,
29
- "PiQA": 8.05,
30
- "CommonsenseQA": 13.1,
31
- "Race": 13.61,
32
- "MedMCQA": 2.1,
33
- "OpenkookQA": 6.11
34
- },
35
- {
36
- "model": "GPT-Neo (1.3B)",
37
- "Average": 7.38,
38
- "MMLU": 6.94,
39
- "WinoGrande": 10.81,
40
- "PiQA": 4.31,
41
- "CommonsenseQA": 6.34,
42
- "Race": 13.75,
43
- "MedMCQA": 2.63,
44
- "OpenkookQA": 4.89
45
- },
46
- {
47
- "model": "Cerebras-GPT (1.3B)",
48
- "Average": 4.84,
49
- "MMLU": 5.37,
50
- "WinoGrande": 9.31,
51
- "PiQA": 2.16,
52
- "CommonsenseQA": 6.2,
53
- "Race": 6.9,
54
- "MedMCQA": 1.04,
55
- "OpenkookQA": 3.46
56
  },
57
- {
58
  "model": "RedPajama (1B)",
59
- "Average": 9.01,
60
  "MMLU": 9.21,
 
61
  "WinoGrande": 16.97,
62
- "PiQA": 1.39,
63
  "CommonsenseQA": 11.41,
64
  "Race": 14.35,
65
  "MedMCQA": 1.86,
66
  "OpenkookQA": 3.87
67
  },
68
  {
 
 
 
 
 
 
 
 
 
 
 
 
69
  "model": "Pythia (1.4B)",
70
- "Average": 8.73,
71
  "MMLU": 9.66,
 
72
  "WinoGrande": 11.52,
73
  "PiQA": 4.17,
74
  "CommonsenseQA": 9.01,
75
  "Race": 12.76,
76
  "MedMCQA": 3.19,
77
- "OpenkookQA": 5.3
78
  },
79
- {
80
  "model": "TinyLLama (1.1B)",
81
- "Average": 8.39,
82
  "MMLU": 8.94,
 
83
  "WinoGrande": 12.23,
84
  "PiQA": 3.59,
85
  "CommonsenseQA": 6.06,
@@ -87,48 +96,40 @@
87
  "MedMCQA": 2.07,
88
  "OpenkookQA": 4.68
89
  },
90
- {
91
- "model": "OELM (1B)",
92
- "Average": 8.99,
93
- "MMLU": 9.03,
94
- "WinoGrande": 10.18,
95
- "PiQA": 9.05,
96
- "CommonsenseQA": 7.75,
97
- "Race": 12.78,
98
- "MedMCQA": 2.5,
99
- "OpenkookQA": 6.31
100
- },
101
- {
102
- "model": "Phi-3-mini-128k-instruct (3.8B)",
103
- "Average": 39.73,
104
- "MMLU": 36.97,
105
- "WinoGrande": 46.88,
106
- "PiQA": 32.04,
107
- "CommonsenseQA": 49.15,
108
- "Race": 37.81,
109
- "MedMCQA": 22.61,
110
- "OpenkookQA": 33.6
111
  },
112
  {
113
- "model": "Gemma (2B)",
114
- "Average": 17.37,
115
- "MMLU": 17.52,
116
- "WinoGrande": 22.68,
117
- "PiQA": 15.09,
118
- "CommonsenseQA": 27.46,
119
- "Race": 14.32,
120
- "MedMCQA": 4.57,
121
- "OpenkookQA": 14.26
 
122
  },
123
  {
124
- "model": "Qwen (1.8B)",
125
- "Average": 21.61,
126
- "MMLU": 10.0,
127
- "WinoGrande": 40.97,
128
- "PiQA": 15.52,
129
- "CommonsenseQA": 31.13,
130
- "Race": 34.91,
131
- "MedMCQA": 4.7,
132
- "OpenkookQA": 20.37
 
133
  }
134
  ]
 
1
  [
2
+ {
3
+ "model": "Phi-3-mini-128k-instruct (3.8B)",
4
+ "Average": 40.00,
5
+ "MMLU": 36.97,
6
+ "ARC":60.94,
7
+ "WinoGrande": 46.88,
8
+ "PiQA": 32.04,
9
+ "CommonsenseQA": 49.15,
10
+ "Race": 37.81,
11
+ "MedMCQA": 22.61,
12
+ "OpenkookQA": 33.60
13
+ },
14
+ {
15
+ "model": "Qwen1.5 (1.8B)",
16
+ "Average": 21.68,
17
+ "MMLU": 9.99,
18
+ "ARC":15.84
19
+ "WinoGrande": 40.96,
20
+ "PiQA": 15.52,
21
+ "CommonsenseQA": 31.13,
22
+ "Race": 34.91,
23
+ "MedMCQA": 4.7,
24
+ "OpenkookQA": 20.37
25
+ },
26
+ {
27
+ "model": "Gemma (2B)",
28
+ "Average": 16.66,
29
+ "MMLU": 17.52,
30
+ "ARC":23.93,
31
+ "WinoGrande": 16.10,
32
+ "PiQA": 15.09,
33
+ "CommonsenseQA": 27.46,
34
+ "Race": 14.32,
35
+ "MedMCQA": 4.57,
36
+ "OpenkookQA": 14.26
37
  },
38
+
39
  {
40
+ "model": "SlimPajama-DC (1.3B)",
41
+ "Average": 9.60,
42
  "MMLU": 9.22,
43
+ "ARC":14.95,
44
  "WinoGrande": 14.76,
45
  "PiQA": 5.32,
46
  "CommonsenseQA": 9.01,
47
  "Race": 16.19,
48
  "MedMCQA": 1.68,
49
+ "OpenkookQA": 5.70
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  },
51
+ {
52
  "model": "RedPajama (1B)",
53
+ "Average": 9.00,
54
  "MMLU": 9.21,
55
+ "ARC":13.5,
56
  "WinoGrande": 16.97,
57
+ "PiQA": 0.86,
58
  "CommonsenseQA": 11.41,
59
  "Race": 14.35,
60
  "MedMCQA": 1.86,
61
  "OpenkookQA": 3.87
62
  },
63
  {
64
+ "model": "OLMo (1.2B)",
65
+ "Average": 8.85,
66
+ "MMLU": 8.54,
67
+ "ARC:13.18,
68
+ "WinoGrande": 6.16,
69
+ "PiQA": 8.05,
70
+ "CommonsenseQA": 13.10,
71
+ "Race": 13.61,
72
+ "MedMCQA": 2.07,
73
+ "OpenkookQA": 6.11
74
+ },
75
+ {
76
  "model": "Pythia (1.4B)",
77
+ "Average": 8.79,
78
  "MMLU": 9.66,
79
+ "ARC":14.69,
80
  "WinoGrande": 11.52,
81
  "PiQA": 4.17,
82
  "CommonsenseQA": 9.01,
83
  "Race": 12.76,
84
  "MedMCQA": 3.19,
85
+ "OpenkookQA": 5.30
86
  },
87
+ {
88
  "model": "TinyLLama (1.1B)",
89
+ "Average": 8.45,
90
  "MMLU": 8.94,
91
+ "ARC":13.31,
92
  "WinoGrande": 12.23,
93
  "PiQA": 3.59,
94
  "CommonsenseQA": 6.06,
 
96
  "MedMCQA": 2.07,
97
  "OpenkookQA": 4.68
98
  },
99
+ {
100
+ "model": "OPT (1.3B)",
101
+ "Average": 7.89,
102
+ "MMLU": 7.40,
103
+ "ARC":11.83,
104
+ "WinoGrande": 12.47,
105
+ "PiQA": 4.48,
106
+ "CommonsenseQA": 7.61,
107
+ "Race": 13.61,
108
+ "MedMCQA": 1.25,
109
+ "OpenkookQA": 4.48
 
 
 
 
 
 
 
 
 
 
110
  },
111
  {
112
+ "model": "GPT-Neo (1.3B)",
113
+ "Average": 7.42,
114
+ "MMLU": 6.94,
115
+ "ARC": 6.69,
116
+ "WinoGrande": 10.81,
117
+ "PiQA": 4.31,
118
+ "CommonsenseQA": 6.34,
119
+ "Race": 13.75,
120
+ "MedMCQA": 2.63,
121
+ "OpenkookQA": 4.89
122
  },
123
  {
124
+ "model": "Cerebras-GPT (1.3B)",
125
+ "Average": 4.86,
126
+ "MMLU": 5.37,
127
+ "ARC":4.43,
128
+ "WinoGrande": 9.31,
129
+ "PiQA": 2.16,
130
+ "CommonsenseQA": 6.2,
131
+ "Race": 6.9,
132
+ "MedMCQA": 1.04,
133
+ "OpenkookQA": 3.46
134
  }
135
  ]