sequelbox commited on
Commit
6041847
1 Parent(s): f77a408

updated evals

Browse files
Files changed (1) hide show
  1. README.md +22 -22
README.md CHANGED
@@ -46,20 +46,8 @@ model-index:
46
  num_few_shot: 5
47
  metrics:
48
  - type: acc
49
- value: 69.85
50
  name: acc
51
- - task:
52
- type: text-generation
53
- name: Text Generation
54
- dataset:
55
- name: ARC Challenge (25-Shot)
56
- type: arc_challenge
57
- args:
58
- num_few_shot: 25
59
- metrics:
60
- - type: acc_norm
61
- value: 46.25
62
- name: normalized accuracy
63
  - task:
64
  type: text-generation
65
  name: Text Generation
@@ -70,7 +58,7 @@ model-index:
70
  num_few_shot: 5
71
  metrics:
72
  - type: acc
73
- value: 56.25
74
  name: acc
75
  - task:
76
  type: text-generation
@@ -82,7 +70,7 @@ model-index:
82
  num_few_shot: 5
83
  metrics:
84
  - type: acc
85
- value: 63.55
86
  name: acc
87
  - task:
88
  type: text-generation
@@ -94,7 +82,7 @@ model-index:
94
  num_few_shot: 5
95
  metrics:
96
  - type: acc
97
- value: 41.0
98
  name: acc
99
  - task:
100
  type: text-generation
@@ -106,7 +94,7 @@ model-index:
106
  num_few_shot: 5
107
  metrics:
108
  - type: acc
109
- value: 41.38
110
  name: acc
111
  - task:
112
  type: text-generation
@@ -118,7 +106,7 @@ model-index:
118
  num_few_shot: 5
119
  metrics:
120
  - type: acc
121
- value: 34.31
122
  name: acc
123
  - task:
124
  type: text-generation
@@ -132,6 +120,18 @@ model-index:
132
  - type: acc
133
  value: 35.76
134
  name: acc
 
 
 
 
 
 
 
 
 
 
 
 
135
  - task:
136
  type: text-generation
137
  name: Text Generation
@@ -142,7 +142,7 @@ model-index:
142
  num_few_shot: 5
143
  metrics:
144
  - type: acc
145
- value: 48.0
146
  name: acc
147
  - task:
148
  type: text-generation
@@ -154,19 +154,19 @@ model-index:
154
  num_few_shot: 5
155
  metrics:
156
  - type: acc
157
- value: 58.0
158
  name: acc
159
  - task:
160
  type: text-generation
161
  name: Text Generation
162
  dataset:
163
- name: MMLU STEM (5-shot)
164
  type: mmlu
165
  args:
166
  num_few_shot: 5
167
  metrics:
168
  - type: acc
169
- value: 45.54
170
  name: acc
171
  - task:
172
  type: text-generation
 
46
  num_few_shot: 5
47
  metrics:
48
  - type: acc
49
+ value: 69.14
50
  name: acc
 
 
 
 
 
 
 
 
 
 
 
 
51
  - task:
52
  type: text-generation
53
  name: Text Generation
 
58
  num_few_shot: 5
59
  metrics:
60
  - type: acc
61
+ value: 64.58
62
  name: acc
63
  - task:
64
  type: text-generation
 
70
  num_few_shot: 5
71
  metrics:
72
  - type: acc
73
+ value: 70.32
74
  name: acc
75
  - task:
76
  type: text-generation
 
82
  num_few_shot: 5
83
  metrics:
84
  - type: acc
85
+ value: 44.00
86
  name: acc
87
  - task:
88
  type: text-generation
 
94
  num_few_shot: 5
95
  metrics:
96
  - type: acc
97
+ value: 50.25
98
  name: acc
99
  - task:
100
  type: text-generation
 
106
  num_few_shot: 5
107
  metrics:
108
  - type: acc
109
+ value: 42.16
110
  name: acc
111
  - task:
112
  type: text-generation
 
120
  - type: acc
121
  value: 35.76
122
  name: acc
123
+ - task:
124
+ type: text-generation
125
+ name: Text Generation
126
+ dataset:
127
+ name: MMLU Conceptual Physics (5-shot)
128
+ type: mmlu
129
+ args:
130
+ num_few_shot: 5
131
+ metrics:
132
+ - type: acc
133
+ value: 53.19
134
+ name: acc
135
  - task:
136
  type: text-generation
137
  name: Text Generation
 
142
  num_few_shot: 5
143
  metrics:
144
  - type: acc
145
+ value: 53.00
146
  name: acc
147
  - task:
148
  type: text-generation
 
154
  num_few_shot: 5
155
  metrics:
156
  - type: acc
157
+ value: 61.00
158
  name: acc
159
  - task:
160
  type: text-generation
161
  name: Text Generation
162
  dataset:
163
+ name: MMLU Astronomy (5-shot)
164
  type: mmlu
165
  args:
166
  num_few_shot: 5
167
  metrics:
168
  - type: acc
169
+ value: 60.53
170
  name: acc
171
  - task:
172
  type: text-generation