|
{ |
|
"humaneval": { |
|
"pass@1": [ |
|
[ |
|
0, |
|
1.0 |
|
], |
|
[ |
|
1, |
|
0.15000000000000002 |
|
], |
|
[ |
|
2, |
|
1.0 |
|
], |
|
[ |
|
3, |
|
1.0 |
|
], |
|
[ |
|
4, |
|
1.0 |
|
], |
|
[ |
|
5, |
|
0.7999999999999999 |
|
], |
|
[ |
|
6, |
|
0.0 |
|
], |
|
[ |
|
7, |
|
1.0 |
|
], |
|
[ |
|
8, |
|
0.050000000000000044 |
|
], |
|
[ |
|
9, |
|
0.8999999999999999 |
|
], |
|
[ |
|
10, |
|
0.95 |
|
], |
|
[ |
|
11, |
|
1.0 |
|
], |
|
[ |
|
12, |
|
1.0 |
|
], |
|
[ |
|
13, |
|
1.0 |
|
], |
|
[ |
|
14, |
|
0.15000000000000002 |
|
], |
|
[ |
|
15, |
|
1.0 |
|
], |
|
[ |
|
16, |
|
1.0 |
|
], |
|
[ |
|
17, |
|
0.0 |
|
], |
|
[ |
|
18, |
|
0.3500000000000001 |
|
], |
|
[ |
|
19, |
|
0.20000000000000007 |
|
], |
|
[ |
|
20, |
|
0.7999999999999999 |
|
], |
|
[ |
|
21, |
|
1.0 |
|
], |
|
[ |
|
22, |
|
1.0 |
|
], |
|
[ |
|
23, |
|
1.0 |
|
], |
|
[ |
|
24, |
|
1.0 |
|
], |
|
[ |
|
25, |
|
1.0 |
|
], |
|
[ |
|
26, |
|
0.30000000000000004 |
|
], |
|
[ |
|
27, |
|
1.0 |
|
], |
|
[ |
|
28, |
|
1.0 |
|
], |
|
[ |
|
29, |
|
1.0 |
|
], |
|
[ |
|
30, |
|
1.0 |
|
], |
|
[ |
|
31, |
|
1.0 |
|
], |
|
[ |
|
32, |
|
0.0 |
|
], |
|
[ |
|
33, |
|
0.0 |
|
], |
|
[ |
|
34, |
|
1.0 |
|
], |
|
[ |
|
35, |
|
1.0 |
|
], |
|
[ |
|
36, |
|
0.5499999999999998 |
|
], |
|
[ |
|
37, |
|
0.6000000000000001 |
|
], |
|
[ |
|
38, |
|
1.0 |
|
], |
|
[ |
|
39, |
|
0.0 |
|
], |
|
[ |
|
40, |
|
1.0 |
|
], |
|
[ |
|
41, |
|
0.0 |
|
], |
|
[ |
|
42, |
|
1.0 |
|
], |
|
[ |
|
43, |
|
1.0 |
|
], |
|
[ |
|
44, |
|
1.0 |
|
], |
|
[ |
|
45, |
|
1.0 |
|
], |
|
[ |
|
46, |
|
0.95 |
|
], |
|
[ |
|
47, |
|
1.0 |
|
], |
|
[ |
|
48, |
|
1.0 |
|
], |
|
[ |
|
49, |
|
1.0 |
|
], |
|
[ |
|
50, |
|
1.0 |
|
], |
|
[ |
|
51, |
|
1.0 |
|
], |
|
[ |
|
52, |
|
0.6000000000000001 |
|
], |
|
[ |
|
53, |
|
1.0 |
|
], |
|
[ |
|
54, |
|
0.0 |
|
], |
|
[ |
|
55, |
|
1.0 |
|
], |
|
[ |
|
56, |
|
1.0 |
|
], |
|
[ |
|
57, |
|
1.0 |
|
], |
|
[ |
|
58, |
|
1.0 |
|
], |
|
[ |
|
59, |
|
0.7999999999999999 |
|
], |
|
[ |
|
60, |
|
1.0 |
|
], |
|
[ |
|
61, |
|
0.85 |
|
], |
|
[ |
|
62, |
|
1.0 |
|
], |
|
[ |
|
63, |
|
0.5499999999999998 |
|
], |
|
[ |
|
64, |
|
0.0 |
|
], |
|
[ |
|
65, |
|
1.0 |
|
], |
|
[ |
|
66, |
|
1.0 |
|
], |
|
[ |
|
67, |
|
0.0 |
|
], |
|
[ |
|
68, |
|
1.0 |
|
], |
|
[ |
|
69, |
|
0.0 |
|
], |
|
[ |
|
70, |
|
1.0 |
|
], |
|
[ |
|
71, |
|
0.95 |
|
], |
|
[ |
|
72, |
|
0.95 |
|
], |
|
[ |
|
73, |
|
0.0 |
|
], |
|
[ |
|
74, |
|
0.20000000000000007 |
|
], |
|
[ |
|
75, |
|
0.0 |
|
], |
|
[ |
|
76, |
|
0.95 |
|
], |
|
[ |
|
77, |
|
0.050000000000000044 |
|
], |
|
[ |
|
78, |
|
0.3500000000000001 |
|
], |
|
[ |
|
79, |
|
0.85 |
|
], |
|
[ |
|
80, |
|
1.0 |
|
], |
|
[ |
|
81, |
|
0.0 |
|
], |
|
[ |
|
82, |
|
0.3500000000000001 |
|
], |
|
[ |
|
83, |
|
0.0 |
|
], |
|
[ |
|
84, |
|
0.050000000000000044 |
|
], |
|
[ |
|
85, |
|
0.95 |
|
], |
|
[ |
|
86, |
|
0.85 |
|
], |
|
[ |
|
87, |
|
0.95 |
|
], |
|
[ |
|
88, |
|
0.85 |
|
], |
|
[ |
|
89, |
|
0.6000000000000001 |
|
], |
|
[ |
|
90, |
|
0.0 |
|
], |
|
[ |
|
91, |
|
0.0 |
|
], |
|
[ |
|
92, |
|
0.95 |
|
], |
|
[ |
|
93, |
|
0.0 |
|
], |
|
[ |
|
94, |
|
0.3500000000000001 |
|
], |
|
[ |
|
95, |
|
0.7999999999999999 |
|
], |
|
[ |
|
96, |
|
0.4999999999999999 |
|
], |
|
[ |
|
97, |
|
1.0 |
|
], |
|
[ |
|
98, |
|
0.7 |
|
], |
|
[ |
|
99, |
|
0.0 |
|
], |
|
[ |
|
100, |
|
0.0 |
|
], |
|
[ |
|
101, |
|
0.95 |
|
], |
|
[ |
|
102, |
|
0.0 |
|
], |
|
[ |
|
103, |
|
0.4999999999999999 |
|
], |
|
[ |
|
104, |
|
0.75 |
|
], |
|
[ |
|
105, |
|
0.20000000000000007 |
|
], |
|
[ |
|
106, |
|
0.0 |
|
], |
|
[ |
|
107, |
|
0.44999999999999984 |
|
], |
|
[ |
|
108, |
|
0.0 |
|
], |
|
[ |
|
109, |
|
0.15000000000000002 |
|
], |
|
[ |
|
110, |
|
0.0 |
|
], |
|
[ |
|
111, |
|
0.30000000000000004 |
|
], |
|
[ |
|
112, |
|
1.0 |
|
], |
|
[ |
|
113, |
|
0.0 |
|
], |
|
[ |
|
114, |
|
0.85 |
|
], |
|
[ |
|
115, |
|
0.050000000000000044 |
|
], |
|
[ |
|
116, |
|
1.0 |
|
], |
|
[ |
|
117, |
|
0.3500000000000001 |
|
], |
|
[ |
|
118, |
|
0.0 |
|
], |
|
[ |
|
119, |
|
0.0 |
|
], |
|
[ |
|
120, |
|
0.0 |
|
], |
|
[ |
|
121, |
|
0.20000000000000007 |
|
], |
|
[ |
|
122, |
|
0.8999999999999999 |
|
], |
|
[ |
|
123, |
|
0.0 |
|
], |
|
[ |
|
124, |
|
0.0 |
|
], |
|
[ |
|
125, |
|
0.25 |
|
], |
|
[ |
|
126, |
|
0.0 |
|
], |
|
[ |
|
127, |
|
0.050000000000000044 |
|
], |
|
[ |
|
128, |
|
0.4999999999999999 |
|
], |
|
[ |
|
129, |
|
0.0 |
|
], |
|
[ |
|
130, |
|
0.0 |
|
], |
|
[ |
|
131, |
|
0.44999999999999984 |
|
], |
|
[ |
|
132, |
|
0.0 |
|
], |
|
[ |
|
133, |
|
0.09999999999999998 |
|
], |
|
[ |
|
134, |
|
0.0 |
|
], |
|
[ |
|
135, |
|
0.0 |
|
], |
|
[ |
|
136, |
|
0.6000000000000001 |
|
], |
|
[ |
|
137, |
|
0.0 |
|
], |
|
[ |
|
138, |
|
0.3500000000000001 |
|
], |
|
[ |
|
139, |
|
0.0 |
|
], |
|
[ |
|
140, |
|
0.0 |
|
], |
|
[ |
|
141, |
|
0.30000000000000004 |
|
], |
|
[ |
|
142, |
|
0.6000000000000001 |
|
], |
|
[ |
|
143, |
|
0.65 |
|
], |
|
[ |
|
144, |
|
0.0 |
|
], |
|
[ |
|
145, |
|
0.0 |
|
], |
|
[ |
|
146, |
|
0.95 |
|
], |
|
[ |
|
147, |
|
0.7999999999999999 |
|
], |
|
[ |
|
148, |
|
0.6000000000000001 |
|
], |
|
[ |
|
149, |
|
0.15000000000000002 |
|
], |
|
[ |
|
150, |
|
0.95 |
|
], |
|
[ |
|
151, |
|
0.20000000000000007 |
|
], |
|
[ |
|
152, |
|
1.0 |
|
], |
|
[ |
|
153, |
|
0.09999999999999998 |
|
], |
|
[ |
|
154, |
|
0.20000000000000007 |
|
], |
|
[ |
|
155, |
|
0.44999999999999984 |
|
], |
|
[ |
|
156, |
|
0.15000000000000002 |
|
], |
|
[ |
|
157, |
|
0.15000000000000002 |
|
], |
|
[ |
|
158, |
|
1.0 |
|
], |
|
[ |
|
159, |
|
0.0 |
|
], |
|
[ |
|
160, |
|
0.0 |
|
], |
|
[ |
|
161, |
|
0.5499999999999998 |
|
], |
|
[ |
|
162, |
|
0.0 |
|
], |
|
[ |
|
163, |
|
0.0 |
|
] |
|
], |
|
"pass@10": [ |
|
[ |
|
0, |
|
1.0 |
|
], |
|
[ |
|
1, |
|
0.8947368421052632 |
|
], |
|
[ |
|
2, |
|
1.0 |
|
], |
|
[ |
|
3, |
|
1.0 |
|
], |
|
[ |
|
4, |
|
1.0 |
|
], |
|
[ |
|
5, |
|
1.0 |
|
], |
|
[ |
|
6, |
|
0.0 |
|
], |
|
[ |
|
7, |
|
1.0 |
|
], |
|
[ |
|
8, |
|
0.5 |
|
], |
|
[ |
|
9, |
|
1.0 |
|
], |
|
[ |
|
10, |
|
1.0 |
|
], |
|
[ |
|
11, |
|
1.0 |
|
], |
|
[ |
|
12, |
|
1.0 |
|
], |
|
[ |
|
13, |
|
1.0 |
|
], |
|
[ |
|
14, |
|
0.8947368421052632 |
|
], |
|
[ |
|
15, |
|
1.0 |
|
], |
|
[ |
|
16, |
|
1.0 |
|
], |
|
[ |
|
17, |
|
0.0 |
|
], |
|
[ |
|
18, |
|
0.9984520123839009 |
|
], |
|
[ |
|
19, |
|
0.956656346749226 |
|
], |
|
[ |
|
20, |
|
1.0 |
|
], |
|
[ |
|
21, |
|
1.0 |
|
], |
|
[ |
|
22, |
|
1.0 |
|
], |
|
[ |
|
23, |
|
1.0 |
|
], |
|
[ |
|
24, |
|
1.0 |
|
], |
|
[ |
|
25, |
|
1.0 |
|
], |
|
[ |
|
26, |
|
0.9945820433436533 |
|
], |
|
[ |
|
27, |
|
1.0 |
|
], |
|
[ |
|
28, |
|
1.0 |
|
], |
|
[ |
|
29, |
|
1.0 |
|
], |
|
[ |
|
30, |
|
1.0 |
|
], |
|
[ |
|
31, |
|
1.0 |
|
], |
|
[ |
|
32, |
|
0.0 |
|
], |
|
[ |
|
33, |
|
0.0 |
|
], |
|
[ |
|
34, |
|
1.0 |
|
], |
|
[ |
|
35, |
|
1.0 |
|
], |
|
[ |
|
36, |
|
1.0 |
|
], |
|
[ |
|
37, |
|
1.0 |
|
], |
|
[ |
|
38, |
|
1.0 |
|
], |
|
[ |
|
39, |
|
0.0 |
|
], |
|
[ |
|
40, |
|
1.0 |
|
], |
|
[ |
|
41, |
|
0.0 |
|
], |
|
[ |
|
42, |
|
1.0 |
|
], |
|
[ |
|
43, |
|
1.0 |
|
], |
|
[ |
|
44, |
|
1.0 |
|
], |
|
[ |
|
45, |
|
1.0 |
|
], |
|
[ |
|
46, |
|
1.0 |
|
], |
|
[ |
|
47, |
|
1.0 |
|
], |
|
[ |
|
48, |
|
1.0 |
|
], |
|
[ |
|
49, |
|
1.0 |
|
], |
|
[ |
|
50, |
|
1.0 |
|
], |
|
[ |
|
51, |
|
1.0 |
|
], |
|
[ |
|
52, |
|
1.0 |
|
], |
|
[ |
|
53, |
|
1.0 |
|
], |
|
[ |
|
54, |
|
0.0 |
|
], |
|
[ |
|
55, |
|
1.0 |
|
], |
|
[ |
|
56, |
|
1.0 |
|
], |
|
[ |
|
57, |
|
1.0 |
|
], |
|
[ |
|
58, |
|
1.0 |
|
], |
|
[ |
|
59, |
|
1.0 |
|
], |
|
[ |
|
60, |
|
1.0 |
|
], |
|
[ |
|
61, |
|
1.0 |
|
], |
|
[ |
|
62, |
|
1.0 |
|
], |
|
[ |
|
63, |
|
1.0 |
|
], |
|
[ |
|
64, |
|
0.0 |
|
], |
|
[ |
|
65, |
|
1.0 |
|
], |
|
[ |
|
66, |
|
1.0 |
|
], |
|
[ |
|
67, |
|
0.0 |
|
], |
|
[ |
|
68, |
|
1.0 |
|
], |
|
[ |
|
69, |
|
0.0 |
|
], |
|
[ |
|
70, |
|
1.0 |
|
], |
|
[ |
|
71, |
|
1.0 |
|
], |
|
[ |
|
72, |
|
1.0 |
|
], |
|
[ |
|
73, |
|
0.0 |
|
], |
|
[ |
|
74, |
|
0.956656346749226 |
|
], |
|
[ |
|
75, |
|
0.0 |
|
], |
|
[ |
|
76, |
|
1.0 |
|
], |
|
[ |
|
77, |
|
0.5 |
|
], |
|
[ |
|
78, |
|
0.9984520123839009 |
|
], |
|
[ |
|
79, |
|
1.0 |
|
], |
|
[ |
|
80, |
|
1.0 |
|
], |
|
[ |
|
81, |
|
0.0 |
|
], |
|
[ |
|
82, |
|
0.9984520123839009 |
|
], |
|
[ |
|
83, |
|
0.0 |
|
], |
|
[ |
|
84, |
|
0.5 |
|
], |
|
[ |
|
85, |
|
1.0 |
|
], |
|
[ |
|
86, |
|
1.0 |
|
], |
|
[ |
|
87, |
|
1.0 |
|
], |
|
[ |
|
88, |
|
1.0 |
|
], |
|
[ |
|
89, |
|
1.0 |
|
], |
|
[ |
|
90, |
|
0.0 |
|
], |
|
[ |
|
91, |
|
0.0 |
|
], |
|
[ |
|
92, |
|
1.0 |
|
], |
|
[ |
|
93, |
|
0.0 |
|
], |
|
[ |
|
94, |
|
0.9984520123839009 |
|
], |
|
[ |
|
95, |
|
1.0 |
|
], |
|
[ |
|
96, |
|
0.9999945874558878 |
|
], |
|
[ |
|
97, |
|
1.0 |
|
], |
|
[ |
|
98, |
|
1.0 |
|
], |
|
[ |
|
99, |
|
0.0 |
|
], |
|
[ |
|
100, |
|
0.0 |
|
], |
|
[ |
|
101, |
|
1.0 |
|
], |
|
[ |
|
102, |
|
0.0 |
|
], |
|
[ |
|
103, |
|
0.9999945874558878 |
|
], |
|
[ |
|
104, |
|
1.0 |
|
], |
|
[ |
|
105, |
|
0.956656346749226 |
|
], |
|
[ |
|
106, |
|
0.0 |
|
], |
|
[ |
|
107, |
|
0.9999404620147654 |
|
], |
|
[ |
|
108, |
|
0.0 |
|
], |
|
[ |
|
109, |
|
0.8947368421052632 |
|
], |
|
[ |
|
110, |
|
0.0 |
|
], |
|
[ |
|
111, |
|
0.9945820433436533 |
|
], |
|
[ |
|
112, |
|
1.0 |
|
], |
|
[ |
|
113, |
|
0.0 |
|
], |
|
[ |
|
114, |
|
1.0 |
|
], |
|
[ |
|
115, |
|
0.5 |
|
], |
|
[ |
|
116, |
|
1.0 |
|
], |
|
[ |
|
117, |
|
0.9984520123839009 |
|
], |
|
[ |
|
118, |
|
0.0 |
|
], |
|
[ |
|
119, |
|
0.0 |
|
], |
|
[ |
|
120, |
|
0.0 |
|
], |
|
[ |
|
121, |
|
0.956656346749226 |
|
], |
|
[ |
|
122, |
|
1.0 |
|
], |
|
[ |
|
123, |
|
0.0 |
|
], |
|
[ |
|
124, |
|
0.0 |
|
], |
|
[ |
|
125, |
|
0.9837461300309598 |
|
], |
|
[ |
|
126, |
|
0.0 |
|
], |
|
[ |
|
127, |
|
0.5 |
|
], |
|
[ |
|
128, |
|
0.9999945874558878 |
|
], |
|
[ |
|
129, |
|
0.0 |
|
], |
|
[ |
|
130, |
|
0.0 |
|
], |
|
[ |
|
131, |
|
0.9999404620147654 |
|
], |
|
[ |
|
132, |
|
0.0 |
|
], |
|
[ |
|
133, |
|
0.763157894736842 |
|
], |
|
[ |
|
134, |
|
0.0 |
|
], |
|
[ |
|
135, |
|
0.0 |
|
], |
|
[ |
|
136, |
|
1.0 |
|
], |
|
[ |
|
137, |
|
0.0 |
|
], |
|
[ |
|
138, |
|
0.9984520123839009 |
|
], |
|
[ |
|
139, |
|
0.0 |
|
], |
|
[ |
|
140, |
|
0.0 |
|
], |
|
[ |
|
141, |
|
0.9945820433436533 |
|
], |
|
[ |
|
142, |
|
1.0 |
|
], |
|
[ |
|
143, |
|
1.0 |
|
], |
|
[ |
|
144, |
|
0.0 |
|
], |
|
[ |
|
145, |
|
0.0 |
|
], |
|
[ |
|
146, |
|
1.0 |
|
], |
|
[ |
|
147, |
|
1.0 |
|
], |
|
[ |
|
148, |
|
1.0 |
|
], |
|
[ |
|
149, |
|
0.8947368421052632 |
|
], |
|
[ |
|
150, |
|
1.0 |
|
], |
|
[ |
|
151, |
|
0.956656346749226 |
|
], |
|
[ |
|
152, |
|
1.0 |
|
], |
|
[ |
|
153, |
|
0.763157894736842 |
|
], |
|
[ |
|
154, |
|
0.956656346749226 |
|
], |
|
[ |
|
155, |
|
0.9999404620147654 |
|
], |
|
[ |
|
156, |
|
0.8947368421052632 |
|
], |
|
[ |
|
157, |
|
0.8947368421052632 |
|
], |
|
[ |
|
158, |
|
1.0 |
|
], |
|
[ |
|
159, |
|
0.0 |
|
], |
|
[ |
|
160, |
|
0.0 |
|
], |
|
[ |
|
161, |
|
1.0 |
|
], |
|
[ |
|
162, |
|
0.0 |
|
], |
|
[ |
|
163, |
|
0.0 |
|
] |
|
] |
|
}, |
|
"config": { |
|
"prefix": "", |
|
"do_sample": true, |
|
"temperature": 0.2, |
|
"top_k": 0, |
|
"top_p": 0.95, |
|
"n_samples": 20, |
|
"eos": "<|endoftext|>", |
|
"seed": 0, |
|
"model": "deepseek-coder-33b-base", |
|
"modeltype": "causal", |
|
"peft_model": null, |
|
"revision": null, |
|
"use_auth_token": false, |
|
"trust_remote_code": false, |
|
"tasks": "humaneval", |
|
"instruction_tokens": null, |
|
"batch_size": 1, |
|
"max_length_generation": 512, |
|
"precision": "fp32", |
|
"load_in_8bit": false, |
|
"load_in_4bit": false, |
|
"left_padding": false, |
|
"limit": null, |
|
"limit_start": 0, |
|
"save_every_k_tasks": -1, |
|
"postprocess": true, |
|
"allow_code_execution": true, |
|
"generation_only": false, |
|
"load_generations_path": "generations_humaneval_deepseek-coder-33b-base.json", |
|
"load_data_path": null, |
|
"metric_output_path": "deepseek_coder_33b-base_evaluation_results.json", |
|
"save_generations": false, |
|
"load_generations_intermediate_paths": null, |
|
"save_generations_path": "generations.json", |
|
"save_references": false, |
|
"save_references_path": "references.json", |
|
"prompt": "prompt", |
|
"max_memory_per_gpu": null, |
|
"check_references": false |
|
} |
|
} |