test / evaluate_result /7_deepseek_coder_33b-base-results.json
Mengyuan Liu
Upload 71 files
dfe37be verified
{
"humaneval": {
"pass@1": [
[
0,
1.0
],
[
1,
0.15000000000000002
],
[
2,
1.0
],
[
3,
1.0
],
[
4,
1.0
],
[
5,
0.7999999999999999
],
[
6,
0.0
],
[
7,
1.0
],
[
8,
0.050000000000000044
],
[
9,
0.8999999999999999
],
[
10,
0.95
],
[
11,
1.0
],
[
12,
1.0
],
[
13,
1.0
],
[
14,
0.15000000000000002
],
[
15,
1.0
],
[
16,
1.0
],
[
17,
0.0
],
[
18,
0.3500000000000001
],
[
19,
0.20000000000000007
],
[
20,
0.7999999999999999
],
[
21,
1.0
],
[
22,
1.0
],
[
23,
1.0
],
[
24,
1.0
],
[
25,
1.0
],
[
26,
0.30000000000000004
],
[
27,
1.0
],
[
28,
1.0
],
[
29,
1.0
],
[
30,
1.0
],
[
31,
1.0
],
[
32,
0.0
],
[
33,
0.0
],
[
34,
1.0
],
[
35,
1.0
],
[
36,
0.5499999999999998
],
[
37,
0.6000000000000001
],
[
38,
1.0
],
[
39,
0.0
],
[
40,
1.0
],
[
41,
0.0
],
[
42,
1.0
],
[
43,
1.0
],
[
44,
1.0
],
[
45,
1.0
],
[
46,
0.95
],
[
47,
1.0
],
[
48,
1.0
],
[
49,
1.0
],
[
50,
1.0
],
[
51,
1.0
],
[
52,
0.6000000000000001
],
[
53,
1.0
],
[
54,
0.0
],
[
55,
1.0
],
[
56,
1.0
],
[
57,
1.0
],
[
58,
1.0
],
[
59,
0.7999999999999999
],
[
60,
1.0
],
[
61,
0.85
],
[
62,
1.0
],
[
63,
0.5499999999999998
],
[
64,
0.0
],
[
65,
1.0
],
[
66,
1.0
],
[
67,
0.0
],
[
68,
1.0
],
[
69,
0.0
],
[
70,
1.0
],
[
71,
0.95
],
[
72,
0.95
],
[
73,
0.0
],
[
74,
0.20000000000000007
],
[
75,
0.0
],
[
76,
0.95
],
[
77,
0.050000000000000044
],
[
78,
0.3500000000000001
],
[
79,
0.85
],
[
80,
1.0
],
[
81,
0.0
],
[
82,
0.3500000000000001
],
[
83,
0.0
],
[
84,
0.050000000000000044
],
[
85,
0.95
],
[
86,
0.85
],
[
87,
0.95
],
[
88,
0.85
],
[
89,
0.6000000000000001
],
[
90,
0.0
],
[
91,
0.0
],
[
92,
0.95
],
[
93,
0.0
],
[
94,
0.3500000000000001
],
[
95,
0.7999999999999999
],
[
96,
0.4999999999999999
],
[
97,
1.0
],
[
98,
0.7
],
[
99,
0.0
],
[
100,
0.0
],
[
101,
0.95
],
[
102,
0.0
],
[
103,
0.4999999999999999
],
[
104,
0.75
],
[
105,
0.20000000000000007
],
[
106,
0.0
],
[
107,
0.44999999999999984
],
[
108,
0.0
],
[
109,
0.15000000000000002
],
[
110,
0.0
],
[
111,
0.30000000000000004
],
[
112,
1.0
],
[
113,
0.0
],
[
114,
0.85
],
[
115,
0.050000000000000044
],
[
116,
1.0
],
[
117,
0.3500000000000001
],
[
118,
0.0
],
[
119,
0.0
],
[
120,
0.0
],
[
121,
0.20000000000000007
],
[
122,
0.8999999999999999
],
[
123,
0.0
],
[
124,
0.0
],
[
125,
0.25
],
[
126,
0.0
],
[
127,
0.050000000000000044
],
[
128,
0.4999999999999999
],
[
129,
0.0
],
[
130,
0.0
],
[
131,
0.44999999999999984
],
[
132,
0.0
],
[
133,
0.09999999999999998
],
[
134,
0.0
],
[
135,
0.0
],
[
136,
0.6000000000000001
],
[
137,
0.0
],
[
138,
0.3500000000000001
],
[
139,
0.0
],
[
140,
0.0
],
[
141,
0.30000000000000004
],
[
142,
0.6000000000000001
],
[
143,
0.65
],
[
144,
0.0
],
[
145,
0.0
],
[
146,
0.95
],
[
147,
0.7999999999999999
],
[
148,
0.6000000000000001
],
[
149,
0.15000000000000002
],
[
150,
0.95
],
[
151,
0.20000000000000007
],
[
152,
1.0
],
[
153,
0.09999999999999998
],
[
154,
0.20000000000000007
],
[
155,
0.44999999999999984
],
[
156,
0.15000000000000002
],
[
157,
0.15000000000000002
],
[
158,
1.0
],
[
159,
0.0
],
[
160,
0.0
],
[
161,
0.5499999999999998
],
[
162,
0.0
],
[
163,
0.0
]
],
"pass@10": [
[
0,
1.0
],
[
1,
0.8947368421052632
],
[
2,
1.0
],
[
3,
1.0
],
[
4,
1.0
],
[
5,
1.0
],
[
6,
0.0
],
[
7,
1.0
],
[
8,
0.5
],
[
9,
1.0
],
[
10,
1.0
],
[
11,
1.0
],
[
12,
1.0
],
[
13,
1.0
],
[
14,
0.8947368421052632
],
[
15,
1.0
],
[
16,
1.0
],
[
17,
0.0
],
[
18,
0.9984520123839009
],
[
19,
0.956656346749226
],
[
20,
1.0
],
[
21,
1.0
],
[
22,
1.0
],
[
23,
1.0
],
[
24,
1.0
],
[
25,
1.0
],
[
26,
0.9945820433436533
],
[
27,
1.0
],
[
28,
1.0
],
[
29,
1.0
],
[
30,
1.0
],
[
31,
1.0
],
[
32,
0.0
],
[
33,
0.0
],
[
34,
1.0
],
[
35,
1.0
],
[
36,
1.0
],
[
37,
1.0
],
[
38,
1.0
],
[
39,
0.0
],
[
40,
1.0
],
[
41,
0.0
],
[
42,
1.0
],
[
43,
1.0
],
[
44,
1.0
],
[
45,
1.0
],
[
46,
1.0
],
[
47,
1.0
],
[
48,
1.0
],
[
49,
1.0
],
[
50,
1.0
],
[
51,
1.0
],
[
52,
1.0
],
[
53,
1.0
],
[
54,
0.0
],
[
55,
1.0
],
[
56,
1.0
],
[
57,
1.0
],
[
58,
1.0
],
[
59,
1.0
],
[
60,
1.0
],
[
61,
1.0
],
[
62,
1.0
],
[
63,
1.0
],
[
64,
0.0
],
[
65,
1.0
],
[
66,
1.0
],
[
67,
0.0
],
[
68,
1.0
],
[
69,
0.0
],
[
70,
1.0
],
[
71,
1.0
],
[
72,
1.0
],
[
73,
0.0
],
[
74,
0.956656346749226
],
[
75,
0.0
],
[
76,
1.0
],
[
77,
0.5
],
[
78,
0.9984520123839009
],
[
79,
1.0
],
[
80,
1.0
],
[
81,
0.0
],
[
82,
0.9984520123839009
],
[
83,
0.0
],
[
84,
0.5
],
[
85,
1.0
],
[
86,
1.0
],
[
87,
1.0
],
[
88,
1.0
],
[
89,
1.0
],
[
90,
0.0
],
[
91,
0.0
],
[
92,
1.0
],
[
93,
0.0
],
[
94,
0.9984520123839009
],
[
95,
1.0
],
[
96,
0.9999945874558878
],
[
97,
1.0
],
[
98,
1.0
],
[
99,
0.0
],
[
100,
0.0
],
[
101,
1.0
],
[
102,
0.0
],
[
103,
0.9999945874558878
],
[
104,
1.0
],
[
105,
0.956656346749226
],
[
106,
0.0
],
[
107,
0.9999404620147654
],
[
108,
0.0
],
[
109,
0.8947368421052632
],
[
110,
0.0
],
[
111,
0.9945820433436533
],
[
112,
1.0
],
[
113,
0.0
],
[
114,
1.0
],
[
115,
0.5
],
[
116,
1.0
],
[
117,
0.9984520123839009
],
[
118,
0.0
],
[
119,
0.0
],
[
120,
0.0
],
[
121,
0.956656346749226
],
[
122,
1.0
],
[
123,
0.0
],
[
124,
0.0
],
[
125,
0.9837461300309598
],
[
126,
0.0
],
[
127,
0.5
],
[
128,
0.9999945874558878
],
[
129,
0.0
],
[
130,
0.0
],
[
131,
0.9999404620147654
],
[
132,
0.0
],
[
133,
0.763157894736842
],
[
134,
0.0
],
[
135,
0.0
],
[
136,
1.0
],
[
137,
0.0
],
[
138,
0.9984520123839009
],
[
139,
0.0
],
[
140,
0.0
],
[
141,
0.9945820433436533
],
[
142,
1.0
],
[
143,
1.0
],
[
144,
0.0
],
[
145,
0.0
],
[
146,
1.0
],
[
147,
1.0
],
[
148,
1.0
],
[
149,
0.8947368421052632
],
[
150,
1.0
],
[
151,
0.956656346749226
],
[
152,
1.0
],
[
153,
0.763157894736842
],
[
154,
0.956656346749226
],
[
155,
0.9999404620147654
],
[
156,
0.8947368421052632
],
[
157,
0.8947368421052632
],
[
158,
1.0
],
[
159,
0.0
],
[
160,
0.0
],
[
161,
1.0
],
[
162,
0.0
],
[
163,
0.0
]
]
},
"config": {
"prefix": "",
"do_sample": true,
"temperature": 0.2,
"top_k": 0,
"top_p": 0.95,
"n_samples": 20,
"eos": "<|endoftext|>",
"seed": 0,
"model": "deepseek-coder-33b-base",
"modeltype": "causal",
"peft_model": null,
"revision": null,
"use_auth_token": false,
"trust_remote_code": false,
"tasks": "humaneval",
"instruction_tokens": null,
"batch_size": 1,
"max_length_generation": 512,
"precision": "fp32",
"load_in_8bit": false,
"load_in_4bit": false,
"left_padding": false,
"limit": null,
"limit_start": 0,
"save_every_k_tasks": -1,
"postprocess": true,
"allow_code_execution": true,
"generation_only": false,
"load_generations_path": "generations_humaneval_deepseek-coder-33b-base.json",
"load_data_path": null,
"metric_output_path": "deepseek_coder_33b-base_evaluation_results.json",
"save_generations": false,
"load_generations_intermediate_paths": null,
"save_generations_path": "generations.json",
"save_references": false,
"save_references_path": "references.json",
"prompt": "prompt",
"max_memory_per_gpu": null,
"check_references": false
}
}