ryanhoangt commited on
Commit
013500c
·
1 Parent(s): 10afdaa

add CoAct v1.0 trajectories

Browse files
outputs/swe_bench_lite/CoActPlannerAgent/claude-3-5-sonnet@20240620_maxiter_40_N_v0.1-no-hint/README.md ADDED
@@ -0,0 +1,322 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # SWE-bench Report
2
+ This folder contains the evaluation results of the SWE-bench using the [official evaluation docker containerization](https://github.com/princeton-nlp/SWE-bench/blob/main/docs/20240627_docker/README.md#choosing-the-right-cache_level).
3
+
4
+ ## Summary
5
+ - total instances: 300
6
+ - submitted instances: 93
7
+ - completed instances: 93
8
+ - empty patch instances: 0
9
+ - resolved instances: 25
10
+ - unresolved instances: 68
11
+ - error instances: 0
12
+ - unstopped instances: 0
13
+
14
+ ## Resolved Instances
15
+ - [django__django-11099](./eval_outputs/django__django-11099/run_instance.log)
16
+ - [django__django-11815](./eval_outputs/django__django-11815/run_instance.log)
17
+ - [django__django-11964](./eval_outputs/django__django-11964/run_instance.log)
18
+ - [django__django-13028](./eval_outputs/django__django-13028/run_instance.log)
19
+ - [django__django-13590](./eval_outputs/django__django-13590/run_instance.log)
20
+ - [django__django-13658](./eval_outputs/django__django-13658/run_instance.log)
21
+ - [django__django-14238](./eval_outputs/django__django-14238/run_instance.log)
22
+ - [django__django-14752](./eval_outputs/django__django-14752/run_instance.log)
23
+ - [django__django-14915](./eval_outputs/django__django-14915/run_instance.log)
24
+ - [django__django-15814](./eval_outputs/django__django-15814/run_instance.log)
25
+ - [django__django-15851](./eval_outputs/django__django-15851/run_instance.log)
26
+ - [django__django-16255](./eval_outputs/django__django-16255/run_instance.log)
27
+ - [django__django-16527](./eval_outputs/django__django-16527/run_instance.log)
28
+ - [matplotlib__matplotlib-25311](./eval_outputs/matplotlib__matplotlib-25311/run_instance.log)
29
+ - [psf__requests-2317](./eval_outputs/psf__requests-2317/run_instance.log)
30
+ - [scikit-learn__scikit-learn-10297](./eval_outputs/scikit-learn__scikit-learn-10297/run_instance.log)
31
+ - [scikit-learn__scikit-learn-13439](./eval_outputs/scikit-learn__scikit-learn-13439/run_instance.log)
32
+ - [scikit-learn__scikit-learn-13496](./eval_outputs/scikit-learn__scikit-learn-13496/run_instance.log)
33
+ - [scikit-learn__scikit-learn-14894](./eval_outputs/scikit-learn__scikit-learn-14894/run_instance.log)
34
+ - [sphinx-doc__sphinx-8595](./eval_outputs/sphinx-doc__sphinx-8595/run_instance.log)
35
+ - [sympy__sympy-17139](./eval_outputs/sympy__sympy-17139/run_instance.log)
36
+ - [sympy__sympy-17655](./eval_outputs/sympy__sympy-17655/run_instance.log)
37
+ - [sympy__sympy-20590](./eval_outputs/sympy__sympy-20590/run_instance.log)
38
+ - [sympy__sympy-22714](./eval_outputs/sympy__sympy-22714/run_instance.log)
39
+ - [sympy__sympy-24213](./eval_outputs/sympy__sympy-24213/run_instance.log)
40
+
41
+ ## Unresolved Instances
42
+ - [astropy__astropy-12907](./eval_outputs/astropy__astropy-12907/run_instance.log)
43
+ - [astropy__astropy-14182](./eval_outputs/astropy__astropy-14182/run_instance.log)
44
+ - [astropy__astropy-14365](./eval_outputs/astropy__astropy-14365/run_instance.log)
45
+ - [astropy__astropy-14995](./eval_outputs/astropy__astropy-14995/run_instance.log)
46
+ - [django__django-10914](./eval_outputs/django__django-10914/run_instance.log)
47
+ - [django__django-11133](./eval_outputs/django__django-11133/run_instance.log)
48
+ - [django__django-11179](./eval_outputs/django__django-11179/run_instance.log)
49
+ - [django__django-11848](./eval_outputs/django__django-11848/run_instance.log)
50
+ - [django__django-11999](./eval_outputs/django__django-11999/run_instance.log)
51
+ - [django__django-12125](./eval_outputs/django__django-12125/run_instance.log)
52
+ - [django__django-12308](./eval_outputs/django__django-12308/run_instance.log)
53
+ - [django__django-12708](./eval_outputs/django__django-12708/run_instance.log)
54
+ - [django__django-13033](./eval_outputs/django__django-13033/run_instance.log)
55
+ - [django__django-13158](./eval_outputs/django__django-13158/run_instance.log)
56
+ - [django__django-13315](./eval_outputs/django__django-13315/run_instance.log)
57
+ - [django__django-13401](./eval_outputs/django__django-13401/run_instance.log)
58
+ - [django__django-13551](./eval_outputs/django__django-13551/run_instance.log)
59
+ - [django__django-13925](./eval_outputs/django__django-13925/run_instance.log)
60
+ - [django__django-13933](./eval_outputs/django__django-13933/run_instance.log)
61
+ - [django__django-13964](./eval_outputs/django__django-13964/run_instance.log)
62
+ - [django__django-14017](./eval_outputs/django__django-14017/run_instance.log)
63
+ - [django__django-14155](./eval_outputs/django__django-14155/run_instance.log)
64
+ - [django__django-14534](./eval_outputs/django__django-14534/run_instance.log)
65
+ - [django__django-14580](./eval_outputs/django__django-14580/run_instance.log)
66
+ - [django__django-14608](./eval_outputs/django__django-14608/run_instance.log)
67
+ - [django__django-14672](./eval_outputs/django__django-14672/run_instance.log)
68
+ - [django__django-14787](./eval_outputs/django__django-14787/run_instance.log)
69
+ - [django__django-14855](./eval_outputs/django__django-14855/run_instance.log)
70
+ - [django__django-14999](./eval_outputs/django__django-14999/run_instance.log)
71
+ - [django__django-15252](./eval_outputs/django__django-15252/run_instance.log)
72
+ - [django__django-15695](./eval_outputs/django__django-15695/run_instance.log)
73
+ - [django__django-16139](./eval_outputs/django__django-16139/run_instance.log)
74
+ - [django__django-16595](./eval_outputs/django__django-16595/run_instance.log)
75
+ - [django__django-17087](./eval_outputs/django__django-17087/run_instance.log)
76
+ - [matplotlib__matplotlib-23299](./eval_outputs/matplotlib__matplotlib-23299/run_instance.log)
77
+ - [matplotlib__matplotlib-23314](./eval_outputs/matplotlib__matplotlib-23314/run_instance.log)
78
+ - [matplotlib__matplotlib-23476](./eval_outputs/matplotlib__matplotlib-23476/run_instance.log)
79
+ - [matplotlib__matplotlib-24149](./eval_outputs/matplotlib__matplotlib-24149/run_instance.log)
80
+ - [matplotlib__matplotlib-24970](./eval_outputs/matplotlib__matplotlib-24970/run_instance.log)
81
+ - [matplotlib__matplotlib-25332](./eval_outputs/matplotlib__matplotlib-25332/run_instance.log)
82
+ - [pydata__xarray-4094](./eval_outputs/pydata__xarray-4094/run_instance.log)
83
+ - [pylint-dev__pylint-7080](./eval_outputs/pylint-dev__pylint-7080/run_instance.log)
84
+ - [pytest-dev__pytest-7432](./eval_outputs/pytest-dev__pytest-7432/run_instance.log)
85
+ - [pytest-dev__pytest-7490](./eval_outputs/pytest-dev__pytest-7490/run_instance.log)
86
+ - [scikit-learn__scikit-learn-13142](./eval_outputs/scikit-learn__scikit-learn-13142/run_instance.log)
87
+ - [scikit-learn__scikit-learn-13779](./eval_outputs/scikit-learn__scikit-learn-13779/run_instance.log)
88
+ - [scikit-learn__scikit-learn-14087](./eval_outputs/scikit-learn__scikit-learn-14087/run_instance.log)
89
+ - [scikit-learn__scikit-learn-14983](./eval_outputs/scikit-learn__scikit-learn-14983/run_instance.log)
90
+ - [scikit-learn__scikit-learn-25747](./eval_outputs/scikit-learn__scikit-learn-25747/run_instance.log)
91
+ - [sphinx-doc__sphinx-11445](./eval_outputs/sphinx-doc__sphinx-11445/run_instance.log)
92
+ - [sphinx-doc__sphinx-8721](./eval_outputs/sphinx-doc__sphinx-8721/run_instance.log)
93
+ - [sympy__sympy-12419](./eval_outputs/sympy__sympy-12419/run_instance.log)
94
+ - [sympy__sympy-12481](./eval_outputs/sympy__sympy-12481/run_instance.log)
95
+ - [sympy__sympy-13031](./eval_outputs/sympy__sympy-13031/run_instance.log)
96
+ - [sympy__sympy-13480](./eval_outputs/sympy__sympy-13480/run_instance.log)
97
+ - [sympy__sympy-13647](./eval_outputs/sympy__sympy-13647/run_instance.log)
98
+ - [sympy__sympy-15345](./eval_outputs/sympy__sympy-15345/run_instance.log)
99
+ - [sympy__sympy-16792](./eval_outputs/sympy__sympy-16792/run_instance.log)
100
+ - [sympy__sympy-17630](./eval_outputs/sympy__sympy-17630/run_instance.log)
101
+ - [sympy__sympy-18189](./eval_outputs/sympy__sympy-18189/run_instance.log)
102
+ - [sympy__sympy-18199](./eval_outputs/sympy__sympy-18199/run_instance.log)
103
+ - [sympy__sympy-18698](./eval_outputs/sympy__sympy-18698/run_instance.log)
104
+ - [sympy__sympy-20154](./eval_outputs/sympy__sympy-20154/run_instance.log)
105
+ - [sympy__sympy-21379](./eval_outputs/sympy__sympy-21379/run_instance.log)
106
+ - [sympy__sympy-21612](./eval_outputs/sympy__sympy-21612/run_instance.log)
107
+ - [sympy__sympy-21847](./eval_outputs/sympy__sympy-21847/run_instance.log)
108
+ - [sympy__sympy-23262](./eval_outputs/sympy__sympy-23262/run_instance.log)
109
+ - [sympy__sympy-24066](./eval_outputs/sympy__sympy-24066/run_instance.log)
110
+
111
+ ## Error Instances
112
+
113
+ ## Empty Patch Instances
114
+
115
+ ## Incomplete Instances
116
+ - [astropy__astropy-6938](./eval_outputs/astropy__astropy-6938/run_instance.log)
117
+ - [astropy__astropy-7746](./eval_outputs/astropy__astropy-7746/run_instance.log)
118
+ - [django__django-10924](./eval_outputs/django__django-10924/run_instance.log)
119
+ - [django__django-11001](./eval_outputs/django__django-11001/run_instance.log)
120
+ - [django__django-11019](./eval_outputs/django__django-11019/run_instance.log)
121
+ - [django__django-11039](./eval_outputs/django__django-11039/run_instance.log)
122
+ - [django__django-11049](./eval_outputs/django__django-11049/run_instance.log)
123
+ - [django__django-11283](./eval_outputs/django__django-11283/run_instance.log)
124
+ - [django__django-11422](./eval_outputs/django__django-11422/run_instance.log)
125
+ - [django__django-11564](./eval_outputs/django__django-11564/run_instance.log)
126
+ - [django__django-11583](./eval_outputs/django__django-11583/run_instance.log)
127
+ - [django__django-11620](./eval_outputs/django__django-11620/run_instance.log)
128
+ - [django__django-11630](./eval_outputs/django__django-11630/run_instance.log)
129
+ - [django__django-11742](./eval_outputs/django__django-11742/run_instance.log)
130
+ - [django__django-11797](./eval_outputs/django__django-11797/run_instance.log)
131
+ - [django__django-11905](./eval_outputs/django__django-11905/run_instance.log)
132
+ - [django__django-11910](./eval_outputs/django__django-11910/run_instance.log)
133
+ - [django__django-12113](./eval_outputs/django__django-12113/run_instance.log)
134
+ - [django__django-12184](./eval_outputs/django__django-12184/run_instance.log)
135
+ - [django__django-12284](./eval_outputs/django__django-12284/run_instance.log)
136
+ - [django__django-12286](./eval_outputs/django__django-12286/run_instance.log)
137
+ - [django__django-12453](./eval_outputs/django__django-12453/run_instance.log)
138
+ - [django__django-12470](./eval_outputs/django__django-12470/run_instance.log)
139
+ - [django__django-12497](./eval_outputs/django__django-12497/run_instance.log)
140
+ - [django__django-12589](./eval_outputs/django__django-12589/run_instance.log)
141
+ - [django__django-12700](./eval_outputs/django__django-12700/run_instance.log)
142
+ - [django__django-12747](./eval_outputs/django__django-12747/run_instance.log)
143
+ - [django__django-12856](./eval_outputs/django__django-12856/run_instance.log)
144
+ - [django__django-12908](./eval_outputs/django__django-12908/run_instance.log)
145
+ - [django__django-12915](./eval_outputs/django__django-12915/run_instance.log)
146
+ - [django__django-12983](./eval_outputs/django__django-12983/run_instance.log)
147
+ - [django__django-13220](./eval_outputs/django__django-13220/run_instance.log)
148
+ - [django__django-13230](./eval_outputs/django__django-13230/run_instance.log)
149
+ - [django__django-13265](./eval_outputs/django__django-13265/run_instance.log)
150
+ - [django__django-13321](./eval_outputs/django__django-13321/run_instance.log)
151
+ - [django__django-13447](./eval_outputs/django__django-13447/run_instance.log)
152
+ - [django__django-13448](./eval_outputs/django__django-13448/run_instance.log)
153
+ - [django__django-13660](./eval_outputs/django__django-13660/run_instance.log)
154
+ - [django__django-13710](./eval_outputs/django__django-13710/run_instance.log)
155
+ - [django__django-13757](./eval_outputs/django__django-13757/run_instance.log)
156
+ - [django__django-13768](./eval_outputs/django__django-13768/run_instance.log)
157
+ - [django__django-14016](./eval_outputs/django__django-14016/run_instance.log)
158
+ - [django__django-14382](./eval_outputs/django__django-14382/run_instance.log)
159
+ - [django__django-14411](./eval_outputs/django__django-14411/run_instance.log)
160
+ - [django__django-14667](./eval_outputs/django__django-14667/run_instance.log)
161
+ - [django__django-14730](./eval_outputs/django__django-14730/run_instance.log)
162
+ - [django__django-14997](./eval_outputs/django__django-14997/run_instance.log)
163
+ - [django__django-15061](./eval_outputs/django__django-15061/run_instance.log)
164
+ - [django__django-15202](./eval_outputs/django__django-15202/run_instance.log)
165
+ - [django__django-15213](./eval_outputs/django__django-15213/run_instance.log)
166
+ - [django__django-15320](./eval_outputs/django__django-15320/run_instance.log)
167
+ - [django__django-15347](./eval_outputs/django__django-15347/run_instance.log)
168
+ - [django__django-15388](./eval_outputs/django__django-15388/run_instance.log)
169
+ - [django__django-15400](./eval_outputs/django__django-15400/run_instance.log)
170
+ - [django__django-15498](./eval_outputs/django__django-15498/run_instance.log)
171
+ - [django__django-15738](./eval_outputs/django__django-15738/run_instance.log)
172
+ - [django__django-15781](./eval_outputs/django__django-15781/run_instance.log)
173
+ - [django__django-15789](./eval_outputs/django__django-15789/run_instance.log)
174
+ - [django__django-15790](./eval_outputs/django__django-15790/run_instance.log)
175
+ - [django__django-15819](./eval_outputs/django__django-15819/run_instance.log)
176
+ - [django__django-15902](./eval_outputs/django__django-15902/run_instance.log)
177
+ - [django__django-15996](./eval_outputs/django__django-15996/run_instance.log)
178
+ - [django__django-16041](./eval_outputs/django__django-16041/run_instance.log)
179
+ - [django__django-16046](./eval_outputs/django__django-16046/run_instance.log)
180
+ - [django__django-16229](./eval_outputs/django__django-16229/run_instance.log)
181
+ - [django__django-16379](./eval_outputs/django__django-16379/run_instance.log)
182
+ - [django__django-16400](./eval_outputs/django__django-16400/run_instance.log)
183
+ - [django__django-16408](./eval_outputs/django__django-16408/run_instance.log)
184
+ - [django__django-16816](./eval_outputs/django__django-16816/run_instance.log)
185
+ - [django__django-16820](./eval_outputs/django__django-16820/run_instance.log)
186
+ - [django__django-16873](./eval_outputs/django__django-16873/run_instance.log)
187
+ - [django__django-16910](./eval_outputs/django__django-16910/run_instance.log)
188
+ - [django__django-17051](./eval_outputs/django__django-17051/run_instance.log)
189
+ - [matplotlib__matplotlib-18869](./eval_outputs/matplotlib__matplotlib-18869/run_instance.log)
190
+ - [matplotlib__matplotlib-22711](./eval_outputs/matplotlib__matplotlib-22711/run_instance.log)
191
+ - [matplotlib__matplotlib-22835](./eval_outputs/matplotlib__matplotlib-22835/run_instance.log)
192
+ - [matplotlib__matplotlib-23562](./eval_outputs/matplotlib__matplotlib-23562/run_instance.log)
193
+ - [matplotlib__matplotlib-23563](./eval_outputs/matplotlib__matplotlib-23563/run_instance.log)
194
+ - [matplotlib__matplotlib-23913](./eval_outputs/matplotlib__matplotlib-23913/run_instance.log)
195
+ - [matplotlib__matplotlib-23964](./eval_outputs/matplotlib__matplotlib-23964/run_instance.log)
196
+ - [matplotlib__matplotlib-23987](./eval_outputs/matplotlib__matplotlib-23987/run_instance.log)
197
+ - [matplotlib__matplotlib-24265](./eval_outputs/matplotlib__matplotlib-24265/run_instance.log)
198
+ - [matplotlib__matplotlib-24334](./eval_outputs/matplotlib__matplotlib-24334/run_instance.log)
199
+ - [matplotlib__matplotlib-25079](./eval_outputs/matplotlib__matplotlib-25079/run_instance.log)
200
+ - [matplotlib__matplotlib-25433](./eval_outputs/matplotlib__matplotlib-25433/run_instance.log)
201
+ - [matplotlib__matplotlib-25442](./eval_outputs/matplotlib__matplotlib-25442/run_instance.log)
202
+ - [matplotlib__matplotlib-25498](./eval_outputs/matplotlib__matplotlib-25498/run_instance.log)
203
+ - [matplotlib__matplotlib-26011](./eval_outputs/matplotlib__matplotlib-26011/run_instance.log)
204
+ - [matplotlib__matplotlib-26020](./eval_outputs/matplotlib__matplotlib-26020/run_instance.log)
205
+ - [mwaskom__seaborn-2848](./eval_outputs/mwaskom__seaborn-2848/run_instance.log)
206
+ - [mwaskom__seaborn-3010](./eval_outputs/mwaskom__seaborn-3010/run_instance.log)
207
+ - [mwaskom__seaborn-3190](./eval_outputs/mwaskom__seaborn-3190/run_instance.log)
208
+ - [mwaskom__seaborn-3407](./eval_outputs/mwaskom__seaborn-3407/run_instance.log)
209
+ - [pallets__flask-4045](./eval_outputs/pallets__flask-4045/run_instance.log)
210
+ - [pallets__flask-4992](./eval_outputs/pallets__flask-4992/run_instance.log)
211
+ - [pallets__flask-5063](./eval_outputs/pallets__flask-5063/run_instance.log)
212
+ - [psf__requests-1963](./eval_outputs/psf__requests-1963/run_instance.log)
213
+ - [psf__requests-2148](./eval_outputs/psf__requests-2148/run_instance.log)
214
+ - [psf__requests-2674](./eval_outputs/psf__requests-2674/run_instance.log)
215
+ - [psf__requests-3362](./eval_outputs/psf__requests-3362/run_instance.log)
216
+ - [psf__requests-863](./eval_outputs/psf__requests-863/run_instance.log)
217
+ - [pydata__xarray-3364](./eval_outputs/pydata__xarray-3364/run_instance.log)
218
+ - [pydata__xarray-4248](./eval_outputs/pydata__xarray-4248/run_instance.log)
219
+ - [pydata__xarray-4493](./eval_outputs/pydata__xarray-4493/run_instance.log)
220
+ - [pydata__xarray-5131](./eval_outputs/pydata__xarray-5131/run_instance.log)
221
+ - [pylint-dev__pylint-5859](./eval_outputs/pylint-dev__pylint-5859/run_instance.log)
222
+ - [pylint-dev__pylint-6506](./eval_outputs/pylint-dev__pylint-6506/run_instance.log)
223
+ - [pylint-dev__pylint-7114](./eval_outputs/pylint-dev__pylint-7114/run_instance.log)
224
+ - [pylint-dev__pylint-7228](./eval_outputs/pylint-dev__pylint-7228/run_instance.log)
225
+ - [pylint-dev__pylint-7993](./eval_outputs/pylint-dev__pylint-7993/run_instance.log)
226
+ - [pytest-dev__pytest-11143](./eval_outputs/pytest-dev__pytest-11143/run_instance.log)
227
+ - [pytest-dev__pytest-11148](./eval_outputs/pytest-dev__pytest-11148/run_instance.log)
228
+ - [pytest-dev__pytest-5103](./eval_outputs/pytest-dev__pytest-5103/run_instance.log)
229
+ - [pytest-dev__pytest-5221](./eval_outputs/pytest-dev__pytest-5221/run_instance.log)
230
+ - [pytest-dev__pytest-5227](./eval_outputs/pytest-dev__pytest-5227/run_instance.log)
231
+ - [pytest-dev__pytest-5413](./eval_outputs/pytest-dev__pytest-5413/run_instance.log)
232
+ - [pytest-dev__pytest-5495](./eval_outputs/pytest-dev__pytest-5495/run_instance.log)
233
+ - [pytest-dev__pytest-5692](./eval_outputs/pytest-dev__pytest-5692/run_instance.log)
234
+ - [pytest-dev__pytest-6116](./eval_outputs/pytest-dev__pytest-6116/run_instance.log)
235
+ - [pytest-dev__pytest-7168](./eval_outputs/pytest-dev__pytest-7168/run_instance.log)
236
+ - [pytest-dev__pytest-7220](./eval_outputs/pytest-dev__pytest-7220/run_instance.log)
237
+ - [pytest-dev__pytest-7373](./eval_outputs/pytest-dev__pytest-7373/run_instance.log)
238
+ - [pytest-dev__pytest-8365](./eval_outputs/pytest-dev__pytest-8365/run_instance.log)
239
+ - [pytest-dev__pytest-8906](./eval_outputs/pytest-dev__pytest-8906/run_instance.log)
240
+ - [pytest-dev__pytest-9359](./eval_outputs/pytest-dev__pytest-9359/run_instance.log)
241
+ - [scikit-learn__scikit-learn-10508](./eval_outputs/scikit-learn__scikit-learn-10508/run_instance.log)
242
+ - [scikit-learn__scikit-learn-10949](./eval_outputs/scikit-learn__scikit-learn-10949/run_instance.log)
243
+ - [scikit-learn__scikit-learn-11040](./eval_outputs/scikit-learn__scikit-learn-11040/run_instance.log)
244
+ - [scikit-learn__scikit-learn-11281](./eval_outputs/scikit-learn__scikit-learn-11281/run_instance.log)
245
+ - [scikit-learn__scikit-learn-12471](./eval_outputs/scikit-learn__scikit-learn-12471/run_instance.log)
246
+ - [scikit-learn__scikit-learn-13241](./eval_outputs/scikit-learn__scikit-learn-13241/run_instance.log)
247
+ - [scikit-learn__scikit-learn-13497](./eval_outputs/scikit-learn__scikit-learn-13497/run_instance.log)
248
+ - [scikit-learn__scikit-learn-13584](./eval_outputs/scikit-learn__scikit-learn-13584/run_instance.log)
249
+ - [scikit-learn__scikit-learn-14092](./eval_outputs/scikit-learn__scikit-learn-14092/run_instance.log)
250
+ - [scikit-learn__scikit-learn-15512](./eval_outputs/scikit-learn__scikit-learn-15512/run_instance.log)
251
+ - [scikit-learn__scikit-learn-15535](./eval_outputs/scikit-learn__scikit-learn-15535/run_instance.log)
252
+ - [scikit-learn__scikit-learn-25500](./eval_outputs/scikit-learn__scikit-learn-25500/run_instance.log)
253
+ - [scikit-learn__scikit-learn-25570](./eval_outputs/scikit-learn__scikit-learn-25570/run_instance.log)
254
+ - [scikit-learn__scikit-learn-25638](./eval_outputs/scikit-learn__scikit-learn-25638/run_instance.log)
255
+ - [sphinx-doc__sphinx-10325](./eval_outputs/sphinx-doc__sphinx-10325/run_instance.log)
256
+ - [sphinx-doc__sphinx-10451](./eval_outputs/sphinx-doc__sphinx-10451/run_instance.log)
257
+ - [sphinx-doc__sphinx-7686](./eval_outputs/sphinx-doc__sphinx-7686/run_instance.log)
258
+ - [sphinx-doc__sphinx-7738](./eval_outputs/sphinx-doc__sphinx-7738/run_instance.log)
259
+ - [sphinx-doc__sphinx-7975](./eval_outputs/sphinx-doc__sphinx-7975/run_instance.log)
260
+ - [sphinx-doc__sphinx-8273](./eval_outputs/sphinx-doc__sphinx-8273/run_instance.log)
261
+ - [sphinx-doc__sphinx-8282](./eval_outputs/sphinx-doc__sphinx-8282/run_instance.log)
262
+ - [sphinx-doc__sphinx-8435](./eval_outputs/sphinx-doc__sphinx-8435/run_instance.log)
263
+ - [sphinx-doc__sphinx-8474](./eval_outputs/sphinx-doc__sphinx-8474/run_instance.log)
264
+ - [sphinx-doc__sphinx-8506](./eval_outputs/sphinx-doc__sphinx-8506/run_instance.log)
265
+ - [sphinx-doc__sphinx-8627](./eval_outputs/sphinx-doc__sphinx-8627/run_instance.log)
266
+ - [sphinx-doc__sphinx-8713](./eval_outputs/sphinx-doc__sphinx-8713/run_instance.log)
267
+ - [sphinx-doc__sphinx-8801](./eval_outputs/sphinx-doc__sphinx-8801/run_instance.log)
268
+ - [sympy__sympy-11400](./eval_outputs/sympy__sympy-11400/run_instance.log)
269
+ - [sympy__sympy-11870](./eval_outputs/sympy__sympy-11870/run_instance.log)
270
+ - [sympy__sympy-11897](./eval_outputs/sympy__sympy-11897/run_instance.log)
271
+ - [sympy__sympy-12171](./eval_outputs/sympy__sympy-12171/run_instance.log)
272
+ - [sympy__sympy-12236](./eval_outputs/sympy__sympy-12236/run_instance.log)
273
+ - [sympy__sympy-12454](./eval_outputs/sympy__sympy-12454/run_instance.log)
274
+ - [sympy__sympy-13043](./eval_outputs/sympy__sympy-13043/run_instance.log)
275
+ - [sympy__sympy-13146](./eval_outputs/sympy__sympy-13146/run_instance.log)
276
+ - [sympy__sympy-13177](./eval_outputs/sympy__sympy-13177/run_instance.log)
277
+ - [sympy__sympy-13437](./eval_outputs/sympy__sympy-13437/run_instance.log)
278
+ - [sympy__sympy-13471](./eval_outputs/sympy__sympy-13471/run_instance.log)
279
+ - [sympy__sympy-13773](./eval_outputs/sympy__sympy-13773/run_instance.log)
280
+ - [sympy__sympy-13895](./eval_outputs/sympy__sympy-13895/run_instance.log)
281
+ - [sympy__sympy-13915](./eval_outputs/sympy__sympy-13915/run_instance.log)
282
+ - [sympy__sympy-13971](./eval_outputs/sympy__sympy-13971/run_instance.log)
283
+ - [sympy__sympy-14024](./eval_outputs/sympy__sympy-14024/run_instance.log)
284
+ - [sympy__sympy-14308](./eval_outputs/sympy__sympy-14308/run_instance.log)
285
+ - [sympy__sympy-14317](./eval_outputs/sympy__sympy-14317/run_instance.log)
286
+ - [sympy__sympy-14396](./eval_outputs/sympy__sympy-14396/run_instance.log)
287
+ - [sympy__sympy-14774](./eval_outputs/sympy__sympy-14774/run_instance.log)
288
+ - [sympy__sympy-14817](./eval_outputs/sympy__sympy-14817/run_instance.log)
289
+ - [sympy__sympy-15011](./eval_outputs/sympy__sympy-15011/run_instance.log)
290
+ - [sympy__sympy-15308](./eval_outputs/sympy__sympy-15308/run_instance.log)
291
+ - [sympy__sympy-15346](./eval_outputs/sympy__sympy-15346/run_instance.log)
292
+ - [sympy__sympy-15609](./eval_outputs/sympy__sympy-15609/run_instance.log)
293
+ - [sympy__sympy-15678](./eval_outputs/sympy__sympy-15678/run_instance.log)
294
+ - [sympy__sympy-16106](./eval_outputs/sympy__sympy-16106/run_instance.log)
295
+ - [sympy__sympy-16281](./eval_outputs/sympy__sympy-16281/run_instance.log)
296
+ - [sympy__sympy-16503](./eval_outputs/sympy__sympy-16503/run_instance.log)
297
+ - [sympy__sympy-16988](./eval_outputs/sympy__sympy-16988/run_instance.log)
298
+ - [sympy__sympy-17022](./eval_outputs/sympy__sympy-17022/run_instance.log)
299
+ - [sympy__sympy-18057](./eval_outputs/sympy__sympy-18057/run_instance.log)
300
+ - [sympy__sympy-18087](./eval_outputs/sympy__sympy-18087/run_instance.log)
301
+ - [sympy__sympy-18532](./eval_outputs/sympy__sympy-18532/run_instance.log)
302
+ - [sympy__sympy-18621](./eval_outputs/sympy__sympy-18621/run_instance.log)
303
+ - [sympy__sympy-18835](./eval_outputs/sympy__sympy-18835/run_instance.log)
304
+ - [sympy__sympy-19007](./eval_outputs/sympy__sympy-19007/run_instance.log)
305
+ - [sympy__sympy-19254](./eval_outputs/sympy__sympy-19254/run_instance.log)
306
+ - [sympy__sympy-19487](./eval_outputs/sympy__sympy-19487/run_instance.log)
307
+ - [sympy__sympy-20049](./eval_outputs/sympy__sympy-20049/run_instance.log)
308
+ - [sympy__sympy-20212](./eval_outputs/sympy__sympy-20212/run_instance.log)
309
+ - [sympy__sympy-20322](./eval_outputs/sympy__sympy-20322/run_instance.log)
310
+ - [sympy__sympy-20442](./eval_outputs/sympy__sympy-20442/run_instance.log)
311
+ - [sympy__sympy-20639](./eval_outputs/sympy__sympy-20639/run_instance.log)
312
+ - [sympy__sympy-21055](./eval_outputs/sympy__sympy-21055/run_instance.log)
313
+ - [sympy__sympy-21171](./eval_outputs/sympy__sympy-21171/run_instance.log)
314
+ - [sympy__sympy-21614](./eval_outputs/sympy__sympy-21614/run_instance.log)
315
+ - [sympy__sympy-21627](./eval_outputs/sympy__sympy-21627/run_instance.log)
316
+ - [sympy__sympy-22005](./eval_outputs/sympy__sympy-22005/run_instance.log)
317
+ - [sympy__sympy-22840](./eval_outputs/sympy__sympy-22840/run_instance.log)
318
+ - [sympy__sympy-23117](./eval_outputs/sympy__sympy-23117/run_instance.log)
319
+ - [sympy__sympy-23191](./eval_outputs/sympy__sympy-23191/run_instance.log)
320
+ - [sympy__sympy-24102](./eval_outputs/sympy__sympy-24102/run_instance.log)
321
+ - [sympy__sympy-24152](./eval_outputs/sympy__sympy-24152/run_instance.log)
322
+ - [sympy__sympy-24909](./eval_outputs/sympy__sympy-24909/run_instance.log)
outputs/swe_bench_lite/CoActPlannerAgent/claude-3-5-sonnet@20240620_maxiter_40_N_v0.1-no-hint/metadata.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "agent_class": "CoActPlannerAgent",
3
+ "llm_config": {
4
+ "model": "openai/claude-3-5-sonnet@20240620",
5
+ "api_key": "******",
6
+ "base_url": "https://llm-proxy.all-hands.dev/",
7
+ "api_version": null,
8
+ "embedding_model": "",
9
+ "embedding_base_url": null,
10
+ "embedding_deployment_name": null,
11
+ "aws_access_key_id": null,
12
+ "aws_secret_access_key": null,
13
+ "aws_region_name": null,
14
+ "num_retries": 8,
15
+ "retry_multiplier": 2,
16
+ "retry_min_wait": 15,
17
+ "retry_max_wait": 120,
18
+ "timeout": null,
19
+ "max_message_chars": 10000,
20
+ "temperature": 0,
21
+ "top_p": 0.5,
22
+ "custom_llm_provider": null,
23
+ "max_input_tokens": null,
24
+ "max_output_tokens": null,
25
+ "input_cost_per_token": null,
26
+ "output_cost_per_token": null,
27
+ "ollama_base_url": null,
28
+ "drop_params": null,
29
+ "disable_vision": null,
30
+ "caching_prompt": false
31
+ },
32
+ "max_iterations": 40,
33
+ "eval_output_dir": "evaluation/evaluation_outputs/outputs/swe-bench-lite/CoActPlannerAgent/claude-3-5-sonnet@20240620_maxiter_40_N_v0.1-no-hint",
34
+ "start_time": "2024-09-15 00:54:27",
35
+ "git_commit": "1764205043583212eb003f69464caea940db0ac0",
36
+ "dataset": "swe-bench-lite",
37
+ "data_split": null,
38
+ "details": {}
39
+ }
outputs/swe_bench_lite/CoActPlannerAgent/claude-3-5-sonnet@20240620_maxiter_40_N_v0.1-no-hint/output.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d6287417d5ee851f1fa21b3df267bbd475ca8943ea36871d29d0b43d4da99d8
3
+ size 21006551
outputs/swe_bench_lite/CoActPlannerAgent/{claude-3-5-sonnet@20240620_maxiter_40_N_v1.0-no-hint → claude-3-5-sonnet@20240620_maxiter_40_N_v0.1-no-hint}/report.json RENAMED
File without changes
outputs/swe_bench_lite/CoActPlannerAgent/claude-3-5-sonnet@20240620_maxiter_40_N_v0.1-no-hint/run_id.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ RUN_ID: 20240915_045214
outputs/swe_bench_lite/CoActPlannerAgent/claude-3-5-sonnet@20240620_maxiter_40_N_v1.0-no-hint/README.md CHANGED
@@ -1,322 +0,0 @@
1
- # SWE-bench Report
2
- This folder contains the evaluation results of the SWE-bench using the [official evaluation docker containerization](https://github.com/princeton-nlp/SWE-bench/blob/main/docs/20240627_docker/README.md#choosing-the-right-cache_level).
3
-
4
- ## Summary
5
- - total instances: 300
6
- - submitted instances: 93
7
- - completed instances: 93
8
- - empty patch instances: 0
9
- - resolved instances: 25
10
- - unresolved instances: 68
11
- - error instances: 0
12
- - unstopped instances: 0
13
-
14
- ## Resolved Instances
15
- - [django__django-11099](./eval_outputs/django__django-11099/run_instance.log)
16
- - [django__django-11815](./eval_outputs/django__django-11815/run_instance.log)
17
- - [django__django-11964](./eval_outputs/django__django-11964/run_instance.log)
18
- - [django__django-13028](./eval_outputs/django__django-13028/run_instance.log)
19
- - [django__django-13590](./eval_outputs/django__django-13590/run_instance.log)
20
- - [django__django-13658](./eval_outputs/django__django-13658/run_instance.log)
21
- - [django__django-14238](./eval_outputs/django__django-14238/run_instance.log)
22
- - [django__django-14752](./eval_outputs/django__django-14752/run_instance.log)
23
- - [django__django-14915](./eval_outputs/django__django-14915/run_instance.log)
24
- - [django__django-15814](./eval_outputs/django__django-15814/run_instance.log)
25
- - [django__django-15851](./eval_outputs/django__django-15851/run_instance.log)
26
- - [django__django-16255](./eval_outputs/django__django-16255/run_instance.log)
27
- - [django__django-16527](./eval_outputs/django__django-16527/run_instance.log)
28
- - [matplotlib__matplotlib-25311](./eval_outputs/matplotlib__matplotlib-25311/run_instance.log)
29
- - [psf__requests-2317](./eval_outputs/psf__requests-2317/run_instance.log)
30
- - [scikit-learn__scikit-learn-10297](./eval_outputs/scikit-learn__scikit-learn-10297/run_instance.log)
31
- - [scikit-learn__scikit-learn-13439](./eval_outputs/scikit-learn__scikit-learn-13439/run_instance.log)
32
- - [scikit-learn__scikit-learn-13496](./eval_outputs/scikit-learn__scikit-learn-13496/run_instance.log)
33
- - [scikit-learn__scikit-learn-14894](./eval_outputs/scikit-learn__scikit-learn-14894/run_instance.log)
34
- - [sphinx-doc__sphinx-8595](./eval_outputs/sphinx-doc__sphinx-8595/run_instance.log)
35
- - [sympy__sympy-17139](./eval_outputs/sympy__sympy-17139/run_instance.log)
36
- - [sympy__sympy-17655](./eval_outputs/sympy__sympy-17655/run_instance.log)
37
- - [sympy__sympy-20590](./eval_outputs/sympy__sympy-20590/run_instance.log)
38
- - [sympy__sympy-22714](./eval_outputs/sympy__sympy-22714/run_instance.log)
39
- - [sympy__sympy-24213](./eval_outputs/sympy__sympy-24213/run_instance.log)
40
-
41
- ## Unresolved Instances
42
- - [astropy__astropy-12907](./eval_outputs/astropy__astropy-12907/run_instance.log)
43
- - [astropy__astropy-14182](./eval_outputs/astropy__astropy-14182/run_instance.log)
44
- - [astropy__astropy-14365](./eval_outputs/astropy__astropy-14365/run_instance.log)
45
- - [astropy__astropy-14995](./eval_outputs/astropy__astropy-14995/run_instance.log)
46
- - [django__django-10914](./eval_outputs/django__django-10914/run_instance.log)
47
- - [django__django-11133](./eval_outputs/django__django-11133/run_instance.log)
48
- - [django__django-11179](./eval_outputs/django__django-11179/run_instance.log)
49
- - [django__django-11848](./eval_outputs/django__django-11848/run_instance.log)
50
- - [django__django-11999](./eval_outputs/django__django-11999/run_instance.log)
51
- - [django__django-12125](./eval_outputs/django__django-12125/run_instance.log)
52
- - [django__django-12308](./eval_outputs/django__django-12308/run_instance.log)
53
- - [django__django-12708](./eval_outputs/django__django-12708/run_instance.log)
54
- - [django__django-13033](./eval_outputs/django__django-13033/run_instance.log)
55
- - [django__django-13158](./eval_outputs/django__django-13158/run_instance.log)
56
- - [django__django-13315](./eval_outputs/django__django-13315/run_instance.log)
57
- - [django__django-13401](./eval_outputs/django__django-13401/run_instance.log)
58
- - [django__django-13551](./eval_outputs/django__django-13551/run_instance.log)
59
- - [django__django-13925](./eval_outputs/django__django-13925/run_instance.log)
60
- - [django__django-13933](./eval_outputs/django__django-13933/run_instance.log)
61
- - [django__django-13964](./eval_outputs/django__django-13964/run_instance.log)
62
- - [django__django-14017](./eval_outputs/django__django-14017/run_instance.log)
63
- - [django__django-14155](./eval_outputs/django__django-14155/run_instance.log)
64
- - [django__django-14534](./eval_outputs/django__django-14534/run_instance.log)
65
- - [django__django-14580](./eval_outputs/django__django-14580/run_instance.log)
66
- - [django__django-14608](./eval_outputs/django__django-14608/run_instance.log)
67
- - [django__django-14672](./eval_outputs/django__django-14672/run_instance.log)
68
- - [django__django-14787](./eval_outputs/django__django-14787/run_instance.log)
69
- - [django__django-14855](./eval_outputs/django__django-14855/run_instance.log)
70
- - [django__django-14999](./eval_outputs/django__django-14999/run_instance.log)
71
- - [django__django-15252](./eval_outputs/django__django-15252/run_instance.log)
72
- - [django__django-15695](./eval_outputs/django__django-15695/run_instance.log)
73
- - [django__django-16139](./eval_outputs/django__django-16139/run_instance.log)
74
- - [django__django-16595](./eval_outputs/django__django-16595/run_instance.log)
75
- - [django__django-17087](./eval_outputs/django__django-17087/run_instance.log)
76
- - [matplotlib__matplotlib-23299](./eval_outputs/matplotlib__matplotlib-23299/run_instance.log)
77
- - [matplotlib__matplotlib-23314](./eval_outputs/matplotlib__matplotlib-23314/run_instance.log)
78
- - [matplotlib__matplotlib-23476](./eval_outputs/matplotlib__matplotlib-23476/run_instance.log)
79
- - [matplotlib__matplotlib-24149](./eval_outputs/matplotlib__matplotlib-24149/run_instance.log)
80
- - [matplotlib__matplotlib-24970](./eval_outputs/matplotlib__matplotlib-24970/run_instance.log)
81
- - [matplotlib__matplotlib-25332](./eval_outputs/matplotlib__matplotlib-25332/run_instance.log)
82
- - [pydata__xarray-4094](./eval_outputs/pydata__xarray-4094/run_instance.log)
83
- - [pylint-dev__pylint-7080](./eval_outputs/pylint-dev__pylint-7080/run_instance.log)
84
- - [pytest-dev__pytest-7432](./eval_outputs/pytest-dev__pytest-7432/run_instance.log)
85
- - [pytest-dev__pytest-7490](./eval_outputs/pytest-dev__pytest-7490/run_instance.log)
86
- - [scikit-learn__scikit-learn-13142](./eval_outputs/scikit-learn__scikit-learn-13142/run_instance.log)
87
- - [scikit-learn__scikit-learn-13779](./eval_outputs/scikit-learn__scikit-learn-13779/run_instance.log)
88
- - [scikit-learn__scikit-learn-14087](./eval_outputs/scikit-learn__scikit-learn-14087/run_instance.log)
89
- - [scikit-learn__scikit-learn-14983](./eval_outputs/scikit-learn__scikit-learn-14983/run_instance.log)
90
- - [scikit-learn__scikit-learn-25747](./eval_outputs/scikit-learn__scikit-learn-25747/run_instance.log)
91
- - [sphinx-doc__sphinx-11445](./eval_outputs/sphinx-doc__sphinx-11445/run_instance.log)
92
- - [sphinx-doc__sphinx-8721](./eval_outputs/sphinx-doc__sphinx-8721/run_instance.log)
93
- - [sympy__sympy-12419](./eval_outputs/sympy__sympy-12419/run_instance.log)
94
- - [sympy__sympy-12481](./eval_outputs/sympy__sympy-12481/run_instance.log)
95
- - [sympy__sympy-13031](./eval_outputs/sympy__sympy-13031/run_instance.log)
96
- - [sympy__sympy-13480](./eval_outputs/sympy__sympy-13480/run_instance.log)
97
- - [sympy__sympy-13647](./eval_outputs/sympy__sympy-13647/run_instance.log)
98
- - [sympy__sympy-15345](./eval_outputs/sympy__sympy-15345/run_instance.log)
99
- - [sympy__sympy-16792](./eval_outputs/sympy__sympy-16792/run_instance.log)
100
- - [sympy__sympy-17630](./eval_outputs/sympy__sympy-17630/run_instance.log)
101
- - [sympy__sympy-18189](./eval_outputs/sympy__sympy-18189/run_instance.log)
102
- - [sympy__sympy-18199](./eval_outputs/sympy__sympy-18199/run_instance.log)
103
- - [sympy__sympy-18698](./eval_outputs/sympy__sympy-18698/run_instance.log)
104
- - [sympy__sympy-20154](./eval_outputs/sympy__sympy-20154/run_instance.log)
105
- - [sympy__sympy-21379](./eval_outputs/sympy__sympy-21379/run_instance.log)
106
- - [sympy__sympy-21612](./eval_outputs/sympy__sympy-21612/run_instance.log)
107
- - [sympy__sympy-21847](./eval_outputs/sympy__sympy-21847/run_instance.log)
108
- - [sympy__sympy-23262](./eval_outputs/sympy__sympy-23262/run_instance.log)
109
- - [sympy__sympy-24066](./eval_outputs/sympy__sympy-24066/run_instance.log)
110
-
111
- ## Error Instances
112
-
113
- ## Empty Patch Instances
114
-
115
- ## Incomplete Instances
116
- - [astropy__astropy-6938](./eval_outputs/astropy__astropy-6938/run_instance.log)
117
- - [astropy__astropy-7746](./eval_outputs/astropy__astropy-7746/run_instance.log)
118
- - [django__django-10924](./eval_outputs/django__django-10924/run_instance.log)
119
- - [django__django-11001](./eval_outputs/django__django-11001/run_instance.log)
120
- - [django__django-11019](./eval_outputs/django__django-11019/run_instance.log)
121
- - [django__django-11039](./eval_outputs/django__django-11039/run_instance.log)
122
- - [django__django-11049](./eval_outputs/django__django-11049/run_instance.log)
123
- - [django__django-11283](./eval_outputs/django__django-11283/run_instance.log)
124
- - [django__django-11422](./eval_outputs/django__django-11422/run_instance.log)
125
- - [django__django-11564](./eval_outputs/django__django-11564/run_instance.log)
126
- - [django__django-11583](./eval_outputs/django__django-11583/run_instance.log)
127
- - [django__django-11620](./eval_outputs/django__django-11620/run_instance.log)
128
- - [django__django-11630](./eval_outputs/django__django-11630/run_instance.log)
129
- - [django__django-11742](./eval_outputs/django__django-11742/run_instance.log)
130
- - [django__django-11797](./eval_outputs/django__django-11797/run_instance.log)
131
- - [django__django-11905](./eval_outputs/django__django-11905/run_instance.log)
132
- - [django__django-11910](./eval_outputs/django__django-11910/run_instance.log)
133
- - [django__django-12113](./eval_outputs/django__django-12113/run_instance.log)
134
- - [django__django-12184](./eval_outputs/django__django-12184/run_instance.log)
135
- - [django__django-12284](./eval_outputs/django__django-12284/run_instance.log)
136
- - [django__django-12286](./eval_outputs/django__django-12286/run_instance.log)
137
- - [django__django-12453](./eval_outputs/django__django-12453/run_instance.log)
138
- - [django__django-12470](./eval_outputs/django__django-12470/run_instance.log)
139
- - [django__django-12497](./eval_outputs/django__django-12497/run_instance.log)
140
- - [django__django-12589](./eval_outputs/django__django-12589/run_instance.log)
141
- - [django__django-12700](./eval_outputs/django__django-12700/run_instance.log)
142
- - [django__django-12747](./eval_outputs/django__django-12747/run_instance.log)
143
- - [django__django-12856](./eval_outputs/django__django-12856/run_instance.log)
144
- - [django__django-12908](./eval_outputs/django__django-12908/run_instance.log)
145
- - [django__django-12915](./eval_outputs/django__django-12915/run_instance.log)
146
- - [django__django-12983](./eval_outputs/django__django-12983/run_instance.log)
147
- - [django__django-13220](./eval_outputs/django__django-13220/run_instance.log)
148
- - [django__django-13230](./eval_outputs/django__django-13230/run_instance.log)
149
- - [django__django-13265](./eval_outputs/django__django-13265/run_instance.log)
150
- - [django__django-13321](./eval_outputs/django__django-13321/run_instance.log)
151
- - [django__django-13447](./eval_outputs/django__django-13447/run_instance.log)
152
- - [django__django-13448](./eval_outputs/django__django-13448/run_instance.log)
153
- - [django__django-13660](./eval_outputs/django__django-13660/run_instance.log)
154
- - [django__django-13710](./eval_outputs/django__django-13710/run_instance.log)
155
- - [django__django-13757](./eval_outputs/django__django-13757/run_instance.log)
156
- - [django__django-13768](./eval_outputs/django__django-13768/run_instance.log)
157
- - [django__django-14016](./eval_outputs/django__django-14016/run_instance.log)
158
- - [django__django-14382](./eval_outputs/django__django-14382/run_instance.log)
159
- - [django__django-14411](./eval_outputs/django__django-14411/run_instance.log)
160
- - [django__django-14667](./eval_outputs/django__django-14667/run_instance.log)
161
- - [django__django-14730](./eval_outputs/django__django-14730/run_instance.log)
162
- - [django__django-14997](./eval_outputs/django__django-14997/run_instance.log)
163
- - [django__django-15061](./eval_outputs/django__django-15061/run_instance.log)
164
- - [django__django-15202](./eval_outputs/django__django-15202/run_instance.log)
165
- - [django__django-15213](./eval_outputs/django__django-15213/run_instance.log)
166
- - [django__django-15320](./eval_outputs/django__django-15320/run_instance.log)
167
- - [django__django-15347](./eval_outputs/django__django-15347/run_instance.log)
168
- - [django__django-15388](./eval_outputs/django__django-15388/run_instance.log)
169
- - [django__django-15400](./eval_outputs/django__django-15400/run_instance.log)
170
- - [django__django-15498](./eval_outputs/django__django-15498/run_instance.log)
171
- - [django__django-15738](./eval_outputs/django__django-15738/run_instance.log)
172
- - [django__django-15781](./eval_outputs/django__django-15781/run_instance.log)
173
- - [django__django-15789](./eval_outputs/django__django-15789/run_instance.log)
174
- - [django__django-15790](./eval_outputs/django__django-15790/run_instance.log)
175
- - [django__django-15819](./eval_outputs/django__django-15819/run_instance.log)
176
- - [django__django-15902](./eval_outputs/django__django-15902/run_instance.log)
177
- - [django__django-15996](./eval_outputs/django__django-15996/run_instance.log)
178
- - [django__django-16041](./eval_outputs/django__django-16041/run_instance.log)
179
- - [django__django-16046](./eval_outputs/django__django-16046/run_instance.log)
180
- - [django__django-16229](./eval_outputs/django__django-16229/run_instance.log)
181
- - [django__django-16379](./eval_outputs/django__django-16379/run_instance.log)
182
- - [django__django-16400](./eval_outputs/django__django-16400/run_instance.log)
183
- - [django__django-16408](./eval_outputs/django__django-16408/run_instance.log)
184
- - [django__django-16816](./eval_outputs/django__django-16816/run_instance.log)
185
- - [django__django-16820](./eval_outputs/django__django-16820/run_instance.log)
186
- - [django__django-16873](./eval_outputs/django__django-16873/run_instance.log)
187
- - [django__django-16910](./eval_outputs/django__django-16910/run_instance.log)
188
- - [django__django-17051](./eval_outputs/django__django-17051/run_instance.log)
189
- - [matplotlib__matplotlib-18869](./eval_outputs/matplotlib__matplotlib-18869/run_instance.log)
190
- - [matplotlib__matplotlib-22711](./eval_outputs/matplotlib__matplotlib-22711/run_instance.log)
191
- - [matplotlib__matplotlib-22835](./eval_outputs/matplotlib__matplotlib-22835/run_instance.log)
192
- - [matplotlib__matplotlib-23562](./eval_outputs/matplotlib__matplotlib-23562/run_instance.log)
193
- - [matplotlib__matplotlib-23563](./eval_outputs/matplotlib__matplotlib-23563/run_instance.log)
194
- - [matplotlib__matplotlib-23913](./eval_outputs/matplotlib__matplotlib-23913/run_instance.log)
195
- - [matplotlib__matplotlib-23964](./eval_outputs/matplotlib__matplotlib-23964/run_instance.log)
196
- - [matplotlib__matplotlib-23987](./eval_outputs/matplotlib__matplotlib-23987/run_instance.log)
197
- - [matplotlib__matplotlib-24265](./eval_outputs/matplotlib__matplotlib-24265/run_instance.log)
198
- - [matplotlib__matplotlib-24334](./eval_outputs/matplotlib__matplotlib-24334/run_instance.log)
199
- - [matplotlib__matplotlib-25079](./eval_outputs/matplotlib__matplotlib-25079/run_instance.log)
200
- - [matplotlib__matplotlib-25433](./eval_outputs/matplotlib__matplotlib-25433/run_instance.log)
201
- - [matplotlib__matplotlib-25442](./eval_outputs/matplotlib__matplotlib-25442/run_instance.log)
202
- - [matplotlib__matplotlib-25498](./eval_outputs/matplotlib__matplotlib-25498/run_instance.log)
203
- - [matplotlib__matplotlib-26011](./eval_outputs/matplotlib__matplotlib-26011/run_instance.log)
204
- - [matplotlib__matplotlib-26020](./eval_outputs/matplotlib__matplotlib-26020/run_instance.log)
205
- - [mwaskom__seaborn-2848](./eval_outputs/mwaskom__seaborn-2848/run_instance.log)
206
- - [mwaskom__seaborn-3010](./eval_outputs/mwaskom__seaborn-3010/run_instance.log)
207
- - [mwaskom__seaborn-3190](./eval_outputs/mwaskom__seaborn-3190/run_instance.log)
208
- - [mwaskom__seaborn-3407](./eval_outputs/mwaskom__seaborn-3407/run_instance.log)
209
- - [pallets__flask-4045](./eval_outputs/pallets__flask-4045/run_instance.log)
210
- - [pallets__flask-4992](./eval_outputs/pallets__flask-4992/run_instance.log)
211
- - [pallets__flask-5063](./eval_outputs/pallets__flask-5063/run_instance.log)
212
- - [psf__requests-1963](./eval_outputs/psf__requests-1963/run_instance.log)
213
- - [psf__requests-2148](./eval_outputs/psf__requests-2148/run_instance.log)
214
- - [psf__requests-2674](./eval_outputs/psf__requests-2674/run_instance.log)
215
- - [psf__requests-3362](./eval_outputs/psf__requests-3362/run_instance.log)
216
- - [psf__requests-863](./eval_outputs/psf__requests-863/run_instance.log)
217
- - [pydata__xarray-3364](./eval_outputs/pydata__xarray-3364/run_instance.log)
218
- - [pydata__xarray-4248](./eval_outputs/pydata__xarray-4248/run_instance.log)
219
- - [pydata__xarray-4493](./eval_outputs/pydata__xarray-4493/run_instance.log)
220
- - [pydata__xarray-5131](./eval_outputs/pydata__xarray-5131/run_instance.log)
221
- - [pylint-dev__pylint-5859](./eval_outputs/pylint-dev__pylint-5859/run_instance.log)
222
- - [pylint-dev__pylint-6506](./eval_outputs/pylint-dev__pylint-6506/run_instance.log)
223
- - [pylint-dev__pylint-7114](./eval_outputs/pylint-dev__pylint-7114/run_instance.log)
224
- - [pylint-dev__pylint-7228](./eval_outputs/pylint-dev__pylint-7228/run_instance.log)
225
- - [pylint-dev__pylint-7993](./eval_outputs/pylint-dev__pylint-7993/run_instance.log)
226
- - [pytest-dev__pytest-11143](./eval_outputs/pytest-dev__pytest-11143/run_instance.log)
227
- - [pytest-dev__pytest-11148](./eval_outputs/pytest-dev__pytest-11148/run_instance.log)
228
- - [pytest-dev__pytest-5103](./eval_outputs/pytest-dev__pytest-5103/run_instance.log)
229
- - [pytest-dev__pytest-5221](./eval_outputs/pytest-dev__pytest-5221/run_instance.log)
230
- - [pytest-dev__pytest-5227](./eval_outputs/pytest-dev__pytest-5227/run_instance.log)
231
- - [pytest-dev__pytest-5413](./eval_outputs/pytest-dev__pytest-5413/run_instance.log)
232
- - [pytest-dev__pytest-5495](./eval_outputs/pytest-dev__pytest-5495/run_instance.log)
233
- - [pytest-dev__pytest-5692](./eval_outputs/pytest-dev__pytest-5692/run_instance.log)
234
- - [pytest-dev__pytest-6116](./eval_outputs/pytest-dev__pytest-6116/run_instance.log)
235
- - [pytest-dev__pytest-7168](./eval_outputs/pytest-dev__pytest-7168/run_instance.log)
236
- - [pytest-dev__pytest-7220](./eval_outputs/pytest-dev__pytest-7220/run_instance.log)
237
- - [pytest-dev__pytest-7373](./eval_outputs/pytest-dev__pytest-7373/run_instance.log)
238
- - [pytest-dev__pytest-8365](./eval_outputs/pytest-dev__pytest-8365/run_instance.log)
239
- - [pytest-dev__pytest-8906](./eval_outputs/pytest-dev__pytest-8906/run_instance.log)
240
- - [pytest-dev__pytest-9359](./eval_outputs/pytest-dev__pytest-9359/run_instance.log)
241
- - [scikit-learn__scikit-learn-10508](./eval_outputs/scikit-learn__scikit-learn-10508/run_instance.log)
242
- - [scikit-learn__scikit-learn-10949](./eval_outputs/scikit-learn__scikit-learn-10949/run_instance.log)
243
- - [scikit-learn__scikit-learn-11040](./eval_outputs/scikit-learn__scikit-learn-11040/run_instance.log)
244
- - [scikit-learn__scikit-learn-11281](./eval_outputs/scikit-learn__scikit-learn-11281/run_instance.log)
245
- - [scikit-learn__scikit-learn-12471](./eval_outputs/scikit-learn__scikit-learn-12471/run_instance.log)
246
- - [scikit-learn__scikit-learn-13241](./eval_outputs/scikit-learn__scikit-learn-13241/run_instance.log)
247
- - [scikit-learn__scikit-learn-13497](./eval_outputs/scikit-learn__scikit-learn-13497/run_instance.log)
248
- - [scikit-learn__scikit-learn-13584](./eval_outputs/scikit-learn__scikit-learn-13584/run_instance.log)
249
- - [scikit-learn__scikit-learn-14092](./eval_outputs/scikit-learn__scikit-learn-14092/run_instance.log)
250
- - [scikit-learn__scikit-learn-15512](./eval_outputs/scikit-learn__scikit-learn-15512/run_instance.log)
251
- - [scikit-learn__scikit-learn-15535](./eval_outputs/scikit-learn__scikit-learn-15535/run_instance.log)
252
- - [scikit-learn__scikit-learn-25500](./eval_outputs/scikit-learn__scikit-learn-25500/run_instance.log)
253
- - [scikit-learn__scikit-learn-25570](./eval_outputs/scikit-learn__scikit-learn-25570/run_instance.log)
254
- - [scikit-learn__scikit-learn-25638](./eval_outputs/scikit-learn__scikit-learn-25638/run_instance.log)
255
- - [sphinx-doc__sphinx-10325](./eval_outputs/sphinx-doc__sphinx-10325/run_instance.log)
256
- - [sphinx-doc__sphinx-10451](./eval_outputs/sphinx-doc__sphinx-10451/run_instance.log)
257
- - [sphinx-doc__sphinx-7686](./eval_outputs/sphinx-doc__sphinx-7686/run_instance.log)
258
- - [sphinx-doc__sphinx-7738](./eval_outputs/sphinx-doc__sphinx-7738/run_instance.log)
259
- - [sphinx-doc__sphinx-7975](./eval_outputs/sphinx-doc__sphinx-7975/run_instance.log)
260
- - [sphinx-doc__sphinx-8273](./eval_outputs/sphinx-doc__sphinx-8273/run_instance.log)
261
- - [sphinx-doc__sphinx-8282](./eval_outputs/sphinx-doc__sphinx-8282/run_instance.log)
262
- - [sphinx-doc__sphinx-8435](./eval_outputs/sphinx-doc__sphinx-8435/run_instance.log)
263
- - [sphinx-doc__sphinx-8474](./eval_outputs/sphinx-doc__sphinx-8474/run_instance.log)
264
- - [sphinx-doc__sphinx-8506](./eval_outputs/sphinx-doc__sphinx-8506/run_instance.log)
265
- - [sphinx-doc__sphinx-8627](./eval_outputs/sphinx-doc__sphinx-8627/run_instance.log)
266
- - [sphinx-doc__sphinx-8713](./eval_outputs/sphinx-doc__sphinx-8713/run_instance.log)
267
- - [sphinx-doc__sphinx-8801](./eval_outputs/sphinx-doc__sphinx-8801/run_instance.log)
268
- - [sympy__sympy-11400](./eval_outputs/sympy__sympy-11400/run_instance.log)
269
- - [sympy__sympy-11870](./eval_outputs/sympy__sympy-11870/run_instance.log)
270
- - [sympy__sympy-11897](./eval_outputs/sympy__sympy-11897/run_instance.log)
271
- - [sympy__sympy-12171](./eval_outputs/sympy__sympy-12171/run_instance.log)
272
- - [sympy__sympy-12236](./eval_outputs/sympy__sympy-12236/run_instance.log)
273
- - [sympy__sympy-12454](./eval_outputs/sympy__sympy-12454/run_instance.log)
274
- - [sympy__sympy-13043](./eval_outputs/sympy__sympy-13043/run_instance.log)
275
- - [sympy__sympy-13146](./eval_outputs/sympy__sympy-13146/run_instance.log)
276
- - [sympy__sympy-13177](./eval_outputs/sympy__sympy-13177/run_instance.log)
277
- - [sympy__sympy-13437](./eval_outputs/sympy__sympy-13437/run_instance.log)
278
- - [sympy__sympy-13471](./eval_outputs/sympy__sympy-13471/run_instance.log)
279
- - [sympy__sympy-13773](./eval_outputs/sympy__sympy-13773/run_instance.log)
280
- - [sympy__sympy-13895](./eval_outputs/sympy__sympy-13895/run_instance.log)
281
- - [sympy__sympy-13915](./eval_outputs/sympy__sympy-13915/run_instance.log)
282
- - [sympy__sympy-13971](./eval_outputs/sympy__sympy-13971/run_instance.log)
283
- - [sympy__sympy-14024](./eval_outputs/sympy__sympy-14024/run_instance.log)
284
- - [sympy__sympy-14308](./eval_outputs/sympy__sympy-14308/run_instance.log)
285
- - [sympy__sympy-14317](./eval_outputs/sympy__sympy-14317/run_instance.log)
286
- - [sympy__sympy-14396](./eval_outputs/sympy__sympy-14396/run_instance.log)
287
- - [sympy__sympy-14774](./eval_outputs/sympy__sympy-14774/run_instance.log)
288
- - [sympy__sympy-14817](./eval_outputs/sympy__sympy-14817/run_instance.log)
289
- - [sympy__sympy-15011](./eval_outputs/sympy__sympy-15011/run_instance.log)
290
- - [sympy__sympy-15308](./eval_outputs/sympy__sympy-15308/run_instance.log)
291
- - [sympy__sympy-15346](./eval_outputs/sympy__sympy-15346/run_instance.log)
292
- - [sympy__sympy-15609](./eval_outputs/sympy__sympy-15609/run_instance.log)
293
- - [sympy__sympy-15678](./eval_outputs/sympy__sympy-15678/run_instance.log)
294
- - [sympy__sympy-16106](./eval_outputs/sympy__sympy-16106/run_instance.log)
295
- - [sympy__sympy-16281](./eval_outputs/sympy__sympy-16281/run_instance.log)
296
- - [sympy__sympy-16503](./eval_outputs/sympy__sympy-16503/run_instance.log)
297
- - [sympy__sympy-16988](./eval_outputs/sympy__sympy-16988/run_instance.log)
298
- - [sympy__sympy-17022](./eval_outputs/sympy__sympy-17022/run_instance.log)
299
- - [sympy__sympy-18057](./eval_outputs/sympy__sympy-18057/run_instance.log)
300
- - [sympy__sympy-18087](./eval_outputs/sympy__sympy-18087/run_instance.log)
301
- - [sympy__sympy-18532](./eval_outputs/sympy__sympy-18532/run_instance.log)
302
- - [sympy__sympy-18621](./eval_outputs/sympy__sympy-18621/run_instance.log)
303
- - [sympy__sympy-18835](./eval_outputs/sympy__sympy-18835/run_instance.log)
304
- - [sympy__sympy-19007](./eval_outputs/sympy__sympy-19007/run_instance.log)
305
- - [sympy__sympy-19254](./eval_outputs/sympy__sympy-19254/run_instance.log)
306
- - [sympy__sympy-19487](./eval_outputs/sympy__sympy-19487/run_instance.log)
307
- - [sympy__sympy-20049](./eval_outputs/sympy__sympy-20049/run_instance.log)
308
- - [sympy__sympy-20212](./eval_outputs/sympy__sympy-20212/run_instance.log)
309
- - [sympy__sympy-20322](./eval_outputs/sympy__sympy-20322/run_instance.log)
310
- - [sympy__sympy-20442](./eval_outputs/sympy__sympy-20442/run_instance.log)
311
- - [sympy__sympy-20639](./eval_outputs/sympy__sympy-20639/run_instance.log)
312
- - [sympy__sympy-21055](./eval_outputs/sympy__sympy-21055/run_instance.log)
313
- - [sympy__sympy-21171](./eval_outputs/sympy__sympy-21171/run_instance.log)
314
- - [sympy__sympy-21614](./eval_outputs/sympy__sympy-21614/run_instance.log)
315
- - [sympy__sympy-21627](./eval_outputs/sympy__sympy-21627/run_instance.log)
316
- - [sympy__sympy-22005](./eval_outputs/sympy__sympy-22005/run_instance.log)
317
- - [sympy__sympy-22840](./eval_outputs/sympy__sympy-22840/run_instance.log)
318
- - [sympy__sympy-23117](./eval_outputs/sympy__sympy-23117/run_instance.log)
319
- - [sympy__sympy-23191](./eval_outputs/sympy__sympy-23191/run_instance.log)
320
- - [sympy__sympy-24102](./eval_outputs/sympy__sympy-24102/run_instance.log)
321
- - [sympy__sympy-24152](./eval_outputs/sympy__sympy-24152/run_instance.log)
322
- - [sympy__sympy-24909](./eval_outputs/sympy__sympy-24909/run_instance.log)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
outputs/swe_bench_lite/CoActPlannerAgent/claude-3-5-sonnet@20240620_maxiter_40_N_v1.0-no-hint/metadata.json CHANGED
@@ -1 +1 @@
1
- {"agent_class": "CoActPlannerAgent", "llm_config": {"model": "openai/claude-3-5-sonnet@20240620", "api_key": "******", "base_url": "https://llm-proxy.all-hands.dev/", "api_version": null, "embedding_model": "", "embedding_base_url": null, "embedding_deployment_name": null, "aws_access_key_id": null, "aws_secret_access_key": null, "aws_region_name": null, "num_retries": 8, "retry_multiplier": 2, "retry_min_wait": 15, "retry_max_wait": 120, "timeout": null, "max_message_chars": 10000, "temperature": 0, "top_p": 0.5, "custom_llm_provider": null, "max_input_tokens": null, "max_output_tokens": null, "input_cost_per_token": null, "output_cost_per_token": null, "ollama_base_url": null, "drop_params": null, "disable_vision": null, "caching_prompt": false}, "max_iterations": 40, "eval_output_dir": "evaluation/evaluation_outputs/outputs/swe-bench-lite/CoActPlannerAgent/claude-3-5-sonnet@20240620_maxiter_40_N_v1.0-no-hint", "start_time": "2024-09-15 00:54:27", "git_commit": "1764205043583212eb003f69464caea940db0ac0", "dataset": "swe-bench-lite", "data_split": null, "details": {}}
 
1
+ {"agent_class": "CoActPlannerAgent", "llm_config": {"model": "openai/claude-3-5-sonnet@20240620", "api_key": "******", "base_url": "https://llm-proxy.all-hands.dev/", "api_version": null, "embedding_model": "", "embedding_base_url": null, "embedding_deployment_name": null, "aws_access_key_id": null, "aws_secret_access_key": null, "aws_region_name": null, "num_retries": 8, "retry_multiplier": 2, "retry_min_wait": 15, "retry_max_wait": 120, "timeout": null, "max_message_chars": 10000, "temperature": 0, "top_p": 0.5, "custom_llm_provider": null, "max_input_tokens": null, "max_output_tokens": null, "input_cost_per_token": null, "output_cost_per_token": null, "ollama_base_url": null, "drop_params": null, "disable_vision": null, "caching_prompt": false}, "max_iterations": 40, "eval_output_dir": "evaluation/evaluation_outputs/outputs/swe-bench-lite/CoActPlannerAgent/claude-3-5-sonnet@20240620_maxiter_40_N_v1.0-no-hint", "start_time": "2024-09-26 00:08:57", "git_commit": "c5303f2be72ca966df3cd2164d1f25d9cebf1e12", "dataset": "swe-bench-lite", "data_split": null, "details": {}}
outputs/swe_bench_lite/CoActPlannerAgent/claude-3-5-sonnet@20240620_maxiter_40_N_v1.0-no-hint/output.jsonl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2d6287417d5ee851f1fa21b3df267bbd475ca8943ea36871d29d0b43d4da99d8
3
- size 21006551
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:303d3c6f60b3b6deae31bedda3903077c1aa2a48a13694c65627c73a9b009caa
3
+ size 31604881
outputs/swe_bench_lite/CoActPlannerAgent/claude-3-5-sonnet@20240620_maxiter_40_N_v1.0-no-hint/run_id.txt CHANGED
@@ -1 +1 @@
1
- RUN_ID: 20240915_045214
 
1
+ RUN_ID: 20240926_005524