File size: 1,832 Bytes
246d201
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
"""This script compares gold patches with OpenHands-generated patches and check whether

OpenHands found the right (set of) files to modify.

"""

import argparse
import json
import re


def extract_modified_files(patch):
    modified_files = set()
    file_pattern = re.compile(r'^diff --git a/(.*?) b/')

    for line in patch.split('\n'):
        match = file_pattern.match(line)
        if match:
            modified_files.add(match.group(1))

    return modified_files


def process_report(oh_output_file):
    succ = 0
    fail = 0
    for line in open(oh_output_file):
        line = json.loads(line)
        instance_id = line['instance_id']
        gold_patch = line['swe_instance']['patch']
        generated_patch = line['git_patch']
        gold_modified_files = extract_modified_files(gold_patch)
        # swe-bench lite only: a gold patch always contains exactly one file
        assert len(gold_modified_files) == 1
        generated_modified_files = extract_modified_files(generated_patch)

        # Check if all files in gold_patch are also in generated_patch
        all_files_in_generated = gold_modified_files.issubset(generated_modified_files)
        if all_files_in_generated:
            succ += 1
        else:
            fail += 1
            print(
                f'{instance_id}: file mismatch, gold = {gold_modified_files}, generated = {generated_modified_files}'
            )
    print(
        f'\nSUMMARY: {succ} out of {succ + fail} instances found correct files to edit, success rate = {succ / float(succ + fail)}'
    )


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--oh_output_file', help='Path to the OH output file')
    args = parser.parse_args()

    process_report(args.oh_output_file)