Spaces:
Build error
Build error
"""This script compares gold patches with OpenHands-generated patches and check whether | |
OpenHands found the right (set of) files to modify. | |
""" | |
import argparse | |
import json | |
import re | |
def extract_modified_files(patch): | |
modified_files = set() | |
file_pattern = re.compile(r'^diff --git a/(.*?) b/') | |
for line in patch.split('\n'): | |
match = file_pattern.match(line) | |
if match: | |
modified_files.add(match.group(1)) | |
return modified_files | |
def process_report(oh_output_file): | |
succ = 0 | |
fail = 0 | |
for line in open(oh_output_file): | |
line = json.loads(line) | |
instance_id = line['instance_id'] | |
gold_patch = line['swe_instance']['patch'] | |
generated_patch = line['git_patch'] | |
gold_modified_files = extract_modified_files(gold_patch) | |
# swe-bench lite only: a gold patch always contains exactly one file | |
assert len(gold_modified_files) == 1 | |
generated_modified_files = extract_modified_files(generated_patch) | |
# Check if all files in gold_patch are also in generated_patch | |
all_files_in_generated = gold_modified_files.issubset(generated_modified_files) | |
if all_files_in_generated: | |
succ += 1 | |
else: | |
fail += 1 | |
print( | |
f'{instance_id}: file mismatch, gold = {gold_modified_files}, generated = {generated_modified_files}' | |
) | |
print( | |
f'\nSUMMARY: {succ} out of {succ + fail} instances found correct files to edit, success rate = {succ / float(succ + fail)}' | |
) | |
if __name__ == '__main__': | |
parser = argparse.ArgumentParser() | |
parser.add_argument('--oh_output_file', help='Path to the OH output file') | |
args = parser.parse_args() | |
process_report(args.oh_output_file) | |