File size: 4,399 Bytes
51ff9e5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
import argparse
import os
import subprocess

from datasets import load_dataset


# Function to run shell commands
def run_command(command):
    try:
        subprocess.run(command, check=True, shell=True)
    except subprocess.CalledProcessError as e:
        print(f'An error occurred: {e}')


# Function to log in to Docker Hub
def docker_login():
    print('Logging into Docker Hub...')
    run_command('docker login')


# Function to generate Dockerfile content based on image type
def generate_dockerfile_content(
    base_image, dependencies, datum, patch_path, test_patch_path
):
    dockerfile_content = f"""
FROM {base_image}
SHELL ["/bin/bash", "-c"]
RUN source /opt/miniconda3/bin/activate && conda activate testbed && pip install {' '.join(dependencies)}
COPY {patch_path} /app/patch.diff
RUN git apply /app/patch.diff
RUN rm /app/patch.diff
COPY {test_patch_path} /app/patch.diff
RUN git apply /app/patch.diff
RUN git config --global user.email ""
RUN git config --global user.name "TestGenEval"
RUN rm /app/patch.diff
RUN rm {datum['test_file']}
"""

    # Add specific content based on image type
    dockerfile_content += 'RUN git add .\nRUN git commit -m "Testing fixes"'

    return dockerfile_content


# Function to build, push, and clean up Docker images
def build_and_push_image(dockerfile_content, image_name):
    with open('Dockerfile.temp', 'w') as dockerfile:
        dockerfile.write(dockerfile_content)
    run_command(f'docker build -f Dockerfile.temp -t {image_name} .')
    run_command(f'docker push {image_name}')
    run_command(f'docker rmi {image_name}')
    os.remove('Dockerfile.temp')


# Function to process images with .eval in the name
def process_images(dataset, original_namespace, new_namespace, start_instance_id):
    dependencies = ['coverage', 'cosmic-ray']

    found_start = len(start_instance_id) == 0
    for datum in dataset:
        if not found_start and datum['instance_id'] == start_instance_id:
            found_start = True
        elif found_start:
            full_image_name = f'{original_namespace}/sweb.eval.x86_64.{datum["instance_id"].replace("__", "_s_")}:latest'
            print(f'Processing image: {full_image_name}')
            run_command(f'docker pull {full_image_name}')

            # Save patches and preds_context to regular files
            patch_file_path = 'patch.diff'
            test_patch_file_path = 'test_patch.diff'

            with (
                open(patch_file_path, 'w') as patch_file,
                open(test_patch_file_path, 'w') as test_patch_file,
            ):
                patch_file.write(datum['patch'])
                test_patch_file.write(datum['test_patch'])

            # Define image types and corresponding tags
            new_image_name = f'{new_namespace}/sweb.eval.x86_64.{datum["instance_id"].replace("__", "_s_")}:latest'
            dockerfile_content = generate_dockerfile_content(
                full_image_name,
                dependencies,
                datum,
                patch_file_path,
                test_patch_file_path,
            )
            build_and_push_image(dockerfile_content, new_image_name)

            # Cleanup regular files and images
            os.remove(patch_file_path)
            os.remove(test_patch_file_path)
            run_command(f'docker rmi {full_image_name}')
            run_command('docker system prune -f')  # Clean up dangling resources


if __name__ == '__main__':
    parser = argparse.ArgumentParser(
        description='Process Docker images with .eval in the name.'
    )
    parser.add_argument('--dataset', type=str, default='kjain14/testgeneval')
    parser.add_argument('--split', type=str, default='test')
    parser.add_argument(
        '--new_namespace',
        type=str,
        default='kdjain',
        help='The new Docker Hub namespace to push the images',
    )
    parser.add_argument(
        '--original_namespace',
        type=str,
        default='xingyaoww',
        help='The original Docker Hub namespace',
    )
    parser.add_argument(
        '--start_instance_id',
        type=str,
        default='',
        help='The instance_id to start processing from',
    )
    args = parser.parse_args()
    dataset = load_dataset(args.dataset)[args.split]

    docker_login()
    process_images(
        dataset, args.original_namespace, args.new_namespace, args.start_instance_id
    )