Backup-bdg's picture
Upload 964 files
51ff9e5 verified
raw
history blame
4.4 kB
import argparse
import os
import subprocess
from datasets import load_dataset
# Function to run shell commands
def run_command(command):
try:
subprocess.run(command, check=True, shell=True)
except subprocess.CalledProcessError as e:
print(f'An error occurred: {e}')
# Function to log in to Docker Hub
def docker_login():
print('Logging into Docker Hub...')
run_command('docker login')
# Function to generate Dockerfile content based on image type
def generate_dockerfile_content(
base_image, dependencies, datum, patch_path, test_patch_path
):
dockerfile_content = f"""
FROM {base_image}
SHELL ["/bin/bash", "-c"]
RUN source /opt/miniconda3/bin/activate && conda activate testbed && pip install {' '.join(dependencies)}
COPY {patch_path} /app/patch.diff
RUN git apply /app/patch.diff
RUN rm /app/patch.diff
COPY {test_patch_path} /app/patch.diff
RUN git apply /app/patch.diff
RUN git config --global user.email ""
RUN git config --global user.name "TestGenEval"
RUN rm /app/patch.diff
RUN rm {datum['test_file']}
"""
# Add specific content based on image type
dockerfile_content += 'RUN git add .\nRUN git commit -m "Testing fixes"'
return dockerfile_content
# Function to build, push, and clean up Docker images
def build_and_push_image(dockerfile_content, image_name):
with open('Dockerfile.temp', 'w') as dockerfile:
dockerfile.write(dockerfile_content)
run_command(f'docker build -f Dockerfile.temp -t {image_name} .')
run_command(f'docker push {image_name}')
run_command(f'docker rmi {image_name}')
os.remove('Dockerfile.temp')
# Function to process images with .eval in the name
def process_images(dataset, original_namespace, new_namespace, start_instance_id):
dependencies = ['coverage', 'cosmic-ray']
found_start = len(start_instance_id) == 0
for datum in dataset:
if not found_start and datum['instance_id'] == start_instance_id:
found_start = True
elif found_start:
full_image_name = f'{original_namespace}/sweb.eval.x86_64.{datum["instance_id"].replace("__", "_s_")}:latest'
print(f'Processing image: {full_image_name}')
run_command(f'docker pull {full_image_name}')
# Save patches and preds_context to regular files
patch_file_path = 'patch.diff'
test_patch_file_path = 'test_patch.diff'
with (
open(patch_file_path, 'w') as patch_file,
open(test_patch_file_path, 'w') as test_patch_file,
):
patch_file.write(datum['patch'])
test_patch_file.write(datum['test_patch'])
# Define image types and corresponding tags
new_image_name = f'{new_namespace}/sweb.eval.x86_64.{datum["instance_id"].replace("__", "_s_")}:latest'
dockerfile_content = generate_dockerfile_content(
full_image_name,
dependencies,
datum,
patch_file_path,
test_patch_file_path,
)
build_and_push_image(dockerfile_content, new_image_name)
# Cleanup regular files and images
os.remove(patch_file_path)
os.remove(test_patch_file_path)
run_command(f'docker rmi {full_image_name}')
run_command('docker system prune -f') # Clean up dangling resources
if __name__ == '__main__':
parser = argparse.ArgumentParser(
description='Process Docker images with .eval in the name.'
)
parser.add_argument('--dataset', type=str, default='kjain14/testgeneval')
parser.add_argument('--split', type=str, default='test')
parser.add_argument(
'--new_namespace',
type=str,
default='kdjain',
help='The new Docker Hub namespace to push the images',
)
parser.add_argument(
'--original_namespace',
type=str,
default='xingyaoww',
help='The original Docker Hub namespace',
)
parser.add_argument(
'--start_instance_id',
type=str,
default='',
help='The instance_id to start processing from',
)
args = parser.parse_args()
dataset = load_dataset(args.dataset)[args.split]
docker_login()
process_images(
dataset, args.original_namespace, args.new_namespace, args.start_instance_id
)