import os import shutil import json import random # path to the generated images source_image_folder = "/mnt/petrelfs/zhuchenglin/diffusion/images_large" # path to the target folder target_image_folder = ( "/mnt/petrelfs/zhuchenglin/LLaVA/playground/data/LLaVA-Pretrain/images" ) # path to the COCO annotations file annotations_coco_path = ( "/mnt/petrelfs/zhuchenglin/diffusion/coco/annotations/captions_train2017.json" ) with open(annotations_coco_path, "r") as f: annotations = json.load(f) new_annotations = [] for index, annotation in enumerate(annotations["annotations"][:200000]): print(index) image_id, pid = annotation["image_id"], annotation["id"] source_image_path = os.path.join( source_image_folder, f"{image_id:012d}_{pid}_gen.jpg" ) target_image_path = os.path.join(target_image_folder, f"{index}.jpg") if os.path.exists(source_image_path): shutil.copy(source_image_path, target_image_path)