import os | |
import shutil | |
import json | |
import random | |
# path to the generated images | |
source_image_folder = "/mnt/petrelfs/zhuchenglin/diffusion/images_large" | |
# path to the target folder | |
target_image_folder = ( | |
"/mnt/petrelfs/zhuchenglin/LLaVA/playground/data/LLaVA-Pretrain/images" | |
) | |
# path to the COCO annotations file | |
annotations_coco_path = ( | |
"/mnt/petrelfs/zhuchenglin/diffusion/coco/annotations/captions_train2017.json" | |
) | |
with open(annotations_coco_path, "r") as f: | |
annotations = json.load(f) | |
new_annotations = [] | |
for index, annotation in enumerate(annotations["annotations"][:200000]): | |
print(index) | |
image_id, pid = annotation["image_id"], annotation["id"] | |
source_image_path = os.path.join( | |
source_image_folder, f"{image_id:012d}_{pid}_gen.jpg" | |
) | |
target_image_path = os.path.join(target_image_folder, f"{index}.jpg") | |
if os.path.exists(source_image_path): | |
shutil.copy(source_image_path, target_image_path) | |