import os import shutil import json source_folder = '/mnt/petrelfs/zhuchenglin/diffusion/coco/images/train2017' target_folder = '/mnt/petrelfs/zhuchenglin/diffusion/images_large' if not os.path.exists(target_folder): os.makedirs(target_folder) anno_json_path = "/mnt/petrelfs/zhuchenglin/diffusion/coco/annotations/captions_train2017.json" with open(anno_json_path, 'r') as f: annotation_data = json.load(f) annotations = annotation_data["annotations"][:200000] count = 0 for image in annotations: source_path = os.path.join(source_folder, f'{image["image_id"]:012}.jpg') target_path = os.path.join(target_folder, f'{image["image_id"]:012}.jpg') count += 1 print(source_path,count) shutil.copy(source_path, target_path)