import os | |
import shutil | |
import json | |
source_folder = '/mnt/petrelfs/zhuchenglin/diffusion/coco/images/train2017' | |
target_folder = '/mnt/petrelfs/zhuchenglin/diffusion/images_large' | |
if not os.path.exists(target_folder): | |
os.makedirs(target_folder) | |
anno_json_path = "/mnt/petrelfs/zhuchenglin/diffusion/coco/annotations/captions_train2017.json" | |
with open(anno_json_path, 'r') as f: | |
annotation_data = json.load(f) | |
annotations = annotation_data["annotations"][:200000] | |
count = 0 | |
for image in annotations: | |
source_path = os.path.join(source_folder, f'{image["image_id"]:012}.jpg') | |
target_path = os.path.join(target_folder, f'{image["image_id"]:012}.jpg') | |
count += 1 | |
print(source_path,count) | |
shutil.copy(source_path, target_path) | |