import json import os import math anno_json_path = ( "/mnt/petrelfs/zhuchenglin/LLaVA/playground/data/LLaVA-Pretrain/llava_gen_558k.json" ) with open(anno_json_path, "r") as f: annotation_data = json.load(f) total_annotations = len(annotation_data) num_parts = 8 annotations_per_part = math.ceil(total_annotations / num_parts) anno_output_dir = "../annotations/" if not os.path.exists(anno_output_dir): os.makedirs(anno_output_dir) for i in range(num_parts): start_idx = i * annotations_per_part end_idx = min((i + 1) * annotations_per_part, total_annotations) annotations_subset = annotation_data[start_idx:end_idx] part_anno_json_path = os.path.join( anno_output_dir, f"annotations_part_{i + 1}.json" ) with open(part_anno_json_path, "w") as f: json.dump(annotations_subset, f) print(len(annotations_subset)) print("标注已成功分成8份,并保存到文件夹中。")