|
import json |
|
import os |
|
import math |
|
|
|
anno_json_path = ( |
|
"/mnt/petrelfs/zhuchenglin/LLaVA/playground/data/LLaVA-Pretrain/llava_gen_558k.json" |
|
) |
|
with open(anno_json_path, "r") as f: |
|
annotation_data = json.load(f) |
|
|
|
total_annotations = len(annotation_data) |
|
num_parts = 8 |
|
annotations_per_part = math.ceil(total_annotations / num_parts) |
|
|
|
anno_output_dir = "../annotations/" |
|
if not os.path.exists(anno_output_dir): |
|
os.makedirs(anno_output_dir) |
|
|
|
for i in range(num_parts): |
|
start_idx = i * annotations_per_part |
|
end_idx = min((i + 1) * annotations_per_part, total_annotations) |
|
annotations_subset = annotation_data[start_idx:end_idx] |
|
part_anno_json_path = os.path.join( |
|
anno_output_dir, f"annotations_part_{i + 1}.json" |
|
) |
|
with open(part_anno_json_path, "w") as f: |
|
json.dump(annotations_subset, f) |
|
print(len(annotations_subset)) |
|
|
|
print("标注已成功分成8份,并保存到文件夹中。") |
|
|