File size: 937 Bytes
a501a0c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
import json
import os
import math

anno_json_path = (
    "/mnt/petrelfs/zhuchenglin/LLaVA/playground/data/LLaVA-Pretrain/llava_gen_558k.json"
)
with open(anno_json_path, "r") as f:
    annotation_data = json.load(f)

total_annotations = len(annotation_data)
num_parts = 8
annotations_per_part = math.ceil(total_annotations / num_parts)

anno_output_dir = "../annotations/"
if not os.path.exists(anno_output_dir):
    os.makedirs(anno_output_dir)

for i in range(num_parts):
    start_idx = i * annotations_per_part
    end_idx = min((i + 1) * annotations_per_part, total_annotations)
    annotations_subset = annotation_data[start_idx:end_idx]
    part_anno_json_path = os.path.join(
        anno_output_dir, f"annotations_part_{i + 1}.json"
    )
    with open(part_anno_json_path, "w") as f:
        json.dump(annotations_subset, f)
    print(len(annotations_subset))

print("标注已成功分成8份,并保存到文件夹中。")