File size: 1,136 Bytes
a501a0c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import json
import random

# 加载第一个JSON
json1_path = "/mnt/petrelfs/zhuchenglin/LLaVA/playground/data/LLaVA-Pretrain/blip_laion_cc_sbu_558k.json"
json2_path = "/mnt/petrelfs/zhuchenglin/LLaVA/playground/data/LLaVA-Pretrain/mscoco_raw_200k.json"
with open(json1_path) as f:
    data1 = json.load(f)

# 加载第二个JSON
with open(json2_path) as f:
    data2 = json.load(f)

# 检查data1和data2的类型
if isinstance(data1, dict) and isinstance(data2, dict):
    # 合并两个字典
    merged_data = {**data1, **data2}
elif isinstance(data1, list) and isinstance(data2, list):
    # 合并两个列表
    merged_data = data1 + data2
else:
    raise TypeError("data1 and data2 should be of the same type, either list or dict")

random.shuffle(merged_data)

# 将合并后的JSON写入新的文件
with open(
    "/mnt/petrelfs/zhuchenglin/LLaVA/playground/data/LLaVA-Pretrain/llava_coco_raw_758k.json",
    "w",
) as f:
    json.dump(merged_data, f)
json_path = "/mnt/petrelfs/zhuchenglin/LLaVA/playground/data/LLaVA-Pretrain/llava_coco_raw_758k.json"
with open(json_path) as f:
    data1 = json.load(f)
print(len(data1))