import json | |
import random | |
# 加载第一个JSON | |
json1_path = "/mnt/petrelfs/zhuchenglin/LLaVA/playground/data/LLaVA-Pretrain/blip_laion_cc_sbu_558k.json" | |
json2_path = "/mnt/petrelfs/zhuchenglin/LLaVA/playground/data/LLaVA-Pretrain/mscoco_raw_200k.json" | |
with open(json1_path) as f: | |
data1 = json.load(f) | |
# 加载第二个JSON | |
with open(json2_path) as f: | |
data2 = json.load(f) | |
# 检查data1和data2的类型 | |
if isinstance(data1, dict) and isinstance(data2, dict): | |
# 合并两个字典 | |
merged_data = {**data1, **data2} | |
elif isinstance(data1, list) and isinstance(data2, list): | |
# 合并两个列表 | |
merged_data = data1 + data2 | |
else: | |
raise TypeError("data1 and data2 should be of the same type, either list or dict") | |
random.shuffle(merged_data) | |
# 将合并后的JSON写入新的文件 | |
with open( | |
"/mnt/petrelfs/zhuchenglin/LLaVA/playground/data/LLaVA-Pretrain/llava_coco_raw_758k.json", | |
"w", | |
) as f: | |
json.dump(merged_data, f) | |
json_path = "/mnt/petrelfs/zhuchenglin/LLaVA/playground/data/LLaVA-Pretrain/llava_coco_raw_758k.json" | |
with open(json_path) as f: | |
data1 = json.load(f) | |
print(len(data1)) | |