import json import random # 加载第一个JSON json1_path = "/mnt/petrelfs/zhuchenglin/LLaVA/playground/data/LLaVA-Pretrain/blip_laion_cc_sbu_558k.json" json2_path = "/mnt/petrelfs/zhuchenglin/LLaVA/playground/data/LLaVA-Pretrain/mscoco_raw_200k.json" with open(json1_path) as f: data1 = json.load(f) # 加载第二个JSON with open(json2_path) as f: data2 = json.load(f) # 检查data1和data2的类型 if isinstance(data1, dict) and isinstance(data2, dict): # 合并两个字典 merged_data = {**data1, **data2} elif isinstance(data1, list) and isinstance(data2, list): # 合并两个列表 merged_data = data1 + data2 else: raise TypeError("data1 and data2 should be of the same type, either list or dict") random.shuffle(merged_data) # 将合并后的JSON写入新的文件 with open( "/mnt/petrelfs/zhuchenglin/LLaVA/playground/data/LLaVA-Pretrain/llava_coco_raw_758k.json", "w", ) as f: json.dump(merged_data, f) json_path = "/mnt/petrelfs/zhuchenglin/LLaVA/playground/data/LLaVA-Pretrain/llava_coco_raw_758k.json" with open(json_path) as f: data1 = json.load(f) print(len(data1))