diffusion / utils /merge_json.py
starriver030515's picture
Upload folder using huggingface_hub
a501a0c verified
import json
import random
# 加载第一个JSON
json1_path = "/mnt/petrelfs/zhuchenglin/LLaVA/playground/data/LLaVA-Pretrain/blip_laion_cc_sbu_558k.json"
json2_path = "/mnt/petrelfs/zhuchenglin/LLaVA/playground/data/LLaVA-Pretrain/mscoco_raw_200k.json"
with open(json1_path) as f:
data1 = json.load(f)
# 加载第二个JSON
with open(json2_path) as f:
data2 = json.load(f)
# 检查data1和data2的类型
if isinstance(data1, dict) and isinstance(data2, dict):
# 合并两个字典
merged_data = {**data1, **data2}
elif isinstance(data1, list) and isinstance(data2, list):
# 合并两个列表
merged_data = data1 + data2
else:
raise TypeError("data1 and data2 should be of the same type, either list or dict")
random.shuffle(merged_data)
# 将合并后的JSON写入新的文件
with open(
"/mnt/petrelfs/zhuchenglin/LLaVA/playground/data/LLaVA-Pretrain/llava_coco_raw_758k.json",
"w",
) as f:
json.dump(merged_data, f)
json_path = "/mnt/petrelfs/zhuchenglin/LLaVA/playground/data/LLaVA-Pretrain/llava_coco_raw_758k.json"
with open(json_path) as f:
data1 = json.load(f)
print(len(data1))