File size: 1,354 Bytes
5d58b52 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 |
import os.path as osp
import numpy as np
import random
import torch
import argparse
import pdb
import json
'''
把数据合并
同时抽取一部分需要的数据出来
'''
this_dir = osp.dirname(__file__)
data_root = osp.abspath(osp.join(this_dir, '..', '..', 'data', ''))
data_path = "huawei"
data_path = osp.join(data_root, data_path)
with open(osp.join(data_path, 'product_corpus.json'), "r") as f:
data_doc = json.load(f)
with open(osp.join(data_path, '831_alarm_serialize.json'), "r") as f:
data_alarm = json.load(f)
# kpi_info.json
with open(osp.join(data_path, '917_kpi_serialize_50_mn.json'), "r") as f:
data_kpi = json.load(f)
# 实体的序列化
with open(osp.join(data_path, '5GC_KB/database_entity_serialize.json'), "r") as f:
data_entity = json.load(f)
random.shuffle(data_kpi)
random.shuffle(data_doc)
random.shuffle(data_alarm)
random.shuffle(data_entity)
data = data_alarm + data_kpi + data_entity + data_doc
random.shuffle(data)
# 241527
pdb.set_trace()
with open(osp.join(data_path, 'Seq_data_large.json'), "w") as fp:
json.dump(data, fp, ensure_ascii=False)
# 三元组
with open(osp.join(data_path, '5GC_KB/database_triples.json'), "r") as f:
data = json.load(f)
random.shuffle(data)
with open(osp.join(data_path, 'KG_data_base.json'), "w") as fp:
json.dump(data, fp, ensure_ascii=False)
|