Keras
legal
flowise / KTeleBERT /data_trans.py
kevin110211's picture
Upload 51 files
5d58b52
import os.path as osp
import numpy as np
import random
import torch
import argparse
import pdb
import json
'''
把数据合并
同时抽取一部分需要的数据出来
'''
this_dir = osp.dirname(__file__)
data_root = osp.abspath(osp.join(this_dir, '..', '..', 'data', ''))
data_path = "huawei"
data_path = osp.join(data_root, data_path)
with open(osp.join(data_path, 'product_corpus.json'), "r") as f:
data_doc = json.load(f)
with open(osp.join(data_path, '831_alarm_serialize.json'), "r") as f:
data_alarm = json.load(f)
# kpi_info.json
with open(osp.join(data_path, '917_kpi_serialize_50_mn.json'), "r") as f:
data_kpi = json.load(f)
# 实体的序列化
with open(osp.join(data_path, '5GC_KB/database_entity_serialize.json'), "r") as f:
data_entity = json.load(f)
random.shuffle(data_kpi)
random.shuffle(data_doc)
random.shuffle(data_alarm)
random.shuffle(data_entity)
data = data_alarm + data_kpi + data_entity + data_doc
random.shuffle(data)
# 241527
pdb.set_trace()
with open(osp.join(data_path, 'Seq_data_large.json'), "w") as fp:
json.dump(data, fp, ensure_ascii=False)
# 三元组
with open(osp.join(data_path, '5GC_KB/database_triples.json'), "r") as f:
data = json.load(f)
random.shuffle(data)
with open(osp.join(data_path, 'KG_data_base.json'), "w") as fp:
json.dump(data, fp, ensure_ascii=False)