import json from collections import defaultdict kg = defaultdict(list) entity2id = defaultdict(lambda: len(entity2id)) relation2id = defaultdict(lambda: len(relation2id)) with open('id2info.json', encoding='utf-8') as f: id2info = json.load(f) for info_dict in id2info.values(): item = info_dict['name'] for attr, value in info_dict.items(): if attr == 'name': continue if isinstance(value, list): for v in value: kg[entity2id[item]].append((relation2id[attr], entity2id[v])) else: kg[entity2id[item]].append((relation2id[attr], entity2id[value])) print(len(kg), len(entity2id), len(relation2id)) with open('kg.json', 'w', encoding='utf-8') as f: json.dump(kg, f, ensure_ascii=False) with open('entity2id.json', 'w', encoding='utf-8') as f: json.dump(entity2id, f, ensure_ascii=False) with open('relation2id.json', 'w', encoding='utf-8') as f: json.dump(relation2id, f, ensure_ascii=False) item_ids = set() with open('data.jsonl', encoding='utf-8') as f: for line in f: line = json.loads(line) for turn in line['dialog']: for item in turn['item']: if item in entity2id: item_ids.add(entity2id[item]) print(len(item_ids)) item_ids = sorted(item_ids) with open('item_ids.json', 'w', encoding='utf-8') as f: json.dump(item_ids, f, ensure_ascii=False)