PatentSolver / App /bin /ParamProcessor.py
xin's picture
initial commit
22738ca
# -*- coding: utf-8 -*-
import json
import os
import re
import matplotlib.pyplot as plt
import numpy as np
from io import StringIO
from App4api.bin import constants
from collections import OrderedDict
from App4api.bin.InformationExtractor import InformationExtractor
from App4api.bin.ParameterExtractor import ParameterExtractor
from App4api.bin.TechnologyFinder import TechnologyFinder
class ParamProcessor(object):
def __init__(self, patents,input_folder, file_extension):
self.patents = patents
self.input_folder = input_folder
self.file_extension = file_extension
print("Processing started")
def change_keys(self, dictionnary, number):
number = number+'-'
if type(dictionnary) is dict:
return dict([(number+str(k) , self.change_keys(v, number)) for k, v in dictionnary.items()])
else:
return dictionnary
def process_corpus(self):
count_patent = 0
patents = self.patents
input_folder = self.input_folder
project_folder = os.path.basename(os.path.normpath(input_folder))
graph_folder = constants.GRAPH_FOLDER + project_folder+"/"
output_result = []
parameters_graph = []
reduced_content = []
patent_corpus = []
source_list = []
parameters_list =[]
for patent_file in patents:
read_patent = StringIO(patent_file)
patent = json.load(read_patent)
nNumber = patent['number']
aAbstract = patent['abstract']
cClaims = patent['claims']
dDescription = patent['description']
source = patent['source']
patent_content = aAbstract + cClaims + dDescription
patent_content = patent_content.splitlines()
for line in patent_content:
get_parameters = ParameterExtractor(line)
parameters = get_parameters.extract_parameters()
if parameters:
parameters_list.extend( parameters)
parameters_list=list(set(parameters_list))
parameters = dict(enumerate(parameters_list, 1))
parameters = self.change_keys(parameters, nNumber.lower())
parameters_array = OrderedDict({
"concept": {
"source": source,
"valeurs": parameters,
}
})
pParameters= json.dumps(parameters_array, sort_keys=OrderedDict, indent=4, separators=(',', ': '))
parameters_graph.append(pParameters)
count_patent +=1
source_list.append(source)
patent_corpus.append(reduced_content)
header = '{'
parameters_output = '"parameters": [%s]' % ','.join(parameters_graph)
footer = '}'
output_result.extend((header, parameters_output, footer))
output_result = "".join(output_result)
concepts_json = json.loads(output_result)
json_write_to_file = json.dumps(concepts_json, sort_keys=False, indent=4, separators=(',', ': '))
with open(graph_folder+"parameters-graph.json", 'w') as json_graph:
json_graph.write(json_write_to_file)
return concepts_json