Spaces:
Build error
Build error
# -*- coding: utf-8 -*- | |
import json | |
import os | |
import re | |
import matplotlib.pyplot as plt | |
import numpy as np | |
from io import StringIO | |
from App4api.bin import constants | |
from collections import OrderedDict | |
from App4api.bin.InformationExtractor import InformationExtractor | |
from App4api.bin.ParameterExtractor import ParameterExtractor | |
from App4api.bin.TechnologyFinder import TechnologyFinder | |
class ParamProcessor(object): | |
def __init__(self, patents,input_folder, file_extension): | |
self.patents = patents | |
self.input_folder = input_folder | |
self.file_extension = file_extension | |
print("Processing started") | |
def change_keys(self, dictionnary, number): | |
number = number+'-' | |
if type(dictionnary) is dict: | |
return dict([(number+str(k) , self.change_keys(v, number)) for k, v in dictionnary.items()]) | |
else: | |
return dictionnary | |
def process_corpus(self): | |
count_patent = 0 | |
patents = self.patents | |
input_folder = self.input_folder | |
project_folder = os.path.basename(os.path.normpath(input_folder)) | |
graph_folder = constants.GRAPH_FOLDER + project_folder+"/" | |
output_result = [] | |
parameters_graph = [] | |
reduced_content = [] | |
patent_corpus = [] | |
source_list = [] | |
parameters_list =[] | |
for patent_file in patents: | |
read_patent = StringIO(patent_file) | |
patent = json.load(read_patent) | |
nNumber = patent['number'] | |
aAbstract = patent['abstract'] | |
cClaims = patent['claims'] | |
dDescription = patent['description'] | |
source = patent['source'] | |
patent_content = aAbstract + cClaims + dDescription | |
patent_content = patent_content.splitlines() | |
for line in patent_content: | |
get_parameters = ParameterExtractor(line) | |
parameters = get_parameters.extract_parameters() | |
if parameters: | |
parameters_list.extend( parameters) | |
parameters_list=list(set(parameters_list)) | |
parameters = dict(enumerate(parameters_list, 1)) | |
parameters = self.change_keys(parameters, nNumber.lower()) | |
parameters_array = OrderedDict({ | |
"concept": { | |
"source": source, | |
"valeurs": parameters, | |
} | |
}) | |
pParameters= json.dumps(parameters_array, sort_keys=OrderedDict, indent=4, separators=(',', ': ')) | |
parameters_graph.append(pParameters) | |
count_patent +=1 | |
source_list.append(source) | |
patent_corpus.append(reduced_content) | |
header = '{' | |
parameters_output = '"parameters": [%s]' % ','.join(parameters_graph) | |
footer = '}' | |
output_result.extend((header, parameters_output, footer)) | |
output_result = "".join(output_result) | |
concepts_json = json.loads(output_result) | |
json_write_to_file = json.dumps(concepts_json, sort_keys=False, indent=4, separators=(',', ': ')) | |
with open(graph_folder+"parameters-graph.json", 'w') as json_graph: | |
json_graph.write(json_write_to_file) | |
return concepts_json |