File size: 3,395 Bytes
22738ca
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
# -*- coding: utf-8 -*-

import json
import os
import re
import matplotlib.pyplot as plt
import numpy as np
from io import StringIO
from App4api.bin import constants
from collections import OrderedDict
from App4api.bin.InformationExtractor import InformationExtractor
from App4api.bin.ParameterExtractor import ParameterExtractor
from App4api.bin.TechnologyFinder import TechnologyFinder

class ParamProcessor(object):

    def __init__(self, patents,input_folder, file_extension):
        self.patents = patents
        self.input_folder = input_folder
        self.file_extension = file_extension
        print("Processing started")

    def change_keys(self, dictionnary, number):
        number = number+'-'
        if type(dictionnary) is dict:
            return dict([(number+str(k) , self.change_keys(v, number)) for k, v in dictionnary.items()])
        else:
            return dictionnary

    def process_corpus(self):

        count_patent = 0
        patents = self.patents
        input_folder = self.input_folder
        project_folder = os.path.basename(os.path.normpath(input_folder))
        graph_folder = constants.GRAPH_FOLDER + project_folder+"/"
        output_result = []
        parameters_graph = []
        reduced_content = []
        patent_corpus = []
        source_list = []
        parameters_list =[]


        for patent_file in patents:

            read_patent = StringIO(patent_file)
            patent = json.load(read_patent)
            nNumber = patent['number']
            aAbstract = patent['abstract']
            cClaims = patent['claims']
            dDescription = patent['description']
            source = patent['source']

            patent_content = aAbstract + cClaims + dDescription
            patent_content = patent_content.splitlines()

            for line in patent_content:
                get_parameters = ParameterExtractor(line)
                parameters = get_parameters.extract_parameters()
                if parameters:
                    parameters_list.extend( parameters)


            parameters_list=list(set(parameters_list))

            parameters = dict(enumerate(parameters_list, 1))

            parameters = self.change_keys(parameters, nNumber.lower())

            parameters_array = OrderedDict({
                        "concept": {
                            "source": source,
                            "valeurs": parameters,

                        }

                    })
            pParameters= json.dumps(parameters_array, sort_keys=OrderedDict, indent=4, separators=(',', ': '))
            parameters_graph.append(pParameters)
            count_patent +=1
            source_list.append(source)
            patent_corpus.append(reduced_content)

        header = '{'
        parameters_output = '"parameters": [%s]' % ','.join(parameters_graph)
        footer = '}'
        output_result.extend((header, parameters_output,  footer))

        output_result = "".join(output_result)
        concepts_json = json.loads(output_result)


        json_write_to_file = json.dumps(concepts_json, sort_keys=False, indent=4, separators=(',', ': '))

        with open(graph_folder+"parameters-graph.json", 'w') as json_graph:
            json_graph.write(json_write_to_file)

        return concepts_json