File size: 4,342 Bytes
22738ca
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
# -*- coding: utf-8 -*-

import json
import os
import re
import matplotlib.pyplot as plt
import numpy as np
from io import StringIO
from App4api.bin import constants
from collections import OrderedDict
from App4api.bin.InformationExtractor import InformationExtractor
from App4api.bin.ParameterExtractor import ParameterExtractor
from App4api.bin.TechnologyFinder import TechnologyFinder

class PGProcessor(object):

    def __init__(self, patents,input_folder, file_extension):
        self.patents = patents
        self.input_folder = input_folder
        self.file_extension = file_extension
        print("Processing started")

    def process_corpus(self):

        count_abstract = 0
        count_claims = 0
        count_description = 0
        count_patent = 0
        total_sentences_number =0
        count_concepts_solupart = 0
        count_concepts_problem = 0
        patents = self.patents
        input_folder = self.input_folder
        file_extension = self.file_extension
        project_folder = os.path.basename(os.path.normpath(input_folder))
        graph_folder = constants.GRAPH_FOLDER + project_folder+"/"
        extracted_concepts = []
        output_result = []
        parameters_graph = []
        reduced_content = []
        patent_corpus = []
        source_list = []
        parameters_list =[]
        technologies_graph =[]


        for patent_file in patents:

            read_patent = StringIO(patent_file)
            patent = json.load(read_patent)
            nNumber = patent['number']
            aAbstract = patent['abstract']
            cClaims = patent['claims']
            dDescription = patent['description']
            source = patent['source']

            if dDescription !="":
                count_description +=1
                extract_concepts = InformationExtractor(dDescription,input_folder, file_extension, nNumber, source )
                output_json, total_sentences_number = extract_concepts.get_from_description()
                if output_json !="":
                    extracted_concepts.append(output_json)
                total_sentences_number += total_sentences_number
            elif cClaims !="":
                count_claims +=1
                print('Processing claims')
            else:
                count_abstract +=1
                print("processing abstract")
            count_patent +=1


            #print(source)
            source_list.append(source)


        header = '{'
        graph = '"problem_graph": [%s]' % ','.join(extracted_concepts)
        footer = '}'
        output_result.extend((header, graph, footer))
        output_result = "".join(output_result)
        concepts_json = json.loads(output_result)
        count_concepts = len(concepts_json['problem_graph'])
        for item, value in concepts_json.items():
            #if cle == "type" and value =="partialSolution":
             #   print ("yes")
            for element in value:
                for cle, valeur in element.items():
                    for k,v in valeur.items():
                        if k == "type" and v =="partialSolution":
                            count_concepts_solupart += 1
                        elif k == "type" and v =="problem":
                            count_concepts_problem += 1
        json_write_to_file = json.dumps(concepts_json, sort_keys=False, indent=4, separators=(',', ': '))
        #print(concepts_json.keys())
        with open(graph_folder+"graph.json", 'w') as json_graph:
            json_graph.write(json_write_to_file)

        print("Le corpus contenait %s brevets dont %s abstract, %s revendications et %s descriptions" % (count_patent, count_abstract, count_claims, count_description))
        print("%s phrases ont été analysée(s)" % (total_sentences_number))
        print("%s concepts ont été trouvé(s) dont %s problèmes et %s solutions partielles" % (count_concepts, count_concepts_problem, count_concepts_solupart))

        #Display graphics
        first_color = (46, 204, 113)
        second_color = (245, 176, 65)
        #self.make_graphic([count_concepts_problem, count_concepts_solupart], "Ratio",[first_color,second_color],['Problems','Partial Solutions'])
        return concepts_json