File size: 2,042 Bytes
d0de866
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
from src.subject import Subject
from src.algorithms.textrank import TextRank


class WikiContext(Subject):
    def __init__(self, subject, algorithm, params, max_prereqs=5):
        Subject.__init__(self, subject=subject)
        self.algorithm = algorithm
        self.params = params
        self.max_prereq = max_prereqs
        self.content = None
        self.prereq = {}

    def get_main_content(self):
        self.content = self._get_summary()

    def get_prereqs_content(self):
        keyphrases = self.get_top_keywords_from_rake()
        hyperlinks = self._get_links()
        match = []
        count = 0
        for phrase in keyphrases:
            match.extend([hyperlink for hyperlink in hyperlinks if hyperlink.lower() in phrase.lower()])
            # Make it unique
            match = list(set(match))
        
        for prereq in match[:self.max_prereq]:
            s = Subject(prereq)
            if s._check_page_exists:
                summary = s._get_content()
                if summary:
                    self.prereq[prereq] = summary
                    count += 1
        
            if count >= self.max_prereq:
                break
    
    def mapper(self):
        if self.algorithm == 'TextRank':
            return TextRank

    def get_main_summary(self):
        model_class = self.mapper()
        model = model_class(text=self.content, **self.params)
        summary = model.get_summary(self.algorithm)
        if summary:
            return summary
        else:
            return self.content

    def get_prereqs_summary(self):
        model_class = self.mapper()
        prereq_summary = {}
        count = 0
        for each in self.prereq:
            try:
                if count >= self.max_prereq:
                    break
                model = model_class(self.prereq[each], **self.params)
                prereq_summary[each] = model.get_summary(self.algorithm)
                count += 1
            except ValueError:
                continue

        return prereq_summary