File size: 1,394 Bytes
d6504ae
 
 
8b513d0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
from .EntityCandidates import EntityCandidates
from .EntityElement import EntityElement
from .DatabaseConnection import get_wikidata_instance


class TermCandidate:
    def __init__(self, span):
        self.variations = [span]

    def pretty_print(self):
        print("Term Candidates are [{}]".format(self))

    def append(self, span):
        self.variations.append(span)

    def has_plural(self, variation):
        return any([t.tag_ == "NNS" for t in variation])

    def get_singular(self, variation):
        return ' '.join([t.text if t.tag_ != "NNS" else t.lemma_ for t in variation])

    def __str__(self):
        return ', '.join([variation.text for variation in self.variations])

    def get_entity_candidates(self):
        wikidata_instance = get_wikidata_instance()
        entities_by_variation = {}
        for variation in self.variations:
            entities_by_variation[variation] = wikidata_instance.get_entities_from_alias(variation.text)
            if self.has_plural(variation):
                entities_by_variation[variation] += wikidata_instance.get_entities_from_alias(
                    self.get_singular(variation))

        entity_elements = []
        for variation, entities in entities_by_variation.items():
            entity_elements += [EntityElement(entity, variation) for entity in entities]

        return EntityCandidates(entity_elements)