Spaces:
Build error
Build error
Commit
·
8b1561c
1
Parent(s):
8014fee
Remove plural names
Browse files- entity_extraction.py +10 -0
entity_extraction.py
CHANGED
@@ -3,6 +3,15 @@ import spacy
|
|
3 |
nlp = spacy.load("en_core_web_md")
|
4 |
nlp.add_pipe("entityfishing")
|
5 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
|
7 |
def extract_entities(article):
|
8 |
'''Find wikidata refs for article entities'''
|
@@ -11,6 +20,7 @@ def extract_entities(article):
|
|
11 |
seen_surnames = []
|
12 |
seen_qids = []
|
13 |
|
|
|
14 |
doc = nlp(article)
|
15 |
for ent in doc.ents:
|
16 |
if ent._.kb_qid is None or ent.label_ not in ["ORG", "PERSON", "GPE"] or ent.text in seen_entities:
|
|
|
3 |
nlp = spacy.load("en_core_web_md")
|
4 |
nlp.add_pipe("entityfishing")
|
5 |
|
6 |
+
def remove_plural_names(article):
|
7 |
+
words = article.split()
|
8 |
+
new_words = []
|
9 |
+
for word in words:
|
10 |
+
word = word.replace("’s", "")
|
11 |
+
word = word.replace("'s", "")
|
12 |
+
new_words.append(word)
|
13 |
+
return " ".join(new_words)
|
14 |
+
|
15 |
|
16 |
def extract_entities(article):
|
17 |
'''Find wikidata refs for article entities'''
|
|
|
20 |
seen_surnames = []
|
21 |
seen_qids = []
|
22 |
|
23 |
+
article = remove_plural_names(article)
|
24 |
doc = nlp(article)
|
25 |
for ent in doc.ents:
|
26 |
if ent._.kb_qid is None or ent.label_ not in ["ORG", "PERSON", "GPE"] or ent.text in seen_entities:
|