Spaces:
Build error
Build error
import spacy | |
nlp = spacy.load("en_core_web_md") | |
nlp.add_pipe("entityfishing") | |
def extract_entities(article): | |
'''Find wikidata refs for article entities''' | |
ents = [] | |
seen_entities = [] | |
seen_surnames = [] | |
seen_qids = [] | |
doc = nlp(article) | |
for ent in doc.ents: | |
if ent._.kb_qid is None or ent.label_ not in ["ORG", "PERSON", "GPE"] or ent.text in seen_entities: | |
continue | |
if ent._.nerd_score < 0.5: | |
continue | |
if len(ent.text.split()) == 1: | |
# Single name | |
if ent.text in seen_surnames: | |
continue | |
elif ent.label_ == "PERSON": | |
# Multipart name | |
seen_surnames.append(ent.text.split()[-1]) | |
seen_entities.append(ent.text) | |
if ent._.kb_qid in seen_qids: | |
continue | |
seen_qids.append(ent._.kb_qid) | |
ents.append(ent) | |
return ents | |
if __name__ == "__main__": | |
ents = extract_entities(input("article: ")) | |
print() | |
print("ENTITIES:") | |
for ent in ents: | |
print(ent.text, "\t", ent.label_, "\t", ent._.url_wikidata) | |