Spaces:
Running
Running
File size: 1,044 Bytes
91eaff6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 |
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
`spacyfishing` entity linking to Wikidata
<https://github.com/Lucaterre/spacyfishing>
"""
from icecream import ic # pylint: disable=E0401
import spacy # pylint: disable=E0401
SRC_TEXT: str = """
Werner Herzog is a remarkable filmmaker and an intellectual originally from Germany, the son of Dietrich Herzog, although they never spoke after the war.
"""
nlp = spacy.load(
"en_core_web_sm",
exclude = [ "ner" ],
)
nlp.add_pipe(
"span_marker",
config = {
"model": "tomaarsen/span-marker-roberta-large-ontonotes5",
},
)
nlp.add_pipe(
"entityfishing",
config = {
"api_ef_base": "https://cloud.science-miner.com/nerd/service",
"extra_info": True,
"filter_statements": [ ],
},
)
nlp.add_pipe(
"merge_entities",
)
doc = nlp(SRC_TEXT.strip())
for ent in doc.ents:
ic(
ent.text,
ent.label_,
ent._.nerd_score,
ent._.url_wikidata,
ent._.description,
ent._.other_ids,
)
|