Spaces:
Build error
Build error
import streamlit as st | |
import streamlit.components.v1 as components | |
import requests | |
import spacy | |
import hashlib | |
nlp = spacy.load("en_core_web_md") | |
# add pipeline (declared through entry_points in setup.py) | |
nlp.add_pipe("entityfishing") | |
st.title('Entity Linking Demo') | |
article = st.text_area('Article to analyze:', value=open("example.txt").read()) | |
seen_entities = [] | |
seen_surnames = [] | |
if st.button('Submit'): | |
good_ents = [] | |
with st.spinner(text="Analysing..."): | |
doc = nlp(article) | |
for ent in doc.ents: | |
if ent._.kb_qid is None or ent.label_ not in ["ORG", "PERSON", "GPE"] or ent.text in seen_entities: | |
continue | |
if ent.label_ == "PERSON": | |
if len(ent.text.split()) == 1: | |
# Single name | |
if ent.text in seen_surnames: | |
continue | |
else: | |
# Multipart name | |
seen_surnames.append(ent.text.split()[-1]) | |
seen_entities.append(ent.text) | |
print((ent.text, ent.label_, ent._.kb_qid, ent._.url_wikidata, ent._.nerd_score)) | |
r = requests.get("https://www.wikidata.org/w/api.php?action=wbgetclaims&format=json&property=P18&entity=" + ent._.kb_qid) | |
data = r.json()["claims"] | |
if "P18" in data.keys(): | |
data = data["P18"][0]["mainsnak"] | |
img_name = data["datavalue"]["value"].replace(" ", "_") | |
img_name_hash = hashlib.md5(img_name.encode("utf-8")).hexdigest() | |
a = img_name_hash[0] | |
b = img_name_hash[1] | |
url= f"https://upload.wikimedia.org/wikipedia/commons/{a}/{a}{b}/{img_name}" | |
good_ents.append((ent.text, ent.label_, ent._.kb_qid, ent._.url_wikidata, ent._.nerd_score, url)) | |
cols = st.columns(len(good_ents)) | |
for i, ent in enumerate(good_ents): | |
# st.image(url) | |
with cols[i]: | |
components.html(f"<image style='border-radius: 50%;object-fit:cover;width:100px;height:100px' src='{ent[-1]}'/>", height=110, width=110) | |
st.caption(ent[0]) | |