#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import requests
import spacy_udpipe
import streamlit as st
from spacy import displacy
# model = span_marker.SpanMarkerModel.from_pretrained("iahlt/iahlt-span-marker-alephbert-small-nemo-mt-he")
spacy_udpipe.download("he")
nlp = spacy_udpipe.load("he")
nlp.add_pipe("span_marker",
config={"model": "iahlt/span-marker-alephbert-small-nemo-mt-he"})
def get_html(html: str):
"""Convert HTML so it can be rendered."""
WRAPPER = """
{}
"""
# Newlines seem to mess with the rendering
html = html.replace("\n", " ")
style = ""
html = WRAPPER.format(html)
return f"{style}{html}"
def page_init():
st.header("Named Entity Recognition Demo")
@st.cache_data
def get_html_from_server(text):
base_url = "https://ne-api.iahlt.org/api/hebrew/ner/?text={}"
def get_entities(text):
text = text.strip()
if text == "":
return []
response = requests.get(base_url.format(text))
answer = response.json()
ents = []
for ent in answer["ents"]:
if ent["entity_group"] == "O":
continue
ents.append({
"start": ent["start"],
"end": ent["end"],
"label": ent["entity_group"]
})
answer["ents"] = ents
return answer
def render_entities(text):
entities = get_entities(text)
html = displacy.render(entities,
style="ent",
options={"direction": "rtl"},
manual=True)
return html.replace("ltr", "rtl")
return get_html(render_entities(text))
if __name__ == '__main__':
page_init()
sample_text = "יו\"ר ועדת הנוער נתן סלובטיק אמר שהשחקנים של אנחנו לא משתלבים באירופה."
text = st.text_area("Text", sample_text, height=200, max_chars=1000)
btn = st.button("Annotate")
style = """
"""
st.write(style, unsafe_allow_html=True)
if text and btn:
doc = nlp(text)
html = displacy.render(
doc,
style="ent",
options={"direction": "rtl"},
manual=False,
)
nemo_html = get_html(html)
iahlt_html = get_html_from_server(text)
html = f"""