#!/usr/bin/env python3 # -*- coding: utf-8 -*- import requests import spacy_udpipe import streamlit as st from spacy import displacy # model = span_marker.SpanMarkerModel.from_pretrained("iahlt/iahlt-span-marker-alephbert-small-nemo-mt-he") spacy_udpipe.download("he") nlp = spacy_udpipe.load("he") nlp.add_pipe("span_marker", config={"model": "iahlt/span-marker-alephbert-small-nemo-mt-he"}) def get_html(html: str): """Convert HTML so it can be rendered.""" WRAPPER = """
{}
""" # Newlines seem to mess with the rendering html = html.replace("\n", " ") style = "" html = WRAPPER.format(html) return f"{style}{html}" def page_init(): st.header("Named Entity Recognition Demo") @st.cache_data def get_html_from_server(text): base_url = "https://ne-api.iahlt.org/api/hebrew/ner/?text={}" def get_entities(text): text = text.strip() if text == "": return [] response = requests.get(base_url.format(text)) answer = response.json() ents = [] for ent in answer["ents"]: if ent["entity_group"] == "O": continue ents.append({ "start": ent["start"], "end": ent["end"], "label": ent["entity_group"] }) answer["ents"] = ents return answer def render_entities(text): entities = get_entities(text) html = displacy.render(entities, style="ent", options={"direction": "rtl"}, manual=True) return html.replace("ltr", "rtl") return get_html(render_entities(text)) if __name__ == '__main__': page_init() sample_text = "יו\"ר ועדת הנוער נתן סלובטיק אמר שהשחקנים של אנחנו לא משתלבים באירופה." text = st.text_area("Text", sample_text, height=200, max_chars=1000) btn = st.button("Annotate") style = """ """ st.write(style, unsafe_allow_html=True) if text and btn: doc = nlp(text) html = displacy.render( doc, style="ent", options={"direction": "rtl"}, manual=False, ) nemo_html = get_html(html) iahlt_html = get_html_from_server(text) html = f"""

Nemo model results

{nemo_html}

IAHLT results

{iahlt_html}
""" st.write(html, unsafe_allow_html=True) else: st.write("")