Spaces:
Runtime error
Runtime error
File size: 3,545 Bytes
6d26f77 c6c7469 6d26f77 44ad8f1 6d26f77 44ad8f1 6d26f77 44ad8f1 6d26f77 44ad8f1 6d26f77 44ad8f1 6d26f77 44ad8f1 6d26f77 44ad8f1 6d26f77 44ad8f1 6d26f77 44ad8f1 6d26f77 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 |
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import requests
import spacy_udpipe
import streamlit as st
from spacy import displacy
# model = span_marker.SpanMarkerModel.from_pretrained("iahlt/iahlt-span-marker-alephbert-small-nemo-mt-he")
spacy_udpipe.download("he")
nlp = spacy_udpipe.load("he")
nlp.add_pipe("span_marker",
config={"model": "iahlt/span-marker-alephbert-small-nemo-mt-he"})
def get_html(html: str):
"""Convert HTML so it can be rendered."""
WRAPPER = """<div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem; margin-bottom: 2.5rem"; direction: rtl; >{}</div>"""
# Newlines seem to mess with the rendering
html = html.replace("\n", " ")
style = "<style>mark.entity { display: inline-block }</style>"
html = WRAPPER.format(html)
return f"{style}{html}"
def page_init():
st.header("Named Entity Recognition Demo")
@st.cache_data
def get_html_from_server(text):
base_url = "https://ne-api.iahlt.org/api/hebrew/ner/?text={}"
def get_entities(text):
text = text.strip()
if text == "":
return []
response = requests.get(base_url.format(text))
answer = response.json()
ents = []
for ent in answer["ents"]:
if ent["entity_group"] == "O":
continue
ents.append({
"start": ent["start"],
"end": ent["end"],
"label": ent["entity_group"]
})
answer["ents"] = ents
return answer
def render_entities(text):
entities = get_entities(text)
html = displacy.render(entities,
style="ent",
options={"direction": "rtl"},
manual=True)
return html.replace("ltr", "rtl")
return get_html(render_entities(text))
if __name__ == '__main__':
page_init()
sample_text = "讬讜\"专 讜注讚转 讛谞讜注专 谞转谉 住诇讜讘讟讬拽 讗诪专 砖讛砖讞拽谞讬诐 砖诇 讗谞讞谞讜 诇讗 诪砖转诇讘讬诐 讘讗讬专讜驻讛."
text = st.text_area("Text", sample_text, height=200, max_chars=1000)
btn = st.button("Annotate")
style = """
<link rel="stylesheet" href="https://fonts.googleapis.com/css?family=David+Libre">
<style>
.stTextArea textarea {
font-size: 20px;
font-color: black;
font-family: 'David+Libre';
direction: rtl;
}
.entities {
font-size: 16px;
font-family: 'David+Libre';
direction: rtl;
}
#MainMenu {visibility: hidden;}
footer {visibility: hidden;}
</style>
"""
st.write(style, unsafe_allow_html=True)
if text and btn:
doc = nlp(text)
html = displacy.render(
doc,
style="ent",
options={"direction": "rtl"},
manual=False,
)
nemo_html = get_html(html)
iahlt_html = get_html_from_server(text)
html = f"""
<div style="display: flex; flex-direction: row; justify-content: space-between; direction: rtl">
<div>
<h3>Nemo model results</h3>
{nemo_html}
</div>
</div>
<div style="display: flex; flex-direction: row; justify-content: space-between; direction: rtl">
<div>
<h3>IAHLT results</h3>
{iahlt_html}
</div>
</div>
"""
st.write(html, unsafe_allow_html=True)
else:
st.write("")
|