File size: 3,545 Bytes
6d26f77
 
 
 
 
 
c6c7469
6d26f77
 
 
 
 
 
 
 
 
44ad8f1
6d26f77
 
 
 
 
 
 
 
 
 
 
44ad8f1
6d26f77
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44ad8f1
 
 
 
 
6d26f77
44ad8f1
6d26f77
 
 
 
 
 
 
 
 
 
 
 
44ad8f1
 
 
6d26f77
 
 
 
 
 
 
 
44ad8f1
 
6d26f77
 
 
 
 
 
 
 
 
44ad8f1
6d26f77
 
 
 
 
 
 
 
44ad8f1
6d26f77
 
 
44ad8f1
 
 
6d26f77
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import requests
import spacy_udpipe
import streamlit as st
from spacy import displacy

# model = span_marker.SpanMarkerModel.from_pretrained("iahlt/iahlt-span-marker-alephbert-small-nemo-mt-he")
spacy_udpipe.download("he")
nlp = spacy_udpipe.load("he")
nlp.add_pipe("span_marker",
             config={"model": "iahlt/span-marker-alephbert-small-nemo-mt-he"})


def get_html(html: str):
    """Convert HTML so it can be rendered."""
    WRAPPER = """<div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem; margin-bottom: 2.5rem"; direction: rtl; >{}</div>"""
    # Newlines seem to mess with the rendering
    html = html.replace("\n", " ")
    style = "<style>mark.entity { display: inline-block }</style>"
    html = WRAPPER.format(html)
    return f"{style}{html}"


def page_init():
    st.header("Named Entity Recognition Demo")


@st.cache_data
def get_html_from_server(text):
    base_url = "https://ne-api.iahlt.org/api/hebrew/ner/?text={}"

    def get_entities(text):
        text = text.strip()
        if text == "":
            return []
        response = requests.get(base_url.format(text))
        answer = response.json()
        ents = []
        for ent in answer["ents"]:
            if ent["entity_group"] == "O":
                continue
            ents.append({
                "start": ent["start"],
                "end": ent["end"],
                "label": ent["entity_group"]
            })
        answer["ents"] = ents
        return answer

    def render_entities(text):
        entities = get_entities(text)
        html = displacy.render(entities,
                               style="ent",
                               options={"direction": "rtl"},
                               manual=True)
        return html.replace("ltr", "rtl")

    return get_html(render_entities(text))


if __name__ == '__main__':
    page_init()

    sample_text = "讬讜\"专 讜注讚转 讛谞讜注专 谞转谉 住诇讜讘讟讬拽 讗诪专 砖讛砖讞拽谞讬诐 砖诇 讗谞讞谞讜 诇讗 诪砖转诇讘讬诐 讘讗讬专讜驻讛."

    text = st.text_area("Text", sample_text, height=200, max_chars=1000)
    btn = st.button("Annotate")
    style = """
    <link rel="stylesheet" href="https://fonts.googleapis.com/css?family=David+Libre">
    <style>
    .stTextArea textarea {
        font-size: 20px;
        font-color: black;
        font-family: 'David+Libre';
        direction: rtl;
    }
    .entities  {
        font-size: 16px;
        font-family: 'David+Libre';
        direction: rtl;
    }
    #MainMenu {visibility: hidden;}
    footer {visibility: hidden;}
   </style>
    """
    st.write(style, unsafe_allow_html=True)

    if text and btn:
        doc = nlp(text)
        html = displacy.render(
            doc,
            style="ent",
            options={"direction": "rtl"},
            manual=False,
        )

        nemo_html = get_html(html)
        iahlt_html = get_html_from_server(text)

        html = f"""
        <div style="display: flex; flex-direction: row; justify-content: space-between; direction: rtl">
            <div>
            <h3>Nemo model results</h3>
                {nemo_html}
            </div>
        </div>
        <div style="display: flex; flex-direction: row; justify-content: space-between; direction: rtl">
            <div>
            <h3>IAHLT results</h3>
                {iahlt_html}
            </div>
        </div>
        """
        st.write(html, unsafe_allow_html=True)

    else:
        st.write("")