File size: 5,506 Bytes
2ad1bab
a282012
292db1c
 
f7a2a3f
 
 
 
 
 
 
292db1c
83a7938
292db1c
f7a2a3f
 
b0a5997
f7a2a3f
 
 
 
 
48eb165
f7a2a3f
 
 
 
 
 
 
 
 
 
 
 
 
 
a282012
f7a2a3f
 
 
83a7938
 
18b52ea
f7a2a3f
 
 
49cb756
f7a2a3f
 
 
 
 
 
 
c1792e8
f7a2a3f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a984ff2
 
 
83a7938
a984ff2
 
717c5f8
 
 
 
 
 
83a7938
717c5f8
acd2214
83a7938
 
 
 
 
 
 
 
 
a984ff2
 
 
 
 
 
 
f7a2a3f
 
 
002659d
 
f7a2a3f
 
230ff40
f7a2a3f
 
 
 
 
 
 
 
 
002659d
 
f7a2a3f
 
 
 
 
002659d
 
f7a2a3f
 
 
230ff40
5f1db5b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
import gradio as gr
from AinaTheme import theme
from transformers import pipeline
import gradio as gr
from gradio.components import Textbox, Button, HighlightedText, Markdown

import os
from dotenv import load_dotenv
load_dotenv()

MAX_INPUT_CHARACTERS= int(os.environ.get("MAX_INPUT_CHARACTERS", default=1000))

ner_pipeline = pipeline("token-classification", model="projecte-aina/deberta_multiner",aggregation_strategy="simple")

def submit_input(text):
    if text.strip() == "":  
        gr.Warning('Not possible to process an empty input.')
        return None
    
    model_output = ner_pipeline(text)

    if model_output is None:
        gr.Warning('An error occurred. Please try again later.')

    return {"text": text, "entities": model_output} 

def check_max_characters(text, max_char):
    if len(text.strip()) > int(max_char):
        return gr.update(interactive = True),  gr.update(interactive = False)
    return gr.update(interactive = True),  gr.update(interactive = True)

def clear(): 
    return (
        None, 
        None,
    )

with gr.Blocks(theme=theme) as demo:
    with gr.Row():
        with gr.Column():
            gr.Markdown(
                """ **deberta_multiner** is a Named Entity Recognition (NER) model for the Catalan language (but  with multilingual capabilities) fine-tuned from a [DeBERTa](https://huggingface.co/microsoft/deberta-v3-base) model pre-trained on a large-size multilingual corpus collected from publicly available corpora and crawlers, with a high proportion of Spanish and Catalan texts.
                It has been fine-tuned with a dataset (CEIL:  Catalan Entity Identification and Linking )  that contains 9 main types and 52 subtypes on all kinds of short texts, with almost 59K documents.
                This result has been driven and funded by the Government of Catalonia through the  [Aina](https://projecteaina.cat/) project.
                """
            )
            
    with gr.Row( equal_height=True):
        with gr.Column(variant="panel"):
            placeholder_max_characters = Textbox(
                visible=False,
                interactive=False,
                value= MAX_INPUT_CHARACTERS
            )
            input_ = Textbox(
                lines=3,
                label="Input",
                placeholder="e.g. Enter sentence here"
            )
            with gr.Row(variant="panel", equal_height=True):
                gr.HTML("""<span id="countertext" style="display: flex; justify-content: start; color:#ef4444; font-weight: bold;"></span>""")
                gr.HTML(f"""<span id="counter" style="display: flex; justify-content: end;"> <span id="inputlenght">0</span>&nbsp;/&nbsp;{MAX_INPUT_CHARACTERS}</span>""")


        with gr.Column(variant="panel"):
            output = HighlightedText(
                container=True,
                label="Output",
            )
            with gr.Row(variant="panel"):
                clear_btn = Button(
                    "Clear", 
                )
                submit_btn = Button(
                    "Submit", 
                    variant="primary",
                )

   
    with gr.Row():
        with gr.Column(scale=0.5):
            gr.Examples(
                label="Catalan example:",
                examples=[
                    ["""El raper nord-americà Travis Scott ha gravat el videoclip de la seva canço 'Circus Maximus' amb els Castellers de Vilafranca. Segons ha publicat la 'Revista Castells' i ha confirmat l'Agència Catalana de Notícies (ACN), el rodatge es va fer el 2 de juliol a la Tarraco Arena Plaça (TAP) de Tarragona."""],
                ],
                inputs=[input_],
                outputs=output,
                fn=submit_input,
            )
            gr.Examples(
                label="Spanish example:",
                examples=[
                    ["""Durante la Segunda Guerra Mundial, España se mantuvo neutral, aunque Franco simpatizaba con Hitler y su Partido Nacionalsocialista."""],
                ],
                inputs=[input_],
                outputs=output,
                fn=submit_input,
            )
            gr.Examples(
                label="English example:",
                examples=[
                    ["""The shirt Lionel Messi wore during Argentina’s 2022 Fifa World Cup final victory over France is expected to sell for a record-breaking $10m."""],
                ],
                inputs=[input_],
                outputs=output,
                fn=submit_input,
            )


    input_.change(
        fn=check_max_characters, 
        inputs=[input_, placeholder_max_characters],
        outputs=[clear_btn, submit_btn],
        api_name=False
    )

    input_.change(fn=None, inputs=[input_, placeholder_max_characters], js="""(i, m) => {
        document.getElementById('countertext').textContent =  i.length > m && 'Max length ' + m + ' characters. ' || ''
        document.getElementById('inputlenght').textContent = i.length + '  '
        document.getElementById('inputlenght').style.color =  (i.length > m) ? "#ef4444" : "";
    }""")

    clear_btn.click(
        fn=clear, 
        inputs=[], 
        outputs=[input_, output],
        queue=False,
        api_name=False,
    )
    
    submit_btn.click(
        fn=submit_input, 
        inputs=[input_],
        outputs=[output],
        api_name="get-results"
    )

if __name__ == "__main__":
    demo.queue(api_open=False)
    demo.launch(max_threads=10, show_api=True)