File size: 5,018 Bytes
2ad1bab
e25282d
292db1c
 
f7a2a3f
 
 
 
 
 
 
292db1c
1652c9d
292db1c
f7a2a3f
 
b0a5997
f7a2a3f
 
 
 
 
48eb165
f7a2a3f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17b0fdc
f7a2a3f
a984ff2
f7a2a3f
 
 
49cb756
f7a2a3f
 
 
 
 
 
 
c1792e8
f7a2a3f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a984ff2
 
 
 
 
 
717c5f8
 
 
 
 
 
 
 
 
a984ff2
 
 
 
 
 
 
f7a2a3f
 
 
9a4f043
f7a2a3f
 
53b77b7
f7a2a3f
 
 
 
 
 
 
 
 
9a4f043
f7a2a3f
 
 
 
 
9a4f043
f7a2a3f
 
 
9a4f043
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import gradio as gr
from AinaTheme import AinaGradioTheme
from transformers import pipeline
import gradio as gr
from gradio.components import Textbox, Button, HighlightedText, Markdown

import os
from dotenv import load_dotenv
load_dotenv()

MAX_INPUT_CHARACTERS= int(os.environ.get("MAX_INPUT_CHARACTERS", default=1000))

ner_pipeline = pipeline("token-classification", model="projecte-aina/multiner_ceil",aggregation_strategy="simple")

def submit_input(text):
    if text.strip() == "":  
        gr.Warning('Not possible to process an empty input.')
        return None
    
    model_output = ner_pipeline(text)

    if model_output is None:
        gr.Warning('An error occurred. Please try again later.')

    return {"text": text, "entities": model_output} 

def check_max_characters(text, max_char):
    if len(text.strip()) > int(max_char):
        return gr.update(interactive = True),  gr.update(interactive = False)
    return gr.update(interactive = True),  gr.update(interactive = True)

def clear(): 
    return (
        None, 
        None,
    )

with gr.Blocks(**AinaGradioTheme().get_kwargs()) as demo:
    with gr.Row():
        with gr.Column():
            gr.Markdown(
                """ **Multiner** is a Named Entity Recognition (NER) model for the Catalan language fine-tuned from the [BERTa] model, a RoBERTa base model pre-trained on a medium-size corpus collected from publicly available corpora and crawlers (check the BERTa model card for more details).
                It has been trained with a dataset (CEIL:  Catalan Entity Identification and Linking )  that contains 9 main types and 52 subtypes on all kinds of short texts, with almost 59K documents.
                This result has been driven and funded by the Government of Catalonia through the  [Aina](https://projecteaina.cat/).
                """
            )
            
    with gr.Row( equal_height=True):
        with gr.Column(variant="panel"):
            placeholder_max_characters = Textbox(
                visible=False,
                interactive=False,
                value= MAX_INPUT_CHARACTERS
            )
            input_ = Textbox(
                lines=3,
                label="Input",
                placeholder="e.g. Enter sentence here"
            )
            with gr.Row(variant="panel", equal_height=True):
                gr.HTML("""<span id="countertext" style="display: flex; justify-content: start; color:#ef4444; font-weight: bold;"></span>""")
                gr.HTML(f"""<span id="counter" style="display: flex; justify-content: end;"> <span id="inputlenght">0</span>&nbsp;/&nbsp;{MAX_INPUT_CHARACTERS}</span>""")


        with gr.Column(variant="panel"):
            output = HighlightedText(
                container=True,
                label="Output",
            )
            with gr.Row(variant="panel"):
                clear_btn = Button(
                    "Clear", 
                )
                submit_btn = Button(
                    "Submit", 
                    variant="primary",
                )

   
    with gr.Row():
        with gr.Column(scale=0.5):
            gr.Examples(
                label="Example:",
                examples=[
                    ["""El raper nord-americà Travis Scott ha gravat el videoclip de la seva canço 'Circus Maximus' amb els Castellers de Vilafranca. Segons ha publicat la 'Revista Castells' i ha confirmat l'Agència Catalana de Notícies (ACN), el rodatge es va fer el 2 de juliol a la Tarraco Arena Plaça (TAP) de Tarragona."""],
                ],
                inputs=[input_],
                outputs=output,
                fn=submit_input,
            )
            gr.Examples(
                label="Example:",
                examples=[
                    ["""Un jove Marc Guiu es dona a conèixer davant l'Athletic Club i dona una victòria importantíssima al Barça (1-0). D'aquesta manera, el conjunt blaugrana arriba al Clàssic a només un punt de Reial Madrid."""],
                ],
                inputs=[input_],
                outputs=output,
                fn=submit_input,
            )


    input_.change(
        fn=check_max_characters, 
        inputs=[input_, placeholder_max_characters],
        outputs=[clear_btn, submit_btn]
    )

    input_.change(fn=None, inputs=[input_, placeholder_max_characters], _js="""(i, m) => {
        document.getElementById('countertext').textContent =  i.length > m && 'Max length ' + m + ' characters. ' || ''
        document.getElementById('inputlenght').textContent = i.length + '  '
        document.getElementById('inputlenght').style.color =  (i.length > m) ? "#ef4444" : "";
    }""")

    clear_btn.click(
        fn=clear, 
        inputs=[], 
        outputs=[input_, output],
        queue=False
    )
    
    submit_btn.click(
        fn=submit_input, 
        inputs=[input_],
        outputs=[output]
    )

if __name__ == "__main__":
    demo.queue(concurrency_count=1, api_open=False)
    demo.launch(show_api=False)