import gradio as gr from AinaTheme import theme from transformers import pipeline import gradio as gr from gradio.components import Textbox, Button, HighlightedText, Markdown import os from dotenv import load_dotenv load_dotenv() MAX_INPUT_CHARACTERS= int(os.environ.get("MAX_INPUT_CHARACTERS", default=1000)) ner_pipeline = pipeline("token-classification", model="projecte-aina/deberta_multiner",aggregation_strategy="simple") def submit_input(text): if text.strip() == "": gr.Warning('Not possible to process an empty input.') return None model_output = ner_pipeline(text) if model_output is None: gr.Warning('An error occurred. Please try again later.') return {"text": text, "entities": model_output} def check_max_characters(text, max_char): if len(text.strip()) > int(max_char): return gr.update(interactive = True), gr.update(interactive = False) return gr.update(interactive = True), gr.update(interactive = True) def clear(): return ( None, None, ) with gr.Blocks(theme=theme) as demo: with gr.Row(): with gr.Column(): gr.Markdown( """ **deberta_multiner** is a Named Entity Recognition (NER) model for the Catalan language (but with multilingual capabilities) fine-tuned from a [DeBERTa](https://huggingface.co/microsoft/deberta-v3-base) model pre-trained on a large-size multilingual corpus collected from publicly available corpora and crawlers, with a high proportion of Spanish and Catalan texts. It has been fine-tuned with a dataset (CEIL: Catalan Entity Identification and Linking ) that contains 9 main types and 52 subtypes on all kinds of short texts, with almost 59K documents. This result has been driven and funded by the Government of Catalonia through the [Aina](https://projecteaina.cat/) project. """ ) with gr.Row( equal_height=True): with gr.Column(variant="panel"): placeholder_max_characters = Textbox( visible=False, interactive=False, value= MAX_INPUT_CHARACTERS ) input_ = Textbox( lines=3, label="Input", placeholder="e.g. Enter sentence here" ) with gr.Row(variant="panel", equal_height=True): gr.HTML("""""") gr.HTML(f""" 0 / {MAX_INPUT_CHARACTERS}""") with gr.Column(variant="panel"): output = HighlightedText( container=True, label="Output", ) with gr.Row(variant="panel"): clear_btn = Button( "Clear", ) submit_btn = Button( "Submit", variant="primary", ) with gr.Row(): with gr.Column(scale=0.5): gr.Examples( label="Catalan example:", examples=[ ["""El raper nord-americà Travis Scott ha gravat el videoclip de la seva canço 'Circus Maximus' amb els Castellers de Vilafranca. Segons ha publicat la 'Revista Castells' i ha confirmat l'Agència Catalana de Notícies (ACN), el rodatge es va fer el 2 de juliol a la Tarraco Arena Plaça (TAP) de Tarragona."""], ], inputs=[input_], outputs=output, fn=submit_input, ) gr.Examples( label="Spanish example:", examples=[ ["""Durante la Segunda Guerra Mundial, España se mantuvo neutral, aunque Franco simpatizaba con Hitler y su Partido Nacionalsocialista."""], ], inputs=[input_], outputs=output, fn=submit_input, ) gr.Examples( label="English example:", examples=[ ["""The shirt Lionel Messi wore during Argentina’s 2022 Fifa World Cup final victory over France is expected to sell for a record-breaking $10m."""], ], inputs=[input_], outputs=output, fn=submit_input, ) input_.change( fn=check_max_characters, inputs=[input_, placeholder_max_characters], outputs=[clear_btn, submit_btn], api_name=False ) input_.change(fn=None, inputs=[input_, placeholder_max_characters], js="""(i, m) => { document.getElementById('countertext').textContent = i.length > m && 'Max length ' + m + ' characters. ' || '' document.getElementById('inputlenght').textContent = i.length + ' ' document.getElementById('inputlenght').style.color = (i.length > m) ? "#ef4444" : ""; }""") clear_btn.click( fn=clear, inputs=[], outputs=[input_, output], queue=False, api_name=False, ) submit_btn.click( fn=submit_input, inputs=[input_], outputs=[output], api_name="get-results" ) if __name__ == "__main__": demo.queue(api_open=False) demo.launch(max_threads=10, show_api=True)