import gradio as gr from AinaTheme import AinaGradioTheme from transformers import pipeline import gradio as gr from gradio.components import Textbox, Button, HighlightedText, Markdown import os from dotenv import load_dotenv load_dotenv() MAX_INPUT_CHARACTERS= int(os.environ.get("MAX_INPUT_CHARACTERS", default=1000)) ner_pipeline = pipeline("token-classification", model="projecte-aina/multiner_ceil",aggregation_strategy="simple") def submit_input(text): if text.strip() == "": gr.Warning('Not possible to inference an empty input') return None model_output = ner_pipeline(text) if model_output is None: gr.Warning('Inference endpoint is not available right now. Please try again later.') return {"text": text, "entities": model_output} def check_max_characters(text, max_char): if len(text.strip()) > int(max_char): return gr.update(interactive = True), gr.update(interactive = False) return gr.update(interactive = True), gr.update(interactive = True) def clear(): return ( None, None, ) with gr.Blocks(**AinaGradioTheme().get_kwargs()) as demo: with gr.Row(): with gr.Column(): gr.Markdown( """ **Multiner** is a Named Entity Recognition (NER) model for the Catalan language fine-tuned from the [BERTa] model, a RoBERTa base model pre-trained on a medium-size corpus collected from publicly available corpora and crawlers (check the BERTa model card for more details). It has been trained with a dataset (CEIL: Catalan Entity Identification and Linking ) that contains 9 main types and 52 subtypes on all kinds of short texts, with almost 59K documents. This result has been driven and funded by the Government of Catalonia through the [Aina](https://projecteaina.cat/) """ ) with gr.Row( equal_height=False): with gr.Column(variant="panel"): placeholder_max_characters = Textbox( visible=False, interactive=False, value= MAX_INPUT_CHARACTERS ) input_ = Textbox( lines=8, label="Input", placeholder="e.g. Enter sentence here" ) with gr.Row(variant="panel", equal_height=True): gr.HTML("""""") gr.HTML(f""" 0 / {MAX_INPUT_CHARACTERS}""") with gr.Column(variant="panel"): output = HighlightedText( container=True, label="Output", ) with gr.Row(variant="panel"): clear_btn = Button( "Clear", ) submit_btn = Button( "Submit", variant="primary", ) input_.change( fn=check_max_characters, inputs=[input_, placeholder_max_characters], outputs=[clear_btn, submit_btn] ) input_.change(fn=None, inputs=[input_, placeholder_max_characters], _js="""(i, m) => { document.getElementById('countertext').textContent = i.length > m && 'Max length ' + m + ' characters. ' || '' document.getElementById('inputlenght').textContent = i.length + ' ' document.getElementById('inputlenght').style.color = (i.length > m) ? "#ef4444" : ""; }""") clear_btn.click( fn=clear, inputs=[], outputs=[input_, output], queue=False ) submit_btn.click( fn=submit_input, inputs=[input_], outputs=[output] ) if __name__ == "__main__": demo.queue(concurrency_count=1, api_open=False) demo.launch(show_api=False)