import gradio as gr
from AinaTheme import theme
from transformers import pipeline
import gradio as gr
from gradio.components import Textbox, Button, HighlightedText, Markdown
import os
from dotenv import load_dotenv
load_dotenv()
MAX_INPUT_CHARACTERS= int(os.environ.get("MAX_INPUT_CHARACTERS", default=1000))
ner_pipeline = pipeline("token-classification", model="projecte-aina/deberta_multiner",aggregation_strategy="simple")
def submit_input(text):
if text.strip() == "":
gr.Warning('Not possible to process an empty input.')
return None
model_output = ner_pipeline(text)
if model_output is None:
gr.Warning('An error occurred. Please try again later.')
return {"text": text, "entities": model_output}
def check_max_characters(text, max_char):
if len(text.strip()) > int(max_char):
return gr.update(interactive = True), gr.update(interactive = False)
return gr.update(interactive = True), gr.update(interactive = True)
def clear():
return (
None,
None,
)
with gr.Blocks(theme=theme) as demo:
with gr.Row():
with gr.Column():
gr.Markdown(
""" **deberta_multiner** is a Named Entity Recognition (NER) model for the Catalan language (but with multilingual capabilities) fine-tuned from a [DeBERTa](https://huggingface.co/microsoft/deberta-v3-base) model pre-trained on a large-size multilingual corpus collected from publicly available corpora and crawlers, with a high proportion of Spanish and Catalan texts.
It has been fine-tuned with a dataset (CEIL: Catalan Entity Identification and Linking ) that contains 9 main types and 52 subtypes on all kinds of short texts, with almost 59K documents.
This result has been driven and funded by the Government of Catalonia through the [Aina](https://projecteaina.cat/) project.
"""
)
with gr.Row( equal_height=True):
with gr.Column(variant="panel"):
placeholder_max_characters = Textbox(
visible=False,
interactive=False,
value= MAX_INPUT_CHARACTERS
)
input_ = Textbox(
lines=3,
label="Input",
placeholder="e.g. Enter sentence here"
)
with gr.Row(variant="panel", equal_height=True):
gr.HTML("""""")
gr.HTML(f""" 0 / {MAX_INPUT_CHARACTERS}""")
with gr.Column(variant="panel"):
output = HighlightedText(
container=True,
label="Output",
)
with gr.Row(variant="panel"):
clear_btn = Button(
"Clear",
)
submit_btn = Button(
"Submit",
variant="primary",
)
with gr.Row():
with gr.Column(scale=0.5):
gr.Examples(
label="Catalan example:",
examples=[
["""El raper nord-americà Travis Scott ha gravat el videoclip de la seva canço 'Circus Maximus' amb els Castellers de Vilafranca. Segons ha publicat la 'Revista Castells' i ha confirmat l'Agència Catalana de Notícies (ACN), el rodatge es va fer el 2 de juliol a la Tarraco Arena Plaça (TAP) de Tarragona."""],
],
inputs=[input_],
outputs=output,
fn=submit_input,
)
gr.Examples(
label="Spanish example:",
examples=[
["""Durante la Segunda Guerra Mundial, España se mantuvo neutral, aunque Franco simpatizaba con Hitler y su Partido Nacionalsocialista."""],
],
inputs=[input_],
outputs=output,
fn=submit_input,
)
gr.Examples(
label="English example:",
examples=[
["""The shirt Lionel Messi wore during Argentina’s 2022 Fifa World Cup final victory over France is expected to sell for a record-breaking $10m."""],
],
inputs=[input_],
outputs=output,
fn=submit_input,
)
input_.change(
fn=check_max_characters,
inputs=[input_, placeholder_max_characters],
outputs=[clear_btn, submit_btn],
api_name=False
)
input_.change(fn=None, inputs=[input_, placeholder_max_characters], js="""(i, m) => {
document.getElementById('countertext').textContent = i.length > m && 'Max length ' + m + ' characters. ' || ''
document.getElementById('inputlenght').textContent = i.length + ' '
document.getElementById('inputlenght').style.color = (i.length > m) ? "#ef4444" : "";
}""")
clear_btn.click(
fn=clear,
inputs=[],
outputs=[input_, output],
queue=False,
api_name=False,
)
submit_btn.click(
fn=submit_input,
inputs=[input_],
outputs=[output],
api_name="get-results"
)
if __name__ == "__main__":
demo.queue(api_open=False)
demo.launch(max_threads=10, show_api=True)