Spaces:
Runtime error
Runtime error
File size: 5,506 Bytes
2ad1bab a282012 292db1c f7a2a3f 292db1c 83a7938 292db1c f7a2a3f b0a5997 f7a2a3f 48eb165 f7a2a3f a282012 f7a2a3f 83a7938 18b52ea f7a2a3f 49cb756 f7a2a3f c1792e8 f7a2a3f a984ff2 83a7938 a984ff2 717c5f8 83a7938 717c5f8 acd2214 83a7938 a984ff2 f7a2a3f 002659d f7a2a3f 230ff40 f7a2a3f 002659d f7a2a3f 002659d f7a2a3f 230ff40 5f1db5b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 |
import gradio as gr
from AinaTheme import theme
from transformers import pipeline
import gradio as gr
from gradio.components import Textbox, Button, HighlightedText, Markdown
import os
from dotenv import load_dotenv
load_dotenv()
MAX_INPUT_CHARACTERS= int(os.environ.get("MAX_INPUT_CHARACTERS", default=1000))
ner_pipeline = pipeline("token-classification", model="projecte-aina/deberta_multiner",aggregation_strategy="simple")
def submit_input(text):
if text.strip() == "":
gr.Warning('Not possible to process an empty input.')
return None
model_output = ner_pipeline(text)
if model_output is None:
gr.Warning('An error occurred. Please try again later.')
return {"text": text, "entities": model_output}
def check_max_characters(text, max_char):
if len(text.strip()) > int(max_char):
return gr.update(interactive = True), gr.update(interactive = False)
return gr.update(interactive = True), gr.update(interactive = True)
def clear():
return (
None,
None,
)
with gr.Blocks(theme=theme) as demo:
with gr.Row():
with gr.Column():
gr.Markdown(
""" **deberta_multiner** is a Named Entity Recognition (NER) model for the Catalan language (but with multilingual capabilities) fine-tuned from a [DeBERTa](https://huggingface.co/microsoft/deberta-v3-base) model pre-trained on a large-size multilingual corpus collected from publicly available corpora and crawlers, with a high proportion of Spanish and Catalan texts.
It has been fine-tuned with a dataset (CEIL: Catalan Entity Identification and Linking ) that contains 9 main types and 52 subtypes on all kinds of short texts, with almost 59K documents.
This result has been driven and funded by the Government of Catalonia through the [Aina](https://projecteaina.cat/) project.
"""
)
with gr.Row( equal_height=True):
with gr.Column(variant="panel"):
placeholder_max_characters = Textbox(
visible=False,
interactive=False,
value= MAX_INPUT_CHARACTERS
)
input_ = Textbox(
lines=3,
label="Input",
placeholder="e.g. Enter sentence here"
)
with gr.Row(variant="panel", equal_height=True):
gr.HTML("""<span id="countertext" style="display: flex; justify-content: start; color:#ef4444; font-weight: bold;"></span>""")
gr.HTML(f"""<span id="counter" style="display: flex; justify-content: end;"> <span id="inputlenght">0</span> / {MAX_INPUT_CHARACTERS}</span>""")
with gr.Column(variant="panel"):
output = HighlightedText(
container=True,
label="Output",
)
with gr.Row(variant="panel"):
clear_btn = Button(
"Clear",
)
submit_btn = Button(
"Submit",
variant="primary",
)
with gr.Row():
with gr.Column(scale=0.5):
gr.Examples(
label="Catalan example:",
examples=[
["""El raper nord-americà Travis Scott ha gravat el videoclip de la seva canço 'Circus Maximus' amb els Castellers de Vilafranca. Segons ha publicat la 'Revista Castells' i ha confirmat l'Agència Catalana de Notícies (ACN), el rodatge es va fer el 2 de juliol a la Tarraco Arena Plaça (TAP) de Tarragona."""],
],
inputs=[input_],
outputs=output,
fn=submit_input,
)
gr.Examples(
label="Spanish example:",
examples=[
["""Durante la Segunda Guerra Mundial, España se mantuvo neutral, aunque Franco simpatizaba con Hitler y su Partido Nacionalsocialista."""],
],
inputs=[input_],
outputs=output,
fn=submit_input,
)
gr.Examples(
label="English example:",
examples=[
["""The shirt Lionel Messi wore during Argentina’s 2022 Fifa World Cup final victory over France is expected to sell for a record-breaking $10m."""],
],
inputs=[input_],
outputs=output,
fn=submit_input,
)
input_.change(
fn=check_max_characters,
inputs=[input_, placeholder_max_characters],
outputs=[clear_btn, submit_btn],
api_name=False
)
input_.change(fn=None, inputs=[input_, placeholder_max_characters], js="""(i, m) => {
document.getElementById('countertext').textContent = i.length > m && 'Max length ' + m + ' characters. ' || ''
document.getElementById('inputlenght').textContent = i.length + ' '
document.getElementById('inputlenght').style.color = (i.length > m) ? "#ef4444" : "";
}""")
clear_btn.click(
fn=clear,
inputs=[],
outputs=[input_, output],
queue=False,
api_name=False,
)
submit_btn.click(
fn=submit_input,
inputs=[input_],
outputs=[output],
api_name="get-results"
)
if __name__ == "__main__":
demo.queue(api_open=False)
demo.launch(max_threads=10, show_api=True) |