File size: 1,409 Bytes
2ad1bab
e25282d
292db1c
2ad1bab
292db1c
0ca76da
 
292db1c
 
 
 
 
 
 
 
 
 
 
 
eee28ed
 
 
 
 
 
 
 
 
 
 
292db1c
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import gradio as gr
from AinaTheme import AinaGradioTheme
from transformers import pipeline

# gr.load("projecte-aina/multiner_ceil",src="models",aggregation_strategy="first", **AinaGradioTheme().get_kwargs()).launch()


import gradio as gr

ner_pipeline = pipeline("token-classification", model="projecte-aina/multiner_ceil")

# examples = [
#     "Does Chicago have any stores and does Joe live here?",
# ]

def ner(text):
    output = ner_pipeline(text)
    return {"text": text, "entities": output}    

demo = gr.Interface(
            ner,
            gr.Textbox(placeholder="Enter sentence here..."), 
            gr.HighlightedText(), 
            **AinaGradioTheme().get_kwargs(),
            flagging_options=None,
            article="""
            Multiner is a Named Entity Recognition (NER) model for the Catalan language fine-tuned from the [BERTa] model, a RoBERTa base model pre-trained on a medium-size corpus collected from publicly available corpora and crawlers (check the BERTa model card for more details).
            It has been trained with a dataset (CEIL:  Catalan Entity Identification and Linking )  that contains 9 main types and 52 subtypes on all kinds of short texts, with almost 59K documents.
            Aquest resultat ha estat impulsat i finançat per la Generalitat de Catalunya mitjançant el projecte Aina (https://projecteaina.cat/).
            """)

demo.launch()