Spaces:

impresso-project
/

multilingual-named-entity-recognition

Running

File size: 2,299 Bytes

d5d2a07
c619232
d5d2a07
 
 
 
 
 
 
5436b2b
 
 
 
 
 
 
 
d5d2a07
49dd9a6
 
 
 
 
c619232
 
c5b3453
c619232
 
49dd9a6
ac886a9
 
 
c5b3453
ac886a9
49dd9a6
 
 
 
 
d5d2a07
 
 
5436b2b
464c568
49dd9a6
5436b2b
d5d2a07
 
 
5436b2b
 
 
d20062f
d5d2a07
 
 
 
 
 
 
5436b2b
464c568
 
 
 
 
ac886a9
d5d2a07
5436b2b
ac886a9
d5d2a07
5436b2b
d5d2a07

import gradio as gr
from transformers import pipeline, AutoTokenizer

# Define the model name
MODEL_NAME = "impresso-project/ner-stacked-bert-multilingual"

# Load the tokenizer and model using the pipeline
ner_tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

ner_pipeline = pipeline(
    "generic-ner",
    model=MODEL_NAME,
    tokenizer=ner_tokenizer,
    trust_remote_code=True,
    device="cpu",
)


# Helper function to flatten entities and prepare them for HighlightedText
def prepare_entities_for_highlight(text, results):
    entities = []
    for category, entity_list in results.items():
        for entity in entity_list:
            # Debugging information to check character positions
            print(
                f"Entity: {entity['word']}, Start: {entity['start']}, End: {entity['end']}, Type: {entity['entity']}"
            )
            # Append entity with character indices
            entities.append(
                {
                    "start": entity["start"],
                    "end": entity["end"],
                    "label": f"{entity['entity']}",  # ({entity['score']:.2f}%)
                }
            )

    return {"text": text, "entities": entities}


# Function to process the sentence and extract entities
def extract_entities(sentence):
    results = ner_pipeline(sentence)

    # Format the results for HighlightedText
    return prepare_entities_for_highlight(sentence, results)


# Create Gradio interface
def ner_app_interface():
    input_sentence = gr.Textbox(
        lines=5, label="Input Sentence", placeholder="Enter a sentence for NER..."
    )
    output_entities = gr.HighlightedText(label="Extracted Entities")

    # Interface definition
    interface = gr.Interface(
        fn=extract_entities,
        inputs=input_sentence,
        outputs=output_entities,
        title="Named Entity Recognition",
        description="Enter a sentence to extract named entities using the NER model from the Impresso project.",
        examples=[
            [
                "In the year 1789, King Louis XVI, ruler of France, convened the Estates-General at the Palace of Versailles."
            ]
        ],
        live=False,
    )

    interface.launch(share=True)


# Run the app
if __name__ == "__main__":
    ner_app_interface()