File size: 2,676 Bytes
80d3ffc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
from transformers import pipeline
import gradio as gr

# Load NER model for English and Arabic
ner_pipeline_en = pipeline('ner', grouped_entities=True)  # English model
ner_pipeline_ar = pipeline('ner', model='CAMeL-Lab/bert-base-arabic-camelbert-msa-ner', grouped_entities=True)  # Arabic model

def get_ner_pipeline(language='English'): #Return the NER model based on the specified language.
    if language == 'Arabic':
        return ner_pipeline_ar  # Return Arabic model
    return ner_pipeline_en  # Return English model

def highlight_entities(text, language='English'): #Extract entities and return the text with highlighted entities.
    ner_pipeline = get_ner_pipeline(language)  # Get the appropriate NER model
    entities = ner_pipeline(text)  # Process the input text

    # Create a list to store the highlighted text
    highlighted_text_data = []
    last_index = 0

    for entity in entities:
        entity_name = entity['word']  # Get the entity name
        entity_type = entity['entity_group']  # Get the entity type
        # Add text before the entity
        highlighted_text_data.append((text[last_index: text.index(entity_name, last_index)], None))
        # Add the entity with its type
        highlighted_text_data.append((f"{entity_name}", entity_type))
        last_index = text.index(entity_name, last_index) + len(entity_name)

    # Add any remaining text after the last entity
    highlighted_text_data.append((text[last_index:], None))

    return highlighted_text_data  # Return the highlighted entities

# Custom CSS for right-to-left (RTL) text alignment
custom_css = """
#output {
    direction: rtl;  /* Right-to-left for Arabic */
    text-align: right; /* Align right for Arabic */
}
"""

# Gradio interface setup
interface = gr.Interface(
    fn=highlight_entities,  # Function to call
    inputs=[
        gr.Textbox(label="Input Text", lines=5, placeholder="Enter your text here..."),  # Text input
        gr.Radio(label="Select Language", choices=["English", "Arabic"], value="English")  # Language selection
    ],
    outputs=gr.HighlightedText(label="Highlighted NER Results", elem_id="output"),  # Output as highlighted text
    title="Named Entity Recognition",  # Interface title
    description="Select a language and enter text to extract and highlight named entities.",  # Description
    examples=[
        ["Hugging Face Inc. is a company based in New York City.", "English"],
        ["أحمد هو عالم في مجال الذكاء الاصطناعي", "Arabic"] ],  # Add example inputs
    css=custom_css  # Apply custom CSS for RTL
)

# Launch the interface
interface.launch()  # Start the Gradio interface