File size: 2,749 Bytes
67aa33f
a9ff64c
 
 
 
67aa33f
 
 
 
 
 
c2564fc
67aa33f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e21f4c1
67aa33f
 
 
 
 
 
 
 
 
 
 
 
e21f4c1
 
67aa33f
e21f4c1
 
 
 
 
67aa33f
 
 
 
 
 
136db36
67aa33f
 
 
 
 
 
 
 
 
4008dee
 
 
136db36
 
67aa33f
 
 
4008dee
67aa33f
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import pandas as pd
import random
from gliner import GLiNER
import gradio as gr
from datasets import load_dataset

# Load the CSV file
df = pd.read_csv("1921_catalogue_SMG.csv")  # Replace with your actual CSV file path
text_column = "Description"  # Replace with the actual column name containing the text data

# Load the model
model = GLiNER.from_pretrained("max-long/textile_machines_ner_5_oct", trust_remote_code=True)

def get_new_snippet():
    # Randomly select a snippet from the CSV file
    if len(df) > 0:
        sample = df.sample(n=1)[text_column].values[0]
        return sample
    else:
        return "No more snippets available."  # Return this if the CSV file is empty

def ner(text: str):
    labels = ["Textile Machinery"]
    threshold = 0.5

    # Predict entities using the fine-tuned GLiNER model
    entities = model.predict_entities(text, labels, flat_ner=True, threshold=threshold)

    # Filter for "Textile Machinery" entities
    textile_entities = [
        {
            "entity": ent["label"],
            "word": ent["text"],
            "start": ent["start"],
            "end": ent["end"],
            "score": ent.get("score", 0),
        }
        for ent in entities
        if ent["label"] == "Textile Machinery"
    ]

    # Prepare entities for color-coded display using gr.HighlightedText in the required dictionary format
    highlights = [{"start": ent["start"], "end": ent["end"], "entity": ent["entity"]} for ent in textile_entities]

    # Return two outputs: one for the highlighted text and one for the entities in JSON format
    return {
        "text": text,
        "entities": highlights
    }, textile_entities

# Gradio Interface
with gr.Blocks(title="Textile Machinery NER Demo") as demo:
    gr.Markdown(
        """
        # Textile Machinery Entity Recognition Demo
        This demo selects a random text snippet from the Science Museum's 1921 catalogue and identifies "Textile Machinery" entities using a fine-tuned GLiNER model developed by the Congruence Engine project.
        """
    )

    input_text = gr.Textbox(
        value="Enter or refresh to get text from CSV",
        label="Text input",
        placeholder="Enter your text here",
        lines=5
    )
    
    refresh_btn = gr.Button("Get New Snippet")
    
    # Use HighlightedText to show the entities
    output_highlighted = gr.HighlightedText(label="Predicted Entities")
    output_entities = gr.JSON(label="Entities")

    submit_btn = gr.Button("Find Textile Machinery!")
    

    refresh_btn.click(fn=get_new_snippet, outputs=input_text)

    submit_btn.click(
        fn=ner,
        inputs=[input_text],
        outputs=[output_highlighted, output_entities]
    )

demo.queue()
demo.launch(debug=True)