Spaces:

max-long
/

1921_SMG_CAT_GLiNER_finetune

Sleeping

1921_SMG_CAT_GLiNER_finetune

File size: 2,768 Bytes

67aa33f
a9ff64c
 
 
 
67aa33f
 
 
 
 
 
a834d53
67aa33f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e21f4c1
67aa33f
 
 
 
 
 
 
 
 
 
 
 
e21f4c1
 
67aa33f
e21f4c1
 
 
 
 
67aa33f
 
 
 
 
 
136db36
67aa33f
 
 
 
 
 
 
 
 
4008dee
 
 
136db36
 
67aa33f
 
 
4008dee
67aa33f

import pandas as pd
import random
from gliner import GLiNER
import gradio as gr
from datasets import load_dataset

# Load the CSV file
df = pd.read_csv("1921_catalogue_SMG.csv")  # Replace with your actual CSV file path
text_column = "Description"  # Replace with the actual column name containing the text data

# Load the model
model = GLiNER.from_pretrained("congruence-engine/gliner_2.5_textile_industry_historic", trust_remote_code=True)

def get_new_snippet():
    # Randomly select a snippet from the CSV file
    if len(df) > 0:
        sample = df.sample(n=1)[text_column].values[0]
        return sample
    else:
        return "No more snippets available."  # Return this if the CSV file is empty

def ner(text: str):
    labels = ["Textile Machinery"]
    threshold = 0.5

    # Predict entities using the fine-tuned GLiNER model
    entities = model.predict_entities(text, labels, flat_ner=True, threshold=threshold)

    # Filter for "Textile Machinery" entities
    textile_entities = [
        {
            "entity": ent["label"],
            "word": ent["text"],
            "start": ent["start"],
            "end": ent["end"],
            "score": ent.get("score", 0),
        }
        for ent in entities
        if ent["label"] == "Textile Machinery"
    ]

    # Prepare entities for color-coded display using gr.HighlightedText in the required dictionary format
    highlights = [{"start": ent["start"], "end": ent["end"], "entity": ent["entity"]} for ent in textile_entities]

    # Return two outputs: one for the highlighted text and one for the entities in JSON format
    return {
        "text": text,
        "entities": highlights
    }, textile_entities

# Gradio Interface
with gr.Blocks(title="Textile Machinery NER Demo") as demo:
    gr.Markdown(
        """
        # Textile Machinery Entity Recognition Demo
        This demo selects a random text snippet from the Science Museum's 1921 catalogue and identifies "Textile Machinery" entities using a fine-tuned GLiNER model developed by the Congruence Engine project.
        """
    )

    input_text = gr.Textbox(
        value="Enter or refresh to get text from CSV",
        label="Text input",
        placeholder="Enter your text here",
        lines=5
    )
    
    refresh_btn = gr.Button("Get New Snippet")
    
    # Use HighlightedText to show the entities
    output_highlighted = gr.HighlightedText(label="Predicted Entities")
    output_entities = gr.JSON(label="Entities")

    submit_btn = gr.Button("Find Textile Machinery!")
    

    refresh_btn.click(fn=get_new_snippet, outputs=input_text)

    submit_btn.click(
        fn=ner,
        inputs=[input_text],
        outputs=[output_highlighted, output_entities]
    )

demo.queue()
demo.launch(debug=True)