|
import pandas as pd |
|
import random |
|
from gliner import GLiNER |
|
import gradio as gr |
|
from datasets import load_dataset |
|
|
|
|
|
df = pd.read_csv("1921_catalogue_SMG.csv") |
|
text_column = "Description" |
|
|
|
|
|
model = GLiNER.from_pretrained("congruence-engine/gliner_2.5_textile_industry_historic", trust_remote_code=True) |
|
|
|
def get_new_snippet(): |
|
|
|
if len(df) > 0: |
|
sample = df.sample(n=1)[text_column].values[0] |
|
return sample |
|
else: |
|
return "No more snippets available." |
|
|
|
def ner(text: str): |
|
labels = ["Textile Machinery"] |
|
threshold = 0.5 |
|
|
|
|
|
entities = model.predict_entities(text, labels, flat_ner=True, threshold=threshold) |
|
|
|
|
|
textile_entities = [ |
|
{ |
|
"entity": ent["label"], |
|
"word": ent["text"], |
|
"start": ent["start"], |
|
"end": ent["end"], |
|
"score": ent.get("score", 0), |
|
} |
|
for ent in entities |
|
if ent["label"] == "Textile Machinery" |
|
] |
|
|
|
|
|
highlights = [{"start": ent["start"], "end": ent["end"], "entity": ent["entity"]} for ent in textile_entities] |
|
|
|
|
|
return { |
|
"text": text, |
|
"entities": highlights |
|
}, textile_entities |
|
|
|
|
|
with gr.Blocks(title="Textile Machinery NER Demo") as demo: |
|
gr.Markdown( |
|
""" |
|
# Textile Machinery Entity Recognition Demo |
|
This demo selects a random text snippet from the Science Museum's 1921 catalogue and identifies "Textile Machinery" entities using a fine-tuned GLiNER model developed by the Congruence Engine project. |
|
""" |
|
) |
|
|
|
input_text = gr.Textbox( |
|
value="Enter or refresh to get text from CSV", |
|
label="Text input", |
|
placeholder="Enter your text here", |
|
lines=5 |
|
) |
|
|
|
refresh_btn = gr.Button("Get New Snippet") |
|
|
|
|
|
output_highlighted = gr.HighlightedText(label="Predicted Entities") |
|
output_entities = gr.JSON(label="Entities") |
|
|
|
submit_btn = gr.Button("Find Textile Machinery!") |
|
|
|
|
|
refresh_btn.click(fn=get_new_snippet, outputs=input_text) |
|
|
|
submit_btn.click( |
|
fn=ner, |
|
inputs=[input_text], |
|
outputs=[output_highlighted, output_entities] |
|
) |
|
|
|
demo.queue() |
|
demo.launch(debug=True) |