Spaces:
Sleeping
Sleeping
import random | |
from gliner import GLiNER | |
import gradio as gr | |
from datasets import load_dataset | |
# Load the subset dataset from Hugging Face Hub | |
subset_dataset = load_dataset("TheBritishLibrary/blbooks", split="train", streaming=True, trust_remote_code=True) | |
# Load the GLiNER model | |
model = GLiNER.from_pretrained("max-long/textile_machines_3_oct", trust_remote_code=True) | |
# Define the NER function | |
def ner(text: str, labels: str, threshold: float, nested_ner: bool): | |
labels = [label.strip() for label in labels.split(",")] | |
entities = model.predict_entities(text, labels, flat_ner=not nested_ner, threshold=threshold) | |
# Filter for "textile machinery" entities | |
textile_entities = [ | |
{ | |
"entity": ent["label"], | |
"word": ent["text"], | |
"start": ent["start"], | |
"end": ent["end"], | |
"score": ent.get("score", 0), | |
} | |
for ent in entities | |
if ent["label"].lower() == "textile machinery" | |
] | |
# Highlight entities with HTML | |
highlighted_text = text | |
for ent in sorted(textile_entities, key=lambda x: x['start'], reverse=True): | |
highlighted_text = ( | |
highlighted_text[:ent['start']] + | |
f"<span style='background-color: yellow'>{highlighted_text[ent['start']:ent['end']]}</span>" + | |
highlighted_text[ent['end']:] | |
) | |
return gr.HTML(highlighted_text), textile_entities | |
# Build Gradio interface | |
with gr.Blocks(title="Textile Machinery NER Demo") as demo: | |
gr.Markdown( | |
""" | |
# Textile Machinery Entity Recognition Demo | |
This demo selects a random text snippet from the British Library's books dataset and identifies "textile machinery" entities using a fine-tuned GLiNER model. | |
""" | |
) | |
with gr.Accordion("How to run this model locally", open=False): | |
gr.Markdown( | |
""" | |
## Installation | |
To use this model, you must install the GLiNER Python library: | |
``` | |
!pip install gliner | |
``` | |
## Usage | |
Once you've downloaded the GLiNER library, you can import the GLiNER class. You can then load this model using `GLiNER.from_pretrained` and predict entities with `predict_entities`. | |
""" | |
) | |
gr.Code( | |
''' | |
from gliner import GLiNER | |
model = GLiNER.from_pretrained("max-long/textile_machines_3_oct") | |
text = "Your sample text here." | |
labels = ["textile machinery"] | |
entities = model.predict_entities(text, labels) | |
for entity in entities: | |
print(entity["text"], "=>", entity["label"]) | |
''', | |
language="python", | |
) | |
gr.Code( | |
""" | |
Textile Machine 1 => textile machinery | |
Textile Machine 2 => textile machinery | |
""" | |
) | |
input_text = gr.Textbox( | |
value="Amelia Earhart flew her single engine Lockheed Vega 5B across the Atlantic to Paris.", | |
label="Text input", | |
placeholder="Enter your text here", | |
lines=5 | |
) | |
with gr.Row(): | |
labels = gr.Textbox( | |
value="textile machinery", | |
label="Labels", | |
placeholder="Enter your labels here (comma separated)", | |
scale=2, | |
) | |
threshold = gr.Slider( | |
0, | |
1, | |
value=0.3, | |
step=0.01, | |
label="Threshold", | |
info="Lower the threshold to increase how many entities get predicted.", | |
scale=1, | |
) | |
nested_ner = gr.Checkbox( | |
value=False, | |
label="Nested NER", | |
info="Allow for nested NER?", | |
scale=0, | |
) | |
output = gr.HighlightedText(label="Predicted Entities") | |
submit_btn = gr.Button("Analyze Random Snippet") | |
refresh_btn = gr.Button("Get New Snippet") | |
# Function to fetch a new random snippet | |
def get_new_snippet(): | |
# WARNING: Streaming datasets may have performance implications | |
try: | |
sample = next(iter(subset_dataset))['text'] | |
return sample | |
except StopIteration: | |
return "No more snippets available." | |
refresh_btn.click(fn=get_new_snippet, outputs=input_text) | |
submit_btn.click( | |
fn=ner, | |
inputs=[input_text, labels, threshold, nested_ner], | |
outputs=[output, gr.JSON(label="Entities")] | |
) | |
examples = [ | |
[ | |
"However, both models lack other frequent DM symptoms including the fibre-type dependent atrophy, myotonia, cataract and male-infertility.", | |
"textile machinery", | |
0.3, | |
False, | |
], | |
# Add more examples as needed | |
] | |
gr.Examples( | |
examples=examples, | |
inputs=[input_text, labels, threshold, nested_ner], | |
outputs=[output, gr.JSON(label="Entities")], | |
fn=ner, | |
label="Examples", | |
cache_examples=True, | |
) | |
demo.queue() | |
demo.launch(debug=True) |