Spaces:
Sleeping
Sleeping
File size: 4,987 Bytes
1dc581a 999a2cb 1dc581a 999a2cb 1dc581a 999a2cb 1dc581a 999a2cb 1dc581a 999a2cb 1dc581a 999a2cb 1dc581a 999a2cb 1dc581a 999a2cb 1dc581a 999a2cb 1dc581a 999a2cb 1dc581a 999a2cb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 |
import random
from gliner import GLiNER
import gradio as gr
from datasets import load_dataset
# Load the subset dataset from Hugging Face Hub
subset_dataset = load_dataset("TheBritishLibrary/blbooks", split="train", streaming=True, trust_remote_code=True)
# Load the GLiNER model
model = GLiNER.from_pretrained("max-long/textile_machines_3_oct", trust_remote_code=True)
# Define the NER function
def ner(text: str, labels: str, threshold: float, nested_ner: bool):
labels = [label.strip() for label in labels.split(",")]
entities = model.predict_entities(text, labels, flat_ner=not nested_ner, threshold=threshold)
# Filter for "textile machinery" entities
textile_entities = [
{
"entity": ent["label"],
"word": ent["text"],
"start": ent["start"],
"end": ent["end"],
"score": ent.get("score", 0),
}
for ent in entities
if ent["label"].lower() == "textile machinery"
]
# Highlight entities with HTML
highlighted_text = text
for ent in sorted(textile_entities, key=lambda x: x['start'], reverse=True):
highlighted_text = (
highlighted_text[:ent['start']] +
f"<span style='background-color: yellow'>{highlighted_text[ent['start']:ent['end']]}</span>" +
highlighted_text[ent['end']:]
)
return gr.HTML(highlighted_text), textile_entities
# Build Gradio interface
with gr.Blocks(title="Textile Machinery NER Demo") as demo:
gr.Markdown(
"""
# Textile Machinery Entity Recognition Demo
This demo selects a random text snippet from the British Library's books dataset and identifies "textile machinery" entities using a fine-tuned GLiNER model.
"""
)
with gr.Accordion("How to run this model locally", open=False):
gr.Markdown(
"""
## Installation
To use this model, you must install the GLiNER Python library:
```
!pip install gliner
```
## Usage
Once you've downloaded the GLiNER library, you can import the GLiNER class. You can then load this model using `GLiNER.from_pretrained` and predict entities with `predict_entities`.
"""
)
gr.Code(
'''
from gliner import GLiNER
model = GLiNER.from_pretrained("max-long/textile_machines_3_oct")
text = "Your sample text here."
labels = ["textile machinery"]
entities = model.predict_entities(text, labels)
for entity in entities:
print(entity["text"], "=>", entity["label"])
''',
language="python",
)
gr.Code(
"""
Textile Machine 1 => textile machinery
Textile Machine 2 => textile machinery
"""
)
input_text = gr.Textbox(
value="Amelia Earhart flew her single engine Lockheed Vega 5B across the Atlantic to Paris.",
label="Text input",
placeholder="Enter your text here",
lines=5
)
with gr.Row():
labels = gr.Textbox(
value="textile machinery",
label="Labels",
placeholder="Enter your labels here (comma separated)",
scale=2,
)
threshold = gr.Slider(
0,
1,
value=0.3,
step=0.01,
label="Threshold",
info="Lower the threshold to increase how many entities get predicted.",
scale=1,
)
nested_ner = gr.Checkbox(
value=False,
label="Nested NER",
info="Allow for nested NER?",
scale=0,
)
output = gr.HighlightedText(label="Predicted Entities")
submit_btn = gr.Button("Analyze Random Snippet")
refresh_btn = gr.Button("Get New Snippet")
# Function to fetch a new random snippet
def get_new_snippet():
# WARNING: Streaming datasets may have performance implications
try:
sample = next(iter(subset_dataset))['text']
return sample
except StopIteration:
return "No more snippets available."
refresh_btn.click(fn=get_new_snippet, outputs=input_text)
submit_btn.click(
fn=ner,
inputs=[input_text, labels, threshold, nested_ner],
outputs=[output, gr.JSON(label="Entities")]
)
examples = [
[
"However, both models lack other frequent DM symptoms including the fibre-type dependent atrophy, myotonia, cataract and male-infertility.",
"textile machinery",
0.3,
False,
],
# Add more examples as needed
]
gr.Examples(
examples=examples,
inputs=[input_text, labels, threshold, nested_ner],
outputs=[output, gr.JSON(label="Entities")],
fn=ner,
label="Examples",
cache_examples=True,
)
demo.queue()
demo.launch(debug=True) |