Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -3,15 +3,17 @@ from gliner import GLiNER
|
|
3 |
import gradio as gr
|
4 |
from datasets import load_dataset
|
5 |
|
6 |
-
# Load the
|
7 |
-
|
8 |
|
9 |
# Load the GLiNER model
|
10 |
model = GLiNER.from_pretrained("max-long/textile_machines_3_oct", trust_remote_code=True)
|
11 |
|
12 |
-
# Define the NER function
|
13 |
def ner(text: str, labels: str, threshold: float, nested_ner: bool):
|
|
|
14 |
labels = [label.strip() for label in labels.split(",")]
|
|
|
|
|
15 |
entities = model.predict_entities(text, labels, flat_ner=not nested_ner, threshold=threshold)
|
16 |
|
17 |
# Filter for "textile machinery" entities
|
@@ -27,18 +29,17 @@ def ner(text: str, labels: str, threshold: float, nested_ner: bool):
|
|
27 |
if ent["label"].lower() == "textile machinery"
|
28 |
]
|
29 |
|
30 |
-
#
|
31 |
highlighted_text = text
|
32 |
for ent in sorted(textile_entities, key=lambda x: x['start'], reverse=True):
|
33 |
highlighted_text = (
|
34 |
highlighted_text[:ent['start']] +
|
35 |
-
f"<span style='background-color: yellow'>{highlighted_text[ent['start']:ent['end']]}</span>" +
|
36 |
highlighted_text[ent['end']:]
|
37 |
)
|
38 |
|
39 |
-
return
|
40 |
|
41 |
-
# Build Gradio interface
|
42 |
with gr.Blocks(title="Textile Machinery NER Demo") as demo:
|
43 |
gr.Markdown(
|
44 |
"""
|
@@ -79,6 +80,7 @@ Textile Machine 2 => textile machinery
|
|
79 |
"""
|
80 |
)
|
81 |
|
|
|
82 |
input_text = gr.Textbox(
|
83 |
value="Amelia Earhart flew her single engine Lockheed Vega 5B across the Atlantic to Paris.",
|
84 |
label="Text input",
|
@@ -109,28 +111,32 @@ Textile Machine 2 => textile machinery
|
|
109 |
scale=0,
|
110 |
)
|
111 |
|
112 |
-
|
|
|
|
|
113 |
|
114 |
submit_btn = gr.Button("Analyze Random Snippet")
|
115 |
refresh_btn = gr.Button("Get New Snippet")
|
116 |
|
117 |
# Function to fetch a new random snippet
|
118 |
def get_new_snippet():
|
119 |
-
# WARNING: Streaming datasets may have performance implications
|
120 |
try:
|
121 |
-
sample = next(
|
122 |
return sample
|
123 |
except StopIteration:
|
124 |
return "No more snippets available."
|
125 |
|
|
|
126 |
refresh_btn.click(fn=get_new_snippet, outputs=input_text)
|
127 |
|
|
|
128 |
submit_btn.click(
|
129 |
fn=ner,
|
130 |
inputs=[input_text, labels, threshold, nested_ner],
|
131 |
-
outputs=[
|
132 |
)
|
133 |
|
|
|
134 |
examples = [
|
135 |
[
|
136 |
"However, both models lack other frequent DM symptoms including the fibre-type dependent atrophy, myotonia, cataract and male-infertility.",
|
@@ -138,13 +144,25 @@ Textile Machine 2 => textile machinery
|
|
138 |
0.3,
|
139 |
False,
|
140 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
141 |
# Add more examples as needed
|
142 |
]
|
143 |
|
144 |
gr.Examples(
|
145 |
examples=examples,
|
146 |
inputs=[input_text, labels, threshold, nested_ner],
|
147 |
-
outputs=[
|
148 |
fn=ner,
|
149 |
label="Examples",
|
150 |
cache_examples=True,
|
|
|
3 |
import gradio as gr
|
4 |
from datasets import load_dataset
|
5 |
|
6 |
+
# Load the large dataset with streaming
|
7 |
+
dataset_iter = iter(load_dataset("TheBritishLibrary/blbooks", split="train", streaming=True, trust_remote_code=True))
|
8 |
|
9 |
# Load the GLiNER model
|
10 |
model = GLiNER.from_pretrained("max-long/textile_machines_3_oct", trust_remote_code=True)
|
11 |
|
|
|
12 |
def ner(text: str, labels: str, threshold: float, nested_ner: bool):
|
13 |
+
# Split and clean labels
|
14 |
labels = [label.strip() for label in labels.split(",")]
|
15 |
+
|
16 |
+
# Predict entities using GLiNER
|
17 |
entities = model.predict_entities(text, labels, flat_ner=not nested_ner, threshold=threshold)
|
18 |
|
19 |
# Filter for "textile machinery" entities
|
|
|
29 |
if ent["label"].lower() == "textile machinery"
|
30 |
]
|
31 |
|
32 |
+
# Prepare data for HighlightedText
|
33 |
highlighted_text = text
|
34 |
for ent in sorted(textile_entities, key=lambda x: x['start'], reverse=True):
|
35 |
highlighted_text = (
|
36 |
highlighted_text[:ent['start']] +
|
37 |
+
f"<span style='background-color: yellow; font-weight: bold;'>{highlighted_text[ent['start']:ent['end']]}</span>" +
|
38 |
highlighted_text[ent['end']:]
|
39 |
)
|
40 |
|
41 |
+
return highlighted_text, textile_entities
|
42 |
|
|
|
43 |
with gr.Blocks(title="Textile Machinery NER Demo") as demo:
|
44 |
gr.Markdown(
|
45 |
"""
|
|
|
80 |
"""
|
81 |
)
|
82 |
|
83 |
+
# Display a random example
|
84 |
input_text = gr.Textbox(
|
85 |
value="Amelia Earhart flew her single engine Lockheed Vega 5B across the Atlantic to Paris.",
|
86 |
label="Text input",
|
|
|
111 |
scale=0,
|
112 |
)
|
113 |
|
114 |
+
# Define output components
|
115 |
+
output_highlighted = gr.HTML(label="Predicted Entities")
|
116 |
+
output_entities = gr.JSON(label="Entities")
|
117 |
|
118 |
submit_btn = gr.Button("Analyze Random Snippet")
|
119 |
refresh_btn = gr.Button("Get New Snippet")
|
120 |
|
121 |
# Function to fetch a new random snippet
|
122 |
def get_new_snippet():
|
|
|
123 |
try:
|
124 |
+
sample = next(dataset_iter)['text']
|
125 |
return sample
|
126 |
except StopIteration:
|
127 |
return "No more snippets available."
|
128 |
|
129 |
+
# Connect refresh button
|
130 |
refresh_btn.click(fn=get_new_snippet, outputs=input_text)
|
131 |
|
132 |
+
# Connect submit button
|
133 |
submit_btn.click(
|
134 |
fn=ner,
|
135 |
inputs=[input_text, labels, threshold, nested_ner],
|
136 |
+
outputs=[output_highlighted, output_entities]
|
137 |
)
|
138 |
|
139 |
+
# Define examples
|
140 |
examples = [
|
141 |
[
|
142 |
"However, both models lack other frequent DM symptoms including the fibre-type dependent atrophy, myotonia, cataract and male-infertility.",
|
|
|
144 |
0.3,
|
145 |
False,
|
146 |
],
|
147 |
+
[
|
148 |
+
"Amelia Earhart flew her single engine Lockheed Vega 5B across the Atlantic to Paris.",
|
149 |
+
"textile machinery",
|
150 |
+
0.3,
|
151 |
+
False,
|
152 |
+
],
|
153 |
+
[
|
154 |
+
"The Shore Line route of the CNS & M until 1955 served, from south to north, the Illinois communities of Chicago, Evanston, Wilmette...",
|
155 |
+
"textile machinery",
|
156 |
+
0.3,
|
157 |
+
False,
|
158 |
+
],
|
159 |
# Add more examples as needed
|
160 |
]
|
161 |
|
162 |
gr.Examples(
|
163 |
examples=examples,
|
164 |
inputs=[input_text, labels, threshold, nested_ner],
|
165 |
+
outputs=[output_highlighted, output_entities],
|
166 |
fn=ner,
|
167 |
label="Examples",
|
168 |
cache_examples=True,
|