max-long commited on
Commit
4fd99d4
·
verified ·
1 Parent(s): fe6bb8e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -12
app.py CHANGED
@@ -3,15 +3,17 @@ from gliner import GLiNER
3
  import gradio as gr
4
  from datasets import load_dataset
5
 
6
- # Load the subset dataset from Hugging Face Hub
7
- subset_dataset = load_dataset("TheBritishLibrary/blbooks", split="train", streaming=True, trust_remote_code=True)
8
 
9
  # Load the GLiNER model
10
  model = GLiNER.from_pretrained("max-long/textile_machines_3_oct", trust_remote_code=True)
11
 
12
- # Define the NER function
13
  def ner(text: str, labels: str, threshold: float, nested_ner: bool):
 
14
  labels = [label.strip() for label in labels.split(",")]
 
 
15
  entities = model.predict_entities(text, labels, flat_ner=not nested_ner, threshold=threshold)
16
 
17
  # Filter for "textile machinery" entities
@@ -27,18 +29,17 @@ def ner(text: str, labels: str, threshold: float, nested_ner: bool):
27
  if ent["label"].lower() == "textile machinery"
28
  ]
29
 
30
- # Highlight entities with HTML
31
  highlighted_text = text
32
  for ent in sorted(textile_entities, key=lambda x: x['start'], reverse=True):
33
  highlighted_text = (
34
  highlighted_text[:ent['start']] +
35
- f"<span style='background-color: yellow'>{highlighted_text[ent['start']:ent['end']]}</span>" +
36
  highlighted_text[ent['end']:]
37
  )
38
 
39
- return gr.HTML(highlighted_text), textile_entities
40
 
41
- # Build Gradio interface
42
  with gr.Blocks(title="Textile Machinery NER Demo") as demo:
43
  gr.Markdown(
44
  """
@@ -79,6 +80,7 @@ Textile Machine 2 => textile machinery
79
  """
80
  )
81
 
 
82
  input_text = gr.Textbox(
83
  value="Amelia Earhart flew her single engine Lockheed Vega 5B across the Atlantic to Paris.",
84
  label="Text input",
@@ -109,28 +111,32 @@ Textile Machine 2 => textile machinery
109
  scale=0,
110
  )
111
 
112
- output = gr.HighlightedText(label="Predicted Entities")
 
 
113
 
114
  submit_btn = gr.Button("Analyze Random Snippet")
115
  refresh_btn = gr.Button("Get New Snippet")
116
 
117
  # Function to fetch a new random snippet
118
  def get_new_snippet():
119
- # WARNING: Streaming datasets may have performance implications
120
  try:
121
- sample = next(iter(subset_dataset))['text']
122
  return sample
123
  except StopIteration:
124
  return "No more snippets available."
125
 
 
126
  refresh_btn.click(fn=get_new_snippet, outputs=input_text)
127
 
 
128
  submit_btn.click(
129
  fn=ner,
130
  inputs=[input_text, labels, threshold, nested_ner],
131
- outputs=[output, gr.JSON(label="Entities")]
132
  )
133
 
 
134
  examples = [
135
  [
136
  "However, both models lack other frequent DM symptoms including the fibre-type dependent atrophy, myotonia, cataract and male-infertility.",
@@ -138,13 +144,25 @@ Textile Machine 2 => textile machinery
138
  0.3,
139
  False,
140
  ],
 
 
 
 
 
 
 
 
 
 
 
 
141
  # Add more examples as needed
142
  ]
143
 
144
  gr.Examples(
145
  examples=examples,
146
  inputs=[input_text, labels, threshold, nested_ner],
147
- outputs=[output, gr.JSON(label="Entities")],
148
  fn=ner,
149
  label="Examples",
150
  cache_examples=True,
 
3
  import gradio as gr
4
  from datasets import load_dataset
5
 
6
+ # Load the large dataset with streaming
7
+ dataset_iter = iter(load_dataset("TheBritishLibrary/blbooks", split="train", streaming=True, trust_remote_code=True))
8
 
9
  # Load the GLiNER model
10
  model = GLiNER.from_pretrained("max-long/textile_machines_3_oct", trust_remote_code=True)
11
 
 
12
  def ner(text: str, labels: str, threshold: float, nested_ner: bool):
13
+ # Split and clean labels
14
  labels = [label.strip() for label in labels.split(",")]
15
+
16
+ # Predict entities using GLiNER
17
  entities = model.predict_entities(text, labels, flat_ner=not nested_ner, threshold=threshold)
18
 
19
  # Filter for "textile machinery" entities
 
29
  if ent["label"].lower() == "textile machinery"
30
  ]
31
 
32
+ # Prepare data for HighlightedText
33
  highlighted_text = text
34
  for ent in sorted(textile_entities, key=lambda x: x['start'], reverse=True):
35
  highlighted_text = (
36
  highlighted_text[:ent['start']] +
37
+ f"<span style='background-color: yellow; font-weight: bold;'>{highlighted_text[ent['start']:ent['end']]}</span>" +
38
  highlighted_text[ent['end']:]
39
  )
40
 
41
+ return highlighted_text, textile_entities
42
 
 
43
  with gr.Blocks(title="Textile Machinery NER Demo") as demo:
44
  gr.Markdown(
45
  """
 
80
  """
81
  )
82
 
83
+ # Display a random example
84
  input_text = gr.Textbox(
85
  value="Amelia Earhart flew her single engine Lockheed Vega 5B across the Atlantic to Paris.",
86
  label="Text input",
 
111
  scale=0,
112
  )
113
 
114
+ # Define output components
115
+ output_highlighted = gr.HTML(label="Predicted Entities")
116
+ output_entities = gr.JSON(label="Entities")
117
 
118
  submit_btn = gr.Button("Analyze Random Snippet")
119
  refresh_btn = gr.Button("Get New Snippet")
120
 
121
  # Function to fetch a new random snippet
122
  def get_new_snippet():
 
123
  try:
124
+ sample = next(dataset_iter)['text']
125
  return sample
126
  except StopIteration:
127
  return "No more snippets available."
128
 
129
+ # Connect refresh button
130
  refresh_btn.click(fn=get_new_snippet, outputs=input_text)
131
 
132
+ # Connect submit button
133
  submit_btn.click(
134
  fn=ner,
135
  inputs=[input_text, labels, threshold, nested_ner],
136
+ outputs=[output_highlighted, output_entities]
137
  )
138
 
139
+ # Define examples
140
  examples = [
141
  [
142
  "However, both models lack other frequent DM symptoms including the fibre-type dependent atrophy, myotonia, cataract and male-infertility.",
 
144
  0.3,
145
  False,
146
  ],
147
+ [
148
+ "Amelia Earhart flew her single engine Lockheed Vega 5B across the Atlantic to Paris.",
149
+ "textile machinery",
150
+ 0.3,
151
+ False,
152
+ ],
153
+ [
154
+ "The Shore Line route of the CNS & M until 1955 served, from south to north, the Illinois communities of Chicago, Evanston, Wilmette...",
155
+ "textile machinery",
156
+ 0.3,
157
+ False,
158
+ ],
159
  # Add more examples as needed
160
  ]
161
 
162
  gr.Examples(
163
  examples=examples,
164
  inputs=[input_text, labels, threshold, nested_ner],
165
+ outputs=[output_highlighted, output_entities],
166
  fn=ner,
167
  label="Examples",
168
  cache_examples=True,