AlGe commited on
Commit
5e6d454
·
verified ·
1 Parent(s): f7882cb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -36
app.py CHANGED
@@ -79,7 +79,7 @@ def process_ner(text: str, pipeline) -> dict:
79
  if current_entity is not None:
80
  entities.append(current_entity)
81
 
82
- return {"text": text, "entities": entities}
83
 
84
  def process_classification(text: str, model1, model2, tokenizer1) -> Tuple[str, str, str]:
85
  inputs1 = tokenizer1(text, max_length=512, return_tensors='pt', truncation=True, padding=True)
@@ -97,7 +97,7 @@ def process_classification(text: str, model1, model2, tokenizer1) -> Tuple[str,
97
  import plotly.graph_objects as go
98
  from typing import Tuple
99
 
100
- def generate_charts(ner_output_bin: dict, ner_output_ext: dict) -> Tuple[go.Figure, go.Figure]:
101
  entities_bin = [entity['entity'] for entity in ner_output_bin['entities']]
102
  entities_ext = [entity['entity'] for entity in ner_output_ext['entities']]
103
 
@@ -151,47 +151,51 @@ def generate_charts(ner_output_bin: dict, ner_output_ext: dict) -> Tuple[go.Figu
151
  paper_bgcolor='rgba(0,0,0,0)'
152
  )
153
 
154
- # Create word cloud for extended classification entities
155
- wordcloud_fig = generate_wordcloud(ner_output_ext['entities'], 'tab10')
156
-
157
- return fig1, fig2, wordcloud_fig
158
-
159
- def generate_wordcloud(entities: list, color_map: dict) -> plt.Figure:
160
- word_freq = {entity['entity']: entity['score'] for entity in entities}
161
- wc = WordCloud(width=800, height=400, background_color='black', colormap=color_map).generate_from_frequencies(word_freq)
162
 
163
- fig, ax = plt.subplots(figsize=(10, 5))
164
- ax.imshow(wc, interpolation='bilinear')
165
- ax.axis('off')
166
- return fig
167
 
168
- @spaces.GPU
169
- def all(text: str) -> Tuple[gr.HighlightedText, gr.HighlightedText, int, int, float, go.Figure, go.Figure, plt.Figure]:
170
- ner_output_bin = process_ner(text, pipe_bin)
171
- ner_output_ext = process_ner(text, pipe_ext)
172
 
173
- binary_entities = [{"entity": ent["entity"], "score": ent["score"]} for ent in ner_output_bin["entities"]]
174
- extended_entities = [{"entity": ent["entity"], "score": ent["score"]} for ent in ner_output_ext["entities"]]
175
 
176
- bin_text = [{"text": text[ent["start"]:ent["end"]], "entity": ent["entity"]} for ent in ner_output_bin["entities"]]
177
- ext_text = [{"text": text[ent["start"]:ent["end"]], "entity": ent["entity"]} for ent in ner_output_ext["entities"]]
 
178
 
179
- fig1, fig2, wordcloud_fig = generate_charts(ner_output_bin, ner_output_ext)
180
 
181
- # Convert the word cloud figure to an image for Gradio
182
- buf = io.BytesIO()
183
- wordcloud_fig.savefig(buf, format='png')
184
- buf.seek(0)
 
185
 
186
- internal_count = sum(1 for ent in ner_output_bin["entities"] if ent["entity"] == "Internal")
187
- external_count = sum(1 for ent in ner_output_bin["entities"] if ent["entity"] == "External")
188
- internal_ratio = internal_count / (internal_count + external_count) if (internal_count + external_count) > 0 else 0
 
 
 
189
 
190
- return bin_text, ext_text, internal_count, external_count, internal_ratio, fig1, fig2, buf
191
 
192
- examples = [
193
- ['Bevor ich meinen Hund kaufte bin ich immer alleine durch den Park gelaufen. Gestern war ich aber mit dem Hund losgelaufen. Das Wetter war sehr schön, nicht wie sonst im Winter. Ich weiß nicht genau. Mir fällt sonst nichts dazu ein. Wir trafen auf mehrere Spaziergänger. Ein Mann mit seinem Kind. Das Kind hat ein Eis gegessen.'],
194
- ]
 
 
 
 
 
 
 
 
195
 
196
  iface = gr.Interface(
197
  fn=all,
@@ -209,7 +213,6 @@ iface = gr.Interface(
209
  "INTtime": "#FF6347", # Tomato
210
  "INTplace": "#FFD700", # Gold
211
  "INTevent": "#FFA500", # Orange
212
-
213
  "EXTsemantic": "#4682B4", # SteelBlue
214
  "EXTrepetition": "#5F9EA0", # CadetBlue
215
  "EXTother": "#00CED1", # DarkTurquoise
@@ -220,7 +223,7 @@ iface = gr.Interface(
220
  gr.Label(label="Approximated Internal Detail Ratio"),
221
  gr.Plot(label="Extended SeqClass Entity Distribution Pie Chart"),
222
  gr.Plot(label="Binary SeqClass Entity Count Bar Chart"),
223
- gr.Image(label="Word Cloud of Extended Entities")
224
  ],
225
  title="Scoring Demo",
226
  description="Autobiographical Memory Analysis: This demo combines two text - and two sequence classification models to showcase our automated Autobiographical Interview scoring method. Submit a narrative to see the results.",
 
79
  if current_entity is not None:
80
  entities.append(current_entity)
81
 
82
+ return {"entities": entities}
83
 
84
  def process_classification(text: str, model1, model2, tokenizer1) -> Tuple[str, str, str]:
85
  inputs1 = tokenizer1(text, max_length=512, return_tensors='pt', truncation=True, padding=True)
 
97
  import plotly.graph_objects as go
98
  from typing import Tuple
99
 
100
+ def generate_charts(ner_output_bin: dict, ner_output_ext: dict) -> Tuple[go.Figure, go.Figure, np.ndarray]:
101
  entities_bin = [entity['entity'] for entity in ner_output_bin['entities']]
102
  entities_ext = [entity['entity'] for entity in ner_output_ext['entities']]
103
 
 
151
  paper_bgcolor='rgba(0,0,0,0)'
152
  )
153
 
154
+ # Generate word cloud
155
+ wordcloud_image = generate_wordcloud(ner_output_ext['entities'], ext_color_map)
 
 
 
 
 
 
156
 
157
+ return fig1, fig2, wordcloud_image
 
 
 
158
 
159
+ def generate_wordcloud(entities: List[Dict], color_map: Dict[str, str]) -> np.ndarray:
160
+ entity_texts = [entity['entity'] for entity in entities]
161
+ entity_scores = [entity['score'] for entity in entities]
162
+ entity_types = [entity['entity'] for entity in entities]
163
 
164
+ # Create a dictionary for word cloud
165
+ word_freq = {text: score for text, score in zip(entity_texts, entity_scores)}
166
 
167
+ def color_func(word, font_size, position, orientation, random_state=None, **kwargs):
168
+ entity_type = next(entity['entity'] for entity in entities if entity['entity'] == word)
169
+ return color_map.get(entity_type, "#FFFFFF")
170
 
171
+ wordcloud = WordCloud(width=800, height=400, background_color='black', color_func=color_func).generate_from_frequencies(word_freq)
172
 
173
+ # Convert to image array
174
+ plt.figure(figsize=(10, 5))
175
+ plt.imshow(wordcloud, interpolation='bilinear')
176
+ plt.axis('off')
177
+ plt.tight_layout(pad=0)
178
 
179
+ # Convert plt to numpy array
180
+ plt_image = plt.gcf()
181
+ plt_image.canvas.draw()
182
+ image_array = np.frombuffer(plt_image.canvas.tostring_rgb(), dtype=np.uint8)
183
+ image_array = image_array.reshape(plt_image.canvas.get_width_height()[::-1] + (3,))
184
+ plt.close()
185
 
186
+ return image_array
187
 
188
+ @spaces.GPU
189
+ def all(text: str):
190
+ ner_output_bin = process_ner(text, pipe_bin)
191
+ ner_output_ext = process_ner(text, pipe_ext)
192
+ classification_output = process_classification(text, model1, model2, tokenizer1)
193
+
194
+ pie_chart, bar_chart, wordcloud_image = generate_charts(ner_output_bin, ner_output_ext)
195
+
196
+ return (ner_output_bin, ner_output_ext,
197
+ classification_output[0], classification_output[1], classification_output[2],
198
+ pie_chart, bar_chart, wordcloud_image)
199
 
200
  iface = gr.Interface(
201
  fn=all,
 
213
  "INTtime": "#FF6347", # Tomato
214
  "INTplace": "#FFD700", # Gold
215
  "INTevent": "#FFA500", # Orange
 
216
  "EXTsemantic": "#4682B4", # SteelBlue
217
  "EXTrepetition": "#5F9EA0", # CadetBlue
218
  "EXTother": "#00CED1", # DarkTurquoise
 
223
  gr.Label(label="Approximated Internal Detail Ratio"),
224
  gr.Plot(label="Extended SeqClass Entity Distribution Pie Chart"),
225
  gr.Plot(label="Binary SeqClass Entity Count Bar Chart"),
226
+ gr.Image(label="Entity Word Cloud")
227
  ],
228
  title="Scoring Demo",
229
  description="Autobiographical Memory Analysis: This demo combines two text - and two sequence classification models to showcase our automated Autobiographical Interview scoring method. Submit a narrative to see the results.",