Update app.py
Browse files
app.py
CHANGED
@@ -79,7 +79,7 @@ def process_ner(text: str, pipeline) -> dict:
|
|
79 |
if current_entity is not None:
|
80 |
entities.append(current_entity)
|
81 |
|
82 |
-
return {"
|
83 |
|
84 |
def process_classification(text: str, model1, model2, tokenizer1) -> Tuple[str, str, str]:
|
85 |
inputs1 = tokenizer1(text, max_length=512, return_tensors='pt', truncation=True, padding=True)
|
@@ -97,7 +97,7 @@ def process_classification(text: str, model1, model2, tokenizer1) -> Tuple[str,
|
|
97 |
import plotly.graph_objects as go
|
98 |
from typing import Tuple
|
99 |
|
100 |
-
def generate_charts(ner_output_bin: dict, ner_output_ext: dict) -> Tuple[go.Figure, go.Figure]:
|
101 |
entities_bin = [entity['entity'] for entity in ner_output_bin['entities']]
|
102 |
entities_ext = [entity['entity'] for entity in ner_output_ext['entities']]
|
103 |
|
@@ -151,47 +151,51 @@ def generate_charts(ner_output_bin: dict, ner_output_ext: dict) -> Tuple[go.Figu
|
|
151 |
paper_bgcolor='rgba(0,0,0,0)'
|
152 |
)
|
153 |
|
154 |
-
#
|
155 |
-
|
156 |
-
|
157 |
-
return fig1, fig2, wordcloud_fig
|
158 |
-
|
159 |
-
def generate_wordcloud(entities: list, color_map: dict) -> plt.Figure:
|
160 |
-
word_freq = {entity['entity']: entity['score'] for entity in entities}
|
161 |
-
wc = WordCloud(width=800, height=400, background_color='black', colormap=color_map).generate_from_frequencies(word_freq)
|
162 |
|
163 |
-
|
164 |
-
ax.imshow(wc, interpolation='bilinear')
|
165 |
-
ax.axis('off')
|
166 |
-
return fig
|
167 |
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
|
173 |
-
|
174 |
-
|
175 |
|
176 |
-
|
177 |
-
|
|
|
178 |
|
179 |
-
|
180 |
|
181 |
-
# Convert
|
182 |
-
|
183 |
-
|
184 |
-
|
|
|
185 |
|
186 |
-
|
187 |
-
|
188 |
-
|
|
|
|
|
|
|
189 |
|
190 |
-
return
|
191 |
|
192 |
-
|
193 |
-
|
194 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
195 |
|
196 |
iface = gr.Interface(
|
197 |
fn=all,
|
@@ -209,7 +213,6 @@ iface = gr.Interface(
|
|
209 |
"INTtime": "#FF6347", # Tomato
|
210 |
"INTplace": "#FFD700", # Gold
|
211 |
"INTevent": "#FFA500", # Orange
|
212 |
-
|
213 |
"EXTsemantic": "#4682B4", # SteelBlue
|
214 |
"EXTrepetition": "#5F9EA0", # CadetBlue
|
215 |
"EXTother": "#00CED1", # DarkTurquoise
|
@@ -220,7 +223,7 @@ iface = gr.Interface(
|
|
220 |
gr.Label(label="Approximated Internal Detail Ratio"),
|
221 |
gr.Plot(label="Extended SeqClass Entity Distribution Pie Chart"),
|
222 |
gr.Plot(label="Binary SeqClass Entity Count Bar Chart"),
|
223 |
-
gr.Image(label="Word Cloud
|
224 |
],
|
225 |
title="Scoring Demo",
|
226 |
description="Autobiographical Memory Analysis: This demo combines two text - and two sequence classification models to showcase our automated Autobiographical Interview scoring method. Submit a narrative to see the results.",
|
|
|
79 |
if current_entity is not None:
|
80 |
entities.append(current_entity)
|
81 |
|
82 |
+
return {"entities": entities}
|
83 |
|
84 |
def process_classification(text: str, model1, model2, tokenizer1) -> Tuple[str, str, str]:
|
85 |
inputs1 = tokenizer1(text, max_length=512, return_tensors='pt', truncation=True, padding=True)
|
|
|
97 |
import plotly.graph_objects as go
|
98 |
from typing import Tuple
|
99 |
|
100 |
+
def generate_charts(ner_output_bin: dict, ner_output_ext: dict) -> Tuple[go.Figure, go.Figure, np.ndarray]:
|
101 |
entities_bin = [entity['entity'] for entity in ner_output_bin['entities']]
|
102 |
entities_ext = [entity['entity'] for entity in ner_output_ext['entities']]
|
103 |
|
|
|
151 |
paper_bgcolor='rgba(0,0,0,0)'
|
152 |
)
|
153 |
|
154 |
+
# Generate word cloud
|
155 |
+
wordcloud_image = generate_wordcloud(ner_output_ext['entities'], ext_color_map)
|
|
|
|
|
|
|
|
|
|
|
|
|
156 |
|
157 |
+
return fig1, fig2, wordcloud_image
|
|
|
|
|
|
|
158 |
|
159 |
+
def generate_wordcloud(entities: List[Dict], color_map: Dict[str, str]) -> np.ndarray:
|
160 |
+
entity_texts = [entity['entity'] for entity in entities]
|
161 |
+
entity_scores = [entity['score'] for entity in entities]
|
162 |
+
entity_types = [entity['entity'] for entity in entities]
|
163 |
|
164 |
+
# Create a dictionary for word cloud
|
165 |
+
word_freq = {text: score for text, score in zip(entity_texts, entity_scores)}
|
166 |
|
167 |
+
def color_func(word, font_size, position, orientation, random_state=None, **kwargs):
|
168 |
+
entity_type = next(entity['entity'] for entity in entities if entity['entity'] == word)
|
169 |
+
return color_map.get(entity_type, "#FFFFFF")
|
170 |
|
171 |
+
wordcloud = WordCloud(width=800, height=400, background_color='black', color_func=color_func).generate_from_frequencies(word_freq)
|
172 |
|
173 |
+
# Convert to image array
|
174 |
+
plt.figure(figsize=(10, 5))
|
175 |
+
plt.imshow(wordcloud, interpolation='bilinear')
|
176 |
+
plt.axis('off')
|
177 |
+
plt.tight_layout(pad=0)
|
178 |
|
179 |
+
# Convert plt to numpy array
|
180 |
+
plt_image = plt.gcf()
|
181 |
+
plt_image.canvas.draw()
|
182 |
+
image_array = np.frombuffer(plt_image.canvas.tostring_rgb(), dtype=np.uint8)
|
183 |
+
image_array = image_array.reshape(plt_image.canvas.get_width_height()[::-1] + (3,))
|
184 |
+
plt.close()
|
185 |
|
186 |
+
return image_array
|
187 |
|
188 |
+
@spaces.GPU
|
189 |
+
def all(text: str):
|
190 |
+
ner_output_bin = process_ner(text, pipe_bin)
|
191 |
+
ner_output_ext = process_ner(text, pipe_ext)
|
192 |
+
classification_output = process_classification(text, model1, model2, tokenizer1)
|
193 |
+
|
194 |
+
pie_chart, bar_chart, wordcloud_image = generate_charts(ner_output_bin, ner_output_ext)
|
195 |
+
|
196 |
+
return (ner_output_bin, ner_output_ext,
|
197 |
+
classification_output[0], classification_output[1], classification_output[2],
|
198 |
+
pie_chart, bar_chart, wordcloud_image)
|
199 |
|
200 |
iface = gr.Interface(
|
201 |
fn=all,
|
|
|
213 |
"INTtime": "#FF6347", # Tomato
|
214 |
"INTplace": "#FFD700", # Gold
|
215 |
"INTevent": "#FFA500", # Orange
|
|
|
216 |
"EXTsemantic": "#4682B4", # SteelBlue
|
217 |
"EXTrepetition": "#5F9EA0", # CadetBlue
|
218 |
"EXTother": "#00CED1", # DarkTurquoise
|
|
|
223 |
gr.Label(label="Approximated Internal Detail Ratio"),
|
224 |
gr.Plot(label="Extended SeqClass Entity Distribution Pie Chart"),
|
225 |
gr.Plot(label="Binary SeqClass Entity Count Bar Chart"),
|
226 |
+
gr.Image(label="Entity Word Cloud")
|
227 |
],
|
228 |
title="Scoring Demo",
|
229 |
description="Autobiographical Memory Analysis: This demo combines two text - and two sequence classification models to showcase our automated Autobiographical Interview scoring method. Submit a narrative to see the results.",
|