Spaces:
Build error
Build error
Minh Q. Le
commited on
Commit
·
ed47213
1
Parent(s):
40cd566
Added Label Analyis Plot
Browse files- app/cosmic_view.py +34 -4
- app/deberta_view.py +34 -3
- app/gpt_view.py +34 -5
- app/utils.py +32 -1
- requirements.txt +1 -0
app/cosmic_view.py
CHANGED
|
@@ -7,12 +7,11 @@ from app.utils import (
|
|
| 7 |
create_input_instruction,
|
| 8 |
format_prediction_ouptut,
|
| 9 |
remove_temp_dir,
|
| 10 |
-
decode_numeric_label,
|
| 11 |
-
decode_speaker_role,
|
| 12 |
display_sentiment_score_table,
|
| 13 |
sentiment_flow_plot,
|
| 14 |
sentiment_intensity_analysis,
|
| 15 |
EXAMPLE_CONVERSATIONS,
|
|
|
|
| 16 |
)
|
| 17 |
from fairseq.data.data_utils import collate_tokens
|
| 18 |
|
|
@@ -22,6 +21,7 @@ sys.path.insert(0, "../") # neccesary to load modules outside of app
|
|
| 22 |
|
| 23 |
from app import roberta, comet, COSMIC_MODEL, cosmic_args
|
| 24 |
from preprocessing import preprocess
|
|
|
|
| 25 |
from Model.COSMIC.erc_training.predict_epik import predict, get_valid_dataloader
|
| 26 |
|
| 27 |
|
|
@@ -227,6 +227,36 @@ def cosmic_ui():
|
|
| 227 |
|
| 228 |
# reset the output whenever a change in the input is detected
|
| 229 |
conversation_input.change(lambda x: "", conversation_input, output)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 230 |
|
| 231 |
gr.Markdown("# Sentiment Flow Plot")
|
| 232 |
with gr.Row():
|
|
@@ -275,8 +305,8 @@ def cosmic_ui():
|
|
| 275 |
|
| 276 |
# reset all outputs whenever a change in the input is detected
|
| 277 |
conversation_input.change(
|
| 278 |
-
lambda x: ("", None, None),
|
| 279 |
conversation_input,
|
| 280 |
-
outputs=[output, plot_box, intensity_plot],
|
| 281 |
)
|
| 282 |
return cosmic_model
|
|
|
|
| 7 |
create_input_instruction,
|
| 8 |
format_prediction_ouptut,
|
| 9 |
remove_temp_dir,
|
|
|
|
|
|
|
| 10 |
display_sentiment_score_table,
|
| 11 |
sentiment_flow_plot,
|
| 12 |
sentiment_intensity_analysis,
|
| 13 |
EXAMPLE_CONVERSATIONS,
|
| 14 |
+
label_analysis,
|
| 15 |
)
|
| 16 |
from fairseq.data.data_utils import collate_tokens
|
| 17 |
|
|
|
|
| 21 |
|
| 22 |
from app import roberta, comet, COSMIC_MODEL, cosmic_args
|
| 23 |
from preprocessing import preprocess
|
| 24 |
+
from preprocessing.preprocess import decode_numeric_label, decode_speaker_role
|
| 25 |
from Model.COSMIC.erc_training.predict_epik import predict, get_valid_dataloader
|
| 26 |
|
| 27 |
|
|
|
|
| 227 |
|
| 228 |
# reset the output whenever a change in the input is detected
|
| 229 |
conversation_input.change(lambda x: "", conversation_input, output)
|
| 230 |
+
|
| 231 |
+
gr.Markdown("# Analysis of Labels")
|
| 232 |
+
with gr.Row():
|
| 233 |
+
with gr.Column(scale=1):
|
| 234 |
+
gr.Markdown(
|
| 235 |
+
"""
|
| 236 |
+
<b>Frequency Analysis of Labels</b>
|
| 237 |
+
One key aspect of our analysis involves examining the
|
| 238 |
+
frequency distribution of labels assigned to different
|
| 239 |
+
parts of the conversation. This includes tracking the
|
| 240 |
+
occurrence of labels such as "Interest," "Curiosity,"
|
| 241 |
+
"Confused," "Openness," and "Acceptance." The resulting
|
| 242 |
+
distribution provides insights into the prevalence of
|
| 243 |
+
various sentiments during the interaction.
|
| 244 |
+
|
| 245 |
+
<b>Word Cloud Visualization</b>
|
| 246 |
+
In addition to label frequency, we employ word cloud
|
| 247 |
+
visualization to depict the prominent terms in the input
|
| 248 |
+
conversations. This visual representation highlights the
|
| 249 |
+
most frequently used words, shedding light on the key
|
| 250 |
+
themes and topics discussed.
|
| 251 |
+
"""
|
| 252 |
+
)
|
| 253 |
+
with gr.Column(scale=3):
|
| 254 |
+
labels_plot = gr.Plot(label="Analysis of Labels Plot")
|
| 255 |
+
with gr.Column(scale=3):
|
| 256 |
+
wordcloud_plot = gr.Plot(label="Analysis of Labels Plot")
|
| 257 |
+
|
| 258 |
+
labels_btn = gr.Button(value="Plot Label Analysis")
|
| 259 |
+
labels_btn.click(label_analysis, inputs=[output], outputs=[labels_plot,wordcloud_plot])
|
| 260 |
|
| 261 |
gr.Markdown("# Sentiment Flow Plot")
|
| 262 |
with gr.Row():
|
|
|
|
| 305 |
|
| 306 |
# reset all outputs whenever a change in the input is detected
|
| 307 |
conversation_input.change(
|
| 308 |
+
lambda x: ("", None, None, None, None),
|
| 309 |
conversation_input,
|
| 310 |
+
outputs=[output, labels_plot, wordcloud_plot, plot_box, intensity_plot],
|
| 311 |
)
|
| 312 |
return cosmic_model
|
app/deberta_view.py
CHANGED
|
@@ -6,6 +6,7 @@ from app.utils import (
|
|
| 6 |
sentiment_flow_plot,
|
| 7 |
sentiment_intensity_analysis,
|
| 8 |
EXAMPLE_CONVERSATIONS,
|
|
|
|
| 9 |
)
|
| 10 |
|
| 11 |
import sys
|
|
@@ -106,6 +107,36 @@ def deberta_ui():
|
|
| 106 |
# reset the output whenever a change in the input is detected
|
| 107 |
conversation_input.change(lambda x: "", conversation_input, output)
|
| 108 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 109 |
gr.Markdown("# Sentiment Flow Plot")
|
| 110 |
with gr.Row():
|
| 111 |
with gr.Column(scale=1):
|
|
@@ -153,8 +184,8 @@ def deberta_ui():
|
|
| 153 |
|
| 154 |
# reset all outputs whenever a change in the input is detected
|
| 155 |
conversation_input.change(
|
| 156 |
-
lambda x: ("", None, None),
|
| 157 |
conversation_input,
|
| 158 |
-
outputs=[output, plot_box, intensity_plot],
|
| 159 |
)
|
| 160 |
-
return deberta_model
|
|
|
|
| 6 |
sentiment_flow_plot,
|
| 7 |
sentiment_intensity_analysis,
|
| 8 |
EXAMPLE_CONVERSATIONS,
|
| 9 |
+
label_analysis,
|
| 10 |
)
|
| 11 |
|
| 12 |
import sys
|
|
|
|
| 107 |
# reset the output whenever a change in the input is detected
|
| 108 |
conversation_input.change(lambda x: "", conversation_input, output)
|
| 109 |
|
| 110 |
+
gr.Markdown("# Analysis of Labels")
|
| 111 |
+
with gr.Row():
|
| 112 |
+
with gr.Column(scale=1):
|
| 113 |
+
gr.Markdown(
|
| 114 |
+
"""
|
| 115 |
+
<b>Frequency Analysis of Labels</b>
|
| 116 |
+
One key aspect of our analysis involves examining the
|
| 117 |
+
frequency distribution of labels assigned to different
|
| 118 |
+
parts of the conversation. This includes tracking the
|
| 119 |
+
occurrence of labels such as "Interest," "Curiosity,"
|
| 120 |
+
"Confused," "Openness," and "Acceptance." The resulting
|
| 121 |
+
distribution provides insights into the prevalence of
|
| 122 |
+
various sentiments during the interaction.
|
| 123 |
+
|
| 124 |
+
<b>Word Cloud Visualization</b>
|
| 125 |
+
In addition to label frequency, we employ word cloud
|
| 126 |
+
visualization to depict the prominent terms in the input
|
| 127 |
+
conversations. This visual representation highlights the
|
| 128 |
+
most frequently used words, shedding light on the key
|
| 129 |
+
themes and topics discussed.
|
| 130 |
+
"""
|
| 131 |
+
)
|
| 132 |
+
with gr.Column(scale=3):
|
| 133 |
+
labels_plot = gr.Plot(label="Analysis of Labels Plot")
|
| 134 |
+
with gr.Column(scale=3):
|
| 135 |
+
wordcloud_plot = gr.Plot(label="Analysis of Labels Plot")
|
| 136 |
+
|
| 137 |
+
labels_btn = gr.Button(value="Plot Label Analysis")
|
| 138 |
+
labels_btn.click(label_analysis, inputs=[output], outputs=[labels_plot,wordcloud_plot])
|
| 139 |
+
|
| 140 |
gr.Markdown("# Sentiment Flow Plot")
|
| 141 |
with gr.Row():
|
| 142 |
with gr.Column(scale=1):
|
|
|
|
| 184 |
|
| 185 |
# reset all outputs whenever a change in the input is detected
|
| 186 |
conversation_input.change(
|
| 187 |
+
lambda x: ("", None, None, None, None),
|
| 188 |
conversation_input,
|
| 189 |
+
outputs=[output, labels_plot, wordcloud_plot, plot_box, intensity_plot],
|
| 190 |
)
|
| 191 |
+
return deberta_model
|
app/gpt_view.py
CHANGED
|
@@ -1,5 +1,3 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import seaborn as sns
|
| 3 |
from openai import OpenAI
|
| 4 |
import gradio as gr
|
| 5 |
import re
|
|
@@ -11,6 +9,7 @@ from app.utils import (
|
|
| 11 |
display_sentiment_score_table,
|
| 12 |
sentiment_intensity_analysis,
|
| 13 |
EXAMPLE_CONVERSATIONS,
|
|
|
|
| 14 |
)
|
| 15 |
|
| 16 |
openai_args = {"api_key": ""}
|
|
@@ -332,6 +331,36 @@ def gpt_ui():
|
|
| 332 |
outputs=[output_box, report_md],
|
| 333 |
)
|
| 334 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 335 |
gr.Markdown("# Sentiment Flow Plot")
|
| 336 |
with gr.Row():
|
| 337 |
with gr.Column(scale=1):
|
|
@@ -379,9 +408,9 @@ def gpt_ui():
|
|
| 379 |
|
| 380 |
# reset all outputs whenever a change in the input is detected
|
| 381 |
conversation_input.change(
|
| 382 |
-
lambda x: ("", "", None, None),
|
| 383 |
conversation_input,
|
| 384 |
-
outputs=[output_box, report_md, plot_box, intensity_plot],
|
| 385 |
)
|
| 386 |
|
| 387 |
-
return gpt_model
|
|
|
|
|
|
|
|
|
|
| 1 |
from openai import OpenAI
|
| 2 |
import gradio as gr
|
| 3 |
import re
|
|
|
|
| 9 |
display_sentiment_score_table,
|
| 10 |
sentiment_intensity_analysis,
|
| 11 |
EXAMPLE_CONVERSATIONS,
|
| 12 |
+
label_analysis,
|
| 13 |
)
|
| 14 |
|
| 15 |
openai_args = {"api_key": ""}
|
|
|
|
| 331 |
outputs=[output_box, report_md],
|
| 332 |
)
|
| 333 |
|
| 334 |
+
gr.Markdown("# Analysis of Labels")
|
| 335 |
+
with gr.Row():
|
| 336 |
+
with gr.Column(scale=1):
|
| 337 |
+
gr.Markdown(
|
| 338 |
+
"""
|
| 339 |
+
<b>Frequency Analysis of Labels</b>
|
| 340 |
+
One key aspect of our analysis involves examining the
|
| 341 |
+
frequency distribution of labels assigned to different
|
| 342 |
+
parts of the conversation. This includes tracking the
|
| 343 |
+
occurrence of labels such as "Interest," "Curiosity,"
|
| 344 |
+
"Confused," "Openness," and "Acceptance." The resulting
|
| 345 |
+
distribution provides insights into the prevalence of
|
| 346 |
+
various sentiments during the interaction.
|
| 347 |
+
|
| 348 |
+
<b>Word Cloud Visualization</b>
|
| 349 |
+
In addition to label frequency, we employ word cloud
|
| 350 |
+
visualization to depict the prominent terms in the input
|
| 351 |
+
conversations. This visual representation highlights the
|
| 352 |
+
most frequently used words, shedding light on the key
|
| 353 |
+
themes and topics discussed.
|
| 354 |
+
"""
|
| 355 |
+
)
|
| 356 |
+
with gr.Column(scale=3):
|
| 357 |
+
labels_plot = gr.Plot(label="Analysis of Labels Plot")
|
| 358 |
+
with gr.Column(scale=3):
|
| 359 |
+
wordcloud_plot = gr.Plot(label="Analysis of Labels Plot")
|
| 360 |
+
|
| 361 |
+
labels_btn = gr.Button(value="Plot Label Analysis")
|
| 362 |
+
labels_btn.click(label_analysis, inputs=[output_box], outputs=[labels_plot,wordcloud_plot])
|
| 363 |
+
|
| 364 |
gr.Markdown("# Sentiment Flow Plot")
|
| 365 |
with gr.Row():
|
| 366 |
with gr.Column(scale=1):
|
|
|
|
| 408 |
|
| 409 |
# reset all outputs whenever a change in the input is detected
|
| 410 |
conversation_input.change(
|
| 411 |
+
lambda x: ("", "", None, None, None, None),
|
| 412 |
conversation_input,
|
| 413 |
+
outputs=[output_box, report_md, labels_plot, wordcloud_plot, plot_box, intensity_plot],
|
| 414 |
)
|
| 415 |
|
| 416 |
+
return gpt_model
|
app/utils.py
CHANGED
|
@@ -6,9 +6,10 @@ import pandas as pd
|
|
| 6 |
import seaborn as sns
|
| 7 |
from statistics import mean
|
| 8 |
import matplotlib.pyplot as plt
|
|
|
|
|
|
|
| 9 |
from nltk.sentiment import SentimentIntensityAnalyzer
|
| 10 |
from preprocessing.preprocess import process_user_input
|
| 11 |
-
from preprocessing.preprocess import decode_numeric_label, decode_speaker_role
|
| 12 |
|
| 13 |
|
| 14 |
def create_input_instruction():
|
|
@@ -74,6 +75,36 @@ SENTIMENT_GROUP_MAPPING = {
|
|
| 74 |
"Obscene": -3,
|
| 75 |
}
|
| 76 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
|
| 78 |
def sentiment_flow_plot(labeled_conv):
|
| 79 |
"""Generate the sentiment flow plot using the output from the label predecting
|
|
|
|
| 6 |
import seaborn as sns
|
| 7 |
from statistics import mean
|
| 8 |
import matplotlib.pyplot as plt
|
| 9 |
+
from collections import Counter
|
| 10 |
+
from wordcloud import WordCloud
|
| 11 |
from nltk.sentiment import SentimentIntensityAnalyzer
|
| 12 |
from preprocessing.preprocess import process_user_input
|
|
|
|
| 13 |
|
| 14 |
|
| 15 |
def create_input_instruction():
|
|
|
|
| 75 |
"Obscene": -3,
|
| 76 |
}
|
| 77 |
|
| 78 |
+
def label_analysis(labeled_conv):
|
| 79 |
+
msg_pattern = r"(Agent|Visitor): (.*)\n\[(.*)\]"
|
| 80 |
+
# find the components of each message, including the speaker, message, and label
|
| 81 |
+
component_lst = re.findall(msg_pattern, labeled_conv)
|
| 82 |
+
|
| 83 |
+
labels=[]
|
| 84 |
+
|
| 85 |
+
for speaker, _, label in component_lst:
|
| 86 |
+
labels.append(label)
|
| 87 |
+
|
| 88 |
+
label_counts = Counter(labels)
|
| 89 |
+
|
| 90 |
+
# Create a bar plot
|
| 91 |
+
fig1, ax = plt.subplots(figsize=(12, 6))
|
| 92 |
+
sns.barplot(x=list(label_counts.keys()), y=list(label_counts.values()), ax=ax)
|
| 93 |
+
|
| 94 |
+
plt.title('Label Frequency Distribution')
|
| 95 |
+
plt.xlabel('Labels')
|
| 96 |
+
plt.ylabel('Frequency')
|
| 97 |
+
plt.xticks(rotation=45, ha='right')
|
| 98 |
+
|
| 99 |
+
labels_text = " ".join(labels)
|
| 100 |
+
wordcloud = WordCloud(width=800, height=400, background_color='white').generate(labels_text)
|
| 101 |
+
|
| 102 |
+
# Create a new figure for the word cloud
|
| 103 |
+
fig2, ax2 = plt.subplots(figsize=(10, 5))
|
| 104 |
+
ax2.imshow(wordcloud, interpolation='bilinear')
|
| 105 |
+
ax2.axis('off') # Turn off the axis
|
| 106 |
+
|
| 107 |
+
return fig1,fig2
|
| 108 |
|
| 109 |
def sentiment_flow_plot(labeled_conv):
|
| 110 |
"""Generate the sentiment flow plot using the output from the label predecting
|
requirements.txt
CHANGED
|
@@ -167,4 +167,5 @@ wcwidth==0.2.12
|
|
| 167 |
weasel==0.3.4
|
| 168 |
websockets==11.0.3
|
| 169 |
Werkzeug==3.0.1
|
|
|
|
| 170 |
wrapt==1.14.1
|
|
|
|
| 167 |
weasel==0.3.4
|
| 168 |
websockets==11.0.3
|
| 169 |
Werkzeug==3.0.1
|
| 170 |
+
wordcloud==1.9.3
|
| 171 |
wrapt==1.14.1
|