Spaces:

spark-ds549
/

Epik

Build error

App Files Files Community

Minh Q. Le commited on Dec 10, 2023

Commit

ed47213

1 Parent(s): 40cd566

Added Label Analyis Plot

Browse files

Files changed (5) hide show

app/cosmic_view.py +34 -4
app/deberta_view.py +34 -3
app/gpt_view.py +34 -5
app/utils.py +32 -1
requirements.txt +1 -0

app/cosmic_view.py CHANGED Viewed

@@ -7,12 +7,11 @@ from app.utils import (
     create_input_instruction,
     format_prediction_ouptut,
     remove_temp_dir,
-    decode_numeric_label,
-    decode_speaker_role,
     display_sentiment_score_table,
     sentiment_flow_plot,
     sentiment_intensity_analysis,
     EXAMPLE_CONVERSATIONS,
 )
 from fairseq.data.data_utils import collate_tokens
@@ -22,6 +21,7 @@ sys.path.insert(0, "../")  # neccesary to load modules outside of app
 from app import roberta, comet, COSMIC_MODEL, cosmic_args
 from preprocessing import preprocess
 from Model.COSMIC.erc_training.predict_epik import predict, get_valid_dataloader
@@ -227,6 +227,36 @@ def cosmic_ui():
         # reset the output whenever a change in the input is detected
         conversation_input.change(lambda x: "", conversation_input, output)
         gr.Markdown("# Sentiment Flow Plot")
         with gr.Row():
@@ -275,8 +305,8 @@ def cosmic_ui():
         # reset all outputs whenever a change in the input is detected
         conversation_input.change(
-            lambda x: ("", None, None),
             conversation_input,
-            outputs=[output, plot_box, intensity_plot],
         )
     return cosmic_model

     create_input_instruction,
     format_prediction_ouptut,
     remove_temp_dir,
     display_sentiment_score_table,
     sentiment_flow_plot,
     sentiment_intensity_analysis,
     EXAMPLE_CONVERSATIONS,
+    label_analysis,
 )
 from fairseq.data.data_utils import collate_tokens
 from app import roberta, comet, COSMIC_MODEL, cosmic_args
 from preprocessing import preprocess
+from preprocessing.preprocess import decode_numeric_label, decode_speaker_role
 from Model.COSMIC.erc_training.predict_epik import predict, get_valid_dataloader
         # reset the output whenever a change in the input is detected
         conversation_input.change(lambda x: "", conversation_input, output)
+        gr.Markdown("# Analysis of Labels")
+        with gr.Row():
+            with gr.Column(scale=1):
+                gr.Markdown(
+                    """
+                        <b>Frequency Analysis of Labels</b>
+                        One key aspect of our analysis involves examining the
+                        frequency distribution of labels assigned to different
+                        parts of the conversation. This includes tracking the
+                        occurrence of labels such as "Interest," "Curiosity,"
+                        "Confused," "Openness," and "Acceptance." The resulting
+                        distribution provides insights into the prevalence of
+                        various sentiments during the interaction.
+                        <b>Word Cloud Visualization</b>
+                        In addition to label frequency, we employ word cloud
+                        visualization to depict the prominent terms in the input
+                        conversations. This visual representation highlights the
+                        most frequently used words, shedding light on the key
+                        themes and topics discussed.
+                    """
+                )
+            with gr.Column(scale=3):
+                labels_plot = gr.Plot(label="Analysis of Labels Plot")
+            with gr.Column(scale=3):
+                wordcloud_plot = gr.Plot(label="Analysis of Labels Plot")
+        labels_btn = gr.Button(value="Plot Label Analysis")
+        labels_btn.click(label_analysis, inputs=[output], outputs=[labels_plot,wordcloud_plot])
         gr.Markdown("# Sentiment Flow Plot")
         with gr.Row():
         # reset all outputs whenever a change in the input is detected
         conversation_input.change(
+            lambda x: ("", None, None, None, None),
             conversation_input,
+            outputs=[output, labels_plot, wordcloud_plot, plot_box, intensity_plot],
         )
     return cosmic_model

app/deberta_view.py CHANGED Viewed

@@ -6,6 +6,7 @@ from app.utils import (
     sentiment_flow_plot,
     sentiment_intensity_analysis,
     EXAMPLE_CONVERSATIONS,
 )
 import sys
@@ -106,6 +107,36 @@ def deberta_ui():
         # reset the output whenever a change in the input is detected
         conversation_input.change(lambda x: "", conversation_input, output)
         gr.Markdown("# Sentiment Flow Plot")
         with gr.Row():
             with gr.Column(scale=1):
@@ -153,8 +184,8 @@ def deberta_ui():
         # reset all outputs whenever a change in the input is detected
         conversation_input.change(
-            lambda x: ("", None, None),
             conversation_input,
-            outputs=[output, plot_box, intensity_plot],
         )
-    return deberta_model

     sentiment_flow_plot,
     sentiment_intensity_analysis,
     EXAMPLE_CONVERSATIONS,
+    label_analysis,
 )
 import sys
         # reset the output whenever a change in the input is detected
         conversation_input.change(lambda x: "", conversation_input, output)
+        gr.Markdown("# Analysis of Labels")
+        with gr.Row():
+            with gr.Column(scale=1):
+                gr.Markdown(
+                    """
+                        <b>Frequency Analysis of Labels</b>
+                        One key aspect of our analysis involves examining the
+                        frequency distribution of labels assigned to different
+                        parts of the conversation. This includes tracking the
+                        occurrence of labels such as "Interest," "Curiosity,"
+                        "Confused," "Openness," and "Acceptance." The resulting
+                        distribution provides insights into the prevalence of
+                        various sentiments during the interaction.
+                        <b>Word Cloud Visualization</b>
+                        In addition to label frequency, we employ word cloud
+                        visualization to depict the prominent terms in the input
+                        conversations. This visual representation highlights the
+                        most frequently used words, shedding light on the key
+                        themes and topics discussed.
+                    """
+                )
+            with gr.Column(scale=3):
+                labels_plot = gr.Plot(label="Analysis of Labels Plot")
+            with gr.Column(scale=3):
+                wordcloud_plot = gr.Plot(label="Analysis of Labels Plot")
+        labels_btn = gr.Button(value="Plot Label Analysis")
+        labels_btn.click(label_analysis, inputs=[output], outputs=[labels_plot,wordcloud_plot])
         gr.Markdown("# Sentiment Flow Plot")
         with gr.Row():
             with gr.Column(scale=1):
         # reset all outputs whenever a change in the input is detected
         conversation_input.change(
+            lambda x: ("", None, None, None, None),
             conversation_input,
+            outputs=[output, labels_plot, wordcloud_plot, plot_box, intensity_plot],
         )
+    return deberta_model

app/gpt_view.py CHANGED Viewed

@@ -1,5 +1,3 @@
-import os
-import seaborn as sns
 from openai import OpenAI
 import gradio as gr
 import re
@@ -11,6 +9,7 @@ from app.utils import (
     display_sentiment_score_table,
     sentiment_intensity_analysis,
     EXAMPLE_CONVERSATIONS,
 )
 openai_args = {"api_key": ""}
@@ -332,6 +331,36 @@ def gpt_ui():
             outputs=[output_box, report_md],
         )
         gr.Markdown("# Sentiment Flow Plot")
         with gr.Row():
             with gr.Column(scale=1):
@@ -379,9 +408,9 @@ def gpt_ui():
         # reset all outputs whenever a change in the input is detected
         conversation_input.change(
-            lambda x: ("", "", None, None),
             conversation_input,
-            outputs=[output_box, report_md, plot_box, intensity_plot],
         )
-    return gpt_model

 from openai import OpenAI
 import gradio as gr
 import re
     display_sentiment_score_table,
     sentiment_intensity_analysis,
     EXAMPLE_CONVERSATIONS,
+    label_analysis,
 )
 openai_args = {"api_key": ""}
             outputs=[output_box, report_md],
         )
+        gr.Markdown("# Analysis of Labels")
+        with gr.Row():
+            with gr.Column(scale=1):
+                gr.Markdown(
+                    """
+                        <b>Frequency Analysis of Labels</b>
+                        One key aspect of our analysis involves examining the
+                        frequency distribution of labels assigned to different
+                        parts of the conversation. This includes tracking the
+                        occurrence of labels such as "Interest," "Curiosity,"
+                        "Confused," "Openness," and "Acceptance." The resulting
+                        distribution provides insights into the prevalence of
+                        various sentiments during the interaction.
+                        <b>Word Cloud Visualization</b>
+                        In addition to label frequency, we employ word cloud
+                        visualization to depict the prominent terms in the input
+                        conversations. This visual representation highlights the
+                        most frequently used words, shedding light on the key
+                        themes and topics discussed.
+                    """
+                )
+            with gr.Column(scale=3):
+                labels_plot = gr.Plot(label="Analysis of Labels Plot")
+            with gr.Column(scale=3):
+                wordcloud_plot = gr.Plot(label="Analysis of Labels Plot")
+        labels_btn = gr.Button(value="Plot Label Analysis")
+        labels_btn.click(label_analysis, inputs=[output_box], outputs=[labels_plot,wordcloud_plot])
         gr.Markdown("# Sentiment Flow Plot")
         with gr.Row():
             with gr.Column(scale=1):
         # reset all outputs whenever a change in the input is detected
         conversation_input.change(
+            lambda x: ("", "", None, None, None, None),
             conversation_input,
+            outputs=[output_box, report_md, labels_plot, wordcloud_plot, plot_box, intensity_plot],
         )
+    return gpt_model

app/utils.py CHANGED Viewed

@@ -6,9 +6,10 @@ import pandas as pd
 import seaborn as sns
 from statistics import mean
 import matplotlib.pyplot as plt
 from nltk.sentiment import SentimentIntensityAnalyzer
 from preprocessing.preprocess import process_user_input
-from preprocessing.preprocess import decode_numeric_label, decode_speaker_role
 def create_input_instruction():
@@ -74,6 +75,36 @@ SENTIMENT_GROUP_MAPPING = {
     "Obscene": -3,
 }
 def sentiment_flow_plot(labeled_conv):
     """Generate the sentiment flow plot using the output from the label predecting

 import seaborn as sns
 from statistics import mean
 import matplotlib.pyplot as plt
+from collections import Counter
+from wordcloud import WordCloud
 from nltk.sentiment import SentimentIntensityAnalyzer
 from preprocessing.preprocess import process_user_input
 def create_input_instruction():
     "Obscene": -3,
 }
+def label_analysis(labeled_conv):
+    msg_pattern = r"(Agent|Visitor): (.*)\n\[(.*)\]"
+    # find the components of each message, including the speaker, message, and label
+    component_lst = re.findall(msg_pattern, labeled_conv)
+    labels=[]
+    for speaker, _, label in component_lst:
+        labels.append(label)
+    label_counts = Counter(labels)
+    # Create a bar plot
+    fig1, ax = plt.subplots(figsize=(12, 6))
+    sns.barplot(x=list(label_counts.keys()), y=list(label_counts.values()), ax=ax)
+    plt.title('Label Frequency Distribution')
+    plt.xlabel('Labels')
+    plt.ylabel('Frequency')
+    plt.xticks(rotation=45, ha='right')
+    labels_text = " ".join(labels)
+    wordcloud = WordCloud(width=800, height=400, background_color='white').generate(labels_text)
+    # Create a new figure for the word cloud
+    fig2, ax2 = plt.subplots(figsize=(10, 5))
+    ax2.imshow(wordcloud, interpolation='bilinear')
+    ax2.axis('off')  # Turn off the axis
+    return fig1,fig2
 def sentiment_flow_plot(labeled_conv):
     """Generate the sentiment flow plot using the output from the label predecting

requirements.txt CHANGED Viewed

@@ -167,4 +167,5 @@ wcwidth==0.2.12
 weasel==0.3.4
 websockets==11.0.3
 Werkzeug==3.0.1
 wrapt==1.14.1

 weasel==0.3.4
 websockets==11.0.3
 Werkzeug==3.0.1
+wordcloud==1.9.3
 wrapt==1.14.1