kovacsvi commited on
Commit
52bbc14
·
1 Parent(s): 51b9447

removed piechart

Browse files
Files changed (1) hide show
  1. app.py +101 -85
app.py CHANGED
@@ -15,20 +15,24 @@ import plotly.express as px
15
  import seaborn as sns
16
  from tqdm import tqdm
17
 
18
- PATH = '/data/' # at least 150GB storage needs to be attached
19
- os.environ['TRANSFORMERS_CACHE'] = PATH
20
- os.environ['HF_HOME'] = PATH
21
- os.environ['HF_DATASETS_CACHE'] = PATH
22
- os.environ['TORCH_HOME'] = PATH
23
 
24
- css = '''
25
  .info {font-size: 3em; !important}
26
  .title_ {text-align: center;}
27
- '''
28
 
29
  HF_TOKEN = os.environ["hf_read"]
30
 
31
- SENTIMENT_LABEL_NAMES = {0: "Negative", 1: "No sentiment or Neutral sentiment", 2: "Positive"}
 
 
 
 
32
  LANGUAGES = ["Czech", "English", "French", "German", "Hungarian", "Polish", "Slovakian"]
33
 
34
  id2label = {
@@ -37,7 +41,7 @@ id2label = {
37
  2: "Disgust",
38
  3: "Sadness",
39
  4: "Joy",
40
- 5: "None of Them"
41
  }
42
 
43
  emotion_colors = {
@@ -46,8 +50,10 @@ emotion_colors = {
46
  "Disgust": "#A4C639",
47
  "Sadness": "#9DBCD4",
48
  "Joy": "#F3E9A8",
49
- "None of Them": "#C0C0C0"
50
  }
 
 
51
  def load_spacy_model(model_name="xx_sent_ud_sm"):
52
  try:
53
  model = spacy.load(model_name)
@@ -56,31 +62,38 @@ def load_spacy_model(model_name="xx_sent_ud_sm"):
56
  model = spacy.load(model_name)
57
  return model
58
 
 
59
  def split_sentences(text, model):
60
  # disable pipeline components not necessary for splitting
61
  model.disable_pipes(model.pipe_names) # first disable all the pipes
62
- model.enable_pipe("senter") # then enable the sentence splitter only
63
 
64
  doc = model(text)
65
  sentences = [sent.text for sent in doc.sents]
66
 
67
  return sentences
68
 
 
69
  def build_huggingface_path(language: str):
70
  if language == "Czech" or language == "Slovakian":
71
  return "visegradmedia-emotion/Emotion_RoBERTa_pooled_V4"
72
  return "poltextlab/xlm-roberta-large-pooled-emotions6"
73
 
 
74
  @spaces.GPU
75
  def predict(text, model_id, tokenizer_id):
76
- model = AutoModelForSequenceClassification.from_pretrained(model_id, low_cpu_mem_usage=True, device_map="auto", offload_folder="offload", token=HF_TOKEN)
 
 
 
 
 
 
77
  tokenizer = AutoTokenizer.from_pretrained(tokenizer_id)
78
 
79
- inputs = tokenizer(text,
80
- max_length=64,
81
- truncation=True,
82
- padding="do_not_pad",
83
- return_tensors="pt")
84
  model.eval()
85
 
86
  with torch.no_grad():
@@ -89,6 +102,7 @@ def predict(text, model_id, tokenizer_id):
89
  probs = torch.nn.functional.softmax(logits, dim=1).cpu().numpy().flatten()
90
  return probs
91
 
 
92
  def get_most_probable_label(probs):
93
  label = id2label[probs.argmax()]
94
  probability = f"{round(100 * probs.max(), 2)}%"
@@ -104,9 +118,10 @@ def prepare_heatmap_data(data):
104
  emotion = id2label[idy]
105
  heatmap_data.at[emotion, idx] = round(confidence, 4)
106
 
107
- heatmap_data.columns = [item["sentence"][:18]+"..." for item in data]
108
  return heatmap_data
109
 
 
110
  def plot_emotion_heatmap(heatmap_data):
111
  # Transpose: now rows = sentences, columns = emotions
112
  heatmap_data = heatmap_data.T
@@ -115,10 +130,14 @@ def plot_emotion_heatmap(heatmap_data):
115
  normalized_data = heatmap_data.copy()
116
  for row in normalized_data.index:
117
  max_val = normalized_data.loc[row].max()
118
- normalized_data.loc[row] = normalized_data.loc[row] / max_val if max_val > 0 else 0
 
 
119
 
120
  # Create color matrix
121
- color_matrix = np.empty((len(normalized_data.index), len(normalized_data.columns), 3))
 
 
122
  for i, sentence in enumerate(normalized_data.index):
123
  for j, emotion in enumerate(normalized_data.columns):
124
  val = normalized_data.loc[sentence, emotion]
@@ -127,12 +146,17 @@ def plot_emotion_heatmap(heatmap_data):
127
  blended = tuple(1 - val * (1 - c) for c in base_rgb)
128
  color_matrix[i, j] = blended
129
 
130
- fig, ax = plt.subplots(figsize=(len(normalized_data.columns) * 0.8 + 2, len(normalized_data.index) * 0.5 + 2))
131
- ax.imshow(color_matrix, aspect='auto')
 
 
 
 
 
132
 
133
  # Set ticks and labels
134
  ax.set_xticks(np.arange(len(normalized_data.columns)))
135
- ax.set_xticklabels(normalized_data.columns, rotation=45, ha='right', fontsize=10)
136
 
137
  ax.set_yticks(np.arange(len(normalized_data.index)))
138
  ax.set_yticklabels(normalized_data.index, rotation=0, fontsize=10)
@@ -143,63 +167,50 @@ def plot_emotion_heatmap(heatmap_data):
143
  plt.tight_layout()
144
  return fig
145
 
146
- def plot_average_emotion_pie(heatmap_data):
147
- all_emotion_scores = np.array([item['emotions'] for item in heatmap_data])
 
 
148
  mean_scores = all_emotion_scores.mean(axis=0)
149
 
150
  labels = [id2label[i] for i in range(len(mean_scores))]
151
- sizes = mean_scores
152
 
153
- # optional: remove emotions with near-zero average
154
  labels_filtered = []
155
- sizes_filtered = []
156
- for l, s in zip(labels, sizes):
157
- if s > 0.01: # You can change this threshold
158
  labels_filtered.append(l)
159
- sizes_filtered.append(s)
160
-
161
- fig, ax = plt.subplots(figsize=(6, 6))
162
- wedges, texts, autotexts = ax.pie(
163
- sizes_filtered,
164
- labels=labels_filtered,
165
- autopct='%1.1f%%',
166
- startangle=140,
167
- textprops={'fontsize': 12},
168
- colors=[emotion_colors[l] for l in labels_filtered]
169
- )
170
 
171
- ax.axis('equal') # Equal aspect ratio ensures a circle
172
- plt.title("Average Emotion Confidence Across Sentences", fontsize=14, pad=15)
 
 
 
173
 
174
- return fig
175
 
176
- def plot_emotion_barplot(heatmap_data):
177
- most_probable_emotions = heatmap_data.idxmax(axis=0)
178
- emotion_counts = most_probable_emotions.value_counts()
179
- all_emotions = heatmap_data.index
180
-
181
- # Convert to percentage, round to integer
182
- emotion_frequencies = (emotion_counts.reindex(all_emotions, fill_value=0) / emotion_counts.sum() * 100).round(0)
183
- emotion_frequencies = emotion_frequencies.sort_values(ascending=False)
184
-
185
- palette = [emotion_colors[emotion] for emotion in emotion_frequencies.index]
186
-
187
  fig, ax = plt.subplots(figsize=(8, 6))
188
- bars = sns.barplot(x=emotion_frequencies.values, y=emotion_frequencies.index, palette=palette, ax=ax)
189
-
190
- ax.xaxis.set_major_formatter(PercentFormatter(xmax=100, decimals=0))
191
-
192
- # Add % labels on the bars
193
- for i, (value, label) in enumerate(zip(emotion_frequencies.values, emotion_frequencies.index)):
194
- ax.text(value + 1, i, f"{int(value)}%", va='center')
195
-
196
- ax.set_title("Relative Frequencies of Predicted Emotions")
197
- ax.set_xlabel("Relative Frequency")
 
 
198
  ax.set_ylabel("Emotions")
199
  plt.tight_layout()
200
-
201
  return fig
202
 
 
203
  def predict_wrapper(text, language):
204
  model_id = build_huggingface_path(language)
205
  tokenizer_id = "xlm-roberta-large"
@@ -213,18 +224,17 @@ def predict_wrapper(text, language):
213
  probs = predict(sentence, model_id, tokenizer_id)
214
  label, probability = get_most_probable_label(probs)
215
  results.append([sentence, label, probability])
216
- results_heatmap.append({"sentence":sentence, "emotions":probs})
217
 
218
  # let's see...
219
  print(results)
220
  print(results_heatmap)
221
 
222
- figure = plot_emotion_barplot(prepare_heatmap_data(results_heatmap))
223
  heatmap = plot_emotion_heatmap(prepare_heatmap_data(results_heatmap))
224
- piechart = plot_average_emotion_pie(results_heatmap)
225
  output_info = f'Prediction was made using the <a href="https://huggingface.co/{model_id}">{model_id}</a> model. '
226
  funding_info = "The research was funded by European Union’s Horizon 2020 research and innovation program, “MORES” project (Grant No.: 101132601)"
227
- return results, figure, piechart, heatmap, output_info + funding_info
228
 
229
 
230
  with gr.Blocks(css=css) as demo:
@@ -234,15 +244,19 @@ with gr.Blocks(css=css) as demo:
234
  The [model](https://huggingface.co/poltextlab/xlm-roberta-large-pooled-emotions6) is optimized for sentence-level analysis, and make predictions in the following languages: Czech, English, French, German, Hungarian, Polish, and Slovak.
235
  The text you enter in the input box is automatically divided into sentences, and the analysis is performed on each sentence. Depending on the length of the text, this process may take a few seconds, but for longer texts, it can take up to 2-3 minutes.
236
  """
237
-
238
  gr.HTML("<h1>MORES Pulse</h1>", elem_classes="title_")
239
  gr.Markdown(introduction, elem_classes="info")
240
  with gr.Row():
241
  with gr.Column():
242
- input_text = gr.Textbox(lines=6, label="Input", placeholder="Enter your text here...")
 
 
243
  with gr.Column():
244
  with gr.Row():
245
- language_choice = gr.Dropdown(choices=LANGUAGES, label="Language", value="English")
 
 
246
  with gr.Row():
247
  predict_button = gr.Button("Submit")
248
 
@@ -250,29 +264,32 @@ with gr.Blocks(css=css) as demo:
250
  with gr.Column(scale=7):
251
  piechart = gr.Plot()
252
  with gr.Column(scale=3):
253
- gr.Markdown("The chart gives an overview of the main emotions found in the text and how strongly each one is present.", elem_classes="info")
254
-
255
- with gr.Row():
256
- with gr.Column(scale=7):
257
- plot = gr.Plot()
258
- with gr.Column(scale=3):
259
- gr.Markdown("This bar chart shows how often each emotion appears in the sentences of the text.", elem_classes="info")
260
 
261
  with gr.Row():
262
  with gr.Column(scale=7):
263
  result_table = gr.Dataframe(
264
  headers=["Sentence", "Prediction", "Confidence"],
265
  column_widths=["65%", "25%", "10%"],
266
- wrap=True # important
267
  )
268
  with gr.Column(scale=3):
269
- gr.Markdown("This table shows the emotion detected in each sentence, along with how confident our prediction is.", elem_classes="info")
 
 
 
270
 
271
  with gr.Row():
272
  with gr.Column(scale=7):
273
  heatmap = gr.Plot()
274
  with gr.Column(scale=3):
275
- gr.Markdown("This heatmap shows how strongly each emotion appears in every sentence. Darker colours mean stronger presence.", elem_classes="info")
 
 
 
276
 
277
  with gr.Row():
278
  model_info = gr.Markdown()
@@ -280,9 +297,8 @@ with gr.Blocks(css=css) as demo:
280
  predict_button.click(
281
  fn=predict_wrapper,
282
  inputs=[input_text, language_choice],
283
- outputs=[result_table, plot, piechart, heatmap, model_info]
284
  )
285
 
286
  if __name__ == "__main__":
287
  demo.launch()
288
-
 
15
  import seaborn as sns
16
  from tqdm import tqdm
17
 
18
+ PATH = "/data/" # at least 150GB storage needs to be attached
19
+ os.environ["TRANSFORMERS_CACHE"] = PATH
20
+ os.environ["HF_HOME"] = PATH
21
+ os.environ["HF_DATASETS_CACHE"] = PATH
22
+ os.environ["TORCH_HOME"] = PATH
23
 
24
+ css = """
25
  .info {font-size: 3em; !important}
26
  .title_ {text-align: center;}
27
+ """
28
 
29
  HF_TOKEN = os.environ["hf_read"]
30
 
31
+ SENTIMENT_LABEL_NAMES = {
32
+ 0: "Negative",
33
+ 1: "No sentiment or Neutral sentiment",
34
+ 2: "Positive",
35
+ }
36
  LANGUAGES = ["Czech", "English", "French", "German", "Hungarian", "Polish", "Slovakian"]
37
 
38
  id2label = {
 
41
  2: "Disgust",
42
  3: "Sadness",
43
  4: "Joy",
44
+ 5: "None of Them",
45
  }
46
 
47
  emotion_colors = {
 
50
  "Disgust": "#A4C639",
51
  "Sadness": "#9DBCD4",
52
  "Joy": "#F3E9A8",
53
+ "None of Them": "#C0C0C0",
54
  }
55
+
56
+
57
  def load_spacy_model(model_name="xx_sent_ud_sm"):
58
  try:
59
  model = spacy.load(model_name)
 
62
  model = spacy.load(model_name)
63
  return model
64
 
65
+
66
  def split_sentences(text, model):
67
  # disable pipeline components not necessary for splitting
68
  model.disable_pipes(model.pipe_names) # first disable all the pipes
69
+ model.enable_pipe("senter") # then enable the sentence splitter only
70
 
71
  doc = model(text)
72
  sentences = [sent.text for sent in doc.sents]
73
 
74
  return sentences
75
 
76
+
77
  def build_huggingface_path(language: str):
78
  if language == "Czech" or language == "Slovakian":
79
  return "visegradmedia-emotion/Emotion_RoBERTa_pooled_V4"
80
  return "poltextlab/xlm-roberta-large-pooled-emotions6"
81
 
82
+
83
  @spaces.GPU
84
  def predict(text, model_id, tokenizer_id):
85
+ model = AutoModelForSequenceClassification.from_pretrained(
86
+ model_id,
87
+ low_cpu_mem_usage=True,
88
+ device_map="auto",
89
+ offload_folder="offload",
90
+ token=HF_TOKEN,
91
+ )
92
  tokenizer = AutoTokenizer.from_pretrained(tokenizer_id)
93
 
94
+ inputs = tokenizer(
95
+ text, max_length=64, truncation=True, padding="do_not_pad", return_tensors="pt"
96
+ )
 
 
97
  model.eval()
98
 
99
  with torch.no_grad():
 
102
  probs = torch.nn.functional.softmax(logits, dim=1).cpu().numpy().flatten()
103
  return probs
104
 
105
+
106
  def get_most_probable_label(probs):
107
  label = id2label[probs.argmax()]
108
  probability = f"{round(100 * probs.max(), 2)}%"
 
118
  emotion = id2label[idy]
119
  heatmap_data.at[emotion, idx] = round(confidence, 4)
120
 
121
+ heatmap_data.columns = [item["sentence"][:18] + "..." for item in data]
122
  return heatmap_data
123
 
124
+
125
  def plot_emotion_heatmap(heatmap_data):
126
  # Transpose: now rows = sentences, columns = emotions
127
  heatmap_data = heatmap_data.T
 
130
  normalized_data = heatmap_data.copy()
131
  for row in normalized_data.index:
132
  max_val = normalized_data.loc[row].max()
133
+ normalized_data.loc[row] = (
134
+ normalized_data.loc[row] / max_val if max_val > 0 else 0
135
+ )
136
 
137
  # Create color matrix
138
+ color_matrix = np.empty(
139
+ (len(normalized_data.index), len(normalized_data.columns), 3)
140
+ )
141
  for i, sentence in enumerate(normalized_data.index):
142
  for j, emotion in enumerate(normalized_data.columns):
143
  val = normalized_data.loc[sentence, emotion]
 
146
  blended = tuple(1 - val * (1 - c) for c in base_rgb)
147
  color_matrix[i, j] = blended
148
 
149
+ fig, ax = plt.subplots(
150
+ figsize=(
151
+ len(normalized_data.columns) * 0.8 + 2,
152
+ len(normalized_data.index) * 0.5 + 2,
153
+ )
154
+ )
155
+ ax.imshow(color_matrix, aspect="auto")
156
 
157
  # Set ticks and labels
158
  ax.set_xticks(np.arange(len(normalized_data.columns)))
159
+ ax.set_xticklabels(normalized_data.columns, rotation=45, ha="right", fontsize=10)
160
 
161
  ax.set_yticks(np.arange(len(normalized_data.index)))
162
  ax.set_yticklabels(normalized_data.index, rotation=0, fontsize=10)
 
167
  plt.tight_layout()
168
  return fig
169
 
170
+
171
+ def plot_average_emotion_barplot(heatmap_data):
172
+ # Compute average emotion scores
173
+ all_emotion_scores = np.array([item["emotions"] for item in heatmap_data])
174
  mean_scores = all_emotion_scores.mean(axis=0)
175
 
176
  labels = [id2label[i] for i in range(len(mean_scores))]
177
+ scores = mean_scores
178
 
179
+ # Optional: filter out near-zero average emotions
180
  labels_filtered = []
181
+ scores_filtered = []
182
+ for l, s in zip(labels, scores):
183
+ if s > 0.01:
184
  labels_filtered.append(l)
185
+ scores_filtered.append(s)
 
 
 
 
 
 
 
 
 
 
186
 
187
+ # Sort for better visualization
188
+ sorted_data = sorted(
189
+ zip(labels_filtered, scores_filtered), key=lambda x: x[1], reverse=True
190
+ )
191
+ sorted_labels, sorted_scores = zip(*sorted_data)
192
 
193
+ colors = [emotion_colors[label] for label in sorted_labels]
194
 
 
 
 
 
 
 
 
 
 
 
 
195
  fig, ax = plt.subplots(figsize=(8, 6))
196
+ bars = sns.barplot(
197
+ x=list(sorted_scores), y=list(sorted_labels), palette=colors, ax=ax
198
+ )
199
+
200
+ ax.xaxis.set_major_formatter(PercentFormatter(xmax=1.0, decimals=0))
201
+
202
+ # Add percentage labels
203
+ for i, score in enumerate(sorted_scores):
204
+ ax.text(score + 0.01, i, f"{score*100:.1f}%", va="center")
205
+
206
+ ax.set_title("Average Emotion Confidence Across Sentences", fontsize=14)
207
+ ax.set_xlabel("Average Confidence")
208
  ax.set_ylabel("Emotions")
209
  plt.tight_layout()
210
+
211
  return fig
212
 
213
+
214
  def predict_wrapper(text, language):
215
  model_id = build_huggingface_path(language)
216
  tokenizer_id = "xlm-roberta-large"
 
224
  probs = predict(sentence, model_id, tokenizer_id)
225
  label, probability = get_most_probable_label(probs)
226
  results.append([sentence, label, probability])
227
+ results_heatmap.append({"sentence": sentence, "emotions": probs})
228
 
229
  # let's see...
230
  print(results)
231
  print(results_heatmap)
232
 
233
+ figure = plot_average_emotion_barplot(prepare_heatmap_data(results_heatmap))
234
  heatmap = plot_emotion_heatmap(prepare_heatmap_data(results_heatmap))
 
235
  output_info = f'Prediction was made using the <a href="https://huggingface.co/{model_id}">{model_id}</a> model. '
236
  funding_info = "The research was funded by European Union’s Horizon 2020 research and innovation program, “MORES” project (Grant No.: 101132601)"
237
+ return results, figure, heatmap, output_info + funding_info
238
 
239
 
240
  with gr.Blocks(css=css) as demo:
 
244
  The [model](https://huggingface.co/poltextlab/xlm-roberta-large-pooled-emotions6) is optimized for sentence-level analysis, and make predictions in the following languages: Czech, English, French, German, Hungarian, Polish, and Slovak.
245
  The text you enter in the input box is automatically divided into sentences, and the analysis is performed on each sentence. Depending on the length of the text, this process may take a few seconds, but for longer texts, it can take up to 2-3 minutes.
246
  """
247
+
248
  gr.HTML("<h1>MORES Pulse</h1>", elem_classes="title_")
249
  gr.Markdown(introduction, elem_classes="info")
250
  with gr.Row():
251
  with gr.Column():
252
+ input_text = gr.Textbox(
253
+ lines=6, label="Input", placeholder="Enter your text here..."
254
+ )
255
  with gr.Column():
256
  with gr.Row():
257
+ language_choice = gr.Dropdown(
258
+ choices=LANGUAGES, label="Language", value="English"
259
+ )
260
  with gr.Row():
261
  predict_button = gr.Button("Submit")
262
 
 
264
  with gr.Column(scale=7):
265
  piechart = gr.Plot()
266
  with gr.Column(scale=3):
267
+ gr.Markdown(
268
+ "The chart gives an overview of the main emotions found in the text and how strongly each one is present.",
269
+ elem_classes="info",
270
+ )
 
 
 
271
 
272
  with gr.Row():
273
  with gr.Column(scale=7):
274
  result_table = gr.Dataframe(
275
  headers=["Sentence", "Prediction", "Confidence"],
276
  column_widths=["65%", "25%", "10%"],
277
+ wrap=True, # important
278
  )
279
  with gr.Column(scale=3):
280
+ gr.Markdown(
281
+ "This table shows the emotion detected in each sentence, along with how confident our prediction is.",
282
+ elem_classes="info",
283
+ )
284
 
285
  with gr.Row():
286
  with gr.Column(scale=7):
287
  heatmap = gr.Plot()
288
  with gr.Column(scale=3):
289
+ gr.Markdown(
290
+ "This heatmap shows how strongly each emotion appears in every sentence. Darker colours mean stronger presence.",
291
+ elem_classes="info",
292
+ )
293
 
294
  with gr.Row():
295
  model_info = gr.Markdown()
 
297
  predict_button.click(
298
  fn=predict_wrapper,
299
  inputs=[input_text, language_choice],
300
+ outputs=[result_table, plot, heatmap, model_info],
301
  )
302
 
303
  if __name__ == "__main__":
304
  demo.launch()