dhanikitkat commited on
Commit
e3d3834
·
1 Parent(s): dee7862

Update Word Cloud & Some buttons

Browse files
Files changed (1) hide show
  1. app.py +138 -31
app.py CHANGED
@@ -15,7 +15,7 @@ import os
15
 
16
 
17
  nltk.download('punkt')
18
-
19
 
20
  # Load pipelines
21
  sentiment_pipe = pipeline("text-classification", model="ayameRushia/bert-base-indonesian-1.5G-sentiment-analysis-smsa")
@@ -48,25 +48,40 @@ def preprocess_text(text, slank_formal_df):
48
  preprocessed_text = ' '.join(tokens)
49
  return preprocessed_text
50
 
51
- def generate_wordcloud(text, font_path, title, colormap):
 
 
 
 
 
 
 
 
 
 
 
52
  wordcloud = WordCloud(
53
- width=600,
54
- height=600,
55
  background_color='white',
56
  font_path=font_path,
57
  prefer_horizontal=1.0,
58
  colormap=colormap,
59
- max_words=100
 
60
  ).generate(text)
61
 
62
- plt.figure(figsize=(10, 10))
63
- plt.title(title, fontsize=20)
64
  plt.imshow(wordcloud, interpolation='bilinear')
65
  plt.axis('off')
66
- st.pyplot(plt)
67
-
68
- # Save word cloud to file
69
- wordcloud.to_file(f"{title}.png")
 
 
 
70
 
71
  # Add download link for word cloud
72
  st.markdown(get_image_download_link(f"{title}.png"), unsafe_allow_html=True)
@@ -102,7 +117,6 @@ def get_example_download_link(file_path, link_text):
102
  b64 = base64.b64encode(file.read()).decode()
103
  return f'<a href="data:file/txt;base64,{b64}" download="{os.path.basename(file_path)}">{link_text}</a>'
104
 
105
-
106
  def combined_analysis(text, slank_formal_df):
107
  texts = text.split('\n')
108
  results = []
@@ -114,30 +128,76 @@ def combined_analysis(text, slank_formal_df):
114
  results.append((text, cleaned_text, sentiment_result['label'].lower(), sentiment_result['score'], emotion_result['label'].lower(), emotion_result['score']))
115
  df = pd.DataFrame(results, columns=['Content', 'Cleaned Content', 'Sentiment', 'Score Sentiment', 'Emotion', 'Score Emotion'])
116
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
  # Sentiment pie chart
118
  sentiment_counts = df['Sentiment'].value_counts()
119
- fig_sentiment = px.pie(sentiment_counts, values=sentiment_counts.values, names=sentiment_counts.index, title='Sentiment Distribution')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
  st.plotly_chart(fig_sentiment, use_container_width=True)
 
121
 
122
  # Emotion pie chart
123
  emotion_counts = df['Emotion'].value_counts()
124
- fig_emotion = px.pie(emotion_counts, values=emotion_counts.values, names=emotion_counts.index, title='Emotion Distribution')
125
- st.plotly_chart(fig_emotion, use_container_width=True)
 
 
126
 
 
 
 
 
 
 
 
 
 
 
 
 
127
  # Generate word clouds
128
  font_path = os.path.join('assets', 'Poppins-Regular.ttf')
129
 
130
- # Overall word cloud
131
  overall_text = ' '.join(df['Cleaned Content'].dropna())
132
- generate_wordcloud(overall_text, font_path, 'Overall Word Cloud', 'viridis')
133
-
134
- # Positive sentiment and happy emotion word cloud
135
  positive_happy_text = ' '.join(df[(df['Sentiment'] == 'positive') & (df['Emotion'] == 'senang')]['Cleaned Content'].dropna())
136
- generate_wordcloud(positive_happy_text, font_path, 'Positive Sentiment & Happy Emotion Word Cloud', 'Greens')
137
 
138
- # Negative sentiment and angry or sad emotion word cloud
139
  negative_angry_sad_text = ' '.join(df[(df['Sentiment'] == 'negative') & (df['Emotion'].isin(['marah', 'sedih']))]['Cleaned Content'].dropna())
140
- generate_wordcloud(negative_angry_sad_text, font_path, 'Negative Sentiment & Angry or Sad Emotion Word Cloud', 'Reds')
141
 
142
  # Word frequency
143
  word_freq = pd.Series(' '.join(df['Cleaned Content'].dropna()).split()).value_counts()
@@ -176,30 +236,77 @@ def process_file(file, slank_formal_df):
176
  df['Emotion'] = [r[4] for r in results]
177
  df['Score Emotion'] = [r[5] for r in results]
178
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
179
  # Sentiment pie chart
180
  sentiment_counts = df['Sentiment'].value_counts()
181
- fig_sentiment = px.pie(sentiment_counts, values=sentiment_counts.values, names=sentiment_counts.index, title='Sentiment Distribution')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
182
  st.plotly_chart(fig_sentiment, use_container_width=True)
 
183
 
184
  # Emotion pie chart
185
  emotion_counts = df['Emotion'].value_counts()
186
- fig_emotion = px.pie(emotion_counts, values=emotion_counts.values, names=emotion_counts.index, title='Emotion Distribution')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
187
  st.plotly_chart(fig_emotion, use_container_width=True)
 
188
 
189
  # Generate word clouds
190
  font_path = os.path.join('assets', 'Poppins-Regular.ttf')
191
 
192
- # Overall word cloud
193
  overall_text = ' '.join(df['Cleaned Content'].dropna())
194
- generate_wordcloud(overall_text, font_path, 'Overall Word Cloud', 'viridis')
195
-
196
- # Positive sentiment and happy emotion word cloud
197
  positive_happy_text = ' '.join(df[(df['Sentiment'] == 'positive') & (df['Emotion'] == 'senang')]['Cleaned Content'].dropna())
198
- generate_wordcloud(positive_happy_text, font_path, 'Positive Sentiment & Happy Emotion Word Cloud', 'Greens')
199
 
200
- # Negative sentiment and angry or sad emotion word cloud
201
  negative_angry_sad_text = ' '.join(df[(df['Sentiment'] == 'negative') & (df['Emotion'].isin(['marah', 'sedih']))]['Cleaned Content'].dropna())
202
- generate_wordcloud(negative_angry_sad_text, font_path, 'Negative Sentiment & Angry or Sad Emotion Word Cloud', 'Reds')
203
 
204
  # Word frequency
205
  word_freq = pd.Series(' '.join(df['Cleaned Content'].dropna()).split()).value_counts()
 
15
 
16
 
17
  nltk.download('punkt')
18
+ nltk.download('stopwords')
19
 
20
  # Load pipelines
21
  sentiment_pipe = pipeline("text-classification", model="ayameRushia/bert-base-indonesian-1.5G-sentiment-analysis-smsa")
 
48
  preprocessed_text = ' '.join(tokens)
49
  return preprocessed_text
50
 
51
+ def generate_wordcloud(text, font_path, colormap, title):
52
+ # Create a circular mask for Full HD resolution
53
+ x, y = np.ogrid[:1400, :1400] # Adjusted for 1400x1400 resolution
54
+ mask = (x - 700) ** 2 + (y - 700) ** 2 > 630 ** 2 # Adjusted mask size for 1400x1400 resolution
55
+ mask = 255 * mask.astype(int)
56
+
57
+ # Remove Indonesian stopwords
58
+ indo_stopwords = set(stopwords.words('indonesian'))
59
+ words = text.split()
60
+ words = [word for word in words if word.lower() not in indo_stopwords]
61
+ text = ' '.join(words)
62
+
63
  wordcloud = WordCloud(
64
+ width=1400,
65
+ height=1400,
66
  background_color='white',
67
  font_path=font_path,
68
  prefer_horizontal=1.0,
69
  colormap=colormap,
70
+ max_words=100,
71
+ mask=mask
72
  ).generate(text)
73
 
74
+ # Configure plot settings for high-quality output
75
+ plt.figure(figsize=(14, 14)) # Adjusted figure size for 1400x1400 resolution
76
  plt.imshow(wordcloud, interpolation='bilinear')
77
  plt.axis('off')
78
+ plt.title(title, fontsize=20, pad=20) # Title directly in matplotlib plot
79
+
80
+ # Save word cloud to file with high DPI for better quality
81
+ plt.savefig(f"{title}.png", dpi=300, bbox_inches='tight', pad_inches=0.1)
82
+
83
+ # Display word cloud in Streamlit
84
+ st.image(f"{title}.png", use_column_width=True)
85
 
86
  # Add download link for word cloud
87
  st.markdown(get_image_download_link(f"{title}.png"), unsafe_allow_html=True)
 
117
  b64 = base64.b64encode(file.read()).decode()
118
  return f'<a href="data:file/txt;base64,{b64}" download="{os.path.basename(file_path)}">{link_text}</a>'
119
 
 
120
  def combined_analysis(text, slank_formal_df):
121
  texts = text.split('\n')
122
  results = []
 
128
  results.append((text, cleaned_text, sentiment_result['label'].lower(), sentiment_result['score'], emotion_result['label'].lower(), emotion_result['score']))
129
  df = pd.DataFrame(results, columns=['Content', 'Cleaned Content', 'Sentiment', 'Score Sentiment', 'Emotion', 'Score Emotion'])
130
 
131
+ # Define custom CSS to adjust the height
132
+ st.markdown(
133
+ """
134
+ <style>
135
+ .chart-container {
136
+ display: flex;
137
+ justify-content: center;
138
+ }
139
+ .user-select-none.svg-container {
140
+ height: 350px !important;
141
+ }
142
+ .average-score {
143
+ text-align: center;
144
+ }
145
+ </style>
146
+ """,
147
+ unsafe_allow_html=True
148
+ )
149
+
150
  # Sentiment pie chart
151
  sentiment_counts = df['Sentiment'].value_counts()
152
+ fig_sentiment = px.pie(sentiment_counts, values=sentiment_counts.values, names=sentiment_counts.index, title='Sentiment Distribution', width=400, height=400)
153
+
154
+ # Calculate sentiment average
155
+ sentiment_average = df['Score Sentiment'].mean()
156
+
157
+ # Add average sentiment score as an annotation
158
+ fig_sentiment.add_annotation(
159
+ text=f"Average Sentiment Score: {sentiment_average:.4f}",
160
+ xref="paper", yref="paper",
161
+ x=0.5, y=-0.2,
162
+ showarrow=False,
163
+ font=dict(size=18)
164
+ )
165
+
166
+ st.markdown('<div class="chart-container">', unsafe_allow_html=True)
167
  st.plotly_chart(fig_sentiment, use_container_width=True)
168
+ st.markdown('</div>', unsafe_allow_html=True)
169
 
170
  # Emotion pie chart
171
  emotion_counts = df['Emotion'].value_counts()
172
+ fig_emotion = px.pie(emotion_counts, values=emotion_counts.values, names=emotion_counts.index, title='Emotion Distribution', width=400, height=400)
173
+
174
+ # Calculate emotion average
175
+ emotion_average = df['Score Emotion'].mean()
176
 
177
+ # Add average emotion score as an annotation
178
+ fig_emotion.add_annotation(
179
+ text=f"Average Emotion Score: {emotion_average:.4f}",
180
+ xref="paper", yref="paper",
181
+ x=0.5, y=-0.2,
182
+ showarrow=False,
183
+ font=dict(size=18)
184
+ )
185
+
186
+ st.markdown('<div class="chart-container">', unsafe_allow_html=True)
187
+ st.plotly_chart(fig_emotion, use_container_width=True)
188
+ st.markdown('</div>', unsafe_allow_html=True)
189
  # Generate word clouds
190
  font_path = os.path.join('assets', 'Poppins-Regular.ttf')
191
 
192
+ # Ensure `df` is your DataFrame and 'Cleaned Content', 'Sentiment', and 'Emotion' columns exist
193
  overall_text = ' '.join(df['Cleaned Content'].dropna())
194
+ generate_wordcloud(overall_text, font_path, 'hsv_r', 'Overall Word Cloud')
195
+
 
196
  positive_happy_text = ' '.join(df[(df['Sentiment'] == 'positive') & (df['Emotion'] == 'senang')]['Cleaned Content'].dropna())
197
+ generate_wordcloud(positive_happy_text, font_path, 'gist_rainbow_r', 'Positive Sentiment & Happy Emotion Word Cloud')
198
 
 
199
  negative_angry_sad_text = ' '.join(df[(df['Sentiment'] == 'negative') & (df['Emotion'].isin(['marah', 'sedih']))]['Cleaned Content'].dropna())
200
+ generate_wordcloud(negative_angry_sad_text, font_path, 'inferno', 'Negative Sentiment & Angry or Sad Emotion Word Cloud')
201
 
202
  # Word frequency
203
  word_freq = pd.Series(' '.join(df['Cleaned Content'].dropna()).split()).value_counts()
 
236
  df['Emotion'] = [r[4] for r in results]
237
  df['Score Emotion'] = [r[5] for r in results]
238
 
239
+ # Define custom CSS to adjust the height
240
+ st.markdown(
241
+ """
242
+ <style>
243
+ .chart-container {
244
+ display: flex;
245
+ justify-content: center;
246
+ }
247
+ .user-select-none.svg-container {
248
+ height: 350px !important;
249
+ }
250
+ .average-score {
251
+ text-align: center;
252
+ }
253
+ </style>
254
+ """,
255
+ unsafe_allow_html=True
256
+ )
257
+
258
  # Sentiment pie chart
259
  sentiment_counts = df['Sentiment'].value_counts()
260
+ fig_sentiment = px.pie(sentiment_counts, values=sentiment_counts.values, names=sentiment_counts.index, title='Sentiment Distribution', width=400, height=400)
261
+
262
+ # Calculate sentiment average
263
+ sentiment_average = df['Score Sentiment'].mean()
264
+
265
+ # Add average sentiment score as an annotation
266
+ fig_sentiment.add_annotation(
267
+ text=f"Average Sentiment Score: {sentiment_average:.4f}",
268
+ xref="paper", yref="paper",
269
+ x=0.5, y=-0.2,
270
+ showarrow=False,
271
+ font=dict(size=18)
272
+ )
273
+
274
+ st.markdown('<div class="chart-container">', unsafe_allow_html=True)
275
  st.plotly_chart(fig_sentiment, use_container_width=True)
276
+ st.markdown('</div>', unsafe_allow_html=True)
277
 
278
  # Emotion pie chart
279
  emotion_counts = df['Emotion'].value_counts()
280
+ fig_emotion = px.pie(emotion_counts, values=emotion_counts.values, names=emotion_counts.index, title='Emotion Distribution', width=400, height=400)
281
+
282
+ # Calculate emotion average
283
+ emotion_average = df['Score Emotion'].mean()
284
+
285
+ # Add average emotion score as an annotation
286
+ fig_emotion.add_annotation(
287
+ text=f"Average Emotion Score: {emotion_average:.4f}",
288
+ xref="paper", yref="paper",
289
+ x=0.5, y=-0.2,
290
+ showarrow=False,
291
+ font=dict(size=18)
292
+ )
293
+
294
+ st.markdown('<div class="chart-container">', unsafe_allow_html=True)
295
  st.plotly_chart(fig_emotion, use_container_width=True)
296
+ st.markdown('</div>', unsafe_allow_html=True)
297
 
298
  # Generate word clouds
299
  font_path = os.path.join('assets', 'Poppins-Regular.ttf')
300
 
301
+ # Ensure `df` is your DataFrame and 'Cleaned Content', 'Sentiment', and 'Emotion' columns exist
302
  overall_text = ' '.join(df['Cleaned Content'].dropna())
303
+ generate_wordcloud(overall_text, font_path, 'hsv_r', 'Overall Word Cloud')
304
+
 
305
  positive_happy_text = ' '.join(df[(df['Sentiment'] == 'positive') & (df['Emotion'] == 'senang')]['Cleaned Content'].dropna())
306
+ generate_wordcloud(positive_happy_text, font_path, 'gist_rainbow_r', 'Positive Sentiment & Happy Emotion Word Cloud')
307
 
 
308
  negative_angry_sad_text = ' '.join(df[(df['Sentiment'] == 'negative') & (df['Emotion'].isin(['marah', 'sedih']))]['Cleaned Content'].dropna())
309
+ generate_wordcloud(negative_angry_sad_text, font_path, 'inferno', 'Negative Sentiment & Angry or Sad Emotion Word Cloud')
310
 
311
  # Word frequency
312
  word_freq = pd.Series(' '.join(df['Cleaned Content'].dropna()).split()).value_counts()