Spaces:
Sleeping
Sleeping
Commit
·
e3d3834
1
Parent(s):
dee7862
Update Word Cloud & Some buttons
Browse files
app.py
CHANGED
@@ -15,7 +15,7 @@ import os
|
|
15 |
|
16 |
|
17 |
nltk.download('punkt')
|
18 |
-
|
19 |
|
20 |
# Load pipelines
|
21 |
sentiment_pipe = pipeline("text-classification", model="ayameRushia/bert-base-indonesian-1.5G-sentiment-analysis-smsa")
|
@@ -48,25 +48,40 @@ def preprocess_text(text, slank_formal_df):
|
|
48 |
preprocessed_text = ' '.join(tokens)
|
49 |
return preprocessed_text
|
50 |
|
51 |
-
def generate_wordcloud(text, font_path,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
wordcloud = WordCloud(
|
53 |
-
width=
|
54 |
-
height=
|
55 |
background_color='white',
|
56 |
font_path=font_path,
|
57 |
prefer_horizontal=1.0,
|
58 |
colormap=colormap,
|
59 |
-
max_words=100
|
|
|
60 |
).generate(text)
|
61 |
|
62 |
-
|
63 |
-
plt.
|
64 |
plt.imshow(wordcloud, interpolation='bilinear')
|
65 |
plt.axis('off')
|
66 |
-
|
67 |
-
|
68 |
-
# Save word cloud to file
|
69 |
-
|
|
|
|
|
|
|
70 |
|
71 |
# Add download link for word cloud
|
72 |
st.markdown(get_image_download_link(f"{title}.png"), unsafe_allow_html=True)
|
@@ -102,7 +117,6 @@ def get_example_download_link(file_path, link_text):
|
|
102 |
b64 = base64.b64encode(file.read()).decode()
|
103 |
return f'<a href="data:file/txt;base64,{b64}" download="{os.path.basename(file_path)}">{link_text}</a>'
|
104 |
|
105 |
-
|
106 |
def combined_analysis(text, slank_formal_df):
|
107 |
texts = text.split('\n')
|
108 |
results = []
|
@@ -114,30 +128,76 @@ def combined_analysis(text, slank_formal_df):
|
|
114 |
results.append((text, cleaned_text, sentiment_result['label'].lower(), sentiment_result['score'], emotion_result['label'].lower(), emotion_result['score']))
|
115 |
df = pd.DataFrame(results, columns=['Content', 'Cleaned Content', 'Sentiment', 'Score Sentiment', 'Emotion', 'Score Emotion'])
|
116 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
117 |
# Sentiment pie chart
|
118 |
sentiment_counts = df['Sentiment'].value_counts()
|
119 |
-
fig_sentiment = px.pie(sentiment_counts, values=sentiment_counts.values, names=sentiment_counts.index, title='Sentiment Distribution')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
120 |
st.plotly_chart(fig_sentiment, use_container_width=True)
|
|
|
121 |
|
122 |
# Emotion pie chart
|
123 |
emotion_counts = df['Emotion'].value_counts()
|
124 |
-
fig_emotion = px.pie(emotion_counts, values=emotion_counts.values, names=emotion_counts.index, title='Emotion Distribution')
|
125 |
-
|
|
|
|
|
126 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
127 |
# Generate word clouds
|
128 |
font_path = os.path.join('assets', 'Poppins-Regular.ttf')
|
129 |
|
130 |
-
#
|
131 |
overall_text = ' '.join(df['Cleaned Content'].dropna())
|
132 |
-
generate_wordcloud(overall_text, font_path, 'Overall Word Cloud'
|
133 |
-
|
134 |
-
# Positive sentiment and happy emotion word cloud
|
135 |
positive_happy_text = ' '.join(df[(df['Sentiment'] == 'positive') & (df['Emotion'] == 'senang')]['Cleaned Content'].dropna())
|
136 |
-
generate_wordcloud(positive_happy_text, font_path, 'Positive Sentiment & Happy Emotion Word Cloud'
|
137 |
|
138 |
-
# Negative sentiment and angry or sad emotion word cloud
|
139 |
negative_angry_sad_text = ' '.join(df[(df['Sentiment'] == 'negative') & (df['Emotion'].isin(['marah', 'sedih']))]['Cleaned Content'].dropna())
|
140 |
-
generate_wordcloud(negative_angry_sad_text, font_path, 'Negative Sentiment & Angry or Sad Emotion Word Cloud'
|
141 |
|
142 |
# Word frequency
|
143 |
word_freq = pd.Series(' '.join(df['Cleaned Content'].dropna()).split()).value_counts()
|
@@ -176,30 +236,77 @@ def process_file(file, slank_formal_df):
|
|
176 |
df['Emotion'] = [r[4] for r in results]
|
177 |
df['Score Emotion'] = [r[5] for r in results]
|
178 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
179 |
# Sentiment pie chart
|
180 |
sentiment_counts = df['Sentiment'].value_counts()
|
181 |
-
fig_sentiment = px.pie(sentiment_counts, values=sentiment_counts.values, names=sentiment_counts.index, title='Sentiment Distribution')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
182 |
st.plotly_chart(fig_sentiment, use_container_width=True)
|
|
|
183 |
|
184 |
# Emotion pie chart
|
185 |
emotion_counts = df['Emotion'].value_counts()
|
186 |
-
fig_emotion = px.pie(emotion_counts, values=emotion_counts.values, names=emotion_counts.index, title='Emotion Distribution')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
187 |
st.plotly_chart(fig_emotion, use_container_width=True)
|
|
|
188 |
|
189 |
# Generate word clouds
|
190 |
font_path = os.path.join('assets', 'Poppins-Regular.ttf')
|
191 |
|
192 |
-
#
|
193 |
overall_text = ' '.join(df['Cleaned Content'].dropna())
|
194 |
-
generate_wordcloud(overall_text, font_path, 'Overall Word Cloud'
|
195 |
-
|
196 |
-
# Positive sentiment and happy emotion word cloud
|
197 |
positive_happy_text = ' '.join(df[(df['Sentiment'] == 'positive') & (df['Emotion'] == 'senang')]['Cleaned Content'].dropna())
|
198 |
-
generate_wordcloud(positive_happy_text, font_path, 'Positive Sentiment & Happy Emotion Word Cloud'
|
199 |
|
200 |
-
# Negative sentiment and angry or sad emotion word cloud
|
201 |
negative_angry_sad_text = ' '.join(df[(df['Sentiment'] == 'negative') & (df['Emotion'].isin(['marah', 'sedih']))]['Cleaned Content'].dropna())
|
202 |
-
generate_wordcloud(negative_angry_sad_text, font_path, 'Negative Sentiment & Angry or Sad Emotion Word Cloud'
|
203 |
|
204 |
# Word frequency
|
205 |
word_freq = pd.Series(' '.join(df['Cleaned Content'].dropna()).split()).value_counts()
|
|
|
15 |
|
16 |
|
17 |
nltk.download('punkt')
|
18 |
+
nltk.download('stopwords')
|
19 |
|
20 |
# Load pipelines
|
21 |
sentiment_pipe = pipeline("text-classification", model="ayameRushia/bert-base-indonesian-1.5G-sentiment-analysis-smsa")
|
|
|
48 |
preprocessed_text = ' '.join(tokens)
|
49 |
return preprocessed_text
|
50 |
|
51 |
+
def generate_wordcloud(text, font_path, colormap, title):
|
52 |
+
# Create a circular mask for Full HD resolution
|
53 |
+
x, y = np.ogrid[:1400, :1400] # Adjusted for 1400x1400 resolution
|
54 |
+
mask = (x - 700) ** 2 + (y - 700) ** 2 > 630 ** 2 # Adjusted mask size for 1400x1400 resolution
|
55 |
+
mask = 255 * mask.astype(int)
|
56 |
+
|
57 |
+
# Remove Indonesian stopwords
|
58 |
+
indo_stopwords = set(stopwords.words('indonesian'))
|
59 |
+
words = text.split()
|
60 |
+
words = [word for word in words if word.lower() not in indo_stopwords]
|
61 |
+
text = ' '.join(words)
|
62 |
+
|
63 |
wordcloud = WordCloud(
|
64 |
+
width=1400,
|
65 |
+
height=1400,
|
66 |
background_color='white',
|
67 |
font_path=font_path,
|
68 |
prefer_horizontal=1.0,
|
69 |
colormap=colormap,
|
70 |
+
max_words=100,
|
71 |
+
mask=mask
|
72 |
).generate(text)
|
73 |
|
74 |
+
# Configure plot settings for high-quality output
|
75 |
+
plt.figure(figsize=(14, 14)) # Adjusted figure size for 1400x1400 resolution
|
76 |
plt.imshow(wordcloud, interpolation='bilinear')
|
77 |
plt.axis('off')
|
78 |
+
plt.title(title, fontsize=20, pad=20) # Title directly in matplotlib plot
|
79 |
+
|
80 |
+
# Save word cloud to file with high DPI for better quality
|
81 |
+
plt.savefig(f"{title}.png", dpi=300, bbox_inches='tight', pad_inches=0.1)
|
82 |
+
|
83 |
+
# Display word cloud in Streamlit
|
84 |
+
st.image(f"{title}.png", use_column_width=True)
|
85 |
|
86 |
# Add download link for word cloud
|
87 |
st.markdown(get_image_download_link(f"{title}.png"), unsafe_allow_html=True)
|
|
|
117 |
b64 = base64.b64encode(file.read()).decode()
|
118 |
return f'<a href="data:file/txt;base64,{b64}" download="{os.path.basename(file_path)}">{link_text}</a>'
|
119 |
|
|
|
120 |
def combined_analysis(text, slank_formal_df):
|
121 |
texts = text.split('\n')
|
122 |
results = []
|
|
|
128 |
results.append((text, cleaned_text, sentiment_result['label'].lower(), sentiment_result['score'], emotion_result['label'].lower(), emotion_result['score']))
|
129 |
df = pd.DataFrame(results, columns=['Content', 'Cleaned Content', 'Sentiment', 'Score Sentiment', 'Emotion', 'Score Emotion'])
|
130 |
|
131 |
+
# Define custom CSS to adjust the height
|
132 |
+
st.markdown(
|
133 |
+
"""
|
134 |
+
<style>
|
135 |
+
.chart-container {
|
136 |
+
display: flex;
|
137 |
+
justify-content: center;
|
138 |
+
}
|
139 |
+
.user-select-none.svg-container {
|
140 |
+
height: 350px !important;
|
141 |
+
}
|
142 |
+
.average-score {
|
143 |
+
text-align: center;
|
144 |
+
}
|
145 |
+
</style>
|
146 |
+
""",
|
147 |
+
unsafe_allow_html=True
|
148 |
+
)
|
149 |
+
|
150 |
# Sentiment pie chart
|
151 |
sentiment_counts = df['Sentiment'].value_counts()
|
152 |
+
fig_sentiment = px.pie(sentiment_counts, values=sentiment_counts.values, names=sentiment_counts.index, title='Sentiment Distribution', width=400, height=400)
|
153 |
+
|
154 |
+
# Calculate sentiment average
|
155 |
+
sentiment_average = df['Score Sentiment'].mean()
|
156 |
+
|
157 |
+
# Add average sentiment score as an annotation
|
158 |
+
fig_sentiment.add_annotation(
|
159 |
+
text=f"Average Sentiment Score: {sentiment_average:.4f}",
|
160 |
+
xref="paper", yref="paper",
|
161 |
+
x=0.5, y=-0.2,
|
162 |
+
showarrow=False,
|
163 |
+
font=dict(size=18)
|
164 |
+
)
|
165 |
+
|
166 |
+
st.markdown('<div class="chart-container">', unsafe_allow_html=True)
|
167 |
st.plotly_chart(fig_sentiment, use_container_width=True)
|
168 |
+
st.markdown('</div>', unsafe_allow_html=True)
|
169 |
|
170 |
# Emotion pie chart
|
171 |
emotion_counts = df['Emotion'].value_counts()
|
172 |
+
fig_emotion = px.pie(emotion_counts, values=emotion_counts.values, names=emotion_counts.index, title='Emotion Distribution', width=400, height=400)
|
173 |
+
|
174 |
+
# Calculate emotion average
|
175 |
+
emotion_average = df['Score Emotion'].mean()
|
176 |
|
177 |
+
# Add average emotion score as an annotation
|
178 |
+
fig_emotion.add_annotation(
|
179 |
+
text=f"Average Emotion Score: {emotion_average:.4f}",
|
180 |
+
xref="paper", yref="paper",
|
181 |
+
x=0.5, y=-0.2,
|
182 |
+
showarrow=False,
|
183 |
+
font=dict(size=18)
|
184 |
+
)
|
185 |
+
|
186 |
+
st.markdown('<div class="chart-container">', unsafe_allow_html=True)
|
187 |
+
st.plotly_chart(fig_emotion, use_container_width=True)
|
188 |
+
st.markdown('</div>', unsafe_allow_html=True)
|
189 |
# Generate word clouds
|
190 |
font_path = os.path.join('assets', 'Poppins-Regular.ttf')
|
191 |
|
192 |
+
# Ensure `df` is your DataFrame and 'Cleaned Content', 'Sentiment', and 'Emotion' columns exist
|
193 |
overall_text = ' '.join(df['Cleaned Content'].dropna())
|
194 |
+
generate_wordcloud(overall_text, font_path, 'hsv_r', 'Overall Word Cloud')
|
195 |
+
|
|
|
196 |
positive_happy_text = ' '.join(df[(df['Sentiment'] == 'positive') & (df['Emotion'] == 'senang')]['Cleaned Content'].dropna())
|
197 |
+
generate_wordcloud(positive_happy_text, font_path, 'gist_rainbow_r', 'Positive Sentiment & Happy Emotion Word Cloud')
|
198 |
|
|
|
199 |
negative_angry_sad_text = ' '.join(df[(df['Sentiment'] == 'negative') & (df['Emotion'].isin(['marah', 'sedih']))]['Cleaned Content'].dropna())
|
200 |
+
generate_wordcloud(negative_angry_sad_text, font_path, 'inferno', 'Negative Sentiment & Angry or Sad Emotion Word Cloud')
|
201 |
|
202 |
# Word frequency
|
203 |
word_freq = pd.Series(' '.join(df['Cleaned Content'].dropna()).split()).value_counts()
|
|
|
236 |
df['Emotion'] = [r[4] for r in results]
|
237 |
df['Score Emotion'] = [r[5] for r in results]
|
238 |
|
239 |
+
# Define custom CSS to adjust the height
|
240 |
+
st.markdown(
|
241 |
+
"""
|
242 |
+
<style>
|
243 |
+
.chart-container {
|
244 |
+
display: flex;
|
245 |
+
justify-content: center;
|
246 |
+
}
|
247 |
+
.user-select-none.svg-container {
|
248 |
+
height: 350px !important;
|
249 |
+
}
|
250 |
+
.average-score {
|
251 |
+
text-align: center;
|
252 |
+
}
|
253 |
+
</style>
|
254 |
+
""",
|
255 |
+
unsafe_allow_html=True
|
256 |
+
)
|
257 |
+
|
258 |
# Sentiment pie chart
|
259 |
sentiment_counts = df['Sentiment'].value_counts()
|
260 |
+
fig_sentiment = px.pie(sentiment_counts, values=sentiment_counts.values, names=sentiment_counts.index, title='Sentiment Distribution', width=400, height=400)
|
261 |
+
|
262 |
+
# Calculate sentiment average
|
263 |
+
sentiment_average = df['Score Sentiment'].mean()
|
264 |
+
|
265 |
+
# Add average sentiment score as an annotation
|
266 |
+
fig_sentiment.add_annotation(
|
267 |
+
text=f"Average Sentiment Score: {sentiment_average:.4f}",
|
268 |
+
xref="paper", yref="paper",
|
269 |
+
x=0.5, y=-0.2,
|
270 |
+
showarrow=False,
|
271 |
+
font=dict(size=18)
|
272 |
+
)
|
273 |
+
|
274 |
+
st.markdown('<div class="chart-container">', unsafe_allow_html=True)
|
275 |
st.plotly_chart(fig_sentiment, use_container_width=True)
|
276 |
+
st.markdown('</div>', unsafe_allow_html=True)
|
277 |
|
278 |
# Emotion pie chart
|
279 |
emotion_counts = df['Emotion'].value_counts()
|
280 |
+
fig_emotion = px.pie(emotion_counts, values=emotion_counts.values, names=emotion_counts.index, title='Emotion Distribution', width=400, height=400)
|
281 |
+
|
282 |
+
# Calculate emotion average
|
283 |
+
emotion_average = df['Score Emotion'].mean()
|
284 |
+
|
285 |
+
# Add average emotion score as an annotation
|
286 |
+
fig_emotion.add_annotation(
|
287 |
+
text=f"Average Emotion Score: {emotion_average:.4f}",
|
288 |
+
xref="paper", yref="paper",
|
289 |
+
x=0.5, y=-0.2,
|
290 |
+
showarrow=False,
|
291 |
+
font=dict(size=18)
|
292 |
+
)
|
293 |
+
|
294 |
+
st.markdown('<div class="chart-container">', unsafe_allow_html=True)
|
295 |
st.plotly_chart(fig_emotion, use_container_width=True)
|
296 |
+
st.markdown('</div>', unsafe_allow_html=True)
|
297 |
|
298 |
# Generate word clouds
|
299 |
font_path = os.path.join('assets', 'Poppins-Regular.ttf')
|
300 |
|
301 |
+
# Ensure `df` is your DataFrame and 'Cleaned Content', 'Sentiment', and 'Emotion' columns exist
|
302 |
overall_text = ' '.join(df['Cleaned Content'].dropna())
|
303 |
+
generate_wordcloud(overall_text, font_path, 'hsv_r', 'Overall Word Cloud')
|
304 |
+
|
|
|
305 |
positive_happy_text = ' '.join(df[(df['Sentiment'] == 'positive') & (df['Emotion'] == 'senang')]['Cleaned Content'].dropna())
|
306 |
+
generate_wordcloud(positive_happy_text, font_path, 'gist_rainbow_r', 'Positive Sentiment & Happy Emotion Word Cloud')
|
307 |
|
|
|
308 |
negative_angry_sad_text = ' '.join(df[(df['Sentiment'] == 'negative') & (df['Emotion'].isin(['marah', 'sedih']))]['Cleaned Content'].dropna())
|
309 |
+
generate_wordcloud(negative_angry_sad_text, font_path, 'inferno', 'Negative Sentiment & Angry or Sad Emotion Word Cloud')
|
310 |
|
311 |
# Word frequency
|
312 |
word_freq = pd.Series(' '.join(df['Cleaned Content'].dropna()).split()).value_counts()
|