Spaces:
Sleeping
Sleeping
Commit
·
5618f88
1
Parent(s):
c390fc9
add assets, update req, update func
Browse files- app.py +189 -128
- assets/Poppins-Regular.ttf +0 -0
- assets/slanks.txt +0 -0
- requirements.txt +6 -1
app.py
CHANGED
@@ -1,180 +1,241 @@
|
|
1 |
-
import os
|
2 |
import streamlit as st
|
3 |
import pandas as pd
|
4 |
from transformers import pipeline
|
5 |
import base64
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
|
7 |
-
# Set to use CPU only
|
8 |
-
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
|
9 |
|
10 |
# Load pipelines
|
11 |
sentiment_pipe = pipeline("text-classification", model="ayameRushia/bert-base-indonesian-1.5G-sentiment-analysis-smsa")
|
12 |
emotion_pipe = pipeline("text-classification", model="azizp128/prediksi-emosi-indobert")
|
13 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
|
15 |
-
def
|
16 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
results.append((text, result['label'].lower(), result['score']))
|
24 |
|
25 |
-
#
|
26 |
-
|
27 |
-
return df
|
28 |
|
|
|
|
|
29 |
|
30 |
-
def
|
31 |
-
|
32 |
-
|
33 |
-
|
|
|
|
|
|
|
|
|
|
|
34 |
results = []
|
35 |
for text in texts:
|
36 |
if text.strip():
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
df = pd.DataFrame(results, columns=['Content', 'Emotion', 'Score'])
|
42 |
-
return df
|
43 |
-
|
44 |
-
def process_file_sentiment(file):
|
45 |
-
if file.name.endswith('.xlsx'):
|
46 |
-
df = pd.read_excel(file) # Baca file XLSX
|
47 |
-
elif file.name.endswith('.csv'):
|
48 |
-
df = pd.read_csv(file) # Baca file CSV
|
49 |
-
else:
|
50 |
-
st.error("Format file tidak didukung. Harap unggah file CSV atau XLSX.")
|
51 |
-
return None
|
52 |
|
53 |
-
#
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
|
|
|
|
|
|
|
|
61 |
|
62 |
-
|
63 |
-
df['
|
|
|
64 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
return df
|
66 |
|
67 |
-
|
|
|
|
|
68 |
if file.name.endswith('.xlsx'):
|
69 |
-
df = pd.read_excel(file)
|
70 |
elif file.name.endswith('.csv'):
|
71 |
-
df = pd.read_csv(file)
|
72 |
else:
|
73 |
st.error("Format file tidak didukung. Harap unggah file CSV atau XLSX.")
|
74 |
return None
|
75 |
-
|
76 |
-
# Prediksi emosi dan tambahkan hasil ke DataFrame
|
77 |
results = []
|
78 |
for index, row in df.iterrows():
|
79 |
if pd.notna(row['content']) and isinstance(row['content'], str):
|
80 |
-
|
81 |
-
|
|
|
|
|
82 |
else:
|
83 |
-
results.append((row['content'], None, None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
84 |
|
85 |
-
|
86 |
-
df['
|
|
|
87 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
88 |
return df
|
89 |
|
|
|
90 |
def analyze_sentiment(text):
|
91 |
result = sentiment_pipe(text)[0]
|
92 |
return result['label'].lower(), result['score']
|
93 |
|
94 |
-
def
|
95 |
result = emotion_pipe(text)[0]
|
96 |
return result['label'].lower(), result['score']
|
97 |
|
98 |
-
def
|
99 |
-
# Generate a link to download the dataframe with Sentimen and Skor Sentimen as CSV
|
100 |
csv = df.to_csv(index=False)
|
101 |
-
b64 = base64.b64encode(csv.encode()).decode()
|
102 |
-
href = f'<a href="data:file/csv;base64,{b64}" download="
|
103 |
return href
|
104 |
|
105 |
-
def
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
href = f'<a href="data:file/csv;base64,{b64}" download="prediksi_emosi.csv">Download CSV</a>'
|
110 |
return href
|
111 |
|
112 |
def main():
|
113 |
st.title("Aplikasi Analisis Sentimen dan Prediksi Emosi by Ramdhani")
|
114 |
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
st.write("Hasil Analisis Sentimen:")
|
144 |
-
st.write(df)
|
145 |
-
|
146 |
-
# Tambahkan tombol download CSV
|
147 |
-
st.markdown(get_download_link_sentiment(df), unsafe_allow_html=True)
|
148 |
-
|
149 |
-
elif program == "Prediksi Emosi":
|
150 |
-
# Menu untuk prediksi emosi
|
151 |
-
st.header("Prediksi Emosi")
|
152 |
-
menu_emot = st.sidebar.selectbox("Pilih Metode", ["Prediksi Langsung", "Import dari File"])
|
153 |
-
|
154 |
-
if menu_emot == "Prediksi Langsung":
|
155 |
-
user_input = st.text_area("Masukkan teks yang ingin dianalisis (pisahkan dengan enter):")
|
156 |
-
|
157 |
-
if st.button("Analisis Sentimen"):
|
158 |
-
df = direct_emotion_analysis(user_input)
|
159 |
-
st.write("Hasil Analisis Sentimen:")
|
160 |
-
st.write(df)
|
161 |
-
|
162 |
-
# Tambahkan tombol download CSV
|
163 |
-
st.markdown(get_download_link_emotion(df), unsafe_allow_html=True)
|
164 |
-
|
165 |
-
elif menu_emot == "Import dari File":
|
166 |
-
st.subheader("Import dari File")
|
167 |
-
uploaded_file = st.file_uploader("Upload file CSV atau XLSX", type=["csv", "xlsx"])
|
168 |
-
|
169 |
-
if uploaded_file is not None:
|
170 |
-
df = process_file_emotion(uploaded_file)
|
171 |
-
|
172 |
-
# Tampilkan hasil prediksi emosi
|
173 |
-
st.write("Hasil Prediksi Emosi:")
|
174 |
-
st.write(df)
|
175 |
-
|
176 |
-
# Tambahkan tombol download CSV
|
177 |
-
st.markdown(get_download_link_emotion(df), unsafe_allow_html=True)
|
178 |
-
|
179 |
-
if __name__ == "__main__":
|
180 |
main()
|
|
|
|
|
1 |
import streamlit as st
|
2 |
import pandas as pd
|
3 |
from transformers import pipeline
|
4 |
import base64
|
5 |
+
import re
|
6 |
+
import nltk
|
7 |
+
from nltk.corpus import stopwords
|
8 |
+
from nltk.tokenize import word_tokenize
|
9 |
+
import plotly.express as px
|
10 |
+
import matplotlib.pyplot as plt
|
11 |
+
from wordcloud import WordCloud
|
12 |
+
import numpy as np
|
13 |
+
from PIL import ImageFont
|
14 |
|
|
|
|
|
15 |
|
16 |
# Load pipelines
|
17 |
sentiment_pipe = pipeline("text-classification", model="ayameRushia/bert-base-indonesian-1.5G-sentiment-analysis-smsa")
|
18 |
emotion_pipe = pipeline("text-classification", model="azizp128/prediksi-emosi-indobert")
|
19 |
|
20 |
+
def load_slank_formal(file):
|
21 |
+
if file.name.endswith('.txt'):
|
22 |
+
df = pd.read_csv(file, sep=';', header=None, names=['Slank', 'Formal'])
|
23 |
+
else:
|
24 |
+
st.error("Format file tidak didukung. Harap unggah file TXT.")
|
25 |
+
return None
|
26 |
+
df.columns = ['Slank', 'Formal']
|
27 |
+
return df
|
28 |
|
29 |
+
def replace_slank_to_formal(sentence, slank_formal_df):
|
30 |
+
words = re.findall(r'[\w\',./:-]+|[.,]+|[^\x00-\x7F]+', sentence)
|
31 |
+
for i, word in enumerate(words):
|
32 |
+
replacement = slank_formal_df.loc[slank_formal_df['Slank'] == word.lower(), 'Formal'].values
|
33 |
+
if replacement.size > 0:
|
34 |
+
words[i] = str(replacement[0])
|
35 |
+
return ' '.join(words)
|
36 |
+
|
37 |
+
def preprocess_text(text, slank_formal_df):
|
38 |
+
text = text.lower()
|
39 |
+
text = re.sub(r'http\S+|www\S+|https\S+', '', text, flags=re.MULTILINE)
|
40 |
+
text = re.sub(r'\@\w+|\#', '', text)
|
41 |
+
text = re.sub(r'[^\w\s]', '', text)
|
42 |
+
text = replace_slank_to_formal(text, slank_formal_df)
|
43 |
+
tokens = word_tokenize(text)
|
44 |
+
preprocessed_text = ' '.join(tokens)
|
45 |
+
return preprocessed_text
|
46 |
+
|
47 |
+
def generate_wordcloud(text, font_path, title, colormap):
|
48 |
+
wordcloud = WordCloud(
|
49 |
+
width=800,
|
50 |
+
height=800,
|
51 |
+
background_color='white',
|
52 |
+
font_path='assets\Poppins-Regular.ttf',
|
53 |
+
prefer_horizontal=1.0,
|
54 |
+
colormap=colormap,
|
55 |
+
max_words=100
|
56 |
+
).generate(text)
|
57 |
|
58 |
+
plt.figure(figsize=(10, 10))
|
59 |
+
plt.title(title, fontsize=20)
|
60 |
+
plt.imshow(wordcloud, interpolation='bilinear')
|
61 |
+
plt.axis('off')
|
62 |
+
st.pyplot(plt)
|
|
|
63 |
|
64 |
+
# Save word cloud to file
|
65 |
+
wordcloud.to_file(f"{title}.png")
|
|
|
66 |
|
67 |
+
# Add download link for word cloud
|
68 |
+
st.markdown(get_image_download_link(f"{title}.png"), unsafe_allow_html=True)
|
69 |
|
70 |
+
def get_image_download_link(image_path):
|
71 |
+
with open(image_path, "rb") as image_file:
|
72 |
+
b64 = base64.b64encode(image_file.read()).decode()
|
73 |
+
href = f'<a href="data:file/png;base64,{b64}" download="{image_path}">Download {image_path}</a>'
|
74 |
+
return href
|
75 |
+
|
76 |
+
|
77 |
+
def combined_analysis(text, slank_formal_df):
|
78 |
+
texts = text.split('\n')
|
79 |
results = []
|
80 |
for text in texts:
|
81 |
if text.strip():
|
82 |
+
cleaned_text = preprocess_text(text, slank_formal_df)
|
83 |
+
sentiment_result = sentiment_pipe(cleaned_text)[0]
|
84 |
+
emotion_result = emotion_pipe(cleaned_text)[0]
|
85 |
+
results.append((text, cleaned_text, sentiment_result['label'].lower(), sentiment_result['score'], emotion_result['label'].lower(), emotion_result['score']))
|
86 |
+
df = pd.DataFrame(results, columns=['Content', 'Cleaned Content', 'Sentiment', 'Score Sentiment', 'Emotion', 'Score Emotion'])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
87 |
|
88 |
+
# Sentiment pie chart
|
89 |
+
sentiment_counts = df['Sentiment'].value_counts()
|
90 |
+
fig_sentiment = px.pie(sentiment_counts, values=sentiment_counts.values, names=sentiment_counts.index, title='Sentiment Distribution')
|
91 |
+
st.plotly_chart(fig_sentiment, use_container_width=True)
|
92 |
+
|
93 |
+
# Emotion pie chart
|
94 |
+
emotion_counts = df['Emotion'].value_counts()
|
95 |
+
fig_emotion = px.pie(emotion_counts, values=emotion_counts.values, names=emotion_counts.index, title='Emotion Distribution')
|
96 |
+
st.plotly_chart(fig_emotion, use_container_width=True)
|
97 |
+
|
98 |
+
# Generate word clouds
|
99 |
+
font_path = 'Poppins-Regular.ttf'
|
100 |
|
101 |
+
# Overall word cloud
|
102 |
+
overall_text = ' '.join(df['Cleaned Content'].dropna())
|
103 |
+
generate_wordcloud(overall_text, font_path, 'Overall Word Cloud', 'viridis')
|
104 |
|
105 |
+
# Positive sentiment and happy emotion word cloud
|
106 |
+
positive_happy_text = ' '.join(df[(df['Sentiment'] == 'positive') & (df['Emotion'] == 'senang')]['Cleaned Content'].dropna())
|
107 |
+
generate_wordcloud(positive_happy_text, font_path, 'Positive Sentiment & Happy Emotion Word Cloud', 'Greens')
|
108 |
+
|
109 |
+
# Negative sentiment and angry or sad emotion word cloud
|
110 |
+
negative_angry_sad_text = ' '.join(df[(df['Sentiment'] == 'negative') & (df['Emotion'].isin(['marah', 'sedih']))]['Cleaned Content'].dropna())
|
111 |
+
generate_wordcloud(negative_angry_sad_text, font_path, 'Negative Sentiment & Angry or Sad Emotion Word Cloud', 'Reds')
|
112 |
+
|
113 |
+
# Word frequency
|
114 |
+
word_freq = pd.Series(' '.join(df['Cleaned Content'].dropna()).split()).value_counts()
|
115 |
+
st.write("Word Frequency:")
|
116 |
+
st.write(word_freq)
|
117 |
+
|
118 |
+
# Download link for word frequency
|
119 |
+
word_freq_df = word_freq.reset_index()
|
120 |
+
word_freq_df.columns = ['Word', 'Frequency']
|
121 |
+
st.markdown(get_word_freq_download_link(word_freq_df), unsafe_allow_html=True)
|
122 |
+
|
123 |
return df
|
124 |
|
125 |
+
|
126 |
+
|
127 |
+
def process_file(file, slank_formal_df):
|
128 |
if file.name.endswith('.xlsx'):
|
129 |
+
df = pd.read_excel(file)
|
130 |
elif file.name.endswith('.csv'):
|
131 |
+
df = pd.read_csv(file)
|
132 |
else:
|
133 |
st.error("Format file tidak didukung. Harap unggah file CSV atau XLSX.")
|
134 |
return None
|
135 |
+
|
|
|
136 |
results = []
|
137 |
for index, row in df.iterrows():
|
138 |
if pd.notna(row['content']) and isinstance(row['content'], str):
|
139 |
+
cleaned_text = preprocess_text(row['content'], slank_formal_df)
|
140 |
+
sentiment, score_sentiment = analyze_sentiment(cleaned_text)
|
141 |
+
emotion, score_emotion = analyze_emotion(cleaned_text)
|
142 |
+
results.append((row['content'], cleaned_text, sentiment, score_sentiment, emotion, score_emotion))
|
143 |
else:
|
144 |
+
results.append((row['content'], None, None, None, None, None))
|
145 |
+
|
146 |
+
df['Cleaned Content'] = [r[1] for r in results]
|
147 |
+
df['Sentiment'] = [r[2] for r in results]
|
148 |
+
df['Score Sentiment'] = [r[3] for r in results]
|
149 |
+
df['Emotion'] = [r[4] for r in results]
|
150 |
+
df['Score Emotion'] = [r[5] for r in results]
|
151 |
+
|
152 |
+
# Sentiment pie chart
|
153 |
+
sentiment_counts = df['Sentiment'].value_counts()
|
154 |
+
fig_sentiment = px.pie(sentiment_counts, values=sentiment_counts.values, names=sentiment_counts.index, title='Sentiment Distribution')
|
155 |
+
st.plotly_chart(fig_sentiment, use_container_width=True)
|
156 |
+
|
157 |
+
# Emotion pie chart
|
158 |
+
emotion_counts = df['Emotion'].value_counts()
|
159 |
+
fig_emotion = px.pie(emotion_counts, values=emotion_counts.values, names=emotion_counts.index, title='Emotion Distribution')
|
160 |
+
st.plotly_chart(fig_emotion, use_container_width=True)
|
161 |
+
|
162 |
+
# Generate word clouds
|
163 |
+
font_path = 'Poppins-Regular.ttf'
|
164 |
|
165 |
+
# Overall word cloud
|
166 |
+
overall_text = ' '.join(df['Cleaned Content'].dropna())
|
167 |
+
generate_wordcloud(overall_text, font_path, 'Overall Word Cloud', 'viridis')
|
168 |
|
169 |
+
# Positive sentiment and happy emotion word cloud
|
170 |
+
positive_happy_text = ' '.join(df[(df['Sentiment'] == 'positive') & (df['Emotion'] == 'senang')]['Cleaned Content'].dropna())
|
171 |
+
generate_wordcloud(positive_happy_text, font_path, 'Positive Sentiment & Happy Emotion Word Cloud', 'Greens')
|
172 |
+
|
173 |
+
# Negative sentiment and angry or sad emotion word cloud
|
174 |
+
negative_angry_sad_text = ' '.join(df[(df['Sentiment'] == 'negative') & (df['Emotion'].isin(['marah', 'sedih']))]['Cleaned Content'].dropna())
|
175 |
+
generate_wordcloud(negative_angry_sad_text, font_path, 'Negative Sentiment & Angry or stSad Emotion Word Cloud', 'Reds')
|
176 |
+
|
177 |
+
# Word frequency
|
178 |
+
word_freq = pd.Series(' '.join(df['Cleaned Content'].dropna()).split()).value_counts()
|
179 |
+
st.write("Word Frequency:")
|
180 |
+
st.write(word_freq)
|
181 |
+
|
182 |
+
# Download link for word frequency
|
183 |
+
word_freq_df = word_freq.reset_index()
|
184 |
+
word_freq_df.columns = ['Word', 'Frequency']
|
185 |
+
st.markdown(get_word_freq_download_link(word_freq_df), unsafe_allow_html=True)
|
186 |
+
|
187 |
return df
|
188 |
|
189 |
+
|
190 |
def analyze_sentiment(text):
|
191 |
result = sentiment_pipe(text)[0]
|
192 |
return result['label'].lower(), result['score']
|
193 |
|
194 |
+
def analyze_emotion(text):
|
195 |
result = emotion_pipe(text)[0]
|
196 |
return result['label'].lower(), result['score']
|
197 |
|
198 |
+
def get_download_link(df, filename):
|
|
|
199 |
csv = df.to_csv(index=False)
|
200 |
+
b64 = base64.b64encode(csv.encode()).decode()
|
201 |
+
href = f'<a href="data:file/csv;base64,{b64}" download="{filename}.csv">Download CSV</a>'
|
202 |
return href
|
203 |
|
204 |
+
def get_word_freq_download_link(word_freq_df):
|
205 |
+
csv = word_freq_df.to_csv(index=True)
|
206 |
+
b64 = base64.b64encode(csv.encode()).decode()
|
207 |
+
href = f'<a href="data:file/csv;base64,{b64}" download="word_frequency.csv">Download Word Frequency CSV</a>'
|
|
|
208 |
return href
|
209 |
|
210 |
def main():
|
211 |
st.title("Aplikasi Analisis Sentimen dan Prediksi Emosi by Ramdhani")
|
212 |
|
213 |
+
slank_file = st.file_uploader("Upload file slank (CSV atau TXT)", type=["csv", "txt"])
|
214 |
+
if slank_file is not None:
|
215 |
+
df_slank_formal = load_slank_formal(slank_file)
|
216 |
+
if df_slank_formal is None:
|
217 |
+
st.stop()
|
218 |
+
else:
|
219 |
+
st.warning("Harap upload file slank terlebih dahulu.")
|
220 |
+
st.stop()
|
221 |
+
|
222 |
+
menu = st.sidebar.selectbox("Pilih Metode", ["Analisis Langsung", "Import dari File"])
|
223 |
+
|
224 |
+
if menu == "Analisis Langsung":
|
225 |
+
user_input = st.text_area("Masukkan teks yang ingin dianalisis (pisahkan dengan enter):")
|
226 |
+
if st.button("Analisis"):
|
227 |
+
df = combined_analysis(user_input, df_slank_formal)
|
228 |
+
st.write("Hasil Analisis:")
|
229 |
+
st.write(df)
|
230 |
+
st.markdown(get_download_link(df, "analisis_sentimen_emosi"), unsafe_allow_html=True)
|
231 |
+
|
232 |
+
elif menu == "Import dari File":
|
233 |
+
uploaded_file = st.file_uploader("Upload file CSV atau XLSX", type=["csv", "xlsx"])
|
234 |
+
if uploaded_file is not None:
|
235 |
+
df = process_file(uploaded_file, df_slank_formal)
|
236 |
+
st.write("Hasil Analisis:")
|
237 |
+
st.write(df)
|
238 |
+
st.markdown(get_download_link(df, "analisis_sentimen_emosi"), unsafe_allow_html=True)
|
239 |
+
|
240 |
+
if __name__ == '__main__':
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
241 |
main()
|
assets/Poppins-Regular.ttf
ADDED
Binary file (158 kB). View file
|
|
assets/slanks.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
requirements.txt
CHANGED
@@ -4,4 +4,9 @@ pandas
|
|
4 |
tensorflow-cpu
|
5 |
torch
|
6 |
tf-keras
|
7 |
-
openpyxl
|
|
|
|
|
|
|
|
|
|
|
|
4 |
tensorflow-cpu
|
5 |
torch
|
6 |
tf-keras
|
7 |
+
openpyxl
|
8 |
+
nltk
|
9 |
+
plotly
|
10 |
+
matplotlib
|
11 |
+
wordcloud
|
12 |
+
numpy
|