dhanikitkat commited on
Commit
5618f88
·
1 Parent(s): c390fc9

add assets, update req, update func

Browse files
Files changed (4) hide show
  1. app.py +189 -128
  2. assets/Poppins-Regular.ttf +0 -0
  3. assets/slanks.txt +0 -0
  4. requirements.txt +6 -1
app.py CHANGED
@@ -1,180 +1,241 @@
1
- import os
2
  import streamlit as st
3
  import pandas as pd
4
  from transformers import pipeline
5
  import base64
 
 
 
 
 
 
 
 
 
6
 
7
- # Set to use CPU only
8
- os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
9
 
10
  # Load pipelines
11
  sentiment_pipe = pipeline("text-classification", model="ayameRushia/bert-base-indonesian-1.5G-sentiment-analysis-smsa")
12
  emotion_pipe = pipeline("text-classification", model="azizp128/prediksi-emosi-indobert")
13
 
 
 
 
 
 
 
 
 
14
 
15
- def direct_sentiment_analysis(text):
16
- texts = text.split('\n') # Memisahkan teks berdasarkan baris
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
- # Hasil analisis sentiment
19
- results = []
20
- for text in texts:
21
- if text.strip():
22
- result = sentiment_pipe(text)[0] # Melakukan analisis sentiment pada setiap teks
23
- results.append((text, result['label'].lower(), result['score']))
24
 
25
- # Ubah ke DataFrame untuk tampilan tabel
26
- df = pd.DataFrame(results, columns=['Content', 'Sentiment', 'Score'])
27
- return df
28
 
 
 
29
 
30
- def direct_emotion_analysis(text):
31
- texts = text.split('\n') # Memisahkan teks berdasarkan baris
32
-
33
- # Hasil analisis sentiment
 
 
 
 
 
34
  results = []
35
  for text in texts:
36
  if text.strip():
37
- result = emotion_pipe(text)[0] # Melakukan analisis sentiment pada setiap teks
38
- results.append((text, result['label'].lower(), result['score']))
39
-
40
- # Ubah ke DataFrame untuk tampilan tabel
41
- df = pd.DataFrame(results, columns=['Content', 'Emotion', 'Score'])
42
- return df
43
-
44
- def process_file_sentiment(file):
45
- if file.name.endswith('.xlsx'):
46
- df = pd.read_excel(file) # Baca file XLSX
47
- elif file.name.endswith('.csv'):
48
- df = pd.read_csv(file) # Baca file CSV
49
- else:
50
- st.error("Format file tidak didukung. Harap unggah file CSV atau XLSX.")
51
- return None
52
 
53
- # Analisis sentimen dan tambahkan hasil ke DataFrame
54
- results = []
55
- for index, row in df.iterrows():
56
- if pd.notna(row['content']) and isinstance(row['content'], str):
57
- sentiment, score = analyze_sentiment(row['content'])
58
- results.append((row['content'], sentiment, score))
59
- else:
60
- results.append((row['content'], None, None)) # Menambahkan nilai None jika kosong
 
 
 
 
61
 
62
- df['Sentimen'] = [r[1] for r in results]
63
- df['Skor Sentimen'] = [r[2] for r in results]
 
64
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  return df
66
 
67
- def process_file_emotion(file):
 
 
68
  if file.name.endswith('.xlsx'):
69
- df = pd.read_excel(file) # Baca file XLSX
70
  elif file.name.endswith('.csv'):
71
- df = pd.read_csv(file) # Baca file CSV
72
  else:
73
  st.error("Format file tidak didukung. Harap unggah file CSV atau XLSX.")
74
  return None
75
-
76
- # Prediksi emosi dan tambahkan hasil ke DataFrame
77
  results = []
78
  for index, row in df.iterrows():
79
  if pd.notna(row['content']) and isinstance(row['content'], str):
80
- emotion, score = emotion_prediction(row['content'])
81
- results.append((row['content'], emotion, score))
 
 
82
  else:
83
- results.append((row['content'], None, None)) # Menambahkan nilai None jika kosong
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
 
85
- df['Emosi'] = [r[1] for r in results]
86
- df['Skor Emosi'] = [r[2] for r in results]
 
87
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  return df
89
 
 
90
  def analyze_sentiment(text):
91
  result = sentiment_pipe(text)[0]
92
  return result['label'].lower(), result['score']
93
 
94
- def emotion_prediction(text):
95
  result = emotion_pipe(text)[0]
96
  return result['label'].lower(), result['score']
97
 
98
- def get_download_link_sentiment(df):
99
- # Generate a link to download the dataframe with Sentimen and Skor Sentimen as CSV
100
  csv = df.to_csv(index=False)
101
- b64 = base64.b64encode(csv.encode()).decode() # Encode CSV to base64
102
- href = f'<a href="data:file/csv;base64,{b64}" download="analisis_sentimen.csv">Download CSV</a>'
103
  return href
104
 
105
- def get_download_link_emotion(df):
106
- # Generate a link to download the dataframe with Emosi and Skor Emosi as CSV
107
- csv = df.to_csv(index=False)
108
- b64 = base64.b64encode(csv.encode()).decode() # Encode CSV to base64
109
- href = f'<a href="data:file/csv;base64,{b64}" download="prediksi_emosi.csv">Download CSV</a>'
110
  return href
111
 
112
  def main():
113
  st.title("Aplikasi Analisis Sentimen dan Prediksi Emosi by Ramdhani")
114
 
115
- # Pilihan Program
116
- program = st.sidebar.selectbox("Pilih Program", ["Analisis Sentiment", "Prediksi Emosi"])
117
-
118
- if program == "Analisis Sentiment":
119
- # Menu untuk analisis sentimen
120
- st.header("Analisis Sentiment")
121
- menu_sentiment = st.sidebar.selectbox("Pilih Metode", ["Analisis Langsung", "Import dari File"])
122
-
123
- if menu_sentiment == "Analisis Langsung":
124
- # Masukan teks untuk analisis sentimen
125
- user_input = st.text_area("Masukkan teks yang ingin dianalisis (pisahkan dengan enter):")
126
-
127
- if st.button("Analisis Sentimen"):
128
- df = direct_sentiment_analysis(user_input)
129
- st.write("Hasil Analisis Sentimen:")
130
- st.write(df)
131
-
132
- # Tambahkan tombol download CSV
133
- st.markdown(get_download_link_sentiment(df), unsafe_allow_html=True)
134
-
135
- elif menu_sentiment == "Import dari File":
136
- st.subheader("Import dari File")
137
- uploaded_file = st.file_uploader("Upload file CSV atau XLSX", type=["csv", "xlsx"])
138
-
139
- if uploaded_file is not None:
140
- df = process_file_sentiment(uploaded_file)
141
-
142
- # Tampilkan hasil analisis sentimen
143
- st.write("Hasil Analisis Sentimen:")
144
- st.write(df)
145
-
146
- # Tambahkan tombol download CSV
147
- st.markdown(get_download_link_sentiment(df), unsafe_allow_html=True)
148
-
149
- elif program == "Prediksi Emosi":
150
- # Menu untuk prediksi emosi
151
- st.header("Prediksi Emosi")
152
- menu_emot = st.sidebar.selectbox("Pilih Metode", ["Prediksi Langsung", "Import dari File"])
153
-
154
- if menu_emot == "Prediksi Langsung":
155
- user_input = st.text_area("Masukkan teks yang ingin dianalisis (pisahkan dengan enter):")
156
-
157
- if st.button("Analisis Sentimen"):
158
- df = direct_emotion_analysis(user_input)
159
- st.write("Hasil Analisis Sentimen:")
160
- st.write(df)
161
-
162
- # Tambahkan tombol download CSV
163
- st.markdown(get_download_link_emotion(df), unsafe_allow_html=True)
164
-
165
- elif menu_emot == "Import dari File":
166
- st.subheader("Import dari File")
167
- uploaded_file = st.file_uploader("Upload file CSV atau XLSX", type=["csv", "xlsx"])
168
-
169
- if uploaded_file is not None:
170
- df = process_file_emotion(uploaded_file)
171
-
172
- # Tampilkan hasil prediksi emosi
173
- st.write("Hasil Prediksi Emosi:")
174
- st.write(df)
175
-
176
- # Tambahkan tombol download CSV
177
- st.markdown(get_download_link_emotion(df), unsafe_allow_html=True)
178
-
179
- if __name__ == "__main__":
180
  main()
 
 
1
  import streamlit as st
2
  import pandas as pd
3
  from transformers import pipeline
4
  import base64
5
+ import re
6
+ import nltk
7
+ from nltk.corpus import stopwords
8
+ from nltk.tokenize import word_tokenize
9
+ import plotly.express as px
10
+ import matplotlib.pyplot as plt
11
+ from wordcloud import WordCloud
12
+ import numpy as np
13
+ from PIL import ImageFont
14
 
 
 
15
 
16
  # Load pipelines
17
  sentiment_pipe = pipeline("text-classification", model="ayameRushia/bert-base-indonesian-1.5G-sentiment-analysis-smsa")
18
  emotion_pipe = pipeline("text-classification", model="azizp128/prediksi-emosi-indobert")
19
 
20
+ def load_slank_formal(file):
21
+ if file.name.endswith('.txt'):
22
+ df = pd.read_csv(file, sep=';', header=None, names=['Slank', 'Formal'])
23
+ else:
24
+ st.error("Format file tidak didukung. Harap unggah file TXT.")
25
+ return None
26
+ df.columns = ['Slank', 'Formal']
27
+ return df
28
 
29
+ def replace_slank_to_formal(sentence, slank_formal_df):
30
+ words = re.findall(r'[\w\',./:-]+|[.,]+|[^\x00-\x7F]+', sentence)
31
+ for i, word in enumerate(words):
32
+ replacement = slank_formal_df.loc[slank_formal_df['Slank'] == word.lower(), 'Formal'].values
33
+ if replacement.size > 0:
34
+ words[i] = str(replacement[0])
35
+ return ' '.join(words)
36
+
37
+ def preprocess_text(text, slank_formal_df):
38
+ text = text.lower()
39
+ text = re.sub(r'http\S+|www\S+|https\S+', '', text, flags=re.MULTILINE)
40
+ text = re.sub(r'\@\w+|\#', '', text)
41
+ text = re.sub(r'[^\w\s]', '', text)
42
+ text = replace_slank_to_formal(text, slank_formal_df)
43
+ tokens = word_tokenize(text)
44
+ preprocessed_text = ' '.join(tokens)
45
+ return preprocessed_text
46
+
47
+ def generate_wordcloud(text, font_path, title, colormap):
48
+ wordcloud = WordCloud(
49
+ width=800,
50
+ height=800,
51
+ background_color='white',
52
+ font_path='assets\Poppins-Regular.ttf',
53
+ prefer_horizontal=1.0,
54
+ colormap=colormap,
55
+ max_words=100
56
+ ).generate(text)
57
 
58
+ plt.figure(figsize=(10, 10))
59
+ plt.title(title, fontsize=20)
60
+ plt.imshow(wordcloud, interpolation='bilinear')
61
+ plt.axis('off')
62
+ st.pyplot(plt)
 
63
 
64
+ # Save word cloud to file
65
+ wordcloud.to_file(f"{title}.png")
 
66
 
67
+ # Add download link for word cloud
68
+ st.markdown(get_image_download_link(f"{title}.png"), unsafe_allow_html=True)
69
 
70
+ def get_image_download_link(image_path):
71
+ with open(image_path, "rb") as image_file:
72
+ b64 = base64.b64encode(image_file.read()).decode()
73
+ href = f'<a href="data:file/png;base64,{b64}" download="{image_path}">Download {image_path}</a>'
74
+ return href
75
+
76
+
77
+ def combined_analysis(text, slank_formal_df):
78
+ texts = text.split('\n')
79
  results = []
80
  for text in texts:
81
  if text.strip():
82
+ cleaned_text = preprocess_text(text, slank_formal_df)
83
+ sentiment_result = sentiment_pipe(cleaned_text)[0]
84
+ emotion_result = emotion_pipe(cleaned_text)[0]
85
+ results.append((text, cleaned_text, sentiment_result['label'].lower(), sentiment_result['score'], emotion_result['label'].lower(), emotion_result['score']))
86
+ df = pd.DataFrame(results, columns=['Content', 'Cleaned Content', 'Sentiment', 'Score Sentiment', 'Emotion', 'Score Emotion'])
 
 
 
 
 
 
 
 
 
 
87
 
88
+ # Sentiment pie chart
89
+ sentiment_counts = df['Sentiment'].value_counts()
90
+ fig_sentiment = px.pie(sentiment_counts, values=sentiment_counts.values, names=sentiment_counts.index, title='Sentiment Distribution')
91
+ st.plotly_chart(fig_sentiment, use_container_width=True)
92
+
93
+ # Emotion pie chart
94
+ emotion_counts = df['Emotion'].value_counts()
95
+ fig_emotion = px.pie(emotion_counts, values=emotion_counts.values, names=emotion_counts.index, title='Emotion Distribution')
96
+ st.plotly_chart(fig_emotion, use_container_width=True)
97
+
98
+ # Generate word clouds
99
+ font_path = 'Poppins-Regular.ttf'
100
 
101
+ # Overall word cloud
102
+ overall_text = ' '.join(df['Cleaned Content'].dropna())
103
+ generate_wordcloud(overall_text, font_path, 'Overall Word Cloud', 'viridis')
104
 
105
+ # Positive sentiment and happy emotion word cloud
106
+ positive_happy_text = ' '.join(df[(df['Sentiment'] == 'positive') & (df['Emotion'] == 'senang')]['Cleaned Content'].dropna())
107
+ generate_wordcloud(positive_happy_text, font_path, 'Positive Sentiment & Happy Emotion Word Cloud', 'Greens')
108
+
109
+ # Negative sentiment and angry or sad emotion word cloud
110
+ negative_angry_sad_text = ' '.join(df[(df['Sentiment'] == 'negative') & (df['Emotion'].isin(['marah', 'sedih']))]['Cleaned Content'].dropna())
111
+ generate_wordcloud(negative_angry_sad_text, font_path, 'Negative Sentiment & Angry or Sad Emotion Word Cloud', 'Reds')
112
+
113
+ # Word frequency
114
+ word_freq = pd.Series(' '.join(df['Cleaned Content'].dropna()).split()).value_counts()
115
+ st.write("Word Frequency:")
116
+ st.write(word_freq)
117
+
118
+ # Download link for word frequency
119
+ word_freq_df = word_freq.reset_index()
120
+ word_freq_df.columns = ['Word', 'Frequency']
121
+ st.markdown(get_word_freq_download_link(word_freq_df), unsafe_allow_html=True)
122
+
123
  return df
124
 
125
+
126
+
127
+ def process_file(file, slank_formal_df):
128
  if file.name.endswith('.xlsx'):
129
+ df = pd.read_excel(file)
130
  elif file.name.endswith('.csv'):
131
+ df = pd.read_csv(file)
132
  else:
133
  st.error("Format file tidak didukung. Harap unggah file CSV atau XLSX.")
134
  return None
135
+
 
136
  results = []
137
  for index, row in df.iterrows():
138
  if pd.notna(row['content']) and isinstance(row['content'], str):
139
+ cleaned_text = preprocess_text(row['content'], slank_formal_df)
140
+ sentiment, score_sentiment = analyze_sentiment(cleaned_text)
141
+ emotion, score_emotion = analyze_emotion(cleaned_text)
142
+ results.append((row['content'], cleaned_text, sentiment, score_sentiment, emotion, score_emotion))
143
  else:
144
+ results.append((row['content'], None, None, None, None, None))
145
+
146
+ df['Cleaned Content'] = [r[1] for r in results]
147
+ df['Sentiment'] = [r[2] for r in results]
148
+ df['Score Sentiment'] = [r[3] for r in results]
149
+ df['Emotion'] = [r[4] for r in results]
150
+ df['Score Emotion'] = [r[5] for r in results]
151
+
152
+ # Sentiment pie chart
153
+ sentiment_counts = df['Sentiment'].value_counts()
154
+ fig_sentiment = px.pie(sentiment_counts, values=sentiment_counts.values, names=sentiment_counts.index, title='Sentiment Distribution')
155
+ st.plotly_chart(fig_sentiment, use_container_width=True)
156
+
157
+ # Emotion pie chart
158
+ emotion_counts = df['Emotion'].value_counts()
159
+ fig_emotion = px.pie(emotion_counts, values=emotion_counts.values, names=emotion_counts.index, title='Emotion Distribution')
160
+ st.plotly_chart(fig_emotion, use_container_width=True)
161
+
162
+ # Generate word clouds
163
+ font_path = 'Poppins-Regular.ttf'
164
 
165
+ # Overall word cloud
166
+ overall_text = ' '.join(df['Cleaned Content'].dropna())
167
+ generate_wordcloud(overall_text, font_path, 'Overall Word Cloud', 'viridis')
168
 
169
+ # Positive sentiment and happy emotion word cloud
170
+ positive_happy_text = ' '.join(df[(df['Sentiment'] == 'positive') & (df['Emotion'] == 'senang')]['Cleaned Content'].dropna())
171
+ generate_wordcloud(positive_happy_text, font_path, 'Positive Sentiment & Happy Emotion Word Cloud', 'Greens')
172
+
173
+ # Negative sentiment and angry or sad emotion word cloud
174
+ negative_angry_sad_text = ' '.join(df[(df['Sentiment'] == 'negative') & (df['Emotion'].isin(['marah', 'sedih']))]['Cleaned Content'].dropna())
175
+ generate_wordcloud(negative_angry_sad_text, font_path, 'Negative Sentiment & Angry or stSad Emotion Word Cloud', 'Reds')
176
+
177
+ # Word frequency
178
+ word_freq = pd.Series(' '.join(df['Cleaned Content'].dropna()).split()).value_counts()
179
+ st.write("Word Frequency:")
180
+ st.write(word_freq)
181
+
182
+ # Download link for word frequency
183
+ word_freq_df = word_freq.reset_index()
184
+ word_freq_df.columns = ['Word', 'Frequency']
185
+ st.markdown(get_word_freq_download_link(word_freq_df), unsafe_allow_html=True)
186
+
187
  return df
188
 
189
+
190
  def analyze_sentiment(text):
191
  result = sentiment_pipe(text)[0]
192
  return result['label'].lower(), result['score']
193
 
194
+ def analyze_emotion(text):
195
  result = emotion_pipe(text)[0]
196
  return result['label'].lower(), result['score']
197
 
198
+ def get_download_link(df, filename):
 
199
  csv = df.to_csv(index=False)
200
+ b64 = base64.b64encode(csv.encode()).decode()
201
+ href = f'<a href="data:file/csv;base64,{b64}" download="{filename}.csv">Download CSV</a>'
202
  return href
203
 
204
+ def get_word_freq_download_link(word_freq_df):
205
+ csv = word_freq_df.to_csv(index=True)
206
+ b64 = base64.b64encode(csv.encode()).decode()
207
+ href = f'<a href="data:file/csv;base64,{b64}" download="word_frequency.csv">Download Word Frequency CSV</a>'
 
208
  return href
209
 
210
  def main():
211
  st.title("Aplikasi Analisis Sentimen dan Prediksi Emosi by Ramdhani")
212
 
213
+ slank_file = st.file_uploader("Upload file slank (CSV atau TXT)", type=["csv", "txt"])
214
+ if slank_file is not None:
215
+ df_slank_formal = load_slank_formal(slank_file)
216
+ if df_slank_formal is None:
217
+ st.stop()
218
+ else:
219
+ st.warning("Harap upload file slank terlebih dahulu.")
220
+ st.stop()
221
+
222
+ menu = st.sidebar.selectbox("Pilih Metode", ["Analisis Langsung", "Import dari File"])
223
+
224
+ if menu == "Analisis Langsung":
225
+ user_input = st.text_area("Masukkan teks yang ingin dianalisis (pisahkan dengan enter):")
226
+ if st.button("Analisis"):
227
+ df = combined_analysis(user_input, df_slank_formal)
228
+ st.write("Hasil Analisis:")
229
+ st.write(df)
230
+ st.markdown(get_download_link(df, "analisis_sentimen_emosi"), unsafe_allow_html=True)
231
+
232
+ elif menu == "Import dari File":
233
+ uploaded_file = st.file_uploader("Upload file CSV atau XLSX", type=["csv", "xlsx"])
234
+ if uploaded_file is not None:
235
+ df = process_file(uploaded_file, df_slank_formal)
236
+ st.write("Hasil Analisis:")
237
+ st.write(df)
238
+ st.markdown(get_download_link(df, "analisis_sentimen_emosi"), unsafe_allow_html=True)
239
+
240
+ if __name__ == '__main__':
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
241
  main()
assets/Poppins-Regular.ttf ADDED
Binary file (158 kB). View file
 
assets/slanks.txt ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt CHANGED
@@ -4,4 +4,9 @@ pandas
4
  tensorflow-cpu
5
  torch
6
  tf-keras
7
- openpyxl
 
 
 
 
 
 
4
  tensorflow-cpu
5
  torch
6
  tf-keras
7
+ openpyxl
8
+ nltk
9
+ plotly
10
+ matplotlib
11
+ wordcloud
12
+ numpy