import streamlit as st import pandas as pd from transformers import pipeline import base64 import re import nltk from nltk.corpus import stopwords from nltk.tokenize import word_tokenize import plotly.express as px import matplotlib.pyplot as plt from wordcloud import WordCloud import numpy as np from PIL import ImageFont import os nltk.download('punkt') # Load pipelines sentiment_pipe = pipeline("text-classification", model="ayameRushia/bert-base-indonesian-1.5G-sentiment-analysis-smsa") emotion_pipe = pipeline("text-classification", model="azizp128/prediksi-emosi-indobert") def load_slank_formal(file): if file.name.endswith('.txt'): df = pd.read_csv(file, sep=';', header=None, names=['Slank', 'Formal']) else: st.error("Format file tidak didukung. Harap unggah file TXT.") return None df.columns = ['Slank', 'Formal'] return df def replace_slank_to_formal(sentence, slank_formal_df): words = re.findall(r'[\w\',./:-]+|[.,]+|[^\x00-\x7F]+', sentence) for i, word in enumerate(words): replacement = slank_formal_df.loc[slank_formal_df['Slank'] == word.lower(), 'Formal'].values if replacement.size > 0: words[i] = str(replacement[0]) return ' '.join(words) def preprocess_text(text, slank_formal_df): text = text.lower() text = re.sub(r'http\S+|www\S+|https\S+', '', text, flags=re.MULTILINE) text = re.sub(r'\@\w+|\#', '', text) text = re.sub(r'[^\w\s]', '', text) text = replace_slank_to_formal(text, slank_formal_df) tokens = word_tokenize(text) preprocessed_text = ' '.join(tokens) return preprocessed_text def generate_wordcloud(text, font_path, title, colormap): wordcloud = WordCloud( width=600, height=600, background_color='white', font_path=font_path, prefer_horizontal=1.0, colormap=colormap, max_words=100 ).generate(text) plt.figure(figsize=(10, 10)) plt.title(title, fontsize=20) plt.imshow(wordcloud, interpolation='bilinear') plt.axis('off') st.pyplot(plt) # Save word cloud to file wordcloud.to_file(f"{title}.png") # Add download link for word cloud st.markdown(get_image_download_link(f"{title}.png"), unsafe_allow_html=True) def get_image_download_link(image_path): with open(image_path, "rb") as image_file: b64 = base64.b64encode(image_file.read()).decode() href = f'Download {image_path}' return href def combined_analysis(text, slank_formal_df): texts = text.split('\n') results = [] for text in texts: if text.strip(): cleaned_text = preprocess_text(text, slank_formal_df) sentiment_result = sentiment_pipe(cleaned_text)[0] emotion_result = emotion_pipe(cleaned_text)[0] results.append((text, cleaned_text, sentiment_result['label'].lower(), sentiment_result['score'], emotion_result['label'].lower(), emotion_result['score'])) df = pd.DataFrame(results, columns=['Content', 'Cleaned Content', 'Sentiment', 'Score Sentiment', 'Emotion', 'Score Emotion']) # Sentiment pie chart sentiment_counts = df['Sentiment'].value_counts() fig_sentiment = px.pie(sentiment_counts, values=sentiment_counts.values, names=sentiment_counts.index, title='Sentiment Distribution') st.plotly_chart(fig_sentiment, use_container_width=True) # Emotion pie chart emotion_counts = df['Emotion'].value_counts() fig_emotion = px.pie(emotion_counts, values=emotion_counts.values, names=emotion_counts.index, title='Emotion Distribution') st.plotly_chart(fig_emotion, use_container_width=True) # Generate word clouds font_path = os.path.join('assets', 'Poppins-Regular.ttf') # Overall word cloud overall_text = ' '.join(df['Cleaned Content'].dropna()) generate_wordcloud(overall_text, font_path, 'Overall Word Cloud', 'viridis') # Positive sentiment and happy emotion word cloud positive_happy_text = ' '.join(df[(df['Sentiment'] == 'positive') & (df['Emotion'] == 'senang')]['Cleaned Content'].dropna()) generate_wordcloud(positive_happy_text, font_path, 'Positive Sentiment & Happy Emotion Word Cloud', 'Greens') # Negative sentiment and angry or sad emotion word cloud negative_angry_sad_text = ' '.join(df[(df['Sentiment'] == 'negative') & (df['Emotion'].isin(['marah', 'sedih']))]['Cleaned Content'].dropna()) generate_wordcloud(negative_angry_sad_text, font_path, 'Negative Sentiment & Angry or Sad Emotion Word Cloud', 'Reds') # Word frequency word_freq = pd.Series(' '.join(df['Cleaned Content'].dropna()).split()).value_counts() st.write("Word Frequency:") st.write(word_freq) # Download link for word frequency word_freq_df = word_freq.reset_index() word_freq_df.columns = ['Word', 'Frequency'] st.markdown(get_word_freq_download_link(word_freq_df), unsafe_allow_html=True) return df def process_file(file, slank_formal_df): if file.name.endswith('.xlsx'): df = pd.read_excel(file) elif file.name.endswith('.csv'): df = pd.read_csv(file) else: st.error("Format file tidak didukung. Harap unggah file CSV atau XLSX.") return None results = [] for index, row in df.iterrows(): if pd.notna(row['content']) and isinstance(row['content'], str): cleaned_text = preprocess_text(row['content'], slank_formal_df) sentiment, score_sentiment = analyze_sentiment(cleaned_text) emotion, score_emotion = analyze_emotion(cleaned_text) results.append((row['content'], cleaned_text, sentiment, score_sentiment, emotion, score_emotion)) else: results.append((row['content'], None, None, None, None, None)) df['Cleaned Content'] = [r[1] for r in results] df['Sentiment'] = [r[2] for r in results] df['Score Sentiment'] = [r[3] for r in results] df['Emotion'] = [r[4] for r in results] df['Score Emotion'] = [r[5] for r in results] # Sentiment pie chart sentiment_counts = df['Sentiment'].value_counts() fig_sentiment = px.pie(sentiment_counts, values=sentiment_counts.values, names=sentiment_counts.index, title='Sentiment Distribution') st.plotly_chart(fig_sentiment, use_container_width=True) # Emotion pie chart emotion_counts = df['Emotion'].value_counts() fig_emotion = px.pie(emotion_counts, values=emotion_counts.values, names=emotion_counts.index, title='Emotion Distribution') st.plotly_chart(fig_emotion, use_container_width=True) # Generate word clouds font_path = os.path.join('assets', 'Poppins-Regular.ttf') # Overall word cloud overall_text = ' '.join(df['Cleaned Content'].dropna()) generate_wordcloud(overall_text, font_path, 'Overall Word Cloud', 'viridis') # Positive sentiment and happy emotion word cloud positive_happy_text = ' '.join(df[(df['Sentiment'] == 'positive') & (df['Emotion'] == 'senang')]['Cleaned Content'].dropna()) generate_wordcloud(positive_happy_text, font_path, 'Positive Sentiment & Happy Emotion Word Cloud', 'Greens') # Negative sentiment and angry or sad emotion word cloud negative_angry_sad_text = ' '.join(df[(df['Sentiment'] == 'negative') & (df['Emotion'].isin(['marah', 'sedih']))]['Cleaned Content'].dropna()) generate_wordcloud(negative_angry_sad_text, font_path, 'Negative Sentiment & Angry or Sad Emotion Word Cloud', 'Reds') # Word frequency word_freq = pd.Series(' '.join(df['Cleaned Content'].dropna()).split()).value_counts() st.write("Word Frequency:") st.write(word_freq) # Download link for word frequency word_freq_df = word_freq.reset_index() word_freq_df.columns = ['Word', 'Frequency'] st.markdown(get_word_freq_download_link(word_freq_df), unsafe_allow_html=True) return df def analyze_sentiment(text): result = sentiment_pipe(text)[0] return result['label'].lower(), result['score'] def analyze_emotion(text): result = emotion_pipe(text)[0] return result['label'].lower(), result['score'] def get_download_link(df, filename): csv = df.to_csv(index=False) b64 = base64.b64encode(csv.encode()).decode() href = f'Download CSV' return href def get_word_freq_download_link(word_freq_df): csv = word_freq_df.to_csv(index=True) b64 = base64.b64encode(csv.encode()).decode() href = f'Download Word Frequency CSV' return href def main(): st.title("Aplikasi Analisis Sentimen dan Prediksi Emosi") slank_file = st.file_uploader("Upload file slank (CSV atau TXT)", type=["csv", "txt"]) if slank_file is not None: df_slank_formal = load_slank_formal(slank_file) if df_slank_formal is None: st.stop() else: st.warning("Harap upload file slank terlebih dahulu.") st.stop() menu = st.sidebar.selectbox("Pilih Metode", ["Analisis Langsung", "Import dari File"]) if menu == "Analisis Langsung": user_input = st.text_area("Masukkan teks yang ingin dianalisis (pisahkan dengan enter):") if st.button("Analisis"): df = combined_analysis(user_input, df_slank_formal) st.write("Hasil Analisis:") st.write(df) st.markdown(get_download_link(df, "analisis_sentimen_emosi"), unsafe_allow_html=True) elif menu == "Import dari File": uploaded_file = st.file_uploader("Upload file CSV atau XLSX", type=["csv", "xlsx"]) if uploaded_file is not None: df = process_file(uploaded_file, df_slank_formal) st.write("Hasil Analisis:") st.write(df) st.markdown(get_download_link(df, "analisis_sentimen_emosi"), unsafe_allow_html=True) if __name__ == '__main__': main()