import streamlit as st
import pandas as pd
from transformers import pipeline
import base64
import re
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import plotly.express as px
import matplotlib.pyplot as plt
from wordcloud import WordCloud
import numpy as np
from PIL import ImageFont
import os
nltk.download('punkt')
# Load pipelines
sentiment_pipe = pipeline("text-classification", model="ayameRushia/bert-base-indonesian-1.5G-sentiment-analysis-smsa")
emotion_pipe = pipeline("text-classification", model="azizp128/prediksi-emosi-indobert")
def load_slank_formal(file):
if file.name.endswith('.txt'):
df = pd.read_csv(file, sep=';', header=None, names=['Slank', 'Formal'])
else:
st.error("Format file tidak didukung. Harap unggah file TXT.")
return None
df.columns = ['Slank', 'Formal']
return df
def replace_slank_to_formal(sentence, slank_formal_df):
words = re.findall(r'[\w\',./:-]+|[.,]+|[^\x00-\x7F]+', sentence)
for i, word in enumerate(words):
replacement = slank_formal_df.loc[slank_formal_df['Slank'] == word.lower(), 'Formal'].values
if replacement.size > 0:
words[i] = str(replacement[0])
return ' '.join(words)
def preprocess_text(text, slank_formal_df):
text = text.lower()
text = re.sub(r'http\S+|www\S+|https\S+', '', text, flags=re.MULTILINE)
text = re.sub(r'\@\w+|\#', '', text)
text = re.sub(r'[^\w\s]', '', text)
text = replace_slank_to_formal(text, slank_formal_df)
tokens = word_tokenize(text)
preprocessed_text = ' '.join(tokens)
return preprocessed_text
def generate_wordcloud(text, font_path, title, colormap):
wordcloud = WordCloud(
width=600,
height=600,
background_color='white',
font_path=font_path,
prefer_horizontal=1.0,
colormap=colormap,
max_words=100
).generate(text)
plt.figure(figsize=(10, 10))
plt.title(title, fontsize=20)
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
st.pyplot(plt)
# Save word cloud to file
wordcloud.to_file(f"{title}.png")
# Add download link for word cloud
st.markdown(get_image_download_link(f"{title}.png"), unsafe_allow_html=True)
def get_image_download_link(image_path):
with open(image_path, "rb") as image_file:
b64 = base64.b64encode(image_file.read()).decode()
href = f'Download {image_path}'
return href
def combined_analysis(text, slank_formal_df):
texts = text.split('\n')
results = []
for text in texts:
if text.strip():
cleaned_text = preprocess_text(text, slank_formal_df)
sentiment_result = sentiment_pipe(cleaned_text)[0]
emotion_result = emotion_pipe(cleaned_text)[0]
results.append((text, cleaned_text, sentiment_result['label'].lower(), sentiment_result['score'], emotion_result['label'].lower(), emotion_result['score']))
df = pd.DataFrame(results, columns=['Content', 'Cleaned Content', 'Sentiment', 'Score Sentiment', 'Emotion', 'Score Emotion'])
# Sentiment pie chart
sentiment_counts = df['Sentiment'].value_counts()
fig_sentiment = px.pie(sentiment_counts, values=sentiment_counts.values, names=sentiment_counts.index, title='Sentiment Distribution')
st.plotly_chart(fig_sentiment, use_container_width=True)
# Emotion pie chart
emotion_counts = df['Emotion'].value_counts()
fig_emotion = px.pie(emotion_counts, values=emotion_counts.values, names=emotion_counts.index, title='Emotion Distribution')
st.plotly_chart(fig_emotion, use_container_width=True)
# Generate word clouds
font_path = os.path.join('assets', 'Poppins-Regular.ttf')
# Overall word cloud
overall_text = ' '.join(df['Cleaned Content'].dropna())
generate_wordcloud(overall_text, font_path, 'Overall Word Cloud', 'viridis')
# Positive sentiment and happy emotion word cloud
positive_happy_text = ' '.join(df[(df['Sentiment'] == 'positive') & (df['Emotion'] == 'senang')]['Cleaned Content'].dropna())
generate_wordcloud(positive_happy_text, font_path, 'Positive Sentiment & Happy Emotion Word Cloud', 'Greens')
# Negative sentiment and angry or sad emotion word cloud
negative_angry_sad_text = ' '.join(df[(df['Sentiment'] == 'negative') & (df['Emotion'].isin(['marah', 'sedih']))]['Cleaned Content'].dropna())
generate_wordcloud(negative_angry_sad_text, font_path, 'Negative Sentiment & Angry or Sad Emotion Word Cloud', 'Reds')
# Word frequency
word_freq = pd.Series(' '.join(df['Cleaned Content'].dropna()).split()).value_counts()
st.write("Word Frequency:")
st.write(word_freq)
# Download link for word frequency
word_freq_df = word_freq.reset_index()
word_freq_df.columns = ['Word', 'Frequency']
st.markdown(get_word_freq_download_link(word_freq_df), unsafe_allow_html=True)
return df
def process_file(file, slank_formal_df):
if file.name.endswith('.xlsx'):
df = pd.read_excel(file)
elif file.name.endswith('.csv'):
df = pd.read_csv(file)
else:
st.error("Format file tidak didukung. Harap unggah file CSV atau XLSX.")
return None
results = []
for index, row in df.iterrows():
if pd.notna(row['content']) and isinstance(row['content'], str):
cleaned_text = preprocess_text(row['content'], slank_formal_df)
sentiment, score_sentiment = analyze_sentiment(cleaned_text)
emotion, score_emotion = analyze_emotion(cleaned_text)
results.append((row['content'], cleaned_text, sentiment, score_sentiment, emotion, score_emotion))
else:
results.append((row['content'], None, None, None, None, None))
df['Cleaned Content'] = [r[1] for r in results]
df['Sentiment'] = [r[2] for r in results]
df['Score Sentiment'] = [r[3] for r in results]
df['Emotion'] = [r[4] for r in results]
df['Score Emotion'] = [r[5] for r in results]
# Sentiment pie chart
sentiment_counts = df['Sentiment'].value_counts()
fig_sentiment = px.pie(sentiment_counts, values=sentiment_counts.values, names=sentiment_counts.index, title='Sentiment Distribution')
st.plotly_chart(fig_sentiment, use_container_width=True)
# Emotion pie chart
emotion_counts = df['Emotion'].value_counts()
fig_emotion = px.pie(emotion_counts, values=emotion_counts.values, names=emotion_counts.index, title='Emotion Distribution')
st.plotly_chart(fig_emotion, use_container_width=True)
# Generate word clouds
font_path = os.path.join('assets', 'Poppins-Regular.ttf')
# Overall word cloud
overall_text = ' '.join(df['Cleaned Content'].dropna())
generate_wordcloud(overall_text, font_path, 'Overall Word Cloud', 'viridis')
# Positive sentiment and happy emotion word cloud
positive_happy_text = ' '.join(df[(df['Sentiment'] == 'positive') & (df['Emotion'] == 'senang')]['Cleaned Content'].dropna())
generate_wordcloud(positive_happy_text, font_path, 'Positive Sentiment & Happy Emotion Word Cloud', 'Greens')
# Negative sentiment and angry or sad emotion word cloud
negative_angry_sad_text = ' '.join(df[(df['Sentiment'] == 'negative') & (df['Emotion'].isin(['marah', 'sedih']))]['Cleaned Content'].dropna())
generate_wordcloud(negative_angry_sad_text, font_path, 'Negative Sentiment & Angry or Sad Emotion Word Cloud', 'Reds')
# Word frequency
word_freq = pd.Series(' '.join(df['Cleaned Content'].dropna()).split()).value_counts()
st.write("Word Frequency:")
st.write(word_freq)
# Download link for word frequency
word_freq_df = word_freq.reset_index()
word_freq_df.columns = ['Word', 'Frequency']
st.markdown(get_word_freq_download_link(word_freq_df), unsafe_allow_html=True)
return df
def analyze_sentiment(text):
result = sentiment_pipe(text)[0]
return result['label'].lower(), result['score']
def analyze_emotion(text):
result = emotion_pipe(text)[0]
return result['label'].lower(), result['score']
def get_download_link(df, filename):
csv = df.to_csv(index=False)
b64 = base64.b64encode(csv.encode()).decode()
href = f'Download CSV'
return href
def get_word_freq_download_link(word_freq_df):
csv = word_freq_df.to_csv(index=True)
b64 = base64.b64encode(csv.encode()).decode()
href = f'Download Word Frequency CSV'
return href
def main():
st.title("Aplikasi Analisis Sentimen dan Prediksi Emosi")
slank_file = st.file_uploader("Upload file slank (CSV atau TXT)", type=["csv", "txt"])
if slank_file is not None:
df_slank_formal = load_slank_formal(slank_file)
if df_slank_formal is None:
st.stop()
else:
st.warning("Harap upload file slank terlebih dahulu.")
st.stop()
menu = st.sidebar.selectbox("Pilih Metode", ["Analisis Langsung", "Import dari File"])
if menu == "Analisis Langsung":
user_input = st.text_area("Masukkan teks yang ingin dianalisis (pisahkan dengan enter):")
if st.button("Analisis"):
df = combined_analysis(user_input, df_slank_formal)
st.write("Hasil Analisis:")
st.write(df)
st.markdown(get_download_link(df, "analisis_sentimen_emosi"), unsafe_allow_html=True)
elif menu == "Import dari File":
uploaded_file = st.file_uploader("Upload file CSV atau XLSX", type=["csv", "xlsx"])
if uploaded_file is not None:
df = process_file(uploaded_file, df_slank_formal)
st.write("Hasil Analisis:")
st.write(df)
st.markdown(get_download_link(df, "analisis_sentimen_emosi"), unsafe_allow_html=True)
if __name__ == '__main__':
main()