Spaces:

dhanikitkat
/

sentiment_emotion

Sleeping

App Files Files Community

sentiment_emotion / app.py

dhanikitkat

update os

9790e56 about 1 year ago

raw

history blame

10 kB

	import streamlit as st
	import pandas as pd
	from transformers import pipeline
	import base64
	import re
	import nltk
	from nltk.corpus import stopwords
	from nltk.tokenize import word_tokenize
	import plotly.express as px
	import matplotlib.pyplot as plt
	from wordcloud import WordCloud
	import numpy as np
	from PIL import ImageFont
	import os


	nltk.download('punkt')


	# Load pipelines
	sentiment_pipe = pipeline("text-classification", model="ayameRushia/bert-base-indonesian-1.5G-sentiment-analysis-smsa")
	emotion_pipe = pipeline("text-classification", model="azizp128/prediksi-emosi-indobert")

	def load_slank_formal(file):
	if file.name.endswith('.txt'):
	df = pd.read_csv(file, sep=';', header=None, names=['Slank', 'Formal'])
	else:
	st.error("Format file tidak didukung. Harap unggah file TXT.")
	return None
	df.columns = ['Slank', 'Formal']
	return df

	def replace_slank_to_formal(sentence, slank_formal_df):
	words = re.findall(r'[\w\',./:-]+\|[.,]+\|[^\x00-\x7F]+', sentence)
	for i, word in enumerate(words):
	replacement = slank_formal_df.loc[slank_formal_df['Slank'] == word.lower(), 'Formal'].values
	if replacement.size > 0:
	words[i] = str(replacement[0])
	return ' '.join(words)

	def preprocess_text(text, slank_formal_df):
	text = text.lower()
	text = re.sub(r'http\S+\|www\S+\|https\S+', '', text, flags=re.MULTILINE)
	text = re.sub(r'\@\w+\|\#', '', text)
	text = re.sub(r'[^\w\s]', '', text)
	text = replace_slank_to_formal(text, slank_formal_df)
	tokens = word_tokenize(text)
	preprocessed_text = ' '.join(tokens)
	return preprocessed_text

	def generate_wordcloud(text, font_path, title, colormap):
	wordcloud = WordCloud(
	width=600,
	height=600,
	background_color='white',
	font_path=font_path,
	prefer_horizontal=1.0,
	colormap=colormap,
	max_words=100
	).generate(text)

	plt.figure(figsize=(10, 10))
	plt.title(title, fontsize=20)
	plt.imshow(wordcloud, interpolation='bilinear')
	plt.axis('off')
	st.pyplot(plt)

	# Save word cloud to file
	wordcloud.to_file(f"{title}.png")

	# Add download link for word cloud
	st.markdown(get_image_download_link(f"{title}.png"), unsafe_allow_html=True)

	def get_image_download_link(image_path):
	with open(image_path, "rb") as image_file:
	b64 = base64.b64encode(image_file.read()).decode()
	href = f'<a href="data:file/png;base64,{b64}" download="{image_path}">Download {image_path}</a>'
	return href


	def combined_analysis(text, slank_formal_df):
	texts = text.split('\n')
	results = []
	for text in texts:
	if text.strip():
	cleaned_text = preprocess_text(text, slank_formal_df)
	sentiment_result = sentiment_pipe(cleaned_text)[0]
	emotion_result = emotion_pipe(cleaned_text)[0]
	results.append((text, cleaned_text, sentiment_result['label'].lower(), sentiment_result['score'], emotion_result['label'].lower(), emotion_result['score']))
	df = pd.DataFrame(results, columns=['Content', 'Cleaned Content', 'Sentiment', 'Score Sentiment', 'Emotion', 'Score Emotion'])

	# Sentiment pie chart
	sentiment_counts = df['Sentiment'].value_counts()
	fig_sentiment = px.pie(sentiment_counts, values=sentiment_counts.values, names=sentiment_counts.index, title='Sentiment Distribution')
	st.plotly_chart(fig_sentiment, use_container_width=True)

	# Emotion pie chart
	emotion_counts = df['Emotion'].value_counts()
	fig_emotion = px.pie(emotion_counts, values=emotion_counts.values, names=emotion_counts.index, title='Emotion Distribution')
	st.plotly_chart(fig_emotion, use_container_width=True)

	# Generate word clouds
	font_path = os.path.join('assets', 'Poppins-Regular.ttf')

	# Overall word cloud
	overall_text = ' '.join(df['Cleaned Content'].dropna())
	generate_wordcloud(overall_text, font_path, 'Overall Word Cloud', 'viridis')

	# Positive sentiment and happy emotion word cloud
	positive_happy_text = ' '.join(df[(df['Sentiment'] == 'positive') & (df['Emotion'] == 'senang')]['Cleaned Content'].dropna())
	generate_wordcloud(positive_happy_text, font_path, 'Positive Sentiment & Happy Emotion Word Cloud', 'Greens')

	# Negative sentiment and angry or sad emotion word cloud
	negative_angry_sad_text = ' '.join(df[(df['Sentiment'] == 'negative') & (df['Emotion'].isin(['marah', 'sedih']))]['Cleaned Content'].dropna())
	generate_wordcloud(negative_angry_sad_text, font_path, 'Negative Sentiment & Angry or Sad Emotion Word Cloud', 'Reds')

	# Word frequency
	word_freq = pd.Series(' '.join(df['Cleaned Content'].dropna()).split()).value_counts()
	st.write("Word Frequency:")
	st.write(word_freq)

	# Download link for word frequency
	word_freq_df = word_freq.reset_index()
	word_freq_df.columns = ['Word', 'Frequency']
	st.markdown(get_word_freq_download_link(word_freq_df), unsafe_allow_html=True)

	return df



	def process_file(file, slank_formal_df):
	if file.name.endswith('.xlsx'):
	df = pd.read_excel(file)
	elif file.name.endswith('.csv'):
	df = pd.read_csv(file)
	else:
	st.error("Format file tidak didukung. Harap unggah file CSV atau XLSX.")
	return None

	results = []
	for index, row in df.iterrows():
	if pd.notna(row['content']) and isinstance(row['content'], str):
	cleaned_text = preprocess_text(row['content'], slank_formal_df)
	sentiment, score_sentiment = analyze_sentiment(cleaned_text)
	emotion, score_emotion = analyze_emotion(cleaned_text)
	results.append((row['content'], cleaned_text, sentiment, score_sentiment, emotion, score_emotion))
	else:
	results.append((row['content'], None, None, None, None, None))

	df['Cleaned Content'] = [r[1] for r in results]
	df['Sentiment'] = [r[2] for r in results]
	df['Score Sentiment'] = [r[3] for r in results]
	df['Emotion'] = [r[4] for r in results]
	df['Score Emotion'] = [r[5] for r in results]

	# Sentiment pie chart
	sentiment_counts = df['Sentiment'].value_counts()
	fig_sentiment = px.pie(sentiment_counts, values=sentiment_counts.values, names=sentiment_counts.index, title='Sentiment Distribution')
	st.plotly_chart(fig_sentiment, use_container_width=True)

	# Emotion pie chart
	emotion_counts = df['Emotion'].value_counts()
	fig_emotion = px.pie(emotion_counts, values=emotion_counts.values, names=emotion_counts.index, title='Emotion Distribution')
	st.plotly_chart(fig_emotion, use_container_width=True)

	# Generate word clouds
	font_path = os.path.join('assets', 'Poppins-Regular.ttf')

	# Overall word cloud
	overall_text = ' '.join(df['Cleaned Content'].dropna())
	generate_wordcloud(overall_text, font_path, 'Overall Word Cloud', 'viridis')

	# Positive sentiment and happy emotion word cloud
	positive_happy_text = ' '.join(df[(df['Sentiment'] == 'positive') & (df['Emotion'] == 'senang')]['Cleaned Content'].dropna())
	generate_wordcloud(positive_happy_text, font_path, 'Positive Sentiment & Happy Emotion Word Cloud', 'Greens')

	# Negative sentiment and angry or sad emotion word cloud
	negative_angry_sad_text = ' '.join(df[(df['Sentiment'] == 'negative') & (df['Emotion'].isin(['marah', 'sedih']))]['Cleaned Content'].dropna())
	generate_wordcloud(negative_angry_sad_text, font_path, 'Negative Sentiment & Angry or stSad Emotion Word Cloud', 'Reds')

	# Word frequency
	word_freq = pd.Series(' '.join(df['Cleaned Content'].dropna()).split()).value_counts()
	st.write("Word Frequency:")
	st.write(word_freq)

	# Download link for word frequency
	word_freq_df = word_freq.reset_index()
	word_freq_df.columns = ['Word', 'Frequency']
	st.markdown(get_word_freq_download_link(word_freq_df), unsafe_allow_html=True)

	return df


	def analyze_sentiment(text):
	result = sentiment_pipe(text)[0]
	return result['label'].lower(), result['score']

	def analyze_emotion(text):
	result = emotion_pipe(text)[0]
	return result['label'].lower(), result['score']

	def get_download_link(df, filename):
	csv = df.to_csv(index=False)
	b64 = base64.b64encode(csv.encode()).decode()
	href = f'<a href="data:file/csv;base64,{b64}" download="{filename}.csv">Download CSV</a>'
	return href

	def get_word_freq_download_link(word_freq_df):
	csv = word_freq_df.to_csv(index=True)
	b64 = base64.b64encode(csv.encode()).decode()
	href = f'<a href="data:file/csv;base64,{b64}" download="word_frequency.csv">Download Word Frequency CSV</a>'
	return href

	def main():
	st.title("Aplikasi Analisis Sentimen dan Prediksi Emosi")

	slank_file = st.file_uploader("Upload file slank (CSV atau TXT)", type=["csv", "txt"])
	if slank_file is not None:
	df_slank_formal = load_slank_formal(slank_file)
	if df_slank_formal is None:
	st.stop()
	else:
	st.warning("Harap upload file slank terlebih dahulu.")
	st.stop()

	menu = st.sidebar.selectbox("Pilih Metode", ["Analisis Langsung", "Import dari File"])

	if menu == "Analisis Langsung":
	user_input = st.text_area("Masukkan teks yang ingin dianalisis (pisahkan dengan enter):")
	if st.button("Analisis"):
	df = combined_analysis(user_input, df_slank_formal)
	st.write("Hasil Analisis:")
	st.write(df)
	st.markdown(get_download_link(df, "analisis_sentimen_emosi"), unsafe_allow_html=True)

	elif menu == "Import dari File":
	uploaded_file = st.file_uploader("Upload file CSV atau XLSX", type=["csv", "xlsx"])
	if uploaded_file is not None:
	df = process_file(uploaded_file, df_slank_formal)
	st.write("Hasil Analisis:")
	st.write(df)
	st.markdown(get_download_link(df, "analisis_sentimen_emosi"), unsafe_allow_html=True)

	if __name__ == '__main__':
	main()