Spaces:

dhanikitkat
/

sentiment_emotion

Sleeping

App Files Files Community

dhanikitkat commited on Jul 17, 2024

Commit

5618f88

1 Parent(s): c390fc9

add assets, update req, update func

Browse files

Files changed (4) hide show

app.py +189 -128
assets/Poppins-Regular.ttf +0 -0
assets/slanks.txt +0 -0
requirements.txt +6 -1

app.py CHANGED Viewed

@@ -1,180 +1,241 @@
-import os
 import streamlit as st
 import pandas as pd
 from transformers import pipeline
 import base64
-# Set to use CPU only
-os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
 # Load pipelines
 sentiment_pipe = pipeline("text-classification", model="ayameRushia/bert-base-indonesian-1.5G-sentiment-analysis-smsa")
 emotion_pipe = pipeline("text-classification", model="azizp128/prediksi-emosi-indobert")
-def direct_sentiment_analysis(text):
-    texts = text.split('\n')  # Memisahkan teks berdasarkan baris
-    # Hasil analisis sentiment
-    results = []
-    for text in texts:
-        if text.strip():
-            result = sentiment_pipe(text)[0]  # Melakukan analisis sentiment pada setiap teks
-            results.append((text, result['label'].lower(), result['score']))
-    # Ubah ke DataFrame untuk tampilan tabel
-    df = pd.DataFrame(results, columns=['Content', 'Sentiment', 'Score'])
-    return df
-def direct_emotion_analysis(text):
-    texts = text.split('\n')  # Memisahkan teks berdasarkan baris
-    # Hasil analisis sentiment
     results = []
     for text in texts:
         if text.strip():
-            result = emotion_pipe(text)[0]  # Melakukan analisis sentiment pada setiap teks
-            results.append((text, result['label'].lower(), result['score']))
-    # Ubah ke DataFrame untuk tampilan tabel
-    df = pd.DataFrame(results, columns=['Content', 'Emotion', 'Score'])
-    return df
-def process_file_sentiment(file):
-    if file.name.endswith('.xlsx'):
-        df = pd.read_excel(file)  # Baca file XLSX
-    elif file.name.endswith('.csv'):
-        df = pd.read_csv(file)  # Baca file CSV
-    else:
-        st.error("Format file tidak didukung. Harap unggah file CSV atau XLSX.")
-        return None
-    # Analisis sentimen dan tambahkan hasil ke DataFrame
-    results = []
-    for index, row in df.iterrows():
-        if pd.notna(row['content']) and isinstance(row['content'], str):
-            sentiment, score = analyze_sentiment(row['content'])
-            results.append((row['content'], sentiment, score))
-        else:
-            results.append((row['content'], None, None))  # Menambahkan nilai None jika kosong
-    df['Sentimen'] = [r[1] for r in results]
-    df['Skor Sentimen'] = [r[2] for r in results]
     return df
-def process_file_emotion(file):
     if file.name.endswith('.xlsx'):
-        df = pd.read_excel(file)  # Baca file XLSX
     elif file.name.endswith('.csv'):
-        df = pd.read_csv(file)  # Baca file CSV
     else:
         st.error("Format file tidak didukung. Harap unggah file CSV atau XLSX.")
         return None
-    # Prediksi emosi dan tambahkan hasil ke DataFrame
     results = []
     for index, row in df.iterrows():
         if pd.notna(row['content']) and isinstance(row['content'], str):
-            emotion, score = emotion_prediction(row['content'])
-            results.append((row['content'], emotion, score))
         else:
-            results.append((row['content'], None, None))  # Menambahkan nilai None jika kosong
-    df['Emosi'] = [r[1] for r in results]
-    df['Skor Emosi'] = [r[2] for r in results]
     return df
 def analyze_sentiment(text):
     result = sentiment_pipe(text)[0]
     return result['label'].lower(), result['score']
-def emotion_prediction(text):
     result = emotion_pipe(text)[0]
     return result['label'].lower(), result['score']
-def get_download_link_sentiment(df):
-    # Generate a link to download the dataframe with Sentimen and Skor Sentimen as CSV
     csv = df.to_csv(index=False)
-    b64 = base64.b64encode(csv.encode()).decode()  # Encode CSV to base64
-    href = f'<a href="data:file/csv;base64,{b64}" download="analisis_sentimen.csv">Download CSV</a>'
     return href
-def get_download_link_emotion(df):
-    # Generate a link to download the dataframe with Emosi and Skor Emosi as CSV
-    csv = df.to_csv(index=False)
-    b64 = base64.b64encode(csv.encode()).decode()  # Encode CSV to base64
-    href = f'<a href="data:file/csv;base64,{b64}" download="prediksi_emosi.csv">Download CSV</a>'
     return href
 def main():
     st.title("Aplikasi Analisis Sentimen dan Prediksi Emosi by Ramdhani")
-    # Pilihan Program
-    program = st.sidebar.selectbox("Pilih Program", ["Analisis Sentiment", "Prediksi Emosi"])
-    if program == "Analisis Sentiment":
-        # Menu untuk analisis sentimen
-        st.header("Analisis Sentiment")
-        menu_sentiment = st.sidebar.selectbox("Pilih Metode", ["Analisis Langsung", "Import dari File"])
-        if menu_sentiment == "Analisis Langsung":
-        # Masukan teks untuk analisis sentimen
-            user_input = st.text_area("Masukkan teks yang ingin dianalisis (pisahkan dengan enter):")
-            if st.button("Analisis Sentimen"):
-                df = direct_sentiment_analysis(user_input)
-                st.write("Hasil Analisis Sentimen:")
-                st.write(df)
-                # Tambahkan tombol download CSV
-                st.markdown(get_download_link_sentiment(df), unsafe_allow_html=True)
-        elif menu_sentiment == "Import dari File":
-            st.subheader("Import dari File")
-            uploaded_file = st.file_uploader("Upload file CSV atau XLSX", type=["csv", "xlsx"])
-            if uploaded_file is not None:
-                df = process_file_sentiment(uploaded_file)
-                # Tampilkan hasil analisis sentimen
-                st.write("Hasil Analisis Sentimen:")
-                st.write(df)
-                # Tambahkan tombol download CSV
-                st.markdown(get_download_link_sentiment(df), unsafe_allow_html=True)
-    elif program == "Prediksi Emosi":
-        # Menu untuk prediksi emosi
-        st.header("Prediksi Emosi")
-        menu_emot = st.sidebar.selectbox("Pilih Metode", ["Prediksi Langsung", "Import dari File"])
-        if menu_emot == "Prediksi Langsung":
-            user_input = st.text_area("Masukkan teks yang ingin dianalisis (pisahkan dengan enter):")
-            if st.button("Analisis Sentimen"):
-                df = direct_emotion_analysis(user_input)
-                st.write("Hasil Analisis Sentimen:")
-                st.write(df)
-                # Tambahkan tombol download CSV
-                st.markdown(get_download_link_emotion(df), unsafe_allow_html=True)
-        elif menu_emot == "Import dari File":
-            st.subheader("Import dari File")
-            uploaded_file = st.file_uploader("Upload file CSV atau XLSX", type=["csv", "xlsx"])
-            if uploaded_file is not None:
-                df = process_file_emotion(uploaded_file)
-                # Tampilkan hasil prediksi emosi
-                st.write("Hasil Prediksi Emosi:")
-                st.write(df)
-                # Tambahkan tombol download CSV
-                st.markdown(get_download_link_emotion(df), unsafe_allow_html=True)
-if __name__ == "__main__":
     main()

 import streamlit as st
 import pandas as pd
 from transformers import pipeline
 import base64
+import re
+import nltk
+from nltk.corpus import stopwords
+from nltk.tokenize import word_tokenize
+import plotly.express as px
+import matplotlib.pyplot as plt
+from wordcloud import WordCloud
+import numpy as np
+from PIL import ImageFont
 # Load pipelines
 sentiment_pipe = pipeline("text-classification", model="ayameRushia/bert-base-indonesian-1.5G-sentiment-analysis-smsa")
 emotion_pipe = pipeline("text-classification", model="azizp128/prediksi-emosi-indobert")
+def load_slank_formal(file):
+    if file.name.endswith('.txt'):
+        df = pd.read_csv(file, sep=';', header=None, names=['Slank', 'Formal'])
+    else:
+        st.error("Format file tidak didukung. Harap unggah file TXT.")
+        return None
+    df.columns = ['Slank', 'Formal']
+    return df
+def replace_slank_to_formal(sentence, slank_formal_df):
+    words = re.findall(r'[\w\',./:-]+|[.,]+|[^\x00-\x7F]+', sentence)
+    for i, word in enumerate(words):
+        replacement = slank_formal_df.loc[slank_formal_df['Slank'] == word.lower(), 'Formal'].values
+        if replacement.size > 0:
+            words[i] = str(replacement[0])
+    return ' '.join(words)
+def preprocess_text(text, slank_formal_df):
+    text = text.lower()
+    text = re.sub(r'http\S+|www\S+|https\S+', '', text, flags=re.MULTILINE)
+    text = re.sub(r'\@\w+|\#', '', text)
+    text = re.sub(r'[^\w\s]', '', text)
+    text = replace_slank_to_formal(text, slank_formal_df)
+    tokens = word_tokenize(text)
+    preprocessed_text = ' '.join(tokens)
+    return preprocessed_text
+def generate_wordcloud(text, font_path, title, colormap):
+    wordcloud = WordCloud(
+        width=800,
+        height=800,
+        background_color='white',
+        font_path='assets\Poppins-Regular.ttf',
+        prefer_horizontal=1.0,
+        colormap=colormap,
+        max_words=100
+    ).generate(text)
+    plt.figure(figsize=(10, 10))
+    plt.title(title, fontsize=20)
+    plt.imshow(wordcloud, interpolation='bilinear')
+    plt.axis('off')
+    st.pyplot(plt)
+    # Save word cloud to file
+    wordcloud.to_file(f"{title}.png")
+    # Add download link for word cloud
+    st.markdown(get_image_download_link(f"{title}.png"), unsafe_allow_html=True)
+def get_image_download_link(image_path):
+    with open(image_path, "rb") as image_file:
+        b64 = base64.b64encode(image_file.read()).decode()
+    href = f'<a href="data:file/png;base64,{b64}" download="{image_path}">Download {image_path}</a>'
+    return href
+def combined_analysis(text, slank_formal_df):
+    texts = text.split('\n')
     results = []
     for text in texts:
         if text.strip():
+            cleaned_text = preprocess_text(text, slank_formal_df)
+            sentiment_result = sentiment_pipe(cleaned_text)[0]
+            emotion_result = emotion_pipe(cleaned_text)[0]
+            results.append((text, cleaned_text, sentiment_result['label'].lower(), sentiment_result['score'], emotion_result['label'].lower(), emotion_result['score']))
+    df = pd.DataFrame(results, columns=['Content', 'Cleaned Content', 'Sentiment', 'Score Sentiment', 'Emotion', 'Score Emotion'])
+    # Sentiment pie chart
+    sentiment_counts = df['Sentiment'].value_counts()
+    fig_sentiment = px.pie(sentiment_counts, values=sentiment_counts.values, names=sentiment_counts.index, title='Sentiment Distribution')
+    st.plotly_chart(fig_sentiment, use_container_width=True)
+    # Emotion pie chart
+    emotion_counts = df['Emotion'].value_counts()
+    fig_emotion = px.pie(emotion_counts, values=emotion_counts.values, names=emotion_counts.index, title='Emotion Distribution')
+    st.plotly_chart(fig_emotion, use_container_width=True)
+    # Generate word clouds
+    font_path = 'Poppins-Regular.ttf'
+    # Overall word cloud
+    overall_text = ' '.join(df['Cleaned Content'].dropna())
+    generate_wordcloud(overall_text, font_path, 'Overall Word Cloud', 'viridis')
+    # Positive sentiment and happy emotion word cloud
+    positive_happy_text = ' '.join(df[(df['Sentiment'] == 'positive') & (df['Emotion'] == 'senang')]['Cleaned Content'].dropna())
+    generate_wordcloud(positive_happy_text, font_path, 'Positive Sentiment & Happy Emotion Word Cloud', 'Greens')
+    # Negative sentiment and angry or sad emotion word cloud
+    negative_angry_sad_text = ' '.join(df[(df['Sentiment'] == 'negative') & (df['Emotion'].isin(['marah', 'sedih']))]['Cleaned Content'].dropna())
+    generate_wordcloud(negative_angry_sad_text, font_path, 'Negative Sentiment & Angry or Sad Emotion Word Cloud', 'Reds')
+    # Word frequency
+    word_freq = pd.Series(' '.join(df['Cleaned Content'].dropna()).split()).value_counts()
+    st.write("Word Frequency:")
+    st.write(word_freq)
+    # Download link for word frequency
+    word_freq_df = word_freq.reset_index()
+    word_freq_df.columns = ['Word', 'Frequency']
+    st.markdown(get_word_freq_download_link(word_freq_df), unsafe_allow_html=True)
     return df
+def process_file(file, slank_formal_df):
     if file.name.endswith('.xlsx'):
+        df = pd.read_excel(file)
     elif file.name.endswith('.csv'):
+        df = pd.read_csv(file)
     else:
         st.error("Format file tidak didukung. Harap unggah file CSV atau XLSX.")
         return None
     results = []
     for index, row in df.iterrows():
         if pd.notna(row['content']) and isinstance(row['content'], str):
+            cleaned_text = preprocess_text(row['content'], slank_formal_df)
+            sentiment, score_sentiment = analyze_sentiment(cleaned_text)
+            emotion, score_emotion = analyze_emotion(cleaned_text)
+            results.append((row['content'], cleaned_text, sentiment, score_sentiment, emotion, score_emotion))
         else:
+            results.append((row['content'], None, None, None, None, None))
+    df['Cleaned Content'] = [r[1] for r in results]
+    df['Sentiment'] = [r[2] for r in results]
+    df['Score Sentiment'] = [r[3] for r in results]
+    df['Emotion'] = [r[4] for r in results]
+    df['Score Emotion'] = [r[5] for r in results]
+    # Sentiment pie chart
+    sentiment_counts = df['Sentiment'].value_counts()
+    fig_sentiment = px.pie(sentiment_counts, values=sentiment_counts.values, names=sentiment_counts.index, title='Sentiment Distribution')
+    st.plotly_chart(fig_sentiment, use_container_width=True)
+    # Emotion pie chart
+    emotion_counts = df['Emotion'].value_counts()
+    fig_emotion = px.pie(emotion_counts, values=emotion_counts.values, names=emotion_counts.index, title='Emotion Distribution')
+    st.plotly_chart(fig_emotion, use_container_width=True)
+    # Generate word clouds
+    font_path = 'Poppins-Regular.ttf'
+    # Overall word cloud
+    overall_text = ' '.join(df['Cleaned Content'].dropna())
+    generate_wordcloud(overall_text, font_path, 'Overall Word Cloud', 'viridis')
+    # Positive sentiment and happy emotion word cloud
+    positive_happy_text = ' '.join(df[(df['Sentiment'] == 'positive') & (df['Emotion'] == 'senang')]['Cleaned Content'].dropna())
+    generate_wordcloud(positive_happy_text, font_path, 'Positive Sentiment & Happy Emotion Word Cloud', 'Greens')
+    # Negative sentiment and angry or sad emotion word cloud
+    negative_angry_sad_text = ' '.join(df[(df['Sentiment'] == 'negative') & (df['Emotion'].isin(['marah', 'sedih']))]['Cleaned Content'].dropna())
+    generate_wordcloud(negative_angry_sad_text, font_path, 'Negative Sentiment & Angry or stSad Emotion Word Cloud', 'Reds')
+    # Word frequency
+    word_freq = pd.Series(' '.join(df['Cleaned Content'].dropna()).split()).value_counts()
+    st.write("Word Frequency:")
+    st.write(word_freq)
+    # Download link for word frequency
+    word_freq_df = word_freq.reset_index()
+    word_freq_df.columns = ['Word', 'Frequency']
+    st.markdown(get_word_freq_download_link(word_freq_df), unsafe_allow_html=True)
     return df
 def analyze_sentiment(text):
     result = sentiment_pipe(text)[0]
     return result['label'].lower(), result['score']
+def analyze_emotion(text):
     result = emotion_pipe(text)[0]
     return result['label'].lower(), result['score']
+def get_download_link(df, filename):
     csv = df.to_csv(index=False)
+    b64 = base64.b64encode(csv.encode()).decode()
+    href = f'<a href="data:file/csv;base64,{b64}" download="{filename}.csv">Download CSV</a>'
     return href
+def get_word_freq_download_link(word_freq_df):
+    csv = word_freq_df.to_csv(index=True)
+    b64 = base64.b64encode(csv.encode()).decode()
+    href = f'<a href="data:file/csv;base64,{b64}" download="word_frequency.csv">Download Word Frequency CSV</a>'
     return href
 def main():
     st.title("Aplikasi Analisis Sentimen dan Prediksi Emosi by Ramdhani")
+    slank_file = st.file_uploader("Upload file slank (CSV atau TXT)", type=["csv", "txt"])
+    if slank_file is not None:
+        df_slank_formal = load_slank_formal(slank_file)
+        if df_slank_formal is None:
+            st.stop()
+    else:
+        st.warning("Harap upload file slank terlebih dahulu.")
+        st.stop()
+    menu = st.sidebar.selectbox("Pilih Metode", ["Analisis Langsung", "Import dari File"])
+    if menu == "Analisis Langsung":
+        user_input = st.text_area("Masukkan teks yang ingin dianalisis (pisahkan dengan enter):")
+        if st.button("Analisis"):
+            df = combined_analysis(user_input, df_slank_formal)
+            st.write("Hasil Analisis:")
+            st.write(df)
+            st.markdown(get_download_link(df, "analisis_sentimen_emosi"), unsafe_allow_html=True)
+    elif menu == "Import dari File":
+        uploaded_file = st.file_uploader("Upload file CSV atau XLSX", type=["csv", "xlsx"])
+        if uploaded_file is not None:
+            df = process_file(uploaded_file, df_slank_formal)
+            st.write("Hasil Analisis:")
+            st.write(df)
+            st.markdown(get_download_link(df, "analisis_sentimen_emosi"), unsafe_allow_html=True)
+if __name__ == '__main__':
     main()

assets/Poppins-Regular.ttf ADDED Viewed

Binary file (158 kB). View file

assets/slanks.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

requirements.txt CHANGED Viewed

@@ -4,4 +4,9 @@ pandas
 tensorflow-cpu
 torch
 tf-keras
-openpyxl

 tensorflow-cpu
 torch
 tf-keras
+openpyxl
+nltk
+plotly
+matplotlib
+wordcloud
+numpy