import streamlit as st import pandas as pd from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer import time import matplotlib.pyplot as plt import os st.title("Sentiment Analysis App") st.write("Upload a CSV or Excel file containing text data for sentiment analysis.") # File uploader uploaded_file = st.file_uploader("Upload a CSV or Excel file", type=["csv", "xlsx"]) # Load sentiment analysis model try: sentiment_pipeline = pipeline( "sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english" ) st.success("Sentiment analysis model loaded successfully!") except Exception as e: st.error(f"Error loading model: {e}") st.stop() if uploaded_file: # Check file type if uploaded_file.name.endswith('.csv'): df = pd.read_csv(uploaded_file) elif uploaded_file.name.endswith('.xlsx'): df = pd.read_excel(uploaded_file) else: st.error("Unsupported file format.") st.stop() st.write("Data Preview:", df.head()) # Check for 'text' column if 'text' not in df.columns: text_column = st.text_input("Enter the name of the column containing text values:") if text_column not in df.columns: st.error(f"Column '{text_column}' not found in the file.") st.stop() else: df.rename(columns={text_column: 'text'}, inplace=True) else: text_column = 'text' if st.button("Run Sentiment Analysis"): # Progress bar progress_bar = st.progress(0) sentiments = [] for i, text in enumerate(df[text_column]): try: result = sentiment_pipeline(text)[0] sentiments.append(result['label']) except Exception as e: sentiments.append("Error") st.error(f"Error processing text at row {i + 1}: {e}") progress_bar.progress((i + 1) / len(df)) time.sleep(0.1) # Simulating processing time df['Sentiment'] = sentiments st.write("Sentiment Analysis Output:", df[['text', 'Sentiment']]) # Pie chart sentiment_counts = df['Sentiment'].value_counts() fig, ax = plt.subplots() ax.pie(sentiment_counts, labels=sentiment_counts.index, autopct='%1.1f%%', startangle=90) ax.axis('equal') st.pyplot(fig) # Clear progress bar progress_bar.empty()