|
import streamlit as st |
|
import pandas as pd |
|
import torch |
|
from transformers import AutoTokenizer, AutoModelForSequenceClassification |
|
import matplotlib.pyplot as plt |
|
|
|
|
|
model_name = "tabularisai/multilingual-sentiment-analysis" |
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
model = AutoModelForSequenceClassification.from_pretrained(model_name) |
|
|
|
def predict_sentiment(texts): |
|
inputs = tokenizer(texts, return_tensors="pt", truncation=True, padding=True, max_length=512) |
|
with torch.no_grad(): |
|
outputs = model(**inputs) |
|
probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1) |
|
sentiment_map = {0: "Very Negative", 1: "Negative", 2: "Neutral", 3: "Positive", 4: "Very Positive"} |
|
return [sentiment_map[p] for p in torch.argmax(probabilities, dim=-1).tolist()] |
|
|
|
|
|
st.title("Sentiment Analysis App") |
|
st.write("Upload an Excel file containing text data, and we'll analyze its sentiment.") |
|
|
|
uploaded_file = st.file_uploader("Upload Excel File", type=["xlsx", "xls"]) |
|
|
|
if uploaded_file is not None: |
|
df = pd.read_excel(uploaded_file) |
|
st.write("Preview of Uploaded Data:") |
|
st.dataframe(df.head()) |
|
|
|
text_column = st.selectbox("Select the column containing text", df.columns) |
|
|
|
if st.button("Analyze Sentiment"): |
|
df["Sentiment"] = predict_sentiment(df[text_column].astype(str).tolist()) |
|
|
|
|
|
st.write("Sentiment Analysis Results:") |
|
st.dataframe(df[[text_column, "Sentiment"]]) |
|
|
|
|
|
sentiment_counts = df["Sentiment"].value_counts() |
|
fig, ax = plt.subplots() |
|
ax.pie(sentiment_counts, labels=sentiment_counts.index, autopct='%1.1f%%', colors=["red", "yellow", "pink", "lightgreen", "green"]) |
|
ax.set_title("Sentiment Distribution") |
|
st.pyplot(fig) |
|
|
|
|
|
st.write("Detailed Sentiment Table:") |
|
st.table(df[[text_column, "Sentiment"]]) |
|
|
|
|
|
st.download_button("Download Results", df.to_csv(index=False).encode('utf-8'), "sentiment_results.csv", "text/csv") |