Saima335
commited on
Commit
·
2b0414b
1
Parent(s):
82a97f1
Created folder my_folder
Browse files- requirements.txt +8 -1
- src/emotion_final_model/config.json +42 -0
- src/streamlit_app.py +667 -37
requirements.txt
CHANGED
@@ -1,3 +1,10 @@
|
|
1 |
altair
|
2 |
pandas
|
3 |
-
streamlit
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
altair
|
2 |
pandas
|
3 |
+
streamlit
|
4 |
+
flask
|
5 |
+
torch
|
6 |
+
transformers
|
7 |
+
wordcloud
|
8 |
+
Pillow
|
9 |
+
plotly
|
10 |
+
requests
|
src/emotion_final_model/config.json
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "bert-base-multilingual-cased",
|
3 |
+
"architectures": [
|
4 |
+
"BertForSequenceClassification"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.1,
|
7 |
+
"classifier_dropout": null,
|
8 |
+
"directionality": "bidi",
|
9 |
+
"hidden_act": "gelu",
|
10 |
+
"hidden_dropout_prob": 0.1,
|
11 |
+
"hidden_size": 768,
|
12 |
+
"id2label": {
|
13 |
+
"0": "LABEL_0",
|
14 |
+
"1": "LABEL_1",
|
15 |
+
"2": "LABEL_2"
|
16 |
+
},
|
17 |
+
"initializer_range": 0.02,
|
18 |
+
"intermediate_size": 3072,
|
19 |
+
"label2id": {
|
20 |
+
"LABEL_0": 0,
|
21 |
+
"LABEL_1": 1,
|
22 |
+
"LABEL_2": 2
|
23 |
+
},
|
24 |
+
"layer_norm_eps": 1e-12,
|
25 |
+
"max_position_embeddings": 512,
|
26 |
+
"model_type": "bert",
|
27 |
+
"num_attention_heads": 12,
|
28 |
+
"num_hidden_layers": 12,
|
29 |
+
"pad_token_id": 0,
|
30 |
+
"pooler_fc_size": 768,
|
31 |
+
"pooler_num_attention_heads": 12,
|
32 |
+
"pooler_num_fc_layers": 3,
|
33 |
+
"pooler_size_per_head": 128,
|
34 |
+
"pooler_type": "first_token_transform",
|
35 |
+
"position_embedding_type": "absolute",
|
36 |
+
"problem_type": "single_label_classification",
|
37 |
+
"torch_dtype": "float32",
|
38 |
+
"transformers_version": "4.46.3",
|
39 |
+
"type_vocab_size": 2,
|
40 |
+
"use_cache": true,
|
41 |
+
"vocab_size": 119547
|
42 |
+
}
|
src/streamlit_app.py
CHANGED
@@ -1,40 +1,670 @@
|
|
1 |
-
import
|
2 |
-
import numpy as np
|
3 |
import pandas as pd
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
import streamlit as st
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
In the meantime, below is an example of what you can do with just a few lines of code:
|
14 |
-
"""
|
15 |
-
|
16 |
-
num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
|
17 |
-
num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
|
18 |
-
|
19 |
-
indices = np.linspace(0, 1, num_points)
|
20 |
-
theta = 2 * np.pi * num_turns * indices
|
21 |
-
radius = indices
|
22 |
-
|
23 |
-
x = radius * np.cos(theta)
|
24 |
-
y = radius * np.sin(theta)
|
25 |
-
|
26 |
-
df = pd.DataFrame({
|
27 |
-
"x": x,
|
28 |
-
"y": y,
|
29 |
-
"idx": indices,
|
30 |
-
"rand": np.random.randn(num_points),
|
31 |
-
})
|
32 |
-
|
33 |
-
st.altair_chart(alt.Chart(df, height=700, width=700)
|
34 |
-
.mark_point(filled=True)
|
35 |
-
.encode(
|
36 |
-
x=alt.X("x", axis=None),
|
37 |
-
y=alt.Y("y", axis=None),
|
38 |
-
color=alt.Color("idx", legend=None, scale=alt.Scale()),
|
39 |
-
size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
|
40 |
-
))
|
|
|
1 |
+
from flask import Flask, request, jsonify, send_from_directory
|
|
|
2 |
import pandas as pd
|
3 |
+
import torch
|
4 |
+
from transformers import BertTokenizer, BertForSequenceClassification
|
5 |
+
from wordcloud import WordCloud
|
6 |
+
import uuid
|
7 |
+
import io
|
8 |
+
import base64
|
9 |
+
import os
|
10 |
+
from PIL import Image
|
11 |
+
|
12 |
+
app = Flask(__name__)
|
13 |
+
app.config['UPLOAD_FOLDER'] = 'uploads' # folder where images are saved
|
14 |
+
os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)
|
15 |
+
|
16 |
+
@app.route('/uploads/<filename>')
|
17 |
+
def uploaded_file(filename):
|
18 |
+
return send_from_directory(app.config['UPLOAD_FOLDER'], filename)
|
19 |
+
|
20 |
+
# Load model and tokenizer once
|
21 |
+
tokenizer = BertTokenizer.from_pretrained("bert-base-multilingual-cased")
|
22 |
+
model_path = "./emotion_final_model"
|
23 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
24 |
+
model = BertForSequenceClassification.from_pretrained(model_path).to(device)
|
25 |
+
model.eval()
|
26 |
+
|
27 |
+
# Label Mapping
|
28 |
+
label_mapping = {0: "negative", 1: "neutral", 2: "positive"}
|
29 |
+
|
30 |
+
@app.route('/predict', methods=['POST'])
|
31 |
+
def predict():
|
32 |
+
data = request.get_json()
|
33 |
+
text = data.get('text')
|
34 |
+
|
35 |
+
if not text:
|
36 |
+
return jsonify({"error": "No text provided"}), 400
|
37 |
+
|
38 |
+
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
|
39 |
+
inputs = {key: value.to(device) for key, value in inputs.items()}
|
40 |
+
|
41 |
+
with torch.no_grad():
|
42 |
+
outputs = model(**inputs)
|
43 |
+
|
44 |
+
logits = outputs.logits
|
45 |
+
predicted_class_idx = torch.argmax(logits, dim=-1).item()
|
46 |
+
sentiment = label_mapping[predicted_class_idx]
|
47 |
+
|
48 |
+
return jsonify({"sentiment": sentiment})
|
49 |
+
|
50 |
+
aspect_keywords = {
|
51 |
+
"Quality": ["quality", "material", "durable", "performance", "sturdy", "broken", "defective", "معیار", "ٹوٹا ہوا", "خراب"],
|
52 |
+
"Price": ["price", "cheap", "expensive", "value", "cost", "قیمت", "مہنگا", "سستا", "قیمت زیادہ"],
|
53 |
+
"Delivery": ["delivery", "shipping", "arrived", "late", "courier", "ترسیل", "شپنگ", "تاخیر", "دیر سے پہنچا"],
|
54 |
+
"Usability": ["easy to use", "setup", "installation", "instructions", "user-friendly", "آسان", "استعمال میں آسان", "سیٹ اپ", "تنصیب"],
|
55 |
+
"Design": ["design", "style", "appearance", "color", "looks", "ڈیزائن", "خوبصورتی", "رنگ", "ساخت"],
|
56 |
+
"Warranty/Support": ["warranty", "support", "return", "replacement", "service center", "وارنٹی", "واپسی", "تبادلہ", "سروس سینٹر"]
|
57 |
+
}
|
58 |
+
|
59 |
+
def detect_aspects(text):
|
60 |
+
text_lower = text.lower()
|
61 |
+
detected = []
|
62 |
+
for aspect, keywords in aspect_keywords.items():
|
63 |
+
if any(keyword in text_lower for keyword in keywords):
|
64 |
+
detected.append(aspect)
|
65 |
+
return detected
|
66 |
+
|
67 |
+
@app.route("/analyze", methods=["POST"])
|
68 |
+
def analyze():
|
69 |
+
if 'file' not in request.files:
|
70 |
+
return jsonify({"error": "No file uploaded"}), 400
|
71 |
+
file = request.files['file']
|
72 |
+
print(file.filename)
|
73 |
+
df = pd.read_csv(file)
|
74 |
+
print(df.to_string())
|
75 |
+
total_positive = 0
|
76 |
+
total_negative = 0
|
77 |
+
total_neutral = 0
|
78 |
+
all_text = ""
|
79 |
+
# Aspect summary
|
80 |
+
aspect_summary = {aspect: {"positive": 0, "negative": 0, "neutral": 0, "total": 0} for aspect in aspect_keywords}
|
81 |
+
for text in df['Review'].dropna():
|
82 |
+
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
|
83 |
+
inputs = {k: v.to(device) for k, v in inputs.items()}
|
84 |
+
|
85 |
+
with torch.no_grad():
|
86 |
+
outputs = model(**inputs)
|
87 |
+
|
88 |
+
predicted_class_idx = torch.argmax(outputs.logits, dim=-1).item()
|
89 |
+
sentiment = label_mapping[predicted_class_idx]
|
90 |
+
|
91 |
+
if sentiment == "positive":
|
92 |
+
total_positive += 1
|
93 |
+
elif sentiment == "negative":
|
94 |
+
total_negative += 1
|
95 |
+
else:
|
96 |
+
total_neutral += 1
|
97 |
+
|
98 |
+
all_text += " " + text
|
99 |
+
|
100 |
+
detected_aspects = detect_aspects(text)
|
101 |
+
for aspect in detected_aspects:
|
102 |
+
aspect_summary[aspect][sentiment] += 1
|
103 |
+
aspect_summary[aspect]["total"] += 1
|
104 |
+
|
105 |
+
# Generate WordCloud
|
106 |
+
wordcloud = WordCloud(width=800, height=400, background_color='white', font_path='urdu_font.ttf').generate(all_text)
|
107 |
+
|
108 |
+
# Save in uploads folder
|
109 |
+
if not os.path.exists("uploads"):
|
110 |
+
os.makedirs("uploads")
|
111 |
+
|
112 |
+
wordcloud_path = os.path.join("uploads", f"wordcloud{uuid.uuid4()}.png")
|
113 |
+
wordcloud.to_file(wordcloud_path)
|
114 |
+
|
115 |
+
# Convert image to base64
|
116 |
+
with open(wordcloud_path, "rb") as image_file:
|
117 |
+
encoded_image = base64.b64encode(image_file.read()).decode('utf-8')
|
118 |
+
|
119 |
+
print({
|
120 |
+
"total_positive": total_positive,
|
121 |
+
"total_negative": total_negative,
|
122 |
+
"total_neutral": total_neutral,
|
123 |
+
"aspect_summary": aspect_summary,
|
124 |
+
"wordcloud_image_path": wordcloud_path,
|
125 |
+
})
|
126 |
+
|
127 |
+
return jsonify({
|
128 |
+
"total_positive": total_positive,
|
129 |
+
"total_negative": total_negative,
|
130 |
+
"total_neutral": total_neutral,
|
131 |
+
"aspect_summary": aspect_summary,
|
132 |
+
"wordcloud_image_path": wordcloud_path,
|
133 |
+
# "wordcloud_image_base64": encoded_image
|
134 |
+
})
|
135 |
+
|
136 |
+
def run_flask():
|
137 |
+
app.run(host="0.0.0.0", port=5000)
|
138 |
+
|
139 |
+
import threading
|
140 |
+
threading.Thread(target=run_flask).start()
|
141 |
+
|
142 |
import streamlit as st
|
143 |
+
import pandas as pd
|
144 |
+
import plotly.express as px
|
145 |
+
from io import BytesIO, StringIO
|
146 |
+
from PIL import Image
|
147 |
+
import random
|
148 |
+
import requests
|
149 |
+
import os
|
150 |
+
import uuid
|
151 |
+
import tempfile
|
152 |
+
|
153 |
+
API_URL = 'http://127.0.0.1:5000/analyze'
|
154 |
+
|
155 |
+
# -------------------
|
156 |
+
# PAGE CONFIG & THEME
|
157 |
+
# -------------------
|
158 |
+
st.set_page_config(
|
159 |
+
page_title="Multilingual Sentiment Analyzer",
|
160 |
+
layout="wide"
|
161 |
+
)
|
162 |
+
|
163 |
+
st.markdown("""
|
164 |
+
<style>
|
165 |
+
/* Light theme override */
|
166 |
+
html, body, .stApp {
|
167 |
+
background-color: #ffffff !important;
|
168 |
+
color: #000000 !important;
|
169 |
+
}
|
170 |
+
|
171 |
+
h1, h2, h3, h4, h5, h6, p, div, span, label, section, .markdown-text-container {
|
172 |
+
color: #000000 !important;
|
173 |
+
}
|
174 |
+
|
175 |
+
.stFileUploader > div, .stFileUploader div div {
|
176 |
+
background-color: #f9f9f9 !important;
|
177 |
+
border: 1px solid #ccc !important;
|
178 |
+
color: #000000 !important;
|
179 |
+
}
|
180 |
+
</style>
|
181 |
+
""", unsafe_allow_html=True)
|
182 |
+
|
183 |
+
st.markdown("""
|
184 |
+
<div style='text-align: center; padding-top: 10px;'>
|
185 |
+
<h1 style='font-size: 40px;'>🌍 Multilingual Sentiment Analysis Dashboard</h1>
|
186 |
+
<p style='font-size: 18px; color: #ccc; max-width: 720px; margin: auto;'>
|
187 |
+
Upload a CSV to explore sentiment Report. With sentiment analysis, you can catch early signals, reduce risk, and validate market fit — even across global audiences.
|
188 |
+
</p>
|
189 |
+
</div>
|
190 |
+
""", unsafe_allow_html=True)
|
191 |
+
|
192 |
+
# -------------------
|
193 |
+
# DUMMY DATA FUNCTION
|
194 |
+
# -------------------
|
195 |
+
def load_dummy_data():
|
196 |
+
return pd.DataFrame({
|
197 |
+
"Review": [
|
198 |
+
"La livraison était très rapide et le service excellent.",
|
199 |
+
"The product quality was terrible, I want a refund.",
|
200 |
+
"Servicio al cliente fue amable pero no resolvieron mi problema.",
|
201 |
+
"Das Produkt kam beschädigt an und der Support war unhöflich.",
|
202 |
+
"Great value for the price, I'm very happy!",
|
203 |
+
"Muy mal embalaje, pero el envío fue rápido.",
|
204 |
+
"客服很好,但产品描述不准确。",
|
205 |
+
"Perfect fit, just as described. Will buy again!"
|
206 |
+
]
|
207 |
+
})
|
208 |
+
|
209 |
+
# -------------------
|
210 |
+
# MAIN UPLOAD BLOCK (VISIBLE)
|
211 |
+
# -------------------
|
212 |
+
with st.expander("📁 Upload Your CSV File", expanded=True):
|
213 |
+
uploaded_file = st.file_uploader("Choose a CSV file with reviews", type=["csv"])
|
214 |
+
# Analysis button moved here, right after file upload
|
215 |
+
run_analysis = st.button("🚀 Run Analysis", type="primary")
|
216 |
+
|
217 |
+
# Load Data: Uploaded or Dummy
|
218 |
+
if uploaded_file:
|
219 |
+
try:
|
220 |
+
# Read uploaded CSV file
|
221 |
+
df = pd.read_csv(uploaded_file)
|
222 |
+
if df.empty:
|
223 |
+
st.error("The uploaded CSV file is empty.")
|
224 |
+
df = load_dummy_data()
|
225 |
+
else:
|
226 |
+
st.success("✅ File uploaded successfully!")
|
227 |
+
except Exception as e:
|
228 |
+
st.error(f"Error reading CSV: {e}")
|
229 |
+
df = load_dummy_data()
|
230 |
+
else:
|
231 |
+
st.info("Using built-in demo data. Upload a CSV to use your own.")
|
232 |
+
df = load_dummy_data()
|
233 |
+
|
234 |
+
# Preview data
|
235 |
+
st.write("✅ App is running! Here's a sample:")
|
236 |
+
st.dataframe(df.head())
|
237 |
+
|
238 |
+
# Column selection
|
239 |
+
# text_column = st.selectbox("📝 Select the column with review text:", df.columns)
|
240 |
+
# enable_aspect = st.checkbox("🔍 Include Aspect Report (Optional)", value=True)
|
241 |
+
text_column = 'Review'
|
242 |
+
enable_aspect = True
|
243 |
+
|
244 |
+
# -------------------
|
245 |
+
# SENTIMENT METRICS
|
246 |
+
# -------------------
|
247 |
+
st.markdown("---")
|
248 |
+
st.markdown("### 🔎 Sentiment Analysis Results")
|
249 |
+
|
250 |
+
# MOCK SENTIMENT PREDICTION
|
251 |
+
def fake_sentiment_predict(text):
|
252 |
+
return random.choice(["Positive", "Negative", "Neutral"]), round(random.uniform(0.65, 0.99), 2)
|
253 |
+
|
254 |
+
# Store the analyzed dataframe in session state
|
255 |
+
if 'analyzed_df' not in st.session_state:
|
256 |
+
st.session_state.analyzed_df = df.copy()
|
257 |
+
|
258 |
+
# Initialize variables
|
259 |
+
positive, negative, neutral, total = 0, 0, 0, 0
|
260 |
+
|
261 |
+
# Process the data when the Run Analysis button is clicked
|
262 |
+
if run_analysis:
|
263 |
+
if not enable_aspect:
|
264 |
+
# Use fake predictions if not calling the API
|
265 |
+
fake_results = [fake_sentiment_predict(text) for text in df[text_column]]
|
266 |
+
sentiments, confidences = zip(*fake_results)
|
267 |
+
|
268 |
+
# Update the analyzed dataframe
|
269 |
+
st.session_state.analyzed_df = df.copy()
|
270 |
+
st.session_state.analyzed_df["Sentiment"] = sentiments
|
271 |
+
st.session_state.analyzed_df["Confidence"] = confidences
|
272 |
+
|
273 |
+
# Count sentiment
|
274 |
+
sentiment_counts = pd.Series(sentiments).value_counts()
|
275 |
+
positive = sentiment_counts.get("Positive", 0)
|
276 |
+
negative = sentiment_counts.get("Negative", 0)
|
277 |
+
neutral = sentiment_counts.get("Neutral", 0)
|
278 |
+
total = positive + negative + neutral
|
279 |
+
else:
|
280 |
+
# API Call will be made - handled below
|
281 |
+
pass
|
282 |
+
|
283 |
+
def percent(part):
|
284 |
+
return f"{round((part / total) * 100)}%" if total else "0%"
|
285 |
+
|
286 |
+
# Layout: Cards + Chart
|
287 |
+
card_col, chart_col = st.columns([1.2, 2])
|
288 |
+
|
289 |
+
with card_col:
|
290 |
+
st.markdown("""
|
291 |
+
<style>
|
292 |
+
.card-container {
|
293 |
+
max-width: 10px;
|
294 |
+
margin: 0 auto;
|
295 |
+
}
|
296 |
+
.card {
|
297 |
+
padding: 10px;
|
298 |
+
border-radius: 12px;
|
299 |
+
margin-bottom: 10px;
|
300 |
+
font-size: 16px;
|
301 |
+
font-weight: 500;
|
302 |
+
line-height: 1.5;
|
303 |
+
background-color: var(--secondary-background-color);
|
304 |
+
border: 1px solid rgba(255,255,255,0.15);
|
305 |
+
color: white;
|
306 |
+
text-align: center;
|
307 |
+
}
|
308 |
+
.card strong {
|
309 |
+
font-size: 20px;
|
310 |
+
display: block;
|
311 |
+
margin-top: 5px;
|
312 |
+
}
|
313 |
+
</style>
|
314 |
+
<div class="card-container">
|
315 |
+
""", unsafe_allow_html=True)
|
316 |
+
|
317 |
+
if total > 0: # Only display chart if we have data
|
318 |
+
with chart_col:
|
319 |
+
fig = px.pie(
|
320 |
+
names=["Positive", "Negative", "Neutral"],
|
321 |
+
values=[positive, negative, neutral],
|
322 |
+
color_discrete_map={
|
323 |
+
"Positive": "#66bb6a",
|
324 |
+
"Negative": "#ef5350",
|
325 |
+
"Neutral": "#42a5f5"
|
326 |
+
}
|
327 |
+
)
|
328 |
+
fig.update_traces(
|
329 |
+
textinfo='percent+label',
|
330 |
+
hoverinfo='label+percent+value',
|
331 |
+
pull=[0.03, 0.03, 0.03]
|
332 |
+
)
|
333 |
+
fig.update_layout(
|
334 |
+
margin=dict(t=20, b=20, l=10, r=10),
|
335 |
+
paper_bgcolor="rgba(0,0,0,0)",
|
336 |
+
plot_bgcolor="rgba(0,0,0,0)",
|
337 |
+
font_color="white"
|
338 |
+
)
|
339 |
+
st.plotly_chart(fig, use_container_width=True)
|
340 |
+
|
341 |
+
# -------------------
|
342 |
+
# ASPECT REPORT (API Call)
|
343 |
+
# -------------------
|
344 |
+
if run_analysis and enable_aspect:
|
345 |
+
st.subheader("📌 Aspect Sentiment Summary")
|
346 |
+
|
347 |
+
with st.spinner("Fetching Aspect Report from API..."):
|
348 |
+
try:
|
349 |
+
# Create a dataframe with only the text column
|
350 |
+
api_df = df.copy()
|
351 |
+
|
352 |
+
# Create a unique filename with UUID
|
353 |
+
unique_filename = f"temp_reviews_{uuid.uuid4()}.csv"
|
354 |
+
|
355 |
+
# Write to a physical temporary file with UUID in name
|
356 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=unique_filename) as tmp:
|
357 |
+
api_df.to_csv(tmp.name, index=False)
|
358 |
+
tmp_file_path = tmp.name
|
359 |
+
|
360 |
+
# Open the file in binary mode
|
361 |
+
with open(tmp_file_path, 'rb') as file:
|
362 |
+
# Send the actual file
|
363 |
+
files = {'file': (unique_filename, file, 'text/csv')}
|
364 |
+
|
365 |
+
response = requests.post(API_URL, files=files)
|
366 |
+
|
367 |
+
# Clean up temporary file
|
368 |
+
try:
|
369 |
+
os.unlink(tmp_file_path)
|
370 |
+
except:
|
371 |
+
pass # Silently fail if we can't delete the temp file
|
372 |
+
|
373 |
+
if response.status_code == 200:
|
374 |
+
response_json = response.json()
|
375 |
+
|
376 |
+
# Store API response in session state
|
377 |
+
st.session_state.api_response_json = response_json
|
378 |
+
|
379 |
+
# Update metrics from API response
|
380 |
+
positive = response_json.get("total_positive", 0)
|
381 |
+
negative = response_json.get("total_negative", 0)
|
382 |
+
neutral = response_json.get("total_neutral", 0)
|
383 |
+
total = positive + negative + neutral
|
384 |
+
|
385 |
+
# Calculate percentages for Excel report if not provided by API
|
386 |
+
if "positive_percentage" not in response_json and total > 0:
|
387 |
+
response_json["positive_percentage"] = round((positive / total) * 100)
|
388 |
+
response_json["negative_percentage"] = round((negative / total) * 100)
|
389 |
+
response_json["neutral_percentage"] = round((neutral / total) * 100)
|
390 |
+
|
391 |
+
# Add total reviews to response_json if not present
|
392 |
+
if "total_reviews" not in response_json:
|
393 |
+
response_json["total_reviews"] = total
|
394 |
+
|
395 |
+
# Update metrics from API response
|
396 |
+
positive = response_json.get("total_positive", 0)
|
397 |
+
negative = response_json.get("total_negative", 0)
|
398 |
+
neutral = response_json.get("total_neutral", 0)
|
399 |
+
total = positive + negative + neutral
|
400 |
+
|
401 |
+
# Update the metrics cards with new data
|
402 |
+
with card_col:
|
403 |
+
st.markdown(f"""
|
404 |
+
<div class="card" style="border-color:#bfbfbf;">
|
405 |
+
📊 <strong>Total Reviews</strong>
|
406 |
+
{total}
|
407 |
+
</div>
|
408 |
+
<div class="card" style="border-color:#66bb6a;">
|
409 |
+
✅ <strong>{positive} Positive</strong>
|
410 |
+
{percent(positive)} of total
|
411 |
+
</div>
|
412 |
+
<div class="card" style="border-color:#ef5350;">
|
413 |
+
❗ <strong>{negative} Negative</strong>
|
414 |
+
{percent(negative)} of total
|
415 |
+
</div>
|
416 |
+
<div class="card" style="border-color:#42a5f5;">
|
417 |
+
😐 <strong>{neutral} Neutral</strong>
|
418 |
+
{percent(neutral)} of total
|
419 |
+
</div>
|
420 |
+
</div>
|
421 |
+
""", unsafe_allow_html=True)
|
422 |
+
|
423 |
+
# Update the pie chart
|
424 |
+
with chart_col:
|
425 |
+
fig = px.pie(
|
426 |
+
names=["Positive", "Negative", "Neutral"],
|
427 |
+
values=[positive, negative, neutral],
|
428 |
+
color_discrete_map={
|
429 |
+
"Positive": "#66bb6a",
|
430 |
+
"Negative": "#ef5350",
|
431 |
+
"Neutral": "#42a5f5"
|
432 |
+
}
|
433 |
+
)
|
434 |
+
fig.update_traces(
|
435 |
+
textinfo='percent+label',
|
436 |
+
hoverinfo='label+percent+value',
|
437 |
+
pull=[0.03, 0.03, 0.03]
|
438 |
+
)
|
439 |
+
fig.update_layout(
|
440 |
+
margin=dict(t=20, b=20, l=10, r=10),
|
441 |
+
paper_bgcolor="rgba(0,0,0,0)",
|
442 |
+
plot_bgcolor="rgba(0,0,0,0)",
|
443 |
+
font_color="white"
|
444 |
+
)
|
445 |
+
st.plotly_chart(fig, use_container_width=True)
|
446 |
+
|
447 |
+
# Update the analyzed dataframe with sentiment results from API
|
448 |
+
if "review_details" in response_json:
|
449 |
+
# Create a new dataframe from the API results
|
450 |
+
api_result_df = pd.DataFrame(response_json["review_details"])
|
451 |
+
|
452 |
+
# Store it in session state
|
453 |
+
st.session_state.analyzed_df = api_result_df
|
454 |
+
else:
|
455 |
+
# If review_details not provided, create basic sentiment columns
|
456 |
+
st.session_state.analyzed_df = df.copy()
|
457 |
+
# Try to extract sentiments if available in the API response
|
458 |
+
if "sentiments" in response_json:
|
459 |
+
st.session_state.analyzed_df["Sentiment"] = response_json["sentiments"]
|
460 |
+
# Add any other available result fields
|
461 |
+
for key in ["confidences", "languages"]:
|
462 |
+
if key in response_json:
|
463 |
+
column_name = key.rstrip("s").capitalize() # Convert "confidences" to "Confidence"
|
464 |
+
st.session_state.analyzed_df[column_name] = response_json[key]
|
465 |
+
|
466 |
+
# Prepare aspect DataFrame
|
467 |
+
aspect_rows = []
|
468 |
+
for aspect, values in response_json["aspect_summary"].items():
|
469 |
+
aspect_rows.append({
|
470 |
+
"Aspect": aspect,
|
471 |
+
"Positive": values["positive"],
|
472 |
+
"Negative": values["negative"],
|
473 |
+
"Neutral": values["neutral"],
|
474 |
+
"Total": values["total"]
|
475 |
+
})
|
476 |
+
aspect_df = pd.DataFrame(aspect_rows)
|
477 |
+
|
478 |
+
# Display aspect data if we have any
|
479 |
+
if not aspect_df.empty and aspect_df["Total"].sum() > 0:
|
480 |
+
|
481 |
+
# Store aspect dataframe in session state
|
482 |
+
st.session_state.aspect_dataframe = aspect_df
|
483 |
+
st.dataframe(aspect_df)
|
484 |
+
|
485 |
+
# Prepare data for bar chart
|
486 |
+
melted = aspect_df.melt(
|
487 |
+
id_vars="Aspect",
|
488 |
+
value_vars=["Positive", "Negative", "Neutral"],
|
489 |
+
var_name="Sentiment",
|
490 |
+
value_name="Count"
|
491 |
+
)
|
492 |
+
|
493 |
+
col1, col2 = st.columns([4, 2])
|
494 |
+
|
495 |
+
with col1:
|
496 |
+
st.markdown("### 📊 Sentiment by Aspect")
|
497 |
+
bar_chart = px.bar(
|
498 |
+
melted,
|
499 |
+
x="Aspect",
|
500 |
+
y="Count",
|
501 |
+
color="Sentiment",
|
502 |
+
barmode="group",
|
503 |
+
title=None,
|
504 |
+
color_discrete_map={
|
505 |
+
"Positive": "#66bb6a",
|
506 |
+
"Negative": "#ef5350",
|
507 |
+
"Neutral": "#42a5f5"
|
508 |
+
}
|
509 |
+
)
|
510 |
+
# Update chart theme for dark mode
|
511 |
+
bar_chart.update_layout(
|
512 |
+
paper_bgcolor="rgba(0,0,0,0)",
|
513 |
+
plot_bgcolor="rgba(0,0,0,0)",
|
514 |
+
font_color="white",
|
515 |
+
xaxis=dict(gridcolor="rgba(255,255,255,0.1)"),
|
516 |
+
yaxis=dict(gridcolor="rgba(255,255,255,0.1)")
|
517 |
+
)
|
518 |
+
st.plotly_chart(bar_chart, use_container_width=True)
|
519 |
+
|
520 |
+
with col2:
|
521 |
+
st.markdown("### 🌀 Review Keywords")
|
522 |
+
|
523 |
+
# Try to display wordcloud from API
|
524 |
+
if "wordcloud_image_base64" in response_json:
|
525 |
+
import base64
|
526 |
+
st.markdown("<div style='padding-top:60px'></div>", unsafe_allow_html=True)
|
527 |
+
st.image(
|
528 |
+
BytesIO(base64.b64decode(response_json["wordcloud_image_base64"])),
|
529 |
+
caption="Keyword Cloud",
|
530 |
+
use_container_width=True
|
531 |
+
)
|
532 |
+
else:
|
533 |
+
try:
|
534 |
+
# Try local wordcloud file as fallback
|
535 |
+
wordcloud_path = response_json.get("wordcloud_image_path")
|
536 |
+
if wordcloud_path and os.path.exists(wordcloud_path):
|
537 |
+
image = Image.open(wordcloud_path)
|
538 |
+
st.markdown("<div style='padding-top:60px'></div>", unsafe_allow_html=True)
|
539 |
+
st.image(image, caption="Keywords", use_container_width=True)
|
540 |
+
else:
|
541 |
+
# Try default wordcloud
|
542 |
+
if os.path.exists("wordcloud.jpg"):
|
543 |
+
image = Image.open("wordcloud.jpg")
|
544 |
+
st.markdown("<div style='padding-top:60px'></div>", unsafe_allow_html=True)
|
545 |
+
st.image(image, caption="Keywords", use_container_width=True)
|
546 |
+
except Exception as e:
|
547 |
+
st.warning(f"⚠ Word cloud image not found: {e}")
|
548 |
+
else:
|
549 |
+
st.info("No aspects detected in the reviews.")
|
550 |
+
else:
|
551 |
+
st.error(f"API Error: {response.status_code} - {response.text}")
|
552 |
+
except Exception as e:
|
553 |
+
st.error(f"API call failed: {e}")
|
554 |
+
import traceback
|
555 |
+
st.code(traceback.format_exc(), language="python")
|
556 |
+
|
557 |
+
|
558 |
+
# -------------------
|
559 |
+
# DOWNLOAD BUTTON
|
560 |
+
# -------------------
|
561 |
+
if run_analysis or total > 0:
|
562 |
+
st.subheader("📥 Download Analyzed File")
|
563 |
+
|
564 |
+
def generate_excel_report(df, aspect_data=None, response_json=None):
|
565 |
+
output = BytesIO()
|
566 |
+
|
567 |
+
with pd.ExcelWriter(output, engine='xlsxwriter') as writer:
|
568 |
+
# Sheet 1: Main sentiment results
|
569 |
+
df.to_excel(writer, index=False, sheet_name='Sentiment_Report')
|
570 |
+
|
571 |
+
# Sheet 2: Aspect analysis (if available)
|
572 |
+
if aspect_data is not None and not aspect_data.empty:
|
573 |
+
aspect_data.to_excel(writer, index=False, sheet_name='Aspect_Analysis')
|
574 |
+
|
575 |
+
# Sheet 3: Summary stats (if API response available)
|
576 |
+
if response_json:
|
577 |
+
# Create a summary dataframe
|
578 |
+
summary_data = {
|
579 |
+
'Metric': ['Total Reviews', 'Positive', 'Negative', 'Neutral'],
|
580 |
+
'Count': [
|
581 |
+
response_json.get('total_reviews', 0),
|
582 |
+
response_json.get('total_positive', 0),
|
583 |
+
response_json.get('total_negative', 0),
|
584 |
+
response_json.get('total_neutral', 0)
|
585 |
+
],
|
586 |
+
'Percentage': [
|
587 |
+
'100%',
|
588 |
+
f"{response_json.get('positive_percentage', 0)}%",
|
589 |
+
f"{response_json.get('negative_percentage', 0)}%",
|
590 |
+
f"{response_json.get('neutral_percentage', 0)}%"
|
591 |
+
]
|
592 |
+
}
|
593 |
+
summary_df = pd.DataFrame(summary_data)
|
594 |
+
summary_df.to_excel(writer, index=False, sheet_name='Summary')
|
595 |
+
|
596 |
+
# Add any other relevant data from API response
|
597 |
+
if 'review_details' in response_json:
|
598 |
+
details_df = pd.DataFrame(response_json['review_details'])
|
599 |
+
details_df.to_excel(writer, index=False, sheet_name='Review_Details')
|
600 |
+
|
601 |
+
# Get workbook and add some formatting
|
602 |
+
workbook = writer.book
|
603 |
+
|
604 |
+
# Add formatting
|
605 |
+
header_format = workbook.add_format({
|
606 |
+
'bold': True,
|
607 |
+
'text_wrap': True,
|
608 |
+
'valign': 'top',
|
609 |
+
'border': 1
|
610 |
+
})
|
611 |
+
|
612 |
+
# Apply formatting to each worksheet safely
|
613 |
+
for sheet_name in writer.sheets:
|
614 |
+
worksheet = writer.sheets[sheet_name]
|
615 |
+
# Get column names from the DataFrame based on sheet name
|
616 |
+
if sheet_name == 'Sentiment_Report':
|
617 |
+
columns = df.columns
|
618 |
+
elif sheet_name == 'Aspect_Analysis' and aspect_data is not None:
|
619 |
+
columns = aspect_data.columns
|
620 |
+
elif sheet_name == 'Summary':
|
621 |
+
columns = summary_data.keys()
|
622 |
+
elif sheet_name == 'Review_Details' and 'review_details' in response_json:
|
623 |
+
columns = details_df.columns
|
624 |
+
else:
|
625 |
+
continue
|
626 |
+
|
627 |
+
# Write headers with formatting
|
628 |
+
for col_num, value in enumerate(columns):
|
629 |
+
worksheet.write(0, col_num, value, header_format)
|
630 |
+
|
631 |
+
# Auto-adjust columns' width (supported in newer versions)
|
632 |
+
try:
|
633 |
+
worksheet.autofit()
|
634 |
+
except AttributeError:
|
635 |
+
# Fallback for older xlsxwriter versions
|
636 |
+
for col_num, value in enumerate(columns):
|
637 |
+
# Set width based on header content
|
638 |
+
worksheet.set_column(col_num, col_num, max(10, len(str(value)) + 2))
|
639 |
+
|
640 |
+
return output.getvalue()
|
641 |
+
|
642 |
+
# Store API response in session state to access it for download
|
643 |
+
if 'api_response_json' not in st.session_state:
|
644 |
+
st.session_state.api_response_json = None
|
645 |
+
if 'aspect_dataframe' not in st.session_state:
|
646 |
+
st.session_state.aspect_dataframe = None
|
647 |
+
|
648 |
+
# Update these values when API response is received
|
649 |
+
if run_analysis and enable_aspect and 'response_json' in locals():
|
650 |
+
st.session_state.api_response_json = response_json
|
651 |
+
if 'aspect_df' in locals() and not aspect_df.empty:
|
652 |
+
st.session_state.aspect_dataframe = aspect_df
|
653 |
+
|
654 |
+
st.download_button(
|
655 |
+
label="📥 Download Results as Excel",
|
656 |
+
data=generate_excel_report(
|
657 |
+
st.session_state.analyzed_df, # Use the analyzed dataframe instead of original df
|
658 |
+
st.session_state.aspect_dataframe,
|
659 |
+
st.session_state.api_response_json
|
660 |
+
),
|
661 |
+
file_name="sentiment_analysis_report.xlsx",
|
662 |
+
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
663 |
+
)
|
664 |
|
665 |
+
# Add a footer with dark theme
|
666 |
+
st.markdown("""
|
667 |
+
<div style="text-align: center; margin-top: 50px; padding: 20px; color: #888; font-size: 14px;">
|
668 |
+
<p>Multilingual Sentiment Analysis Dashboard | Made with Streamlit</p>
|
669 |
+
</div>
|
670 |
+
""", unsafe_allow_html=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|