Spaces:
Sleeping
Sleeping
import matplotlib | |
matplotlib.use('Agg') | |
from flask import Flask, render_template, request, redirect, url_for | |
import torch | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
import io | |
import base64 | |
from wordcloud import WordCloud | |
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification | |
from datasets import load_dataset | |
app = Flask(__name__) | |
# Global variables | |
product_data = None | |
tokenizer = None | |
model = None | |
def load_data_and_model(): | |
global product_data, tokenizer, model | |
# Load dataset | |
dataset = load_dataset("LoganKells/amazon_product_reviews_video_games") | |
product_data = dataset["train"].to_pandas() | |
# Load model and tokenizer | |
tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english") | |
model = DistilBertForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english") | |
def generate_word_cloud(text): | |
if not text.strip(): | |
return None | |
wordcloud = WordCloud(width=800, height=400, background_color='white').generate(text) | |
plt.figure(figsize=(8, 4)) | |
plt.imshow(wordcloud, interpolation='bilinear') | |
plt.axis('off') | |
img_buffer = io.BytesIO() | |
plt.savefig(img_buffer, format='png') | |
img_buffer.seek(0) | |
img_base64 = base64.b64encode(img_buffer.getvalue()).decode('utf-8') | |
plt.close() | |
return img_base64 | |
def index(): | |
global product_data, tokenizer, model | |
# Load data and model if not already loaded | |
if product_data is None or tokenizer is None or model is None: | |
load_data_and_model() | |
result = None | |
error = None | |
if request.method == 'POST': | |
if 'refresh' in request.form: | |
return redirect(url_for('index')) | |
if 'generate' in request.form: | |
product_code = request.form.get('product_code', '').strip() | |
# Validate product code (10 digits) | |
if not product_code or not product_code.isdigit() or len(product_code) != 10: | |
error = "Please enter a 10 digit number product code." | |
else: | |
# Get reviews for the product | |
product_reviews = product_data[product_data['asin'] == product_code]['reviewText'].tolist() | |
if not product_reviews: | |
error = "Product code not found." | |
else: | |
total_reviews = len(product_reviews) | |
positive_count = 0 | |
negative_count = 0 | |
positive_reviews_text = "" | |
negative_reviews_text = "" | |
# Perform sentiment analysis on each review | |
for review in product_reviews: | |
inputs = tokenizer(review, return_tensors="pt", truncation=True, padding=True) | |
with torch.no_grad(): | |
logits = model(**inputs).logits | |
predicted_class_id = logits.argmax().item() | |
label = model.config.id2label[predicted_class_id] | |
if label == 'POSITIVE': | |
positive_count += 1 | |
positive_reviews_text += review + " " | |
else: | |
negative_count += 1 | |
negative_reviews_text += review + " " | |
positive_percentage = (positive_count / total_reviews) * 100 | |
negative_percentage = (negative_count / total_reviews) * 100 | |
# Generate word clouds | |
positive_wordcloud = generate_word_cloud(positive_reviews_text) | |
negative_wordcloud = generate_word_cloud(negative_reviews_text) | |
result = { | |
'product_code': product_code, | |
'total_reviews': total_reviews, | |
'positive_percentage': positive_percentage, | |
'negative_percentage': negative_percentage, | |
'positive_wordcloud': positive_wordcloud, | |
'negative_wordcloud': negative_wordcloud | |
} | |
return render_template('index.html', result=result, error=error) | |
if __name__ == '__main__': | |
app.run(debug=True) |