1 / app.py
mayf's picture
Update app.py
443a3e8 verified
raw
history blame
4.04 kB
import os
import numpy as np
import pandas as pd
import streamlit as st
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification, AutoModelForCausalLM
from keybert import KeyBERT
# ─── DeepSeek Model Client ────────────────────────────────────────────────────
# High-level helper pipeline for text-generation
pipe = pipeline(
"text-generation",
model="deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
trust_remote_code=True
)
@st.cache_resource
def load_sentiment_pipeline():
model_name = "mayf/amazon_reviews_bert_ft"
tok = AutoTokenizer.from_pretrained(model_name, use_auth_token=True)
mdl = AutoModelForSequenceClassification.from_pretrained(
model_name,
use_auth_token=True
)
return pipeline(
"sentiment-analysis",
model=mdl,
tokenizer=tok,
return_all_scores=True
)
@st.cache_resource
def load_keybert_model():
return KeyBERT(model="all-MiniLM-L6-v2")
LABEL_MAP = {
"LABEL_0": "Very Negative",
"LABEL_1": "Negative",
"LABEL_2": "Neutral",
"LABEL_3": "Positive",
"LABEL_4": "Very Positive"
}
def main():
st.title("📊 Amazon Review Analyzer")
review = st.text_area("Enter your review:")
if not st.button("Analyze Review"):
return
if not review:
st.warning("Please enter a review to analyze.")
return
progress = st.progress(0)
# Load models
progress.text("Loading models...")
sentiment_pipeline = load_sentiment_pipeline()
kw_model = load_keybert_model()
progress.progress(20)
# Sentiment analysis
progress.text("Analyzing sentiment...")
raw_scores = sentiment_pipeline(review)[0]
sentiment_results = {
LABEL_MAP[item['label']]: float(item['score']) for item in raw_scores
}
progress.progress(40)
# Keyword extraction
progress.text("Extracting keywords...")
keywords = kw_model.extract_keywords(
review,
keyphrase_ngram_range=(1, 2),
stop_words="english",
top_n=3
)
progress.progress(60)
# Display results
col1, col2 = st.columns(2)
with col1:
st.subheader("Sentiment Scores")
st.json({k: round(v, 4) for k, v in sentiment_results.items()})
with col2:
st.subheader("Top Keywords")
for kw, score in keywords:
st.write(f"• {kw} ({score:.4f})")
# Bar chart
progress.text("Rendering chart...")
df_scores = pd.DataFrame.from_dict(
sentiment_results, orient='index', columns=['score']
)
df_scores.index.name = 'Sentiment'
st.bar_chart(df_scores)
progress.progress(80)
# Highlight highest sentiment
max_label, max_score = max(
sentiment_results.items(), key=lambda x: x[1]
)
st.markdown(f"**Highest Sentiment:** **{max_label}** ({max_score:.4f})")
# GPT-Driven Analysis & Suggestions
progress.text("Generating insights...")
# Build the prompt
prompt = f"""
You are an analytical amazon feedback expert.
Review: \"{review}\"
Sentiment Scores: {sentiment_results}
Top Keywords: {[kw for kw, _ in keywords]}
Tasks:
1. Analysis: Write a concise paragraph (3 sentences) interpreting customer sentiment by combining the scores and keywords.
2. Recommendations: Three separate paragraphs with actionable suggestions (max 30 words each).
"""
# Prepare chat messages
chat_input = [
{"role": "system", "content": "You are a product-feedback analyst."},
{"role": "user", "content": prompt}
]
# Flatten into a single text prompt
flat_prompt = "\n".join(
f"{msg['role'].upper()}: {msg['content']}" for msg in chat_input
)
# Generate
gen_output = pipe(flat_prompt, max_new_tokens=200)
gpt_reply = gen_output[0]['generated_text']
st.markdown(gpt_reply)
progress.progress(100)
progress.text("Done!")
if __name__ == "__main__":
main()