import os import numpy as np import pandas as pd import streamlit as st from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification, AutoModelForCausalLM from keybert import KeyBERT # ─── DeepSeek Model Client ──────────────────────────────────────────────────── # Option 1: High-level helper pipeline for chat-like generation pipe = pipeline( "text-generation", model="deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", trust_remote_code=True ) # Option 2: Direct model & tokenizer instantiation (alternative) # tokenizer_ds = AutoTokenizer.from_pretrained( # "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", # trust_remote_code=True # ) # model_ds = AutoModelForCausalLM.from_pretrained( # "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", # trust_remote_code=True # ) @st.cache_resource def load_sentiment_pipeline(): model_name = "mayf/amazon_reviews_bert_ft" tok = AutoTokenizer.from_pretrained(model_name, use_auth_token=True) mdl = AutoModelForSequenceClassification.from_pretrained( model_name, use_auth_token=True ) return pipeline( "sentiment-analysis", model=mdl, tokenizer=tok, return_all_scores=True ) @st.cache_resource def load_keybert_model(): return KeyBERT(model="all-MiniLM-L6-v2") LABEL_MAP = { "LABEL_0": "Very Negative", "LABEL_1": "Negative", "LABEL_2": "Neutral", "LABEL_3": "Positive", "LABEL_4": "Very Positive" } def main(): st.title("📊 Amazon Review Analyzer") review = st.text_area("Enter your review:") if not st.button("Analyze Review"): return if not review: st.warning("Please enter a review to analyze.") return # Initialize progress bar progress = st.progress(0) # Load models progress.text("Loading models...") sentiment_pipeline = load_sentiment_pipeline() kw_model = load_keybert_model() progress.progress(20) # Run sentiment analysis progress.text("Analyzing sentiment...") raw_scores = sentiment_pipeline(review)[0] sentiment_results = {LABEL_MAP[item['label']]: float(item['score']) for item in raw_scores} progress.progress(40) # Extract keywords progress.text("Extracting keywords...") keywords = kw_model.extract_keywords( review, keyphrase_ngram_range=(1, 2), stop_words="english", top_n=3 ) progress.progress(60) # Display scores and keywords side by side col1, col2 = st.columns(2) with col1: st.subheader("Sentiment Scores") st.json({k: round(v, 4) for k, v in sentiment_results.items()}) with col2: st.subheader("Top 3 Keywords") for kw, score in keywords: st.write(f"• {kw} ({score:.4f})") # Bar chart progress.text("Rendering chart...") df_scores = pd.DataFrame.from_dict(sentiment_results, orient='index', columns=['score']) df_scores.index.name = 'Sentiment' st.bar_chart(df_scores) progress.progress(80) # Highlight highest sentiment max_label, max_score = max(sentiment_results.items(), key=lambda x: x[1]) st.markdown(f"**Highest Sentiment:** **{max_label}** ({max_score:.4f})") # GPT-Driven Analysis & Suggestions progress.text("Generating insights...") prompt = f""" You are an analytical amazon feedback expert. Review: "{review}" Sentiment Scores: {sentiment_results} Top Keywords: {[kw for kw, _ in keywords]} Tasks: 1. Analysis: Write a concise paragraph (3 sentences) interpreting customer sentiment by combining the scores and keywords. 2. Recommendations: Three separate paragraphs with actionable suggestions (max 30 words each). """ # Use the high-level pipeline for generation chat_input = [ {"role": "system", "content": "You are a product-feedback analyst."}, {"role": "user", "content": prompt} ] gen_output = pipe(chat_input) gpt_reply = gen_output[0]['generated_text'] # Alternative: direct model invocation # inputs = tokenizer_ds(prompt, return_tensors="pt") # outputs = model_ds.generate(**inputs, max_new_tokens=200) # gpt_reply = tokenizer_ds.decode(outputs[0], skip_special_tokens=True) st.markdown(gpt_reply) progress.progress(100) progress.text("Done!") if __name__ == "__main__": main()