Spaces:

mayf
/

1

Sleeping

File size: 4,036 Bytes

59a3d03
 
 
 
40d3817
59a3d03
8aeff3e
40d3817
443a3e8
ef53265
 
63ef49e
40d3817
8aeff3e
443a3e8
59a3d03
 
 
 
40d3817
 
 
 
59a3d03
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fae49e7
 
 
443a3e8
05ec195
7204d99
9c2aa41
 
 
 
 
 
 
 
8aeff3e
 
 
 
dafe1b4
8aeff3e
 
 
443a3e8
8aeff3e
fae49e7
443a3e8
 
 
8aeff3e
3832b1b
443a3e8
8aeff3e
3832b1b
 
 
 
 
 
8aeff3e
f24967f
443a3e8
6268cef
 
 
 
 
443a3e8
6268cef
 
9c2aa41
8aeff3e
 
443a3e8
 
 
fae49e7
f24967f
8aeff3e
f24967f
8aeff3e
443a3e8
 
 
1341687
3832b1b
8aeff3e
 
443a3e8
8aeff3e
 
443a3e8
8aeff3e
 
 
 
 
 
443a3e8
 
 
 
 
 
 
 
 
 
ef53265
40d3817
 
 
8aeff3e
 
b7d5df4
8aeff3e
63ef49e
9359b84
443a3e8

import os
import numpy as np
import pandas as pd
import streamlit as st
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification, AutoModelForCausalLM
from keybert import KeyBERT

# ─── DeepSeek Model Client ────────────────────────────────────────────────────
# High-level helper pipeline for text-generation
pipe = pipeline(
    "text-generation",
    model="deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
    trust_remote_code=True
)

@st.cache_resource
def load_sentiment_pipeline():
    model_name = "mayf/amazon_reviews_bert_ft"
    tok = AutoTokenizer.from_pretrained(model_name, use_auth_token=True)
    mdl = AutoModelForSequenceClassification.from_pretrained(
        model_name,
        use_auth_token=True
    )
    return pipeline(
        "sentiment-analysis",
        model=mdl,
        tokenizer=tok,
        return_all_scores=True
    )

@st.cache_resource
def load_keybert_model():
    return KeyBERT(model="all-MiniLM-L6-v2")

LABEL_MAP = {
    "LABEL_0": "Very Negative",
    "LABEL_1": "Negative",
    "LABEL_2": "Neutral",
    "LABEL_3": "Positive",
    "LABEL_4": "Very Positive"
}


def main():
    st.title("📊 Amazon Review Analyzer")

    review = st.text_area("Enter your review:")
    if not st.button("Analyze Review"):
        return
    if not review:
        st.warning("Please enter a review to analyze.")
        return

    progress = st.progress(0)

    # Load models
    progress.text("Loading models...")
    sentiment_pipeline = load_sentiment_pipeline()
    kw_model = load_keybert_model()
    progress.progress(20)

    # Sentiment analysis
    progress.text("Analyzing sentiment...")
    raw_scores = sentiment_pipeline(review)[0]
    sentiment_results = {
        LABEL_MAP[item['label']]: float(item['score']) for item in raw_scores
    }
    progress.progress(40)

    # Keyword extraction
    progress.text("Extracting keywords...")
    keywords = kw_model.extract_keywords(
        review,
        keyphrase_ngram_range=(1, 2),
        stop_words="english",
        top_n=3
    )
    progress.progress(60)

    # Display results
    col1, col2 = st.columns(2)
    with col1:
        st.subheader("Sentiment Scores")
        st.json({k: round(v, 4) for k, v in sentiment_results.items()})
    with col2:
        st.subheader("Top Keywords")
        for kw, score in keywords:
            st.write(f"• {kw} ({score:.4f})")

    # Bar chart
    progress.text("Rendering chart...")
    df_scores = pd.DataFrame.from_dict(
        sentiment_results, orient='index', columns=['score']
    )
    df_scores.index.name = 'Sentiment'
    st.bar_chart(df_scores)
    progress.progress(80)

    # Highlight highest sentiment
    max_label, max_score = max(
        sentiment_results.items(), key=lambda x: x[1]
    )
    st.markdown(f"**Highest Sentiment:** **{max_label}** ({max_score:.4f})")

    # GPT-Driven Analysis & Suggestions
    progress.text("Generating insights...")
    # Build the prompt
    prompt = f"""
You are an analytical amazon feedback expert.
Review: \"{review}\"
Sentiment Scores: {sentiment_results}
Top Keywords: {[kw for kw, _ in keywords]}
Tasks:
1. Analysis: Write a concise paragraph (3 sentences) interpreting customer sentiment by combining the scores and keywords.
2. Recommendations: Three separate paragraphs with actionable suggestions (max 30 words each).
"""
    # Prepare chat messages
    chat_input = [
        {"role": "system", "content": "You are a product-feedback analyst."},
        {"role": "user", "content": prompt}
    ]
    # Flatten into a single text prompt
    flat_prompt = "\n".join(
        f"{msg['role'].upper()}: {msg['content']}" for msg in chat_input
    )
    # Generate
    gen_output = pipe(flat_prompt, max_new_tokens=200)
    gpt_reply = gen_output[0]['generated_text']

    st.markdown(gpt_reply)
    progress.progress(100)
    progress.text("Done!")

if __name__ == "__main__":
    main()