1
File size: 4,890 Bytes
59a3d03
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1d35b1e
59a3d03
1d35b1e
 
 
 
 
 
 
 
 
d26f340
601d686
d26f340
 
 
 
 
59a3d03
 
 
 
 
 
 
fae49e7
 
 
533636b
05ec195
7204d99
9c2aa41
 
 
 
 
 
 
 
dafe1b4
fe934dd
dafe1b4
fae49e7
1341687
3832b1b
fe934dd
dafe1b4
3832b1b
 
 
 
 
 
f24967f
fe934dd
6268cef
 
 
 
 
 
 
 
9c2aa41
fe934dd
 
 
 
 
 
fae49e7
f24967f
 
fe934dd
1341687
 
dafe1b4
1341687
1d35b1e
 
dafe1b4
1d35b1e
d26f340
 
 
1341687
1d35b1e
d26f340
 
 
601d686
dafe1b4
1d35b1e
 
 
 
 
 
 
 
 
 
 
 
 
3832b1b
dafe1b4
 
2ecaff0
b7d5df4
7c97534
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
import os
import numpy as np
import pandas as pd
import streamlit as st
from transformers import (
    pipeline,
    AutoTokenizer,
    AutoModelForSequenceClassification,
    AutoModelForSeq2SeqLM
)
from keybert import KeyBERT

# ─── Sentiment & Keyword Models ─────────────────────────────────────────────
@st.cache_resource
def load_sentiment_pipeline():
    model_name = "mayf/amazon_reviews_bert_ft"
    tok = AutoTokenizer.from_pretrained(model_name, use_auth_token=True)
    mdl = AutoModelForSequenceClassification.from_pretrained(
        model_name,
        use_auth_token=True
    )
    return pipeline(
        "sentiment-analysis",
        model=mdl,
        tokenizer=tok,
        return_all_scores=True
    )

@st.cache_resource
def load_keybert_model():
    return KeyBERT(model="all-MiniLM-L6-v2")

# ─── BlenderBot Response Components ─────────────────────────────────────────
@st.cache_resource
def load_response_components():
    # Load tokenizer and model directly for manual generation with truncation
    tok = AutoTokenizer.from_pretrained(
        "facebook/blenderbot-400M-distill",
        use_fast=True
    )
    mdl = AutoModelForSeq2SeqLM.from_pretrained("facebook/blenderbot-400M-distill")
    return tok, mdl
:
    # Use BlenderBot 400M Distill for text generation
    return pipeline(
        "text2text-generation",
        model="facebook/blenderbot-400M-distill",
        tokenizer="facebook/blenderbot-400M-distill",
        max_new_tokens=150,
        do_sample=False
    )

LABEL_MAP = {
    "LABEL_0": "Very Negative",
    "LABEL_1": "Negative",
    "LABEL_2": "Neutral",
    "LABEL_3": "Positive",
    "LABEL_4": "Very Positive"
}


def main():
    st.title("📊 Amazon Review Analyzer")

    review = st.text_area("Enter your review:")
    if not st.button("Analyze Review"):
        return
    if not review:
        st.warning("Please enter a review to analyze.")
        return

    # ─── KEEP THIS BLOCK UNCHANGED ─────────────────────────────────────────
    # Sentiment Analysis
    sentiment_pipeline = load_sentiment_pipeline()
    raw_scores = sentiment_pipeline(review)[0]
    sentiment_results = {LABEL_MAP[item['label']]: float(item['score']) for item in raw_scores}

    # Keyword Extraction
    kw_model = load_keybert_model()
    keywords = kw_model.extract_keywords(
        review,
        keyphrase_ngram_range=(1, 2),
        stop_words="english",
        top_n=3
    )

    # Display Results
    col1, col2 = st.columns(2)
    with col1:
        st.subheader("Sentiment Scores")
        st.json({k: round(v, 4) for k, v in sentiment_results.items()})
    with col2:
        st.subheader("Top 3 Keywords")
        for kw, score in keywords:
            st.write(f"• {kw} ({score:.4f})")

    # Bar Chart
    df_scores = pd.DataFrame.from_dict(
        sentiment_results,
        orient='index',
        columns=['score']
    )
    df_scores.index.name = 'Sentiment'
    st.bar_chart(df_scores)

    # Highlight Highest Sentiment
    max_label, max_score = max(sentiment_results.items(), key=lambda x: x[1])
    st.markdown(f"**Highest Sentiment:** **{max_label}** ({max_score:.4f})")
    # ────────────────────────────────────────────────────────────────────

    # Generate appropriate reply using manual tokenization & generation
    tok, mdl = load_response_components()
    if max_label in ["Positive", "Very Positive"]:
        prompt_text = (
            f"You are a friendly customer success representative. The customer said: \"{review}\". "
            "Write two sentences to express gratitude and highlight their positive experience."
        )
    else:
        prompt_text = (
            f"You are a helpful customer support specialist. The customer said: \"{review}\". "
            f"Identified issues: {', '.join([kw for kw, _ in keywords])}. "
            "First, ask 1-2 clarifying questions to understand their situation. "
            "Then provide two concrete suggestions or next steps to address these issues."
        )
    # Tokenize with truncation to avoid out-of-range embeddings
    inputs = tok(
        prompt_text,
        return_tensors="pt",
        truncation=True,
        max_length=tok.model_max_length
    )
    outputs = mdl.generate(
        **inputs,
        max_new_tokens=150,
        do_sample=False
    )
    reply = tok.decode(outputs[0], skip_special_tokens=True).strip()

    st.subheader("Generated Reply")
    st.write(reply)


if __name__ == '__main__':
    main()