1
File size: 4,983 Bytes
05ec195
9c2aa41
 
4d1f328
fe934dd
 
 
 
 
 
05ec195
bd2216b
2ecaff0
05ec195
 
 
9c2aa41
fe934dd
 
 
 
05ec195
 
9c2aa41
 
05ec195
6a2dbfc
eec20c9
05ec195
 
 
 
2ecaff0
 
 
1341687
 
2ecaff0
 
31fca7a
 
1341687
31fca7a
 
2ecaff0
 
fae49e7
 
 
 
 
 
 
 
533636b
05ec195
7204d99
9c2aa41
 
 
 
 
 
 
 
3832b1b
 
6268cef
3832b1b
05ec195
 
2ecaff0
3832b1b
05ec195
fe934dd
3832b1b
fae49e7
1341687
3832b1b
 
fe934dd
3832b1b
 
 
 
 
 
 
 
f24967f
fe934dd
6268cef
 
 
 
 
 
 
 
9c2aa41
fe934dd
3832b1b
fe934dd
 
 
 
 
fae49e7
f24967f
3832b1b
f24967f
fe934dd
1341687
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9c2aa41
fe934dd
3832b1b
 
 
2ecaff0
05ec195
fae49e7
1341687
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
import os
import numpy as np
import pandas as pd
import streamlit as st
from transformers import (
    pipeline,
    AutoTokenizer,
    AutoModelForSequenceClassification,
    AutoModelForSeq2SeqLM
)
from keybert import KeyBERT

# ─── Sentiment & Keyword Models ─────────────────────────────────────────────
@st.cache_resource
def load_sentiment_pipeline():
    model_name = "mayf/amazon_reviews_bert_ft"
    tok = AutoTokenizer.from_pretrained(model_name, use_auth_token=True)
    mdl = AutoModelForSequenceClassification.from_pretrained(
        model_name,
        use_auth_token=True
    )
    return pipeline(
        "sentiment-analysis",
        model=mdl,
        tokenizer=tok,
        return_all_scores=True
    )

@st.cache_resource
def load_keybert_model():
    return KeyBERT(model="all-MiniLM-L6-v2")

# ─── FLAN-T5 Generation Pipeline ────────────────────────────────────────────
@st.cache_resource
def load_flant5_pipeline():
    seq_tok = AutoTokenizer.from_pretrained("google/flan-t5-base")
    seq_model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-base")
    return pipeline(
        "text2text-generation",
        model=seq_model,
        tokenizer=seq_tok,
        max_new_tokens=300,
        do_sample=True,
        temperature=0.7
    )

LABEL_MAP = {
    "LABEL_0": "Very Negative",
    "LABEL_1": "Negative",
    "LABEL_2": "Neutral",
    "LABEL_3": "Positive",
    "LABEL_4": "Very Positive"
}


def main():
    st.title("📊 Amazon Review Analyzer")

    review = st.text_area("Enter your review:")
    if not st.button("Analyze Review"):
        return
    if not review:
        st.warning("Please enter a review to analyze.")
        return

    progress = st.progress(0)

    # Load models
    progress.text("Loading models...")
    sentiment_pipeline = load_sentiment_pipeline()
    kw_model = load_keybert_model()
    generation_pipeline = load_flant5_pipeline()
    progress.progress(20)

    # Sentiment Analysis
    progress.text("Analyzing sentiment...")
    raw_scores = sentiment_pipeline(review)[0]
    sentiment_results = {LABEL_MAP[item['label']]: float(item['score']) for item in raw_scores}
    progress.progress(40)

    # Keyword Extraction
    progress.text("Extracting keywords...")
    keywords = kw_model.extract_keywords(
        review,
        keyphrase_ngram_range=(1, 2),
        stop_words="english",
        top_n=3
    )
    progress.progress(60)

    # Display Results
    col1, col2 = st.columns(2)
    with col1:
        st.subheader("Sentiment Scores")
        st.json({k: round(v, 4) for k, v in sentiment_results.items()})
    with col2:
        st.subheader("Top 3 Keywords")
        for kw, score in keywords:
            st.write(f"• {kw} ({score:.4f})")

    # Bar Chart
    progress.text("Rendering chart...")
    df_scores = pd.DataFrame.from_dict(
        sentiment_results,
        orient='index',
        columns=['score']
    )
    df_scores.index.name = 'Sentiment'
    st.bar_chart(df_scores)
    progress.progress(80)

    # Highlight Highest Sentiment
    max_label, max_score = max(sentiment_results.items(), key=lambda x: x[1])
    st.markdown(f"**Highest Sentiment:** **{max_label}** ({max_score:.4f})")

        # Generate Detailed Recommendations for select sentiments
    progress.text("Generating detailed recommendations...")
    if max_label in ["Very Negative", "Negative", "Neutral"]:
        prompt = (
            "You are a senior product quality and customer experience specialist at an e-commerce food retailer.

"
            f"Customer Review:
\"{review}\"

"
            "Please analyze this feedback and provide **three** distinct, actionable improvement recommendations designed to reduce customer pain points.
"
            "For each recommendation, include:
"
            "  1. **Recommendation Title**: a concise summary of the action.
"
            "  2. the specific issue or frustration extracted from the review.
"
            "  3. why this action addresses the pain point and how it will improve the customer experience.
"
            "  4. a bullet-point list of 3–5 clear steps for operations or product teams to execute.
"
            "  5. how to measure the impact.

"
            "Write each recommendation in at least 5–7 sentences, grounding every detail in the customer's own words. "
            "Avoid generic advice—focus on specifics from the review.

"
            "Recommendations:
"
        )
        response = generation_pipeline(prompt)
        detailed = response[0]["generated_text"]
        st.markdown(detailed)
    else:
        st.info("Detailed recommendations are provided only for Neutral, Negative, or Very Negative reviews.")

    # Done
    progress.progress(100)
    progress.text("Done!")


if __name__ == "__main__":
    main()