1
File size: 4,063 Bytes
05ec195
9c2aa41
 
4d1f328
05ec195
 
 
9c2aa41
05ec195
9c2aa41
 
90db10a
 
 
9c2aa41
bd2216b
05ec195
 
 
9c2aa41
 
05ec195
 
9c2aa41
 
05ec195
6a2dbfc
eec20c9
05ec195
 
 
 
fae49e7
 
 
 
 
 
 
 
533636b
05ec195
7204d99
9c2aa41
 
 
 
 
 
 
 
3832b1b
 
 
6268cef
3832b1b
05ec195
 
3832b1b
05ec195
6268cef
3832b1b
fae49e7
 
 
3832b1b
 
 
 
 
 
 
 
 
 
 
f24967f
6268cef
 
 
 
 
 
 
 
 
9c2aa41
3832b1b
 
f24967f
fae49e7
f24967f
3832b1b
f24967f
3832b1b
f24967f
5e67ce7
f24967f
3832b1b
 
9c2aa41
7204d99
6268cef
5e67ce7
 
 
fae49e7
1caa239
9c2aa41
 
c7c90ac
f24967f
 
 
 
 
5e67ce7
 
f24967f
 
9c2aa41
05ec195
3832b1b
 
 
 
05ec195
fae49e7
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import os
import numpy as np
import pandas as pd
import streamlit as st
from huggingface_hub import login
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
from keybert import KeyBERT
from openai import AzureOpenAI  # new

# ─── Azure OpenAI Client ─────────────────────────────────────────────────────
openai_client = AzureOpenAI(
  api_key = "fbca46bfd8814334be46a2e5c323904c", # use your key here
  api_version = "2023-05-15", # apparently HKUST uses a deprecated version
  azure_endpoint = "https://hkust.azure-api.net" # per HKUST instructions
)

@st.cache_resource
def load_sentiment_pipeline():
    model_name = "mayf/amazon_reviews_bert_ft"
    tok = AutoTokenizer.from_pretrained(model_name, use_auth_token=True)
    mdl = AutoModelForSequenceClassification.from_pretrained(model_name, use_auth_token=True)
    return pipeline(
        "sentiment-analysis",
        model=mdl,
        tokenizer=tok,
        return_all_scores=True
    )

@st.cache_resource
def load_keybert_model():
    return KeyBERT(model="all-MiniLM-L6-v2")

LABEL_MAP = {
    "LABEL_0": "Very Negative",
    "LABEL_1": "Negative",
    "LABEL_2": "Neutral",
    "LABEL_3": "Positive",
    "LABEL_4": "Very Positive"
}


def main():
    st.title("📊 Amazon Review Analyzer")

    review = st.text_area("Enter your review:")
    if not st.button("Analyze Review"):
        return
    if not review:
        st.warning("Please enter a review to analyze.")
        return

    # Initialize progress bar
    progress = st.progress(0)

    # Load models
    progress.text("Loading models...")
    sentiment_pipeline = load_sentiment_pipeline()
    kw_model = load_keybert_model()
    progress.progress(20)

    # Run sentiment analysis
    progress.text("Analyzing sentiment...")
    raw_scores = sentiment_pipeline(review)[0]
    # Map labels
    sentiment_results = {LABEL_MAP[item['label']]: float(item['score']) for item in raw_scores}
    progress.progress(40)

    # Extract keywords
    progress.text("Extracting keywords...")
    keywords = kw_model.extract_keywords(
        review,
        keyphrase_ngram_range=(1, 2),
        stop_words="english",
        top_n=3
    )
    progress.progress(60)

    # Display scores and keywords side by side
    col1, col2 = st.columns(2)
    with col1:
        st.subheader("Sentiment Scores")
        st.json({k: round(v, 4) for k, v in sentiment_results.items()})
    with col2:
        st.subheader("Top 3 Keywords")
        for kw, score in keywords:
            st.write(f"• {kw} ({score:.4f})")

    # Bar chart
    progress.text("Rendering chart...")
    df_scores = pd.DataFrame.from_dict(sentiment_results, orient='index', columns=['score'])
    df_scores.index.name = 'Sentiment'
    st.bar_chart(df_scores)
    progress.progress(80)

    # Highlight highest sentiment
    max_label, max_score = max(sentiment_results.items(), key=lambda x: x[1])
    st.markdown(f"**Highest Sentiment:** **{max_label}** ({max_score:.4f})")

    # GPT-Driven Analysis & Suggestions
    progress.text("Generating insights...")
    prompt = f"""
You are an analytical amazon feedback expert.
Review: \"{review}\"
Sentiment Scores: {sentiment_results}
Top Keywords: {[kw for kw, _ in keywords]}
Tasks:
1. Write a concise paragraph (3 sentences) interpreting customer sentiment by combining the scores and keywords.
2. Provide 3 actionable suggestions with brief explanations (up to 3 sentences each).
"""

    response = openai_client.chat.completions.create(
        model="gpt-35-turbo",
        messages=[
            {"role": "system", "content": "You are a product-feedback analyst."},
            {"role": "user", "content": prompt}
        ],
        temperature=0.7,
        max_tokens=200
    )
    gpt_reply = response.choices[0].message.content.strip()
    st.markdown(gpt_reply)

    # Complete
    progress.progress(100)
    progress.text("Done!")

if __name__ == "__main__":
    main()