File size: 3,548 Bytes
05ec195 9c2aa41 4d1f328 05ec195 9c2aa41 05ec195 9c2aa41 90db10a 9c2aa41 bd2216b 05ec195 9c2aa41 05ec195 9c2aa41 05ec195 6a2dbfc eec20c9 05ec195 533636b 05ec195 9c2aa41 6268cef 05ec195 6268cef 9c2aa41 f24967f 6268cef 9c2aa41 f24967f 6268cef f24967f 5e67ce7 f24967f 5e67ce7 9c2aa41 5e67ce7 6268cef 5e67ce7 6268cef 9c2aa41 c7c90ac f24967f 5e67ce7 f24967f 9c2aa41 05ec195 5e67ce7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 |
import os
import numpy as np
import pandas as pd
import streamlit as st
from huggingface_hub import login
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
from keybert import KeyBERT
from openai import AzureOpenAI # new
# ─── Azure OpenAI Client ─────────────────────────────────────────────────────
openai_client = AzureOpenAI(
api_key = "fbca46bfd8814334be46a2e5c323904c", # use your key here
api_version = "2023-05-15", # apparently HKUST uses a deprecated version
azure_endpoint = "https://hkust.azure-api.net" # per HKUST instructions
)
@st.cache_resource
def load_sentiment_pipeline():
model_name = "mayf/amazon_reviews_bert_ft"
tok = AutoTokenizer.from_pretrained(model_name, use_auth_token=True)
mdl = AutoModelForSequenceClassification.from_pretrained(model_name, use_auth_token=True)
return pipeline(
"sentiment-analysis",
model=mdl,
tokenizer=tok,
return_all_scores=True
)
@st.cache_resource
def load_keybert_model():
return KeyBERT(model="all-MiniLM-L6-v2")
def main():
st.title("📊 Review Sentiment & Keyword Analyzer + GPT Insights")
review = st.text_area("Enter your review:")
if not st.button("Analyze Review"):
return
if not review:
st.warning("Please enter a review to analyze.")
return
# Load models
sentiment_pipeline = load_sentiment_pipeline()
kw_model = load_keybert_model()
# Run sentiment analysis
scores = sentiment_pipeline(review)[0]
sentiment_results = {item['label']: float(item['score']) for item in scores}
# Display scores and keywords side by side
col1, col2 = st.columns(2)
with col1:
st.subheader("Sentiment Scores")
st.json({k: round(v, 4) for k, v in sentiment_results.items()})
with col2:
st.subheader("Top 3 Keywords")
keywords = kw_model.extract_keywords(
review,
keyphrase_ngram_range=(1, 2),
stop_words="english",
top_n=3
)
for kw, score in keywords:
st.write(f"• {kw} ({score:.4f})")
# Bar chart of sentiment scores
df_scores = pd.DataFrame.from_dict(sentiment_results, orient='index', columns=['score'])
df_scores.index.name = 'label'
st.bar_chart(df_scores)
# Highlight highest sentiment without subheader
max_label, max_score = max(sentiment_results.items(), key=lambda x: x[1])
st.markdown(f"**Highest Sentiment:** **{max_label}** ({max_score:.4f})")
# GPT-Driven Analysis & Suggestions (detailed)
st.subheader("GPT Analysis & Seller Suggestions")
prompt = f"""
You are an analytical e-commerce feedback expert.
Review: \"{review}\"
Sentiment Scores: {sentiment_results}
Top Keywords: {[kw for kw, _ in keywords]}
Tasks:
1. Write a concise paragraph (2 sentences) interpreting customer sentiment by combining the scores and keywords.
2. Provide 3 actionable suggestions with brief explanations (up to 12 words each).
"""
response = openai_client.chat.completions.create(
model="gpt-35-turbo",
messages=[
{"role": "system", "content": "You are a product-feedback analyst."},
{"role": "user", "content": prompt}
],
temperature=0.7,
max_tokens=200
)
gpt_reply = response.choices[0].message.content.strip()
st.markdown(gpt_reply)
if __name__ == "__main__":
main() |