File size: 4,890 Bytes
59a3d03 1d35b1e 59a3d03 1d35b1e d26f340 601d686 d26f340 59a3d03 fae49e7 533636b 05ec195 7204d99 9c2aa41 dafe1b4 fe934dd dafe1b4 fae49e7 1341687 3832b1b fe934dd dafe1b4 3832b1b f24967f fe934dd 6268cef 9c2aa41 fe934dd fae49e7 f24967f fe934dd 1341687 dafe1b4 1341687 1d35b1e dafe1b4 1d35b1e d26f340 1341687 1d35b1e d26f340 601d686 dafe1b4 1d35b1e 3832b1b dafe1b4 2ecaff0 b7d5df4 7c97534 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 |
import os
import numpy as np
import pandas as pd
import streamlit as st
from transformers import (
pipeline,
AutoTokenizer,
AutoModelForSequenceClassification,
AutoModelForSeq2SeqLM
)
from keybert import KeyBERT
# ─── Sentiment & Keyword Models ─────────────────────────────────────────────
@st.cache_resource
def load_sentiment_pipeline():
model_name = "mayf/amazon_reviews_bert_ft"
tok = AutoTokenizer.from_pretrained(model_name, use_auth_token=True)
mdl = AutoModelForSequenceClassification.from_pretrained(
model_name,
use_auth_token=True
)
return pipeline(
"sentiment-analysis",
model=mdl,
tokenizer=tok,
return_all_scores=True
)
@st.cache_resource
def load_keybert_model():
return KeyBERT(model="all-MiniLM-L6-v2")
# ─── BlenderBot Response Components ─────────────────────────────────────────
@st.cache_resource
def load_response_components():
# Load tokenizer and model directly for manual generation with truncation
tok = AutoTokenizer.from_pretrained(
"facebook/blenderbot-400M-distill",
use_fast=True
)
mdl = AutoModelForSeq2SeqLM.from_pretrained("facebook/blenderbot-400M-distill")
return tok, mdl
:
# Use BlenderBot 400M Distill for text generation
return pipeline(
"text2text-generation",
model="facebook/blenderbot-400M-distill",
tokenizer="facebook/blenderbot-400M-distill",
max_new_tokens=150,
do_sample=False
)
LABEL_MAP = {
"LABEL_0": "Very Negative",
"LABEL_1": "Negative",
"LABEL_2": "Neutral",
"LABEL_3": "Positive",
"LABEL_4": "Very Positive"
}
def main():
st.title("📊 Amazon Review Analyzer")
review = st.text_area("Enter your review:")
if not st.button("Analyze Review"):
return
if not review:
st.warning("Please enter a review to analyze.")
return
# ─── KEEP THIS BLOCK UNCHANGED ─────────────────────────────────────────
# Sentiment Analysis
sentiment_pipeline = load_sentiment_pipeline()
raw_scores = sentiment_pipeline(review)[0]
sentiment_results = {LABEL_MAP[item['label']]: float(item['score']) for item in raw_scores}
# Keyword Extraction
kw_model = load_keybert_model()
keywords = kw_model.extract_keywords(
review,
keyphrase_ngram_range=(1, 2),
stop_words="english",
top_n=3
)
# Display Results
col1, col2 = st.columns(2)
with col1:
st.subheader("Sentiment Scores")
st.json({k: round(v, 4) for k, v in sentiment_results.items()})
with col2:
st.subheader("Top 3 Keywords")
for kw, score in keywords:
st.write(f"• {kw} ({score:.4f})")
# Bar Chart
df_scores = pd.DataFrame.from_dict(
sentiment_results,
orient='index',
columns=['score']
)
df_scores.index.name = 'Sentiment'
st.bar_chart(df_scores)
# Highlight Highest Sentiment
max_label, max_score = max(sentiment_results.items(), key=lambda x: x[1])
st.markdown(f"**Highest Sentiment:** **{max_label}** ({max_score:.4f})")
# ────────────────────────────────────────────────────────────────────
# Generate appropriate reply using manual tokenization & generation
tok, mdl = load_response_components()
if max_label in ["Positive", "Very Positive"]:
prompt_text = (
f"You are a friendly customer success representative. The customer said: \"{review}\". "
"Write two sentences to express gratitude and highlight their positive experience."
)
else:
prompt_text = (
f"You are a helpful customer support specialist. The customer said: \"{review}\". "
f"Identified issues: {', '.join([kw for kw, _ in keywords])}. "
"First, ask 1-2 clarifying questions to understand their situation. "
"Then provide two concrete suggestions or next steps to address these issues."
)
# Tokenize with truncation to avoid out-of-range embeddings
inputs = tok(
prompt_text,
return_tensors="pt",
truncation=True,
max_length=tok.model_max_length
)
outputs = mdl.generate(
**inputs,
max_new_tokens=150,
do_sample=False
)
reply = tok.decode(outputs[0], skip_special_tokens=True).strip()
st.subheader("Generated Reply")
st.write(reply)
if __name__ == '__main__':
main()
|