mayf commited on
Commit
8aeff3e
·
verified ·
1 Parent(s): 20eb2f2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -75
app.py CHANGED
@@ -2,23 +2,23 @@ import os
2
  import numpy as np
3
  import pandas as pd
4
  import streamlit as st
5
- from transformers import (
6
- pipeline,
7
- AutoTokenizer,
8
- AutoModelForSequenceClassification,
9
- AutoModelForSeq2SeqLM
10
- )
11
  from keybert import KeyBERT
 
 
 
 
 
 
 
 
12
 
13
- # ─── Sentiment & Keyword Models ─────────────────────────────────────────────
14
  @st.cache_resource
15
  def load_sentiment_pipeline():
16
  model_name = "mayf/amazon_reviews_bert_ft"
17
  tok = AutoTokenizer.from_pretrained(model_name, use_auth_token=True)
18
- mdl = AutoModelForSequenceClassification.from_pretrained(
19
- model_name,
20
- use_auth_token=True
21
- )
22
  return pipeline(
23
  "sentiment-analysis",
24
  model=mdl,
@@ -30,26 +30,6 @@ def load_sentiment_pipeline():
30
  def load_keybert_model():
31
  return KeyBERT(model="all-MiniLM-L6-v2")
32
 
33
- # ─── BlenderBot Response Components ─────────────────────────────────────────
34
- @st.cache_resource
35
- def load_response_components():
36
- # Load tokenizer and model directly for manual generation with truncation
37
- tok = AutoTokenizer.from_pretrained(
38
- "facebook/blenderbot-400M-distill",
39
- use_fast=True
40
- )
41
- mdl = AutoModelForSeq2SeqLM.from_pretrained("facebook/blenderbot-400M-distill")
42
- return tok, mdl
43
- :
44
- # Use BlenderBot 400M Distill for text generation
45
- return pipeline(
46
- "text2text-generation",
47
- model="facebook/blenderbot-400M-distill",
48
- tokenizer="facebook/blenderbot-400M-distill",
49
- max_new_tokens=150,
50
- do_sample=False
51
- )
52
-
53
  LABEL_MAP = {
54
  "LABEL_0": "Very Negative",
55
  "LABEL_1": "Negative",
@@ -69,22 +49,33 @@ def main():
69
  st.warning("Please enter a review to analyze.")
70
  return
71
 
72
- # ─── KEEP THIS BLOCK UNCHANGED ─────────────────────────────────────────
73
- # Sentiment Analysis
 
 
 
74
  sentiment_pipeline = load_sentiment_pipeline()
 
 
 
 
 
75
  raw_scores = sentiment_pipeline(review)[0]
 
76
  sentiment_results = {LABEL_MAP[item['label']]: float(item['score']) for item in raw_scores}
 
77
 
78
- # Keyword Extraction
79
- kw_model = load_keybert_model()
80
  keywords = kw_model.extract_keywords(
81
  review,
82
  keyphrase_ngram_range=(1, 2),
83
  stop_words="english",
84
  top_n=3
85
  )
 
86
 
87
- # Display Results
88
  col1, col2 = st.columns(2)
89
  with col1:
90
  st.subheader("Sentiment Scores")
@@ -94,51 +85,44 @@ def main():
94
  for kw, score in keywords:
95
  st.write(f"• {kw} ({score:.4f})")
96
 
97
- # Bar Chart
98
- df_scores = pd.DataFrame.from_dict(
99
- sentiment_results,
100
- orient='index',
101
- columns=['score']
102
- )
103
  df_scores.index.name = 'Sentiment'
104
  st.bar_chart(df_scores)
 
105
 
106
- # Highlight Highest Sentiment
107
  max_label, max_score = max(sentiment_results.items(), key=lambda x: x[1])
108
  st.markdown(f"**Highest Sentiment:** **{max_label}** ({max_score:.4f})")
109
- # ────────────────────────────────────────────────────────────────────
110
-
111
- # Generate appropriate reply using manual tokenization & generation
112
- tok, mdl = load_response_components()
113
- if max_label in ["Positive", "Very Positive"]:
114
- prompt_text = (
115
- f"You are a friendly customer success representative. The customer said: \"{review}\". "
116
- "Write two sentences to express gratitude and highlight their positive experience."
117
- )
118
- else:
119
- prompt_text = (
120
- f"You are a helpful customer support specialist. The customer said: \"{review}\". "
121
- f"Identified issues: {', '.join([kw for kw, _ in keywords])}. "
122
- "First, ask 1-2 clarifying questions to understand their situation. "
123
- "Then provide two concrete suggestions or next steps to address these issues."
124
- )
125
- # Tokenize with truncation to avoid out-of-range embeddings
126
- inputs = tok(
127
- prompt_text,
128
- return_tensors="pt",
129
- truncation=True,
130
- max_length=tok.model_max_length
131
- )
132
- outputs = mdl.generate(
133
- **inputs,
134
- max_new_tokens=150,
135
- do_sample=False
136
- )
137
- reply = tok.decode(outputs[0], skip_special_tokens=True).strip()
138
 
139
- st.subheader("Generated Reply")
140
- st.write(reply)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
 
 
 
 
142
 
143
- if __name__ == '__main__':
144
  main()
 
2
  import numpy as np
3
  import pandas as pd
4
  import streamlit as st
5
+ from huggingface_hub import login
6
+ from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
 
 
 
 
7
  from keybert import KeyBERT
8
+ from openai import AzureOpenAI # new
9
+
10
+ # ─── Azure OpenAI Client ─────────────────────────────────────────────────────
11
+ openai_client = AzureOpenAI(
12
+ api_key = "fbca46bfd8814334be46a2e5c323904c", # use your key here
13
+ api_version = "2023-05-15", # apparently HKUST uses a deprecated version
14
+ azure_endpoint = "https://hkust.azure-api.net" # per HKUST instructions
15
+ )
16
 
 
17
  @st.cache_resource
18
  def load_sentiment_pipeline():
19
  model_name = "mayf/amazon_reviews_bert_ft"
20
  tok = AutoTokenizer.from_pretrained(model_name, use_auth_token=True)
21
+ mdl = AutoModelForSequenceClassification.from_pretrained(model_name, use_auth_token=True)
 
 
 
22
  return pipeline(
23
  "sentiment-analysis",
24
  model=mdl,
 
30
  def load_keybert_model():
31
  return KeyBERT(model="all-MiniLM-L6-v2")
32
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  LABEL_MAP = {
34
  "LABEL_0": "Very Negative",
35
  "LABEL_1": "Negative",
 
49
  st.warning("Please enter a review to analyze.")
50
  return
51
 
52
+ # Initialize progress bar
53
+ progress = st.progress(0)
54
+
55
+ # Load models
56
+ progress.text("Loading models...")
57
  sentiment_pipeline = load_sentiment_pipeline()
58
+ kw_model = load_keybert_model()
59
+ progress.progress(20)
60
+
61
+ # Run sentiment analysis
62
+ progress.text("Analyzing sentiment...")
63
  raw_scores = sentiment_pipeline(review)[0]
64
+ # Map labels
65
  sentiment_results = {LABEL_MAP[item['label']]: float(item['score']) for item in raw_scores}
66
+ progress.progress(40)
67
 
68
+ # Extract keywords
69
+ progress.text("Extracting keywords...")
70
  keywords = kw_model.extract_keywords(
71
  review,
72
  keyphrase_ngram_range=(1, 2),
73
  stop_words="english",
74
  top_n=3
75
  )
76
+ progress.progress(60)
77
 
78
+ # Display scores and keywords side by side
79
  col1, col2 = st.columns(2)
80
  with col1:
81
  st.subheader("Sentiment Scores")
 
85
  for kw, score in keywords:
86
  st.write(f"• {kw} ({score:.4f})")
87
 
88
+ # Bar chart
89
+ progress.text("Rendering chart...")
90
+ df_scores = pd.DataFrame.from_dict(sentiment_results, orient='index', columns=['score'])
 
 
 
91
  df_scores.index.name = 'Sentiment'
92
  st.bar_chart(df_scores)
93
+ progress.progress(80)
94
 
95
+ # Highlight highest sentiment
96
  max_label, max_score = max(sentiment_results.items(), key=lambda x: x[1])
97
  st.markdown(f"**Highest Sentiment:** **{max_label}** ({max_score:.4f})")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
 
99
+ # GPT-Driven Analysis & Suggestions
100
+ progress.text("Generating insights...")
101
+ prompt = f"""
102
+ You are an analytical amazon feedback expert.
103
+ Review: \"{review}\"
104
+ Sentiment Scores: {sentiment_results}
105
+ Top Keywords: {[kw for kw, _ in keywords]}
106
+ Tasks:
107
+ 1. Analysis: Write a concise paragraph (3 sentences) interpreting customer sentiment by combining the scores and keywords.
108
+ 2. Recommendations: Three separate paragraphs with actionable suggestions (max 30 words each).
109
+ """
110
+
111
+ response = openai_client.chat.completions.create(
112
+ model="gpt-35-turbo",
113
+ messages=[
114
+ {"role": "system", "content": "You are a product-feedback analyst."},
115
+ {"role": "user", "content": prompt}
116
+ ],
117
+ temperature=0.7,
118
+ max_tokens=200
119
+ )
120
+ gpt_reply = response.choices[0].message.content.strip()
121
+ st.markdown(gpt_reply)
122
 
123
+ # Complete
124
+ progress.progress(100)
125
+ progress.text("Done!")
126
 
127
+ if __name__ == "__main__":
128
  main()