Spaces:

mayf
/

1

Sleeping

App Files Files Community

mayf commited on May 20

Commit

8aeff3e

verified ·

1 Parent(s): 20eb2f2

Update app.py

Browse files

Files changed (1) hide show

app.py +59 -75

app.py CHANGED Viewed

@@ -2,23 +2,23 @@ import os
 import numpy as np
 import pandas as pd
 import streamlit as st
-from transformers import (
-    pipeline,
-    AutoTokenizer,
-    AutoModelForSequenceClassification,
-    AutoModelForSeq2SeqLM
-)
 from keybert import KeyBERT
-# ─── Sentiment & Keyword Models ─────────────────────────────────────────────
 @st.cache_resource
 def load_sentiment_pipeline():
     model_name = "mayf/amazon_reviews_bert_ft"
     tok = AutoTokenizer.from_pretrained(model_name, use_auth_token=True)
-    mdl = AutoModelForSequenceClassification.from_pretrained(
-        model_name,
-        use_auth_token=True
-    )
     return pipeline(
         "sentiment-analysis",
         model=mdl,
@@ -30,26 +30,6 @@ def load_sentiment_pipeline():
 def load_keybert_model():
     return KeyBERT(model="all-MiniLM-L6-v2")
-# ─── BlenderBot Response Components ─────────────────────────────────────────
-@st.cache_resource
-def load_response_components():
-    # Load tokenizer and model directly for manual generation with truncation
-    tok = AutoTokenizer.from_pretrained(
-        "facebook/blenderbot-400M-distill",
-        use_fast=True
-    )
-    mdl = AutoModelForSeq2SeqLM.from_pretrained("facebook/blenderbot-400M-distill")
-    return tok, mdl
-:
-    # Use BlenderBot 400M Distill for text generation
-    return pipeline(
-        "text2text-generation",
-        model="facebook/blenderbot-400M-distill",
-        tokenizer="facebook/blenderbot-400M-distill",
-        max_new_tokens=150,
-        do_sample=False
-    )
 LABEL_MAP = {
     "LABEL_0": "Very Negative",
     "LABEL_1": "Negative",
@@ -69,22 +49,33 @@ def main():
         st.warning("Please enter a review to analyze.")
         return
-    # ─── KEEP THIS BLOCK UNCHANGED ─────────────────────────────────────────
-    # Sentiment Analysis
     sentiment_pipeline = load_sentiment_pipeline()
     raw_scores = sentiment_pipeline(review)[0]
     sentiment_results = {LABEL_MAP[item['label']]: float(item['score']) for item in raw_scores}
-    # Keyword Extraction
-    kw_model = load_keybert_model()
     keywords = kw_model.extract_keywords(
         review,
         keyphrase_ngram_range=(1, 2),
         stop_words="english",
         top_n=3
     )
-    # Display Results
     col1, col2 = st.columns(2)
     with col1:
         st.subheader("Sentiment Scores")
@@ -94,51 +85,44 @@ def main():
         for kw, score in keywords:
             st.write(f"• {kw} ({score:.4f})")
-    # Bar Chart
-    df_scores = pd.DataFrame.from_dict(
-        sentiment_results,
-        orient='index',
-        columns=['score']
-    )
     df_scores.index.name = 'Sentiment'
     st.bar_chart(df_scores)
-    # Highlight Highest Sentiment
     max_label, max_score = max(sentiment_results.items(), key=lambda x: x[1])
     st.markdown(f"**Highest Sentiment:** **{max_label}** ({max_score:.4f})")
-    # ────────────────────────────────────────────────────────────────────
-    # Generate appropriate reply using manual tokenization & generation
-    tok, mdl = load_response_components()
-    if max_label in ["Positive", "Very Positive"]:
-        prompt_text = (
-            f"You are a friendly customer success representative. The customer said: \"{review}\". "
-            "Write two sentences to express gratitude and highlight their positive experience."
-        )
-    else:
-        prompt_text = (
-            f"You are a helpful customer support specialist. The customer said: \"{review}\". "
-            f"Identified issues: {', '.join([kw for kw, _ in keywords])}. "
-            "First, ask 1-2 clarifying questions to understand their situation. "
-            "Then provide two concrete suggestions or next steps to address these issues."
-        )
-    # Tokenize with truncation to avoid out-of-range embeddings
-    inputs = tok(
-        prompt_text,
-        return_tensors="pt",
-        truncation=True,
-        max_length=tok.model_max_length
-    )
-    outputs = mdl.generate(
-        **inputs,
-        max_new_tokens=150,
-        do_sample=False
-    )
-    reply = tok.decode(outputs[0], skip_special_tokens=True).strip()
-    st.subheader("Generated Reply")
-    st.write(reply)
-if __name__ == '__main__':
     main()

 import numpy as np
 import pandas as pd
 import streamlit as st
+from huggingface_hub import login
+from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
 from keybert import KeyBERT
+from openai import AzureOpenAI  # new
+# ─── Azure OpenAI Client ─────────────────────────────────────────────────────
+openai_client = AzureOpenAI(
+  api_key = "fbca46bfd8814334be46a2e5c323904c", # use your key here
+  api_version = "2023-05-15", # apparently HKUST uses a deprecated version
+  azure_endpoint = "https://hkust.azure-api.net" # per HKUST instructions
+)
 @st.cache_resource
 def load_sentiment_pipeline():
     model_name = "mayf/amazon_reviews_bert_ft"
     tok = AutoTokenizer.from_pretrained(model_name, use_auth_token=True)
+    mdl = AutoModelForSequenceClassification.from_pretrained(model_name, use_auth_token=True)
     return pipeline(
         "sentiment-analysis",
         model=mdl,
 def load_keybert_model():
     return KeyBERT(model="all-MiniLM-L6-v2")
 LABEL_MAP = {
     "LABEL_0": "Very Negative",
     "LABEL_1": "Negative",
         st.warning("Please enter a review to analyze.")
         return
+    # Initialize progress bar
+    progress = st.progress(0)
+    # Load models
+    progress.text("Loading models...")
     sentiment_pipeline = load_sentiment_pipeline()
+    kw_model = load_keybert_model()
+    progress.progress(20)
+    # Run sentiment analysis
+    progress.text("Analyzing sentiment...")
     raw_scores = sentiment_pipeline(review)[0]
+    # Map labels
     sentiment_results = {LABEL_MAP[item['label']]: float(item['score']) for item in raw_scores}
+    progress.progress(40)
+    # Extract keywords
+    progress.text("Extracting keywords...")
     keywords = kw_model.extract_keywords(
         review,
         keyphrase_ngram_range=(1, 2),
         stop_words="english",
         top_n=3
     )
+    progress.progress(60)
+    # Display scores and keywords side by side
     col1, col2 = st.columns(2)
     with col1:
         st.subheader("Sentiment Scores")
         for kw, score in keywords:
             st.write(f"• {kw} ({score:.4f})")
+    # Bar chart
+    progress.text("Rendering chart...")
+    df_scores = pd.DataFrame.from_dict(sentiment_results, orient='index', columns=['score'])
     df_scores.index.name = 'Sentiment'
     st.bar_chart(df_scores)
+    progress.progress(80)
+    # Highlight highest sentiment
     max_label, max_score = max(sentiment_results.items(), key=lambda x: x[1])
     st.markdown(f"**Highest Sentiment:** **{max_label}** ({max_score:.4f})")
+    # GPT-Driven Analysis & Suggestions
+    progress.text("Generating insights...")
+    prompt = f"""
+You are an analytical amazon feedback expert.
+Review: \"{review}\"
+Sentiment Scores: {sentiment_results}
+Top Keywords: {[kw for kw, _ in keywords]}
+Tasks:
+1. Analysis: Write a concise paragraph (3 sentences) interpreting customer sentiment by combining the scores and keywords.
+2. Recommendations: Three separate paragraphs with actionable suggestions (max 30 words each).
+"""
+    response = openai_client.chat.completions.create(
+        model="gpt-35-turbo",
+        messages=[
+            {"role": "system", "content": "You are a product-feedback analyst."},
+            {"role": "user", "content": prompt}
+        ],
+        temperature=0.7,
+        max_tokens=200
+    )
+    gpt_reply = response.choices[0].message.content.strip()
+    st.markdown(gpt_reply)
+    # Complete
+    progress.progress(100)
+    progress.text("Done!")
+if __name__ == "__main__":
     main()