import os
import numpy as np
import pandas as pd
import streamlit as st
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification, AutoModelForCausalLM
from keybert import KeyBERT

# ─── DeepSeek Model Client ────────────────────────────────────────────────────
# Option 1: High-level helper pipeline for chat-like generation
pipe = pipeline(
    "text-generation",
    model="deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
    trust_remote_code=True
)

# Option 2: Direct model & tokenizer instantiation (alternative)
# tokenizer_ds = AutoTokenizer.from_pretrained(
#     "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
#     trust_remote_code=True
# )
# model_ds = AutoModelForCausalLM.from_pretrained(
#     "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
#     trust_remote_code=True
# )

@st.cache_resource
def load_sentiment_pipeline():
    model_name = "mayf/amazon_reviews_bert_ft"
    tok = AutoTokenizer.from_pretrained(model_name, use_auth_token=True)
    mdl = AutoModelForSequenceClassification.from_pretrained(
        model_name,
        use_auth_token=True
    )
    return pipeline(
        "sentiment-analysis",
        model=mdl,
        tokenizer=tok,
        return_all_scores=True
    )

@st.cache_resource
def load_keybert_model():
    return KeyBERT(model="all-MiniLM-L6-v2")

LABEL_MAP = {
    "LABEL_0": "Very Negative",
    "LABEL_1": "Negative",
    "LABEL_2": "Neutral",
    "LABEL_3": "Positive",
    "LABEL_4": "Very Positive"
}

def main():
    st.title("📊 Amazon Review Analyzer")

    review = st.text_area("Enter your review:")
    if not st.button("Analyze Review"):
        return
    if not review:
        st.warning("Please enter a review to analyze.")
        return

    # Initialize progress bar
    progress = st.progress(0)

    # Load models
    progress.text("Loading models...")
    sentiment_pipeline = load_sentiment_pipeline()
    kw_model = load_keybert_model()
    progress.progress(20)

    # Run sentiment analysis
    progress.text("Analyzing sentiment...")
    raw_scores = sentiment_pipeline(review)[0]
    sentiment_results = {LABEL_MAP[item['label']]: float(item['score']) for item in raw_scores}
    progress.progress(40)

    # Extract keywords
    progress.text("Extracting keywords...")
    keywords = kw_model.extract_keywords(
        review,
        keyphrase_ngram_range=(1, 2),
        stop_words="english",
        top_n=3
    )
    progress.progress(60)

    # Display scores and keywords side by side
    col1, col2 = st.columns(2)
    with col1:
        st.subheader("Sentiment Scores")
        st.json({k: round(v, 4) for k, v in sentiment_results.items()})
    with col2:
        st.subheader("Top 3 Keywords")
        for kw, score in keywords:
            st.write(f"• {kw} ({score:.4f})")

    # Bar chart
    progress.text("Rendering chart...")
    df_scores = pd.DataFrame.from_dict(sentiment_results, orient='index', columns=['score'])
    df_scores.index.name = 'Sentiment'
    st.bar_chart(df_scores)
    progress.progress(80)

    # Highlight highest sentiment
    max_label, max_score = max(sentiment_results.items(), key=lambda x: x[1])
    st.markdown(f"**Highest Sentiment:** **{max_label}** ({max_score:.4f})")

    # GPT-Driven Analysis & Suggestions
    progress.text("Generating insights...")
    prompt = f"""
You are an analytical amazon feedback expert.
Review: "{review}"
Sentiment Scores: {sentiment_results}
Top Keywords: {[kw for kw, _ in keywords]}
Tasks:
1. Analysis: Write a concise paragraph (3 sentences) interpreting customer sentiment by combining the scores and keywords.
2. Recommendations: Three separate paragraphs with actionable suggestions (max 30 words each).
"""

    # Use the high-level pipeline for generation
    chat_input = [
        {"role": "system", "content": "You are a product-feedback analyst."},
        {"role": "user", "content": prompt}
    ]
    gen_output = pipe(chat_input)
    gpt_reply = gen_output[0]['generated_text']

    # Alternative: direct model invocation
    # inputs = tokenizer_ds(prompt, return_tensors="pt")
    # outputs = model_ds.generate(**inputs, max_new_tokens=200)
    # gpt_reply = tokenizer_ds.decode(outputs[0], skip_special_tokens=True)

    st.markdown(gpt_reply)
    progress.progress(100)
    progress.text("Done!")

if __name__ == "__main__":
    main()