import pandas as pd
from pygwalker.api.streamlit import StreamlitRenderer
from io import BytesIO
import requests
import streamlit as st
from pymongo import MongoClient
import os
from dotenv import load_dotenv
import json

# Load environment variables
load_dotenv()
MONGO_URI = os.getenv("MONGO_URI")
DB_NAME = os.getenv("DB_NAME")
COLLECTION_NAME = os.getenv("COLLECTION_NAME")

mongo_client = MongoClient(MONGO_URI)
db = mongo_client[DB_NAME]
collection = db[COLLECTION_NAME]




# Load the CSV from a URL (replace with actual CSV download from S3)
def load_csv_from_url(object_url):
    response = requests.get(object_url)
    response.raise_for_status()  # Ensure the request was successful
    csv_data = pd.read_csv(BytesIO(response.content))
    return csv_data


# Analyzing each column based on data type
def analyze_column_data(df):
    analysis = {}
    for col in df.columns:
        if pd.api.types.is_numeric_dtype(df[col]):
            analysis[col] = {
                "Mean": df[col].mean(),
                "Median": df[col].median(),
                "Mode": df[col].mode()[0] if not df[col].mode().empty else None,
                "Unique Values": df[col].nunique(),
                "Null Values": df[col].isnull().sum()
            }
        else:
            analysis[col] = {
                "Unique Values": df[col].nunique(),
                "Null Values": df[col].isnull().sum(),
                "Top Categories": df[col].value_counts().head(5).to_dict()
            }
    return analysis


# Main function to render the View Table Analysis page
def view_table_analysis_page(url):

    if st.button("Back",key="back_button"):
        st.session_state.page="view_image"
        st.rerun()

    image=collection.find_one({"object_url":url})
    csv_url=image.get("csv_object_url")

    # Load CSV data
    df = load_csv_from_url(csv_url)
    # Check if the last row has any cell containing the word "total" (case-insensitive)
    if df.iloc[-1].apply(lambda x: "total" in str(x).lower()).any():
        df = df.iloc[:-1]  # Drop the last row if "total" is found in any cell




    # Page title
    st.title("Table Analysis")

    # CSV Preview
    st.subheader("CSV Preview")
    st.write("Below is a preview of the uploaded CSV file:")
    st.dataframe(df)  # Interactive, scrollable table

    # Download Button
    excel_buffer = BytesIO()
    with pd.ExcelWriter(excel_buffer, engine='openpyxl') as writer:
        df.to_excel(writer, index=False, sheet_name="Sheet1")
    excel_buffer.seek(0)  # Reset buffer position

    # Download Button
    st.download_button(
        label="Download Full Excel Sheet",
        data=excel_buffer,
        file_name="table_data.xlsx",
        mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
    )

    st.markdown("<hr>", unsafe_allow_html=True)
    table_description=image.get("table_data").get("description",None)

    if table_description:
        # Table Description
        st.subheader("Table Description")
        st.write(table_description)

    # Column Summary
    st.markdown("<hr>",unsafe_allow_html=True)
    st.subheader("Column Summary")
    with st.container(height=400, border=False):

        column_summary = image.get("table_data").get("column_summary", None)

        if column_summary:
            # Column-level descriptions and analysis
            column_analysis = analyze_column_data(df)


            col1, col2 = st.columns(2)
            for idx, (col_name, col_description) in enumerate(column_summary.items()):
                # Determine which column to use based on the index

                with col1 if idx % 2 == 0 else col2:
                    st.markdown(f"Column Name : **{col_name}**")
                    st.write(f"Column Description : {col_description}")

                    # Display basic analysis
                    analysis = column_analysis.get(col_name, {})
                    if pd.api.types.is_numeric_dtype(df[col_name]):
                        # Numeric column analysis
                        st.write({
                            "Mean": analysis.get("Mean"),
                            "Median": analysis.get("Median"),
                            "Mode": analysis.get("Mode"),
                            "Unique Values": analysis.get("Unique Values"),
                            "Null Values": analysis.get("Null Values")
                        })
                    else:
                        # Categorical column analysis
                        st.write({
                            "Unique Values": analysis.get("Unique Values"),
                            "Null Values": analysis.get("Null Values"),
                            "Top Categories": analysis.get("Top Categories")
                        })

    st.markdown("<hr>", unsafe_allow_html=True)
    st.subheader("Graphical Analysis of Table")

    # Default configuration for initial visualization
    best_col1=image.get("table_data").get("best_col1")
    best_col2 = image.get("table_data").get("best_col2")
    default_chart_config = {
        "mark": "bar",
        "encoding": {
            "x": {"field": best_col1, "type": "nominal"},
            "y": {"field": best_col2, "type": "quantitative"}
        }
    }

    # Convert default_chart_config to JSON string for Pygwalker spec parameter
    pyg_app = StreamlitRenderer(df, spec=json.dumps(default_chart_config))
    pyg_app.explorer()