File size: 5,589 Bytes
eef9e83
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
import pandas as pd
from pygwalker.api.streamlit import StreamlitRenderer
from io import BytesIO
import requests
import streamlit as st
from pymongo import MongoClient
import os
from dotenv import load_dotenv
import json

# Load environment variables
load_dotenv()
MONGO_URI = os.getenv("MONGO_URI")
DB_NAME = os.getenv("DB_NAME")
COLLECTION_NAME = os.getenv("COLLECTION_NAME")

mongo_client = MongoClient(MONGO_URI)
db = mongo_client[DB_NAME]
collection = db[COLLECTION_NAME]




# Load the CSV from a URL (replace with actual CSV download from S3)
def load_csv_from_url(object_url):
    response = requests.get(object_url)
    response.raise_for_status()  # Ensure the request was successful
    csv_data = pd.read_csv(BytesIO(response.content))
    return csv_data


# Analyzing each column based on data type
def analyze_column_data(df):
    analysis = {}
    for col in df.columns:
        if pd.api.types.is_numeric_dtype(df[col]):
            analysis[col] = {
                "Mean": df[col].mean(),
                "Median": df[col].median(),
                "Mode": df[col].mode()[0] if not df[col].mode().empty else None,
                "Unique Values": df[col].nunique(),
                "Null Values": df[col].isnull().sum()
            }
        else:
            analysis[col] = {
                "Unique Values": df[col].nunique(),
                "Null Values": df[col].isnull().sum(),
                "Top Categories": df[col].value_counts().head(5).to_dict()
            }
    return analysis


# Main function to render the View Table Analysis page
def view_table_analysis_page(url):

    if st.button("Back",key="back_button"):
        st.session_state.page="view_image"
        st.rerun()

    image=collection.find_one({"object_url":url})
    csv_url=image.get("csv_object_url")

    # Load CSV data
    df = load_csv_from_url(csv_url)
    # Check if the last row has any cell containing the word "total" (case-insensitive)
    if df.iloc[-1].apply(lambda x: "total" in str(x).lower()).any():
        df = df.iloc[:-1]  # Drop the last row if "total" is found in any cell




    # Page title
    st.title("Table Analysis")

    # CSV Preview
    st.subheader("CSV Preview")
    st.write("Below is a preview of the uploaded CSV file:")
    st.dataframe(df)  # Interactive, scrollable table

    # Download Button
    excel_buffer = BytesIO()
    with pd.ExcelWriter(excel_buffer, engine='openpyxl') as writer:
        df.to_excel(writer, index=False, sheet_name="Sheet1")
    excel_buffer.seek(0)  # Reset buffer position

    # Download Button
    st.download_button(
        label="Download Full Excel Sheet",
        data=excel_buffer,
        file_name="table_data.xlsx",
        mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
    )

    st.markdown("<hr>", unsafe_allow_html=True)
    table_description=image.get("table_data").get("description",None)

    if table_description:
        # Table Description
        st.subheader("Table Description")
        st.write(table_description)

    # Column Summary
    st.markdown("<hr>",unsafe_allow_html=True)
    st.subheader("Column Summary")
    with st.container(height=400, border=False):

        column_summary = image.get("table_data").get("column_summary", None)

        if column_summary:
            # Column-level descriptions and analysis
            column_analysis = analyze_column_data(df)


            col1, col2 = st.columns(2)
            for idx, (col_name, col_description) in enumerate(column_summary.items()):
                # Determine which column to use based on the index

                with col1 if idx % 2 == 0 else col2:
                    st.markdown(f"Column Name : **{col_name}**")
                    st.write(f"Column Description : {col_description}")

                    # Display basic analysis
                    analysis = column_analysis.get(col_name, {})
                    if pd.api.types.is_numeric_dtype(df[col_name]):
                        # Numeric column analysis
                        st.write({
                            "Mean": analysis.get("Mean"),
                            "Median": analysis.get("Median"),
                            "Mode": analysis.get("Mode"),
                            "Unique Values": analysis.get("Unique Values"),
                            "Null Values": analysis.get("Null Values")
                        })
                    else:
                        # Categorical column analysis
                        st.write({
                            "Unique Values": analysis.get("Unique Values"),
                            "Null Values": analysis.get("Null Values"),
                            "Top Categories": analysis.get("Top Categories")
                        })

    st.markdown("<hr>", unsafe_allow_html=True)
    st.subheader("Graphical Analysis of Table")

    # Default configuration for initial visualization
    best_col1=image.get("table_data").get("best_col1")
    best_col2 = image.get("table_data").get("best_col2")
    default_chart_config = {
        "mark": "bar",
        "encoding": {
            "x": {"field": best_col1, "type": "nominal"},
            "y": {"field": best_col2, "type": "quantitative"}
        }
    }

    # Convert default_chart_config to JSON string for Pygwalker spec parameter
    pyg_app = StreamlitRenderer(df, spec=json.dumps(default_chart_config))
    pyg_app.explorer()