File size: 2,386 Bytes
eef9e83
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import pandas as pd
from pygwalker.api.streamlit import StreamlitRenderer
from io import BytesIO
import requests
import streamlit as st
from pymongo import MongoClient
import os
from dotenv import load_dotenv
import json

# Load environment variables
load_dotenv()
MONGO_URI = os.getenv("MONGO_URI")
DB_NAME = os.getenv("DB_NAME")
COLLECTION_NAME = os.getenv("COLLECTION_NAME")

mongo_client = MongoClient(MONGO_URI)
db = mongo_client[DB_NAME]
collection = db[COLLECTION_NAME]




def load_csv_from_url(csv_url):
    response = requests.get(csv_url)
    response.raise_for_status()  # Ensure the request was successful
    return pd.read_csv(BytesIO(response.content))

# Column Analysis Function
def analyze_column_data(df):
    analysis = {}
    for col in df.columns:
        if pd.api.types.is_numeric_dtype(df[col]):
            analysis[col] = {
                "Mean": df[col].mean(),
                "Median": df[col].median(),
                "Mode": df[col].mode()[0] if not df[col].mode().empty else None,
                "Unique Values": df[col].nunique(),
                "Null Values": df[col].isnull().sum()
            }
        else:
            analysis[col] = {
                "Unique Values": df[col].nunique(),
                "Null Values": df[col].isnull().sum(),
                "Top Categories": df[col].value_counts().head(5).to_dict()
            }
    return analysis

# Streamlit Interface
def display_csv_analysis(object_url):

    if st.button("Back",key="back_button"):
        st.session_state.page="view_excel"
        st.rerun()

    csv_url=collection.find_one({"object_url":object_url}).get("csv_object_url")
    st.title("CSV File Analysis")

    # Load and display CSV data
    df = load_csv_from_url(csv_url)
    st.subheader("CSV Preview")
    st.dataframe(df)

    # Perform and display analysis
    st.subheader("Column Analysis")
    column_analysis = analyze_column_data(df)

    col1, col2 = st.columns(2)
    for idx, (col_name, col_data) in enumerate(column_analysis.items()):
        with col1 if idx % 2 == 0 else col2:
            st.markdown(f"**{col_name}**")
            st.write(col_data)

    st.markdown("<hr>", unsafe_allow_html=True)
    st.subheader("Graphical Analysis of Table")
    pyg_app = StreamlitRenderer(df)
    pyg_app.explorer()