Spaces:

youdata-ai
/

MOSPI_analysis_tool

Sleeping

File size: 2,386 Bytes

eef9e83

import pandas as pd
from pygwalker.api.streamlit import StreamlitRenderer
from io import BytesIO
import requests
import streamlit as st
from pymongo import MongoClient
import os
from dotenv import load_dotenv
import json

# Load environment variables
load_dotenv()
MONGO_URI = os.getenv("MONGO_URI")
DB_NAME = os.getenv("DB_NAME")
COLLECTION_NAME = os.getenv("COLLECTION_NAME")

mongo_client = MongoClient(MONGO_URI)
db = mongo_client[DB_NAME]
collection = db[COLLECTION_NAME]




def load_csv_from_url(csv_url):
    response = requests.get(csv_url)
    response.raise_for_status()  # Ensure the request was successful
    return pd.read_csv(BytesIO(response.content))

# Column Analysis Function
def analyze_column_data(df):
    analysis = {}
    for col in df.columns:
        if pd.api.types.is_numeric_dtype(df[col]):
            analysis[col] = {
                "Mean": df[col].mean(),
                "Median": df[col].median(),
                "Mode": df[col].mode()[0] if not df[col].mode().empty else None,
                "Unique Values": df[col].nunique(),
                "Null Values": df[col].isnull().sum()
            }
        else:
            analysis[col] = {
                "Unique Values": df[col].nunique(),
                "Null Values": df[col].isnull().sum(),
                "Top Categories": df[col].value_counts().head(5).to_dict()
            }
    return analysis

# Streamlit Interface
def display_csv_analysis(object_url):

    if st.button("Back",key="back_button"):
        st.session_state.page="view_excel"
        st.rerun()

    csv_url=collection.find_one({"object_url":object_url}).get("csv_object_url")
    st.title("CSV File Analysis")

    # Load and display CSV data
    df = load_csv_from_url(csv_url)
    st.subheader("CSV Preview")
    st.dataframe(df)

    # Perform and display analysis
    st.subheader("Column Analysis")
    column_analysis = analyze_column_data(df)

    col1, col2 = st.columns(2)
    for idx, (col_name, col_data) in enumerate(column_analysis.items()):
        with col1 if idx % 2 == 0 else col2:
            st.markdown(f"**{col_name}**")
            st.write(col_data)

    st.markdown("<hr>", unsafe_allow_html=True)
    st.subheader("Graphical Analysis of Table")
    pyg_app = StreamlitRenderer(df)
    pyg_app.explorer()