import pandas as pd from pygwalker.api.streamlit import StreamlitRenderer from io import BytesIO import requests import streamlit as st from pymongo import MongoClient import os from dotenv import load_dotenv import json # Load environment variables load_dotenv() MONGO_URI = os.getenv("MONGO_URI") DB_NAME = os.getenv("DB_NAME") COLLECTION_NAME = os.getenv("COLLECTION_NAME") mongo_client = MongoClient(MONGO_URI) db = mongo_client[DB_NAME] collection = db[COLLECTION_NAME] def load_csv_from_url(csv_url): response = requests.get(csv_url) response.raise_for_status() # Ensure the request was successful return pd.read_csv(BytesIO(response.content)) # Column Analysis Function def analyze_column_data(df): analysis = {} for col in df.columns: if pd.api.types.is_numeric_dtype(df[col]): analysis[col] = { "Mean": df[col].mean(), "Median": df[col].median(), "Mode": df[col].mode()[0] if not df[col].mode().empty else None, "Unique Values": df[col].nunique(), "Null Values": df[col].isnull().sum() } else: analysis[col] = { "Unique Values": df[col].nunique(), "Null Values": df[col].isnull().sum(), "Top Categories": df[col].value_counts().head(5).to_dict() } return analysis # Streamlit Interface def display_csv_analysis(object_url): if st.button("Back",key="back_button"): st.session_state.page="view_excel" st.rerun() csv_url=collection.find_one({"object_url":object_url}).get("csv_object_url") st.title("CSV File Analysis") # Load and display CSV data df = load_csv_from_url(csv_url) st.subheader("CSV Preview") st.dataframe(df) # Perform and display analysis st.subheader("Column Analysis") column_analysis = analyze_column_data(df) col1, col2 = st.columns(2) for idx, (col_name, col_data) in enumerate(column_analysis.items()): with col1 if idx % 2 == 0 else col2: st.markdown(f"**{col_name}**") st.write(col_data) st.markdown("