MOSPI_analysis_tool / table_analysis_for_excel.py
akshansh36's picture
Upload 10 files
eef9e83 verified
import pandas as pd
from pygwalker.api.streamlit import StreamlitRenderer
from io import BytesIO
import requests
import streamlit as st
from pymongo import MongoClient
import os
from dotenv import load_dotenv
import json
# Load environment variables
load_dotenv()
MONGO_URI = os.getenv("MONGO_URI")
DB_NAME = os.getenv("DB_NAME")
COLLECTION_NAME = os.getenv("COLLECTION_NAME")
mongo_client = MongoClient(MONGO_URI)
db = mongo_client[DB_NAME]
collection = db[COLLECTION_NAME]
def load_csv_from_url(csv_url):
response = requests.get(csv_url)
response.raise_for_status() # Ensure the request was successful
return pd.read_csv(BytesIO(response.content))
# Column Analysis Function
def analyze_column_data(df):
analysis = {}
for col in df.columns:
if pd.api.types.is_numeric_dtype(df[col]):
analysis[col] = {
"Mean": df[col].mean(),
"Median": df[col].median(),
"Mode": df[col].mode()[0] if not df[col].mode().empty else None,
"Unique Values": df[col].nunique(),
"Null Values": df[col].isnull().sum()
}
else:
analysis[col] = {
"Unique Values": df[col].nunique(),
"Null Values": df[col].isnull().sum(),
"Top Categories": df[col].value_counts().head(5).to_dict()
}
return analysis
# Streamlit Interface
def display_csv_analysis(object_url):
if st.button("Back",key="back_button"):
st.session_state.page="view_excel"
st.rerun()
csv_url=collection.find_one({"object_url":object_url}).get("csv_object_url")
st.title("CSV File Analysis")
# Load and display CSV data
df = load_csv_from_url(csv_url)
st.subheader("CSV Preview")
st.dataframe(df)
# Perform and display analysis
st.subheader("Column Analysis")
column_analysis = analyze_column_data(df)
col1, col2 = st.columns(2)
for idx, (col_name, col_data) in enumerate(column_analysis.items()):
with col1 if idx % 2 == 0 else col2:
st.markdown(f"**{col_name}**")
st.write(col_data)
st.markdown("<hr>", unsafe_allow_html=True)
st.subheader("Graphical Analysis of Table")
pyg_app = StreamlitRenderer(df)
pyg_app.explorer()