Spaces:
Sleeping
Sleeping
File size: 2,386 Bytes
eef9e83 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 |
import pandas as pd
from pygwalker.api.streamlit import StreamlitRenderer
from io import BytesIO
import requests
import streamlit as st
from pymongo import MongoClient
import os
from dotenv import load_dotenv
import json
# Load environment variables
load_dotenv()
MONGO_URI = os.getenv("MONGO_URI")
DB_NAME = os.getenv("DB_NAME")
COLLECTION_NAME = os.getenv("COLLECTION_NAME")
mongo_client = MongoClient(MONGO_URI)
db = mongo_client[DB_NAME]
collection = db[COLLECTION_NAME]
def load_csv_from_url(csv_url):
response = requests.get(csv_url)
response.raise_for_status() # Ensure the request was successful
return pd.read_csv(BytesIO(response.content))
# Column Analysis Function
def analyze_column_data(df):
analysis = {}
for col in df.columns:
if pd.api.types.is_numeric_dtype(df[col]):
analysis[col] = {
"Mean": df[col].mean(),
"Median": df[col].median(),
"Mode": df[col].mode()[0] if not df[col].mode().empty else None,
"Unique Values": df[col].nunique(),
"Null Values": df[col].isnull().sum()
}
else:
analysis[col] = {
"Unique Values": df[col].nunique(),
"Null Values": df[col].isnull().sum(),
"Top Categories": df[col].value_counts().head(5).to_dict()
}
return analysis
# Streamlit Interface
def display_csv_analysis(object_url):
if st.button("Back",key="back_button"):
st.session_state.page="view_excel"
st.rerun()
csv_url=collection.find_one({"object_url":object_url}).get("csv_object_url")
st.title("CSV File Analysis")
# Load and display CSV data
df = load_csv_from_url(csv_url)
st.subheader("CSV Preview")
st.dataframe(df)
# Perform and display analysis
st.subheader("Column Analysis")
column_analysis = analyze_column_data(df)
col1, col2 = st.columns(2)
for idx, (col_name, col_data) in enumerate(column_analysis.items()):
with col1 if idx % 2 == 0 else col2:
st.markdown(f"**{col_name}**")
st.write(col_data)
st.markdown("<hr>", unsafe_allow_html=True)
st.subheader("Graphical Analysis of Table")
pyg_app = StreamlitRenderer(df)
pyg_app.explorer()
|