import pandas as pd from pygwalker.api.streamlit import StreamlitRenderer from io import BytesIO import requests import streamlit as st from pymongo import MongoClient import os from dotenv import load_dotenv import json # Load environment variables load_dotenv() MONGO_URI = os.getenv("MONGO_URI") DB_NAME = os.getenv("DB_NAME") COLLECTION_NAME = os.getenv("COLLECTION_NAME") mongo_client = MongoClient(MONGO_URI) db = mongo_client[DB_NAME] collection = db[COLLECTION_NAME] # Load the CSV from a URL (replace with actual CSV download from S3) def load_csv_from_url(object_url): response = requests.get(object_url) response.raise_for_status() # Ensure the request was successful csv_data = pd.read_csv(BytesIO(response.content)) return csv_data # Analyzing each column based on data type def analyze_column_data(df): analysis = {} for col in df.columns: if pd.api.types.is_numeric_dtype(df[col]): analysis[col] = { "Mean": df[col].mean(), "Median": df[col].median(), "Mode": df[col].mode()[0] if not df[col].mode().empty else None, "Unique Values": df[col].nunique(), "Null Values": df[col].isnull().sum() } else: analysis[col] = { "Unique Values": df[col].nunique(), "Null Values": df[col].isnull().sum(), "Top Categories": df[col].value_counts().head(5).to_dict() } return analysis # Main function to render the View Table Analysis page def view_table_analysis_page(url): if st.button("Back",key="back_button"): st.session_state.page="view_image" st.rerun() image=collection.find_one({"object_url":url}) csv_url=image.get("csv_object_url") # Load CSV data df = load_csv_from_url(csv_url) # Check if the last row has any cell containing the word "total" (case-insensitive) if df.iloc[-1].apply(lambda x: "total" in str(x).lower()).any(): df = df.iloc[:-1] # Drop the last row if "total" is found in any cell # Page title st.title("Table Analysis") # CSV Preview st.subheader("CSV Preview") st.write("Below is a preview of the uploaded CSV file:") st.dataframe(df) # Interactive, scrollable table # Download Button excel_buffer = BytesIO() with pd.ExcelWriter(excel_buffer, engine='openpyxl') as writer: df.to_excel(writer, index=False, sheet_name="Sheet1") excel_buffer.seek(0) # Reset buffer position # Download Button st.download_button( label="Download Full Excel Sheet", data=excel_buffer, file_name="table_data.xlsx", mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" ) st.markdown("
", unsafe_allow_html=True) table_description=image.get("table_data").get("description",None) if table_description: # Table Description st.subheader("Table Description") st.write(table_description) # Column Summary st.markdown("
",unsafe_allow_html=True) st.subheader("Column Summary") with st.container(height=400, border=False): column_summary = image.get("table_data").get("column_summary", None) if column_summary: # Column-level descriptions and analysis column_analysis = analyze_column_data(df) col1, col2 = st.columns(2) for idx, (col_name, col_description) in enumerate(column_summary.items()): # Determine which column to use based on the index with col1 if idx % 2 == 0 else col2: st.markdown(f"Column Name : **{col_name}**") st.write(f"Column Description : {col_description}") # Display basic analysis analysis = column_analysis.get(col_name, {}) if pd.api.types.is_numeric_dtype(df[col_name]): # Numeric column analysis st.write({ "Mean": analysis.get("Mean"), "Median": analysis.get("Median"), "Mode": analysis.get("Mode"), "Unique Values": analysis.get("Unique Values"), "Null Values": analysis.get("Null Values") }) else: # Categorical column analysis st.write({ "Unique Values": analysis.get("Unique Values"), "Null Values": analysis.get("Null Values"), "Top Categories": analysis.get("Top Categories") }) st.markdown("
", unsafe_allow_html=True) st.subheader("Graphical Analysis of Table") # Default configuration for initial visualization best_col1=image.get("table_data").get("best_col1") best_col2 = image.get("table_data").get("best_col2") default_chart_config = { "mark": "bar", "encoding": { "x": {"field": best_col1, "type": "nominal"}, "y": {"field": best_col2, "type": "quantitative"} } } # Convert default_chart_config to JSON string for Pygwalker spec parameter pyg_app = StreamlitRenderer(df, spec=json.dumps(default_chart_config)) pyg_app.explorer()