import pandas as pd from pygwalker.api.streamlit import StreamlitRenderer from io import BytesIO import requests import streamlit as st from pymongo import MongoClient import os from dotenv import load_dotenv import json # Load environment variables load_dotenv() MONGO_URI = os.getenv("MONGO_URI") DB_NAME = os.getenv("DB_NAME") COLLECTION_NAME = os.getenv("COLLECTION_NAME") mongo_client = MongoClient(MONGO_URI) db = mongo_client[DB_NAME] collection = db[COLLECTION_NAME] # Load the CSV from a URL (replace with actual CSV download from S3) def load_csv_from_url(object_url): response = requests.get(object_url) response.raise_for_status() # Ensure the request was successful csv_data = pd.read_csv(BytesIO(response.content)) return csv_data # Analyzing each column based on data type def analyze_column_data(df): analysis = {} for col in df.columns: if pd.api.types.is_numeric_dtype(df[col]): analysis[col] = { "Mean": df[col].mean(), "Median": df[col].median(), "Mode": df[col].mode()[0] if not df[col].mode().empty else None, "Unique Values": df[col].nunique(), "Null Values": df[col].isnull().sum() } else: analysis[col] = { "Unique Values": df[col].nunique(), "Null Values": df[col].isnull().sum(), "Top Categories": df[col].value_counts().head(5).to_dict() } return analysis # Main function to render the View Table Analysis page def view_table_analysis_page(url): if st.button("Back",key="back_button"): st.session_state.page="view_image" st.rerun() image=collection.find_one({"object_url":url}) csv_url=image.get("csv_object_url") # Load CSV data df = load_csv_from_url(csv_url) # Check if the last row has any cell containing the word "total" (case-insensitive) if df.iloc[-1].apply(lambda x: "total" in str(x).lower()).any(): df = df.iloc[:-1] # Drop the last row if "total" is found in any cell # Page title st.title("Table Analysis") # CSV Preview st.subheader("CSV Preview") st.write("Below is a preview of the uploaded CSV file:") st.dataframe(df) # Interactive, scrollable table # Download Button excel_buffer = BytesIO() with pd.ExcelWriter(excel_buffer, engine='openpyxl') as writer: df.to_excel(writer, index=False, sheet_name="Sheet1") excel_buffer.seek(0) # Reset buffer position # Download Button st.download_button( label="Download Full Excel Sheet", data=excel_buffer, file_name="table_data.xlsx", mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" ) st.markdown("