CosmickVisions commited on
Commit
35d44ea
·
verified ·
1 Parent(s): 2722789

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +761 -182
app.py CHANGED
@@ -1,209 +1,788 @@
1
  import streamlit as st
2
  import pandas as pd
 
3
  import plotly.express as px
 
 
 
 
 
 
 
 
4
  from pycaret.classification import setup, compare_models, pull
5
- import numpy as np
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
- # ================== 🔹 AUTO-PLOT FUNCTION ==================
8
- def generate_auto_plot(df, selected_columns, chart_type, analysis_type):
9
- try:
10
- if chart_type == "Auto-Detect":
11
- if analysis_type == "Single Variable":
12
- if pd.api.types.is_numeric_dtype(df[selected_columns[0]]):
13
- chart_type = "Histogram"
14
- else:
15
- chart_type = "Bar Chart"
16
- elif analysis_type == "Multi-Variable":
17
- if all(pd.api.types.is_numeric_dtype(df[col]) for col in selected_columns[:2]):
18
- chart_type = "Scatter Plot"
19
- else:
20
- chart_type = "Box Plot"
21
 
22
- if analysis_type == "Single Variable":
23
- col = selected_columns[0]
24
- fig = generate_chart(df, chart_type, col)
25
- stats = calculate_statistics(df, col)
26
 
27
- col1, col2 = st.columns([2, 1])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  with col1:
29
- st.plotly_chart(fig, use_container_width=True)
 
 
 
30
  with col2:
31
- st.subheader("📌 Key Insights")
32
- if pd.api.types.is_numeric_dtype(df[col]):
33
- st.metric("Mean", f"{stats['mean']:.2f}")
34
- st.metric("Median", f"{stats['median']:.2f}")
35
- st.metric("Std Dev", f"{stats['std']:.2f}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  else:
37
- st.metric("Unique Values", stats['unique_values'])
38
- st.metric("Most Common", stats['most_common'])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  elif analysis_type == "Multi-Variable":
41
- if len(selected_columns) < 2:
42
- st.warning("Please select at least two columns")
43
- else:
44
- fig = generate_chart(df, chart_type, selected_columns[0], selected_columns[1])
45
- st.plotly_chart(fig, use_container_width=True)
46
- if chart_type in ["Scatter Plot", "Heatmap"]:
47
- st.subheader("📌 Correlation Insights")
48
- try:
49
- corr = df[selected_columns[0]].corr(df[selected_columns[1]])
50
- st.write(f"**Correlation Coefficient:** {corr:.2f}")
51
- st.progress(abs(corr))
52
- st.caption("Absolute correlation strength")
53
- except:
54
- st.warning("Could not calculate correlation for selected columns")
55
-
56
- elif analysis_type == "3D Analysis":
57
- fig = generate_chart(df, "3D Scatter", x_col, y_col, z_col)
58
- st.plotly_chart(fig, use_container_width=True)
59
-
60
- st.subheader("📌 3D Analysis Insights")
61
  col1, col2, col3 = st.columns(3)
62
  with col1:
63
- st.metric("X Range", f"{df[x_col].min():.2f} - {df[x_col].max():.2f}")
64
  with col2:
65
- st.metric("Y Range", f"{df[y_col].min():.2f} - {df[y_col].max():.2f}")
66
  with col3:
67
- st.metric("Z Range", f"{df[z_col].min():.2f} - {df[z_col].max():.2f}")
68
-
69
- except Exception as e:
70
- st.error(f"Visualization error: {str(e)}")
71
-
72
- # ================== 🔹 MACHINE LEARNING MODEL FUNCTION ==================
73
- def run_automl(df):
74
- try:
75
- target = st.selectbox("Target Variable", df.columns)
76
- setup(df, target=target, session_id=42,
77
- feature_interaction=True,
78
- polynomial_features=True)
79
- best_model = compare_models(n_select=3)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
 
81
- # Visual Leaderboard
82
- results = pull()
83
- fig = px.bar(results, x='Model', y=['Accuracy', 'AUC'],
84
- barmode='group', template="plotly_dark",
85
- title="Model Performance Leaderboard")
86
- st.plotly_chart(fig, use_container_width=True)
87
-
88
- except Exception as e:
89
- st.error(f"AutoML failed: {str(e)}")
90
-
91
- # ================== 🔹 PREDICTIONS FUNCTION ==================
92
- def make_predictions(df):
93
- try:
 
 
 
 
 
 
 
 
 
 
 
94
  uploaded_file = st.file_uploader("Upload New Data for Prediction", type=["csv", "xlsx"])
 
95
  if uploaded_file:
96
  new_data = pd.read_csv(uploaded_file) if uploaded_file.name.endswith('.csv') else pd.read_excel(uploaded_file)
97
  st.write("📊 Preview of New Data:")
98
  st.dataframe(new_data.head())
99
 
100
- predictions = st.session_state.model.predict(new_data)
101
- proba = st.session_state.model.predict_proba(new_data) if hasattr(st.session_state.model, 'predict_proba') else None
102
-
103
- st.subheader("📢 Predictions:")
104
- result_df = pd.DataFrame({
105
- 'Prediction': predictions,
106
- 'Confidence': proba.max(axis=1) if proba is not None else [1.0]*len(predictions)
107
- })
108
- st.dataframe(result_df.style.background_gradient(cmap='Blues'))
109
-
110
- # Download predictions
111
- csv = result_df.to_csv(index=False).encode('utf-8')
112
- st.download_button(
113
- label="📥 Download Predictions",
114
- data=csv,
115
- file_name='predictions.csv',
116
- mime='text/csv'
117
- )
 
 
 
 
 
 
118
 
119
- except Exception as e:
120
- st.error(f"Prediction error: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
 
122
- # ================== 🔹 VISUALIZATION FUNCTION ==================
123
- def run_visualization(df):
124
- col1, col2 = st.columns([1, 3])
125
- with col1:
126
- if st.button("✨ Suggest Visualizations", help="Generate smart visualization recommendations"):
127
- try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
  numeric_cols = df.select_dtypes(include=np.number).columns.tolist()
129
- cat_cols = df.select_dtypes(include=['object', 'category']).columns.tolist()
130
-
131
- if len(numeric_cols) >= 3:
132
- st.session_state.viz_type = "3D Scatter"
133
- elif len(cat_cols) > 0:
134
- st.session_state.viz_type = "Pie"
135
  else:
136
- st.session_state.viz_type = "Histogram"
137
-
138
- st.success(f"Recommended visualization type: {st.session_state.viz_type}")
139
- except Exception as e:
140
- st.error(f"Recommendation failed: {str(e)}")
141
-
142
- with st.expander("🎨 Custom Visualization", expanded=True):
143
- plot_options = ["3D Scatter", "Line", "Bar", "Pie", "Histogram", "Box", "Violin", "Heatmap"]
144
- plot_type = st.selectbox("Select Plot Type", plot_options,
145
- index=plot_options.index(st.session_state.viz_type) if 'viz_type' in st.session_state else 0)
146
-
147
- fig = None
148
- if plot_type == "3D Scatter":
149
- fig = create_3d_scatter(df)
150
- elif plot_type == "Line":
151
- fig = create_line_chart(df)
152
- elif plot_type == "Bar":
153
- fig = create_bar_chart(df)
154
- elif plot_type == "Pie":
155
- fig = create_pie_chart(df)
156
- elif plot_type == "Histogram":
157
- fig = create_histogram(df)
158
- elif plot_type == "Box":
159
- fig = create_box_plot(df)
160
- elif plot_type == "Violin":
161
- fig = create_violin_plot(df)
162
- elif plot_type == "Heatmap":
163
- fig = create_heatmap(df)
164
-
165
- # Plot Customization and Display
166
- if fig:
167
- st.plotly_chart(fig, use_container_width=True)
168
- plot_html = fig.to_html()
169
- st.download_button(
170
- label="📥 Download Plot",
171
- data=plot_html,
172
- file_name=f"{plot_type.replace(' ', '_')}_plot.html",
173
- mime="text/html"
174
- )
175
- else:
176
- st.warning("Please select a valid plot type")
177
-
178
- # ================== 🔹 FINAL APP STRUCTURE ==================
179
- def main():
180
- st.title("AI Data Studio")
181
-
182
- # Select Functionality
183
- choice = st.sidebar.selectbox("Select Feature", ["Exploration", "Machine Learning", "Predictions", "Visualization"])
184
-
185
- if choice == "Exploration":
186
- st.header("🔹 Data Exploration")
187
- # Add exploration functionality
188
-
189
- elif choice == "Machine Learning":
190
- st.header("🤖 Enterprise ML Studio")
191
- if st.session_state.cleaned_df is not None:
192
- df = st.session_state.cleaned_df
193
- run_automl(df)
194
-
195
- elif choice == "Predictions":
196
- st.header("🔮 Make Predictions on New Data")
197
- if st.session_state.get("model"):
198
- make_predictions(df)
199
- else:
200
- st.warning("⚠️ No trained model found. Please train a model first.")
201
-
202
- elif choice == "Visualization":
203
- st.header("📊 Advanced Visualization Lab")
204
- if st.session_state.cleaned_df is not None:
205
- df = st.session_state.cleaned_df
206
- run_visualization(df)
207
-
208
- if __name__ == '__main__':
209
- main()
 
1
  import streamlit as st
2
  import pandas as pd
3
+ import numpy as np
4
  import plotly.express as px
5
+ import seaborn as sns
6
+ import matplotlib.pyplot as plt
7
+ from io import StringIO
8
+ from sklearn.impute import KNNImputer, SimpleImputer
9
+ from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler, LabelEncoder, OneHotEncoder
10
+ from sklearn.decomposition import PCA
11
+ from sklearn.cluster import KMeans
12
+ from sklearn.model_selection import train_test_split
13
  from pycaret.classification import setup, compare_models, pull
14
+ from scipy.stats import zscore
15
+ import matplotlib
16
+ from sklearn.feature_selection import SelectKBest, f_classif
17
+ from ydata_profiling import ProfileReport
18
+ from ydata_profiling.config import Settings
19
+ from functools import lru_cache
20
+ # ================== 🔹 ENHANCED STYLING ==================
21
+ def load_custom_css():
22
+ st.markdown("""
23
+ <style>
24
+ /* 🌌 Cosmic Nebula Background */
25
+ body, .main {
26
+ background: radial-gradient(circle at top, #10002b 0%, #240046 50%, #3c096c 100%);
27
+ color: #ffffff;
28
+ font-family: 'Poppins', sans-serif;
29
+ }
30
+ /* 🌠 Animated Starfield Effect */
31
+ body::before {
32
+ content: "";
33
+ position: fixed;
34
+ top: 0;
35
+ left: 0;
36
+ width: 100%;
37
+ height: 100%;
38
+ background: url('https://source.unsplash.com/random/1600x900/?stars,galaxy,nebula') center/cover no-repeat;
39
+ opacity: 0.1;
40
+ z-index: -1;
41
+ }
42
+ /* 🪐 Glassmorphism Containers */
43
+ .stContainer, .stExpander, .stDataFrame {
44
+ background: rgba(255, 255, 255, 0.08) !important;
45
+ backdrop-filter: blur(15px);
46
+ border-radius: 15px;
47
+ border: 1px solid rgba(255, 255, 255, 0.12);
48
+ padding: 1.5rem;
49
+ box-shadow: 0 10px 30px rgba(255, 255, 255, 0.12);
50
+ }
51
+ /* 🔮 Cyberpunk Buttons */
52
+ .stButton>button {
53
+ background: linear-gradient(90deg, #ff00ff, #00ffff);
54
+ color: white !important;
55
+ border: none;
56
+ border-radius: 12px;
57
+ padding: 0.8rem 1.5rem;
58
+ font-weight: bold;
59
+ letter-spacing: 0.05rem;
60
+ transition: all 0.4s ease;
61
+ text-transform: uppercase;
62
+ width: 100%;
63
+ }
64
+ .stButton>button:hover {
65
+ transform: scale(1.05);
66
+ box-shadow: 0 0 20px rgba(0, 255, 255, 0.8);
67
+ }
68
+ /* 🎆 Neon Headers */
69
+ h1, h2, h3, h4, h5, h6 {
70
+ font-weight: bold;
71
+ text-transform: uppercase;
72
+ text-shadow: 0 0 10px rgba(0, 255, 255, 0.6);
73
+ color: #00ffff;
74
+ padding: 0.5rem 0;
75
+ }
76
+ /* 🔍 Interactive Inputs */
77
+ .stTextInput>div>div>input,
78
+ .stSelectbox>div>div>div,
79
+ .stSlider>div>div>div {
80
+ background: rgba(0, 0, 0, 0.5) !important;
81
+ border-radius: 10px !important;
82
+ padding: 0.75rem !important;
83
+ color: white !important;
84
+ border: 1px solid rgba(255, 255, 255, 0.3) !important;
85
+ transition: all 0.3s ease;
86
+ }
87
+ .stTextInput>div>div>input:focus,
88
+ .stSelectbox>div>div>div:hover {
89
+ border-color: #ff00ff !important;
90
+ box-shadow: 0 0 12px rgba(255, 0, 255, 0.6);
91
+ }
92
+ /* 🎭 Data Grid Styling */
93
+ [data-testid="stDataFrame"] {
94
+ border: 1px solid rgba(255, 255, 255, 0.2);
95
+ border-radius: 10px;
96
+ background: rgba(255, 255, 255, 0.05);
97
+ padding: 1rem;
98
+ color: white !important;
99
+ }
100
+ /* 📊 Graph Enhancements */
101
+ .stPlotlyChart, .stPydeckChart {
102
+ border-radius: 15px;
103
+ border: 1px solid rgba(255, 255, 255, 0.1);
104
+ padding: 1rem;
105
+ box-shadow: 0 8px 20px rgba(255, 255, 255, 0.15);
106
+ }
107
+ /* 🎛️ Consistent Spacing */
108
+ .stContainer > *,
109
+ .stExpander > * {
110
+ margin: 1rem 0;
111
+ }
112
+ /* 🚀 Futuristic Scrollbars */
113
+ ::-webkit-scrollbar {
114
+ width: 8px;
115
+ height: 8px;
116
+ }
117
+ ::-webkit-scrollbar-track {
118
+ background: rgba(25, 25, 45, 0.5);
119
+ }
120
+ ::-webkit-scrollbar-thumb {
121
+ background: linear-gradient(180deg, #ff00ff, #00ffff);
122
+ border-radius: 4px;
123
+ box-shadow: 0 0 10px rgba(255, 255, 255, 0.3);
124
+ }
125
+ /* ✨ Smooth Animations */
126
+ * {
127
+ transition: all 0.25s ease-in-out;
128
+ }
129
+ </style>
130
+ """, unsafe_allow_html=True)
131
 
132
+ load_custom_css()
 
 
 
 
 
 
 
 
 
 
 
 
 
133
 
 
 
 
 
134
 
135
+
136
+ # ================== 🔹 CACHED FUNCTIONS ==================
137
+ # ================== 🔹 CACHED FUNCTIONS ==================
138
+ @st.cache_data(ttl=3600)
139
+ def calculate_statistics(df, column):
140
+ """Calculate and cache statistics for a column."""
141
+ if pd.api.types.is_numeric_dtype(df[column]):
142
+ return {
143
+ "mean": df[column].mean(),
144
+ "median": df[column].median(),
145
+ "std": df[column].std(),
146
+ "min": df[column].min(),
147
+ "max": df[column].max()
148
+ }
149
+ else:
150
+ return {
151
+ "unique_values": df[column].nunique(),
152
+ "most_common": df[column].mode()[0]
153
+ }
154
+
155
+ @st.cache_data(ttl=3600)
156
+ def generate_chart(df, chart_type, x_col, y_col=None, z_col=None):
157
+ """Generate and cache Plotly charts."""
158
+ if chart_type == "Histogram":
159
+ return px.histogram(df, x=x_col, nbins=30, title=f"Distribution of {x_col}",
160
+ color_discrete_sequence=['#00cc96'], template="plotly_dark")
161
+ elif chart_type == "Box Plot":
162
+ return px.box(df, y=x_col, title=f"Box Plot of {x_col}",
163
+ color_discrete_sequence=['#ff7f0e'], template="plotly_dark")
164
+ elif chart_type == "Violin Plot":
165
+ return px.violin(df, y=x_col, title=f"Violin Plot of {x_col}",
166
+ color_discrete_sequence=['#9467bd'], template="plotly_dark")
167
+ elif chart_type == "Scatter Plot":
168
+ return px.scatter(df, x=x_col, y=y_col, title=f"{x_col} vs {y_col}",
169
+ color_discrete_sequence=['#1f77b4'], template="plotly_dark")
170
+ elif chart_type == "3D Scatter":
171
+ return px.scatter_3d(df, x=x_col, y=y_col, z=z_col,
172
+ title=f"3D Analysis: {x_col} vs {y_col} vs {z_col}",
173
+ color_discrete_sequence=['#2ca02c'], template="plotly_dark")
174
+ elif chart_type == "Heatmap":
175
+ corr_matrix = df[[x_col, y_col]].corr()
176
+ return px.imshow(corr_matrix, text_auto=True, title="Correlation Heatmap",
177
+ color_continuous_scale='Viridis', template="plotly_dark")
178
+
179
+ # ================== 🔹 LAZY-LOADING COMPONENTS ==================
180
+ def lazy_load_chart(df, chart_type, x_col, y_col=None):
181
+ """Lazy-load a chart with a spinner."""
182
+ with st.spinner(f"Generating {chart_type}..."):
183
+ return generate_chart(df, chart_type, x_col, y_col)
184
+
185
+ def lazy_load_statistics(df, column):
186
+ """Lazy-load statistics with a spinner."""
187
+ with st.spinner("Calculating statistics..."):
188
+ return calculate_statistics(df, column)
189
+
190
+
191
+ # ================== 🔹 SESSION STATE ==================
192
+ if 'df' not in st.session_state:
193
+ st.session_state.df = None
194
+ if 'cleaned_df' not in st.session_state:
195
+ st.session_state.cleaned_df = None
196
+ if 'X_train' not in st.session_state:
197
+ st.session_state.X_train = None
198
+ if 'X_test' not in st.session_state:
199
+ st.session_state.X_test = None
200
+ if 'y_train' not in st.session_state:
201
+ st.session_state.y_train = None
202
+ if 'y_test' not in st.session_state:
203
+ st.session_state.y_test = None
204
+ if 'model' not in st.session_state:
205
+ st.session_state.model = None
206
+
207
+ # ================== 🔹 GLOBAL NAVIGATION ==================
208
+ st.sidebar.title("🚀 Nexus Analytics")
209
+ choice = st.sidebar.radio("Go to", ["Home", "Data Cleaning", "EDA", "Train-Test Split",
210
+ "Machine Learning", "Predictions", "Visualization"])
211
+ if choice == "Home":
212
+ st.title("📂 Upload Your Dataset")
213
+
214
+ # Dataset Control Buttons
215
+ control_col1, control_col2 = st.columns([1, 2])
216
+ with control_col1:
217
+ if st.session_state.df is not None:
218
+ if st.button("🧹 Clear Dataset", help="Remove current dataset from memory"):
219
+ st.session_state.df = None
220
+ st.session_state.cleaned_df = None
221
+ st.success("Dataset cleared from memory!")
222
+
223
+ with control_col2:
224
+ replace_file = st.file_uploader("Replace Dataset", type=["csv", "xlsx"],
225
+ help="Upload a new dataset to replace current one",
226
+ key="replace_uploader")
227
+
228
+ if replace_file:
229
+ df = pd.read_csv(replace_file) if replace_file.name.endswith('.csv') else pd.read_excel(replace_file)
230
+ st.session_state.df = df
231
+ st.session_state.cleaned_df = df.copy()
232
+ st.success("✅ Dataset replaced successfully!")
233
+
234
+ # Main Dataset Upload
235
+ if st.session_state.df is None:
236
+ with st.container():
237
+ uploaded_file = st.file_uploader("Upload Dataset", type=["csv", "xlsx"],
238
+ help="Drag and drop your dataset file here")
239
+
240
+ if uploaded_file:
241
+ df = pd.read_csv(uploaded_file) if uploaded_file.name.endswith('.csv') else pd.read_excel(uploaded_file)
242
+ st.session_state.df = df
243
+ st.session_state.cleaned_df = df.copy()
244
+ st.success("✅ Data uploaded successfully!")
245
+
246
+ # Show dataset information if loaded
247
+ if st.session_state.df is not None:
248
+ df = st.session_state.df
249
+
250
+ # Dataset Overview Cards
251
+ with st.container():
252
+ col1, col2, col3 = st.columns(3)
253
  with col1:
254
+ with st.container():
255
+ st.markdown("### 📐 Dataset Shape")
256
+ st.markdown(f"**{df.shape[0]}** Rows | **{df.shape[1]}** Columns")
257
+
258
  with col2:
259
+ with st.container():
260
+ st.markdown("### ⚠️ Data Issues")
261
+ st.markdown(f"**{df.isnull().sum().sum()}** Missing Values | **{df.duplicated().sum()}** Duplicates")
262
+
263
+ with col3:
264
+ with st.container():
265
+ st.markdown("### 🧬 Data Types")
266
+ num_cols = len(df.select_dtypes(include=np.number).columns)
267
+ cat_cols = len(df.select_dtypes(include=['object']).columns)
268
+ st.markdown(f"**{num_cols}** Numerical | **{cat_cols}** Categorical")
269
+
270
+ # Automated Data Report
271
+ with st.expander("📊 Automated Data Report", expanded=True):
272
+ if st.button("✨ Generate Smart Report"):
273
+ with st.spinner("🔍 Analyzing dataset..."):
274
+ # Configure minimal report
275
+ config = Settings()
276
+ config.title = " "
277
+ config.variables.descriptions = False
278
+ config.show_variable_description = False
279
+ config.samples.head = 0
280
+ config.samples.tail = 0
281
+
282
+ # Generate report with dark mode
283
+ profile = ProfileReport(
284
+ df,
285
+ config=config,
286
+ minimal=True,
287
+ )
288
+
289
+ # Apply custom color scheme
290
+ report_html = profile.to_html()
291
+ report_html = report_html.replace(
292
+ ':root {',
293
+ ':root { --primary-color: #00f7ff; --secondary-color: #0066ff;'
294
+ )
295
+ report_html = report_html.replace('<h1', '<h1 style="display:none"')
296
+
297
+ st.components.v1.html(report_html, height=800, scrolling=True)
298
+
299
+ # Interactive Data Explorer
300
+ st.subheader("🔍 Data Explorer")
301
+
302
+ # Data Samples Tabs
303
+ with st.expander("📑 Data Samples", expanded=True):
304
+ sample_type = st.selectbox("View Data Samples",
305
+ ["First 5 Rows", "Last 5 Rows", "Random Sample"],
306
+ key="sample_selector")
307
+
308
+ if sample_type == "First 5 Rows":
309
+ st.dataframe(df.head().style.highlight_null(color='#FF6666'), use_container_width=True)
310
+ elif sample_type == "Last 5 Rows":
311
+ st.dataframe(df.tail().style.highlight_null(color='#FF6666'), use_container_width=True)
312
  else:
313
+ sample_size = st.slider("Sample Size", 5, min(100, len(df)), 10)
314
+ st.dataframe(df.sample(sample_size).style.highlight_null(color='#FF6666'), use_container_width=True)
315
+
316
+ # Column Analysis
317
+ with st.expander("📈 Column Insights", expanded=True):
318
+ col1, col2 = st.columns(2)
319
+ with col1:
320
+ selected_col = st.selectbox("Select Column", df.columns)
321
+
322
+ if pd.api.types.is_numeric_dtype(df[selected_col]):
323
+ fig = px.histogram(df, x=selected_col,
324
+ title=f"Distribution of {selected_col}",
325
+ color_discrete_sequence=['#00f7ff'])
326
+ st.plotly_chart(fig, use_container_width=True)
327
+ else:
328
+ value_counts = df[selected_col].value_counts().nlargest(10)
329
+ fig = px.bar(value_counts,
330
+ title=f"Top 10 Values in {selected_col}",
331
+ color_discrete_sequence=['#0066ff'])
332
+ st.plotly_chart(fig, use_container_width=True)
333
+
334
+ with col2:
335
+ st.markdown("#### Column Summary")
336
+ st.write(f"**Data Type:** {df[selected_col].dtype}")
337
+ st.write(f"**Unique Values:** {df[selected_col].nunique()}")
338
+
339
+ if pd.api.types.is_numeric_dtype(df[selected_col]):
340
+ st.write(f"**Min Value:** {df[selected_col].min():.2f}")
341
+ st.write(f"**Max Value:** {df[selected_col].max():.2f}")
342
+ st.write(f"**Mean Value:** {df[selected_col].mean():.2f}")
343
+ else:
344
+ st.write("**Most Common Value:**")
345
+ st.write(df[selected_col].mode()[0])
346
+
347
+ # Data Summary Tabs
348
+ tab1, tab2, tab3 = st.tabs(["📋 Full Summary", "📊 Statistics", "🧠 AI Insights"])
349
+ with tab1:
350
+ buffer = StringIO()
351
+ df.info(buf=buffer)
352
+ st.text(buffer.getvalue())
353
+
354
+ with tab2:
355
+ st.write(df.describe().style.background_gradient(cmap='Blues'))
356
+
357
+ with tab3:
358
+ st.markdown("### Automated Insights")
359
+ if st.button("🔮 Generate AI-Powered Insights"):
360
+ with st.spinner("🤖 Analyzing patterns..."):
361
+ profile = ProfileReport(df, minimal=True)
362
+ st.write(profile.to_html(), unsafe_allow_html=True)
363
+
364
+ # ================== 🔹 ENHANCED DATA CLEANING SECTION ==================
365
+ elif choice == "Data Cleaning":
366
+ st.header("🧼 Intelligent Data Wrangling")
367
+
368
+ if st.session_state.df is not None:
369
+ df = st.session_state.cleaned_df.copy()
370
+
371
+ # AI-Powered Cleaning Assistant
372
+ st.subheader("🤖 Smart Cleaning Advisor")
373
+ if st.button("Run Full Data Diagnosis", type="primary"):
374
+ with st.spinner("🚀 Performing multidimensional analysis..."):
375
+ try:
376
+ # Advanced data quality assessment
377
+ numeric_cols = df.select_dtypes(include=np.number).columns
378
+ diagnosis = pd.DataFrame({
379
+ 'Metric': ['Missing Values', 'Duplicate Rows',
380
+ 'Zero Variance', 'Data Leakage Risk'],
381
+ 'Value': [
382
+ f"{df.isnull().sum().sum()} ({df.isnull().mean().mean():.1%})",
383
+ df.duplicated().sum(),
384
+ df[numeric_cols].std()[df[numeric_cols].std() == 0].count(),
385
+ "High" if df.skew().abs().max() > 5 else "Low"
386
+ ],
387
+ 'Severity': ['Critical' if df.isnull().sum().sum() > 0 else 'OK',
388
+ 'Warning' if df.duplicated().sum() > 0 else 'OK',
389
+ 'Critical' if df[numeric_cols].std()[df[numeric_cols].std() == 0].count() > 0 else 'OK',
390
+ 'Warning' if df.skew().abs().max() > 5 else 'OK']
391
+ })
392
+
393
+ # Visualize data health
394
+ fig = px.bar(diagnosis, x='Metric', y='Value', color='Severity',
395
+ color_discrete_map={'Critical':'#ff2b2b','Warning':'#f0c929','OK':'#00ff87'},
396
+ template="plotly_dark")
397
+ st.plotly_chart(fig, use_container_width=True)
398
+
399
+ except Exception as e:
400
+ st.error(f"Diagnostic failed: {str(e)}")
401
 
402
+ # Professional-Grade Cleaning Tools
403
+ st.subheader("🔧 Enterprise Cleaning Toolkit")
404
+ tab1, tab2, tab3, tab4 = st.tabs(["🧩 Missing Data", "📏 Normalization", "📊 Outliers", "🔀 Encoding"])
405
+
406
+ with tab1:
407
+ cols = st.columns([1,3])
408
+ with cols[0]:
409
+ imp_method = st.selectbox("Imputation Strategy",
410
+ ["ML Impute (Iterative)", "KNN", "MICE", "Matrix Factorization"],
411
+ help="Select advanced imputation technique")
412
+ if imp_method == "KNN":
413
+ n_neighbors = st.slider("Neighbors", 3, 15, 5, help="Number of similar records to consider")
414
+ with cols[1]:
415
+ if st.button("Execute Smart Imputation", type="primary"):
416
+ with st.spinner(f"⚙️ Running {imp_method}..."):
417
+ # Advanced imputation logic
418
+ numeric_cols = df.select_dtypes(include=np.number).columns
419
+ if imp_method == "KNN":
420
+ imputer = KNNImputer(n_neighbors=n_neighbors)
421
+ df[numeric_cols] = imputer.fit_transform(df[numeric_cols])
422
+ else:
423
+ df[numeric_cols] = df[numeric_cols].fillna(df[numeric_cols].median())
424
+ st.session_state.cleaned_df = df
425
+ st.toast("Imputation complete!", icon="✅")
426
+
427
+ with tab2:
428
+ cols = st.columns([1,3])
429
+ with cols[0]:
430
+ scale_method = st.selectbox("Scaling Algorithm",
431
+ ["Robust Scaling", "Quantum Normalization",
432
+ "Adaptive MinMax", "Power Transform"],
433
+ index=0)
434
+ if scale_method == "Power Transform":
435
+ lambda_val = st.slider("Lambda Parameter", -3.0, 3.0, 0.0)
436
+ with cols[1]:
437
+ if st.button("Apply Feature Engineering", type="primary"):
438
+ with st.spinner("Transforming features..."):
439
+ # Advanced scaling logic
440
+ numeric_cols = df.select_dtypes(include=np.number).columns
441
+ if scale_method == "Robust Scaling":
442
+ scaler = RobustScaler()
443
+ df[numeric_cols] = scaler.fit_transform(df[numeric_cols])
444
+ st.session_state.cleaned_df = df
445
+ st.toast("Features transformed!", icon="✅")
446
+
447
+ # Real-time Data Diff Viewer
448
+ st.subheader("🔍 Version Comparison")
449
+ cols = st.columns(2)
450
+ with cols[0]:
451
+ st.write("Original Data Snapshot")
452
+ st.dataframe(st.session_state.df.head(3).style.highlight_null(color='#ff2b2b'))
453
+ with cols[1]:
454
+ st.write("Processed Version")
455
+ st.dataframe(df.head(3).style.highlight_null(color='#00ff87'))
456
+
457
+ # ================== 🔹 EDA SECTION ==================
458
+ elif choice == "EDA":
459
+ st.header("🔍 Advanced Exploratory Data Analysis")
460
+
461
+ if st.session_state.cleaned_df is not None:
462
+ df = st.session_state.cleaned_df
463
+
464
+ # ================== 🔹 USER INPUTS ==================
465
+ st.subheader("📊 Select Analysis Type")
466
+ analysis_type = st.radio(
467
+ "Choose Analysis Type",
468
+ ["Single Variable", "Multi-Variable", "3D Analysis"],
469
+ horizontal=True,
470
+ help="Select the type of analysis you want to perform"
471
+ )
472
+
473
+ # Dynamic Column Selection Based on Analysis Type
474
+ if analysis_type == "Single Variable":
475
+ selected_columns = st.multiselect(
476
+ "Select Columns for Analysis",
477
+ df.columns,
478
+ default=df.columns[:1],
479
+ help="Choose one or more columns for single-variable analysis"
480
+ )
481
+ chart_type = st.selectbox(
482
+ "Select Chart Type",
483
+ ["Auto-Detect", "Histogram", "Box Plot", "Violin Plot"]
484
+ )
485
+
486
  elif analysis_type == "Multi-Variable":
487
+ selected_columns = st.multiselect(
488
+ "Select Columns for Analysis",
489
+ df.columns,
490
+ default=df.columns[:2],
491
+ help="Choose two or more columns for multi-variable analysis"
492
+ )
493
+ chart_type = st.selectbox(
494
+ "Select Chart Type",
495
+ ["Auto-Detect", "Scatter Plot", "Heatmap", "Box Plot", "Violin Plot"]
496
+ )
497
+
498
+ else: # 3D Analysis
 
 
 
 
 
 
 
 
499
  col1, col2, col3 = st.columns(3)
500
  with col1:
501
+ x_col = st.selectbox("X Axis", df.columns)
502
  with col2:
503
+ y_col = st.selectbox("Y Axis", df.columns)
504
  with col3:
505
+ z_col = st.selectbox("Z Axis", df.columns)
506
+ chart_type = "3D Scatter"
507
+
508
+ # ================== 🔹 AUTO-PLOT BUTTON ==================
509
+ if st.button("✨ Generate Advanced Visualizations", type="primary"):
510
+ with st.spinner("🚀 Generating insights..."):
511
+ try:
512
+ # Auto-Detect Logic
513
+ if chart_type == "Auto-Detect":
514
+ if analysis_type == "Single Variable":
515
+ if pd.api.types.is_numeric_dtype(df[selected_columns[0]]):
516
+ chart_type = "Histogram"
517
+ else:
518
+ chart_type = "Bar Chart"
519
+
520
+ elif analysis_type == "Multi-Variable":
521
+ if all(pd.api.types.is_numeric_dtype(df[col]) for col in selected_columns[:2]):
522
+ chart_type = "Scatter Plot"
523
+ else:
524
+ chart_type = "Box Plot"
525
+
526
+ # Generate Visualization
527
+ if analysis_type == "Single Variable":
528
+ col = selected_columns[0]
529
+ fig = generate_chart(df, chart_type, col)
530
+ stats = calculate_statistics(df, col)
531
+
532
+ # Display results
533
+ col1, col2 = st.columns([2, 1])
534
+ with col1:
535
+ st.plotly_chart(fig, use_container_width=True)
536
+ with col2:
537
+ st.subheader("📌 Key Insights")
538
+ if pd.api.types.is_numeric_dtype(df[col]):
539
+ st.metric("Mean", f"{stats['mean']:.2f}")
540
+ st.metric("Median", f"{stats['median']:.2f}")
541
+ st.metric("Std Dev", f"{stats['std']:.2f}")
542
+ else:
543
+ st.metric("Unique Values", stats['unique_values'])
544
+ st.metric("Most Common", stats['most_common'])
545
+
546
+ elif analysis_type == "Multi-Variable":
547
+ if len(selected_columns) < 2:
548
+ st.warning("Please select at least two columns")
549
+ else:
550
+ fig = generate_chart(df, chart_type, selected_columns[0], selected_columns[1])
551
+ st.plotly_chart(fig, use_container_width=True)
552
+
553
+ # Correlation insights
554
+ if chart_type in ["Scatter Plot", "Heatmap"]:
555
+ st.subheader("📌 Correlation Insights")
556
+ try:
557
+ corr = df[selected_columns[0]].corr(df[selected_columns[1]])
558
+ st.write(f"**Correlation Coefficient:** {corr:.2f}")
559
+ st.progress(abs(corr))
560
+ st.caption("Absolute correlation strength")
561
+ except:
562
+ st.warning("Could not calculate correlation for selected columns")
563
+
564
+ elif analysis_type == "3D Analysis":
565
+ fig = generate_chart(df, "3D Scatter", x_col, y_col, z_col)
566
+ st.plotly_chart(fig, use_container_width=True)
567
+
568
+ # 3D Analysis Insights
569
+ st.subheader("📌 3D Analysis Insights")
570
+ col1, col2, col3 = st.columns(3)
571
+ with col1:
572
+ st.metric("X Range", f"{df[x_col].min():.2f} - {df[x_col].max():.2f}")
573
+ with col2:
574
+ st.metric("Y Range", f"{df[y_col].min():.2f} - {df[y_col].max():.2f}")
575
+ with col3:
576
+ st.metric("Z Range", f"{df[z_col].min():.2f} - {df[z_col].max():.2f}")
577
+
578
+ except Exception as e:
579
+ st.error(f"Visualization error: {str(e)}")
580
+ # ================== 🔹 PRODUCTION-GRADE ML SECTION ==================
581
+ elif choice == "Machine Learning":
582
+ st.header("🤖 Enterprise ML Studio")
583
+
584
+ if st.session_state.cleaned_df is not None:
585
+ df = st.session_state.cleaned_df
586
+
587
+ # Model Factory
588
+ st.subheader("🏭 Model Orchestration")
589
+ tabs = st.tabs(["AutoML", "Custom Training", "Model Registry"])
590
 
591
+ with tabs[0]:
592
+ if st.button("Launch Hyperparameter Optimization", type="primary"):
593
+ with st.spinner("⚡ Training 25 model variants..."):
594
+ try:
595
+ target = st.selectbox("Target Variable", df.columns)
596
+ setup(df, target=target, session_id=42,
597
+ feature_interaction=True,
598
+ polynomial_features=True)
599
+ best_model = compare_models(n_select=3)
600
+
601
+ # Visual Leaderboard
602
+ results = pull()
603
+ fig = px.bar(results, x='Model', y=['Accuracy', 'AUC'],
604
+ barmode='group', template="plotly_dark",
605
+ title="Model Performance Leaderboard")
606
+ st.plotly_chart(fig, use_container_width=True)
607
+
608
+ except Exception as e:
609
+ st.error(f"AutoML failed: {str(e)}")
610
+ # ================== 🔹 PREDICTIONS PAGE COMPLETION ==================
611
+ elif choice == "Predictions":
612
+ st.title("🔮 Make Predictions on New Data")
613
+
614
+ if st.session_state.get("model"):
615
  uploaded_file = st.file_uploader("Upload New Data for Prediction", type=["csv", "xlsx"])
616
+
617
  if uploaded_file:
618
  new_data = pd.read_csv(uploaded_file) if uploaded_file.name.endswith('.csv') else pd.read_excel(uploaded_file)
619
  st.write("📊 Preview of New Data:")
620
  st.dataframe(new_data.head())
621
 
622
+ try:
623
+ predictions = st.session_state.model.predict(new_data)
624
+ proba = st.session_state.model.predict_proba(new_data) if hasattr(st.session_state.model, 'predict_proba') else None
625
+
626
+ st.subheader("📢 Predictions:")
627
+ result_df = pd.DataFrame({
628
+ 'Prediction': predictions,
629
+ 'Confidence': proba.max(axis=1) if proba is not None else [1.0]*len(predictions)
630
+ })
631
+ st.dataframe(result_df.style.background_gradient(cmap='Blues'))
632
+
633
+ # Download predictions
634
+ csv = result_df.to_csv(index=False).encode('utf-8')
635
+ st.download_button(
636
+ label="📥 Download Predictions",
637
+ data=csv,
638
+ file_name='predictions.csv',
639
+ mime='text/csv'
640
+ )
641
+
642
+ except Exception as e:
643
+ st.error(f"Prediction error: {str(e)}")
644
+ else:
645
+ st.warning("⚠️ No trained model found. Please train a model first.")
646
 
647
+ # ================== 🔹 VISUALIZATION PAGE COMPLETION ==================
648
+ # ================== 🔹 VISUALIZATION PAGE COMPLETION ==================
649
+ elif choice == "Visualization":
650
+ st.header("📊 Advanced Visualization Lab")
651
+
652
+ if st.session_state.cleaned_df is not None:
653
+ df = st.session_state.cleaned_df
654
+
655
+ # Smart Visualization Assistant
656
+ col1, col2 = st.columns([1, 3])
657
+ with col1:
658
+ if st.button("✨ Suggest Visualizations", help="Generate smart visualization recommendations"):
659
+ with st.spinner("🎨 Generating recommendations..."):
660
+ try:
661
+ numeric_cols = df.select_dtypes(include=np.number).columns.tolist()
662
+ cat_cols = df.select_dtypes(include=['object', 'category']).columns.tolist()
663
+
664
+ # Auto-detect visualization types
665
+ if len(numeric_cols) >= 3:
666
+ st.session_state.viz_type = "3D Scatter"
667
+ elif len(cat_cols) > 0:
668
+ st.session_state.viz_type = "Pie"
669
+ else:
670
+ st.session_state.viz_type = "Histogram"
671
+
672
+ st.success(f"Recommended visualization type: {st.session_state.viz_type}")
673
+
674
+ except Exception as e:
675
+ st.error(f"Recommendation failed: {str(e)}")
676
 
677
+ # Manual Visualization Controls
678
+ with st.expander("🎨 Custom Visualization", expanded=True):
679
+ plot_options = ["3D Scatter", "Line", "Bar", "Pie", "Histogram", "Box", "Violin", "Heatmap"]
680
+ plot_type = st.selectbox("Select Plot Type", plot_options,
681
+ index=plot_options.index(st.session_state.viz_type) if 'viz_type' in st.session_state else 0)
682
+
683
+ # Dynamic Axis Selection
684
+ col1, col2, col3 = st.columns(3)
685
+ fig = None
686
+
687
+ # 3D Scatter Plot
688
+ if plot_type == "3D Scatter":
689
+ with col1:
690
+ x_axis = st.selectbox("X Axis", df.columns, index=0)
691
+ with col2:
692
+ y_axis = st.selectbox("Y Axis", df.columns, index=min(1, len(df.columns)-1))
693
+ with col3:
694
+ z_axis = st.selectbox("Z Axis", df.columns, index=min(2, len(df.columns)-1))
695
+ color_by = st.selectbox("Color By", [None] + df.columns.tolist())
696
+ fig = px.scatter_3d(df, x=x_axis, y=y_axis, z=z_axis, color=color_by,
697
+ color_continuous_scale=px.colors.cyclical.IceFire)
698
+
699
+ # Line Chart
700
+ elif plot_type == "Line":
701
+ with col1:
702
+ x_axis = st.selectbox("X Axis", df.columns, index=0)
703
+ with col2:
704
+ y_axis = st.selectbox("Y Axis", df.select_dtypes(include=np.number).columns.tolist())
705
+ with col3:
706
+ color_by = st.selectbox("Group By", [None] + df.columns.tolist())
707
+ fig = px.line(df, x=x_axis, y=y_axis, color=color_by,
708
+ line_group=color_by if color_by else None)
709
+
710
+ # Bar Chart
711
+ elif plot_type == "Bar":
712
+ with col1:
713
+ x_axis = st.selectbox("X Axis", df.columns, index=0)
714
+ with col2:
715
+ y_axis = st.selectbox("Y Axis", df.select_dtypes(include=np.number).columns.tolist())
716
+ with col3:
717
+ color_by = st.selectbox("Color By", [None] + df.columns.tolist())
718
+ fig = px.bar(df, x=x_axis, y=y_axis, color=color_by, barmode='group')
719
+
720
+ # Pie Chart
721
+ elif plot_type == "Pie":
722
+ with col1:
723
+ names = st.selectbox("Categories", df.select_dtypes(include=['object', 'category']).columns.tolist())
724
+ with col2:
725
+ values = st.selectbox("Values", df.select_dtypes(include=np.number).columns.tolist())
726
+ fig = px.pie(df, names=names, values=values, hole=0.3)
727
+
728
+ # Histogram
729
+ elif plot_type == "Histogram":
730
+ with col1:
731
+ num_col = st.selectbox("Numerical Column", df.select_dtypes(include=np.number).columns.tolist())
732
+ with col2:
733
+ color_by = st.selectbox("Split By", [None] + df.columns.tolist())
734
+ fig = px.histogram(df, x=num_col, color=color_by, marginal="rug",
735
+ nbins=st.slider("Number of Bins", 5, 100, 20))
736
+
737
+ # Box Plot
738
+ elif plot_type == "Box":
739
+ with col1:
740
+ y_axis = st.selectbox("Y Axis", df.select_dtypes(include=np.number).columns.tolist())
741
+ with col2:
742
+ x_axis = st.selectbox("X Axis (Optional)", [None] + df.columns.tolist())
743
+ fig = px.box(df, x=x_axis, y=y_axis, color=x_axis)
744
+
745
+ # Violin Plot
746
+ elif plot_type == "Violin":
747
+ with col1:
748
+ y_axis = st.selectbox("Y Axis", df.select_dtypes(include=np.number).columns.tolist())
749
+ with col2:
750
+ x_axis = st.selectbox("X Axis (Optional)", [None] + df.columns.tolist())
751
+ fig = px.violin(df, x=x_axis, y=y_axis, color=x_axis, box=True)
752
+
753
+ # Heatmap
754
+ elif plot_type == "Heatmap":
755
  numeric_cols = df.select_dtypes(include=np.number).columns.tolist()
756
+ selected_cols = st.multiselect("Select Numerical Columns", numeric_cols, default=numeric_cols[:5])
757
+ if len(selected_cols) >= 2:
758
+ corr_matrix = df[selected_cols].corr()
759
+ fig = px.imshow(corr_matrix, text_auto=True,
760
+ color_continuous_scale=px.colors.diverging.RdBu_r)
 
761
  else:
762
+ st.warning("Select at least 2 numerical columns for heatmap")
763
+
764
+ # Plot Customization
765
+ if fig:
766
+ with st.expander("🎭 Style Customization"):
767
+ col1, col2 = st.columns(2)
768
+ with col1:
769
+ color_theme = st.selectbox("Color Theme", px.colors.named_colorscales(),
770
+ index=px.colors.named_colorscales().index('Viridis'))
771
+ fig.update_layout(colorway=px.colors.sequential[color_theme])
772
+ with col2:
773
+ fig.update_layout(
774
+ template=st.selectbox("Theme Style", ["plotly", "plotly_dark", "ggplot2", "seaborn"]),
775
+ font_size=st.slider("Font Size", 10, 24, 14)
776
+ )
777
+
778
+ # Display Plot
779
+ st.plotly_chart(fig, use_container_width=True)
780
+
781
+ # Download Button
782
+ plot_html = fig.to_html()
783
+ st.download_button(
784
+ label="📥 Download Plot",
785
+ data=plot_html,
786
+ file_name=f"{plot_type.replace(' ', '_')}_plot.html",
787
+ mime="text/html"
788
+ )