Spaces:

CosmickVisions
/

Data-Vision

Sleeping

App Files Files Community

CosmickVisions commited on Feb 28

Commit

d44dfe0

verified ·

1 Parent(s): 00b2520

Update app.py

Browse files

Files changed (1) hide show

app.py +135 -133

app.py CHANGED Viewed

@@ -796,153 +796,155 @@ elif app_mode == "Predictions":
 elif app_mode == "Visualization Lab":
     st.title("🔬 Advanced Data Visualization and Clustering Lab")
-# Initialize session state for cleaned data
-if 'cleaned_data' not in st.session_state:
-    st.session_state.cleaned_data = None
-# Sample data upload (replace with your data loading logic)
-uploaded_file = st.file_uploader("Upload a CSV file", type=["csv"])
-if uploaded_file is not None:
-    try:
-        df = pd.read_csv(uploaded_file)
-        st.session_state.cleaned_data = df
-        st.success("Data loaded successfully!")
-    except Exception as e:
-        st.error(f"Error loading data: {e}")
-if st.session_state.cleaned_data is not None:
-    df = st.session_state.cleaned_data.copy()
-    # Visualization Type Selection
-    visualization_type = st.selectbox("Select Visualization Type", [
-        "Pair Plot", "Parallel Coordinates Plot", "Andrews Curves", "Pie Chart",
-        "Area Chart", "Density Contour", "Sunburst Chart", "Funnel Chart", "Clustering Analysis"
-    ])
-    if visualization_type == "Pair Plot":
-        st.subheader("Pair Plot")
-        cols_for_pairplot = st.multiselect("Select Columns for Pair Plot", df.select_dtypes(include=np.number).columns.tolist(), default=df.select_dtypes(include=np.number).columns.tolist()[:3])
-        if cols_for_pairplot:
-            fig = px.scatter_matrix(df, dimensions=cols_for_pairplot)
-            st.plotly_chart(fig, use_container_width=True)
-    elif visualization_type == "Parallel Coordinates Plot":
-        st.subheader("Parallel Coordinates Plot")
-        cols_for_parallel = st.multiselect("Select Columns for Parallel Coordinates", df.select_dtypes(include=np.number).columns.tolist(), default=df.select_dtypes(include=np.number).columns.tolist()[:5])
-        if cols_for_parallel:
-            fig = px.parallel_coordinates(df[cols_for_parallel], color=df[cols_for_parallel[0]] if cols_for_parallel else None)
-            st.plotly_chart(fig, use_container_width=True)
-    elif visualization_type == "Andrews Curves":
-        st.subheader("Andrews Curves")
-        cols_for_andrews = st.multiselect("Select Columns for Andrews Curves", df.select_dtypes(include=np.number).columns.tolist(), default=df.select_dtypes(include=np.number).columns.tolist()[:5])
-        if cols_for_andrews:
-            fig = px.andrews_curves(df[cols_for_andrews + [df.columns[0]]], class_column=df.columns[0])
-            st.plotly_chart(fig, use_container_width=True)
-    elif visualization_type == "Pie Chart":
-        st.subheader("Pie Chart")
-        col_for_pie = st.selectbox("Select Column for Pie Chart", df.columns)
-        fig = px.pie(df, names=col_for_pie)
-        st.plotly_chart(fig, use_container_width=True)
-    elif visualization_type == "Area Chart":
-        st.subheader("Area Chart")
-        cols_for_area = st.multiselect("Select Columns for Area Chart", df.select_dtypes(include=np.number).columns.tolist(), default=df.select_dtypes(include=np.number).columns.tolist()[:3])
-        if cols_for_area:
-            fig = px.area(df[cols_for_area])
             st.plotly_chart(fig, use_container_width=True)
-    elif visualization_type == "Density Contour":
-        st.subheader("Density Contour")
-        x_col = st.selectbox("Select X Column for Density Contour", df.select_dtypes(include=np.number).columns.tolist())
-        y_col = st.selectbox("Select Y Column for Density Contour", df.select_dtypes(include=np.number).columns.tolist())
-        fig = px.density_contour(df, x=x_col, y=y_col)
-        st.plotly_chart(fig, use_container_width=True)
-    elif visualization_type == "Sunburst Chart":
-        st.subheader("Sunburst Chart")
-        path_cols = st.multiselect("Select Path Columns for Sunburst Chart", df.columns)
-        if path_cols:
-            fig = px.sunburst(df, path=path_cols)
             st.plotly_chart(fig, use_container_width=True)
-    elif visualization_type == "Funnel Chart":
-        st.subheader("Funnel Chart")
-        x_col = st.selectbox("Select X Column for Funnel Chart (Values)", df.select_dtypes(include=np.number).columns.tolist())
-        y_col = st.selectbox("Select Y Column for Funnel Chart (Categories)", df.columns)
-        fig = px.funnel(df, x=x_col, y=y_col)
-        st.plotly_chart(fig, use_container_width=True)
-    elif visualization_type == "Clustering Analysis":
-        st.subheader("Clustering Analysis")
-        numerical_cols = df.select_dtypes(include=np.number).columns.tolist()
-        if not numerical_cols:
-            st.warning("No numerical columns found for clustering.")
-        else:
-            cluster_cols = st.multiselect("Select Columns for Clustering", numerical_cols, default=numerical_cols[:2] if len(numerical_cols) >= 2 else numerical_cols)
-            if cluster_cols:
-                try:
-                    scaler = StandardScaler()
-                    scaled_data = scaler.fit_transform(df[cluster_cols])
-                    n_clusters = st.slider("Number of Clusters", 2, 10, 3, help="Number of clusters to form.")
-                    kmeans = KMeans(n_clusters=n_clusters, random_state=42)
-                    clusters = kmeans.fit_predict(scaled_data)
-                    df['Cluster'] = clusters
-                    if len(cluster_cols) == 2:
-                        fig = px.scatter(df, x=cluster_cols[0], y=cluster_cols[1], color='Cluster', title="K-Means Clustering")
-                        st.plotly_chart(fig, use_container_width=True)
-                    elif len(cluster_cols) == 3:
-                        fig = px.scatter_3d(df, x=cluster_cols[0], y=cluster_cols[1], z=cluster_cols[2], color='Cluster', title="K-Means Clustering (3D)")
-                        st.plotly_chart(fig, use_container_width=True)
-                    else:
-                        st.write("Clustering visualization is only supported for 2 or 3 selected columns.")
-                    st.success("Clustering applied successfully!")
-                except Exception as e:
-                    st.error(f"An error occurred during clustering: {e}")
-    #Add clustering performance in clustering analysis
-if len(cluster_cols) >= 2:  # Evaluate Silhouette Score
-    try:
-        silhouette_avg = silhouette_score(scaled_data, clusters)
-        st.write(f"Silhouette Score: {silhouette_avg:.4f}")
-    except:
-        st.write("Could not compute silhouette score")
-#Add dimensionality reduction option and 2d/3d plots
-    dimension_reduction = st.selectbox("Dimensionality Reduction", ["None", "PCA"])
-    if dimension_reduction == "PCA":
-        n_components = st.slider("Number of Components", 2, min(3, len(cluster_cols)), 2)
-        pca = PCA(n_components=n_components)
-        principal_components = pca.fit_transform(scaled_data)
-        pca_df = pd.DataFrame(data=principal_components, columns=[f'PC{i + 1}' for i in range(n_components)])
-        pca_df['Cluster'] = clusters  # Add Cluster
-    if len(cluster_cols) >= 2: #plotting section
-        fig = None #Initialize fig
-        if dimension_reduction == "None":
-            if len(cluster_cols) == 2:
-                fig = px.scatter(df, x=cluster_cols[0], y=cluster_cols[1], color='Cluster', title="K-Means Clustering")
-                st.plotly_chart(fig, use_container_width=True)
-            elif len(cluster_cols) == 3:
-                fig = px.scatter_3d(df, x=cluster_cols[0], y=cluster_cols[1], z=cluster_cols[2], color='Cluster', title="K-Means Clustering (3D)")
-                st.plotly_chart(fig, use_container_width=True)
             else:
-                st.write("Clustering visualization is only supported for 2 or 3 selected columns.")
-        elif dimension_reduction == "PCA":
-            if n_components == 2:
-                fig = px.scatter(pca_df, x='PC1', y='PC2', color='Cluster', title="K-Means Clustering (PCA - 2D)")
-                st.plotly_chart(fig, use_container_width=True)
-            elif n_components == 3:
-                fig = px.scatter_3d(pca_df, x='PC1', y='PC2', z='PC3', color='Cluster', title="K-Means Clustering (PCA - 3D)")
-                st.plotly_chart(fig, use_container_width=True)
-            else:
-                st.write("PCA visualization is only supported for 2 or 3 components.")
 elif app_mode == "Neural Network Studio":
     st.title("🧠 Neural Network Studio")

 elif app_mode == "Visualization Lab":
     st.title("🔬 Advanced Data Visualization and Clustering Lab")
+    # Initialize session state for cleaned data
+    if 'cleaned_data' not in st.session_state:
+        st.session_state.cleaned_data = None
+    # Sample data upload (replace with your data loading logic)
+    uploaded_file = st.file_uploader("Upload a CSV file", type=["csv"])
+    if uploaded_file is not None:
+        try:
+            df = pd.read_csv(uploaded_file)
+            st.session_state.cleaned_data = df
+            st.success("Data loaded successfully!")
+        except Exception as e:
+            st.error(f"Error loading data: {e}")
+    if st.session_state.cleaned_data is not None:
+        df = st.session_state.cleaned_data.copy()
+        # Visualization Type Selection
+        visualization_type = st.selectbox("Select Visualization Type", [
+            "Pair Plot", "Parallel Coordinates Plot", "Andrews Curves", "Pie Chart",
+            "Area Chart", "Density Contour", "Sunburst Chart", "Funnel Chart", "Clustering Analysis"
+        ])
+        if visualization_type == "Pair Plot":
+            st.subheader("Pair Plot")
+            cols_for_pairplot = st.multiselect("Select Columns for Pair Plot", df.select_dtypes(include=np.number).columns.tolist(), default=df.select_dtypes(include=np.number).columns.tolist()[:3])
+            if cols_for_pairplot:
+                fig = px.scatter_matrix(df, dimensions=cols_for_pairplot)
+                st.plotly_chart(fig, use_container_width=True)
+        elif visualization_type == "Parallel Coordinates Plot":
+            st.subheader("Parallel Coordinates Plot")
+            cols_for_parallel = st.multiselect("Select Columns for Parallel Coordinates", df.select_dtypes(include=np.number).columns.tolist(), default=df.select_dtypes(include=np.number).columns.tolist()[:5])
+            if cols_for_parallel:
+                fig = px.parallel_coordinates(df[cols_for_parallel], color=df[cols_for_parallel[0]] if cols_for_parallel else None)
+                st.plotly_chart(fig, use_container_width=True)
+        elif visualization_type == "Andrews Curves":
+            st.subheader("Andrews Curves")
+            cols_for_andrews = st.multiselect("Select Columns for Andrews Curves", df.select_dtypes(include=np.number).columns.tolist(), default=df.select_dtypes(include=np.number).columns.tolist()[:5])
+            if cols_for_andrews:
+                fig = px.andrews_curves(df[cols_for_andrews + [df.columns[0]]], class_column=df.columns[0])
+                st.plotly_chart(fig, use_container_width=True)
+        elif visualization_type == "Pie Chart":
+            st.subheader("Pie Chart")
+            col_for_pie = st.selectbox("Select Column for Pie Chart", df.columns)
+            fig = px.pie(df, names=col_for_pie)
             st.plotly_chart(fig, use_container_width=True)
+        elif visualization_type == "Area Chart":
+            st.subheader("Area Chart")
+            cols_for_area = st.multiselect("Select Columns for Area Chart", df.select_dtypes(include=np.number).columns.tolist(), default=df.select_dtypes(include=np.number).columns.tolist()[:3])
+            if cols_for_area:
+                fig = px.area(df[cols_for_area])
+                st.plotly_chart(fig, use_container_width=True)
+        elif visualization_type == "Density Contour":
+            st.subheader("Density Contour")
+            x_col = st.selectbox("Select X Column for Density Contour", df.select_dtypes(include=np.number).columns.tolist())
+            y_col = st.selectbox("Select Y Column for Density Contour", df.select_dtypes(include=np.number).columns.tolist())
+            fig = px.density_contour(df, x=x_col, y=y_col)
             st.plotly_chart(fig, use_container_width=True)
+        elif visualization_type == "Sunburst Chart":
+            st.subheader("Sunburst Chart")
+            path_cols = st.multiselect("Select Path Columns for Sunburst Chart", df.columns)
+            if path_cols:
+                fig = px.sunburst(df, path=path_cols)
+                st.plotly_chart(fig, use_container_width=True)
+        elif visualization_type == "Funnel Chart":
+            st.subheader("Funnel Chart")
+            x_col = st.selectbox("Select X Column for Funnel Chart (Values)", df.select_dtypes(include=np.number).columns.tolist())
+            y_col = st.selectbox("Select Y Column for Funnel Chart (Categories)", df.columns)
+            fig = px.funnel(df, x=x_col, y=y_col)
+            st.plotly_chart(fig, use_container_width=True)
+        elif visualization_type == "Clustering Analysis":
+            st.subheader("Clustering Analysis")
+            numerical_cols = df.select_dtypes(include=np.number).columns.tolist()
+            if not numerical_cols:
+                st.warning("No numerical columns found for clustering.")
             else:
+                cluster_cols = st.multiselect("Select Columns for Clustering", numerical_cols, default=numerical_cols[:2] if len(numerical_cols) >= 2 else numerical_cols)
+                if cluster_cols:
+                    try:
+                        scaler = StandardScaler()
+                        scaled_data = scaler.fit_transform(df[cluster_cols])
+                        n_clusters = st.slider("Number of Clusters", 2, 10, 3, help="Number of clusters to form.")
+                        kmeans = KMeans(n_clusters=n_clusters, random_state=42)
+                        clusters = kmeans.fit_predict(scaled_data)
+                        df['Cluster'] = clusters
+                        if len(cluster_cols) == 2:
+                            fig = px.scatter(df, x=cluster_cols[0], y=cluster_cols[1], color='Cluster', title="K-Means Clustering")
+                            st.plotly_chart(fig, use_container_width=True)
+                        elif len(cluster_cols) == 3:
+                            fig = px.scatter_3d(df, x=cluster_cols[0], y=cluster_cols[1], z=cluster_cols[2], color='Cluster', title="K-Means Clustering (3D)")
+                            st.plotly_chart(fig, use_container_width=True)
+                        else:
+                            st.write("Clustering visualization is only supported for 2 or 3 selected columns.")
+                        st.success("Clustering applied successfully!")
+                        #Add clustering performance in clustering analysis
+                        if len(cluster_cols) >= 2:  # Evaluate Silhouette Score
+                            try:
+                                silhouette_avg = silhouette_score(scaled_data, clusters)
+                                st.write(f"Silhouette Score: {silhouette_avg:.4f}")
+                            except:
+                                st.write("Could not compute silhouette score")
+                        #Add dimensionality reduction option and 2d/3d plots
+                            dimension_reduction = st.selectbox("Dimensionality Reduction", ["None", "PCA"])
+                            if dimension_reduction == "PCA":
+                                n_components = st.slider("Number of Components", 2, min(3, len(cluster_cols)), 2)
+                                pca = PCA(n_components=n_components)
+                                principal_components = pca.fit_transform(scaled_data)
+                                pca_df = pd.DataFrame(data=principal_components, columns=[f'PC{i + 1}' for i in range(n_components)])
+                                pca_df['Cluster'] = clusters  # Add Cluster
+                                if len(cluster_cols) >= 2: #plotting section
+                                    fig = None #Initialize fig
+                                    if dimension_reduction == "None":
+                                        if len(cluster_cols) == 2:
+                                            fig = px.scatter(df, x=cluster_cols[0], y=cluster_cols[1], color='Cluster', title="K-Means Clustering")
+                                            st.plotly_chart(fig, use_container_width=True)
+                                        elif len(cluster_cols) == 3:
+                                            fig = px.scatter_3d(df, x=cluster_cols[0], y=cluster_cols[1], z=cluster_cols[2], color='Cluster', title="K-Means Clustering (3D)")
+                                            st.plotly_chart(fig, use_container_width=True)
+                                        else:
+                                            st.write("Clustering visualization is only supported for 2 or 3 selected columns.")
+                                    elif dimension_reduction == "PCA":
+                                        if n_components == 2:
+                                            fig = px.scatter(pca_df, x='PC1', y='PC2', color='Cluster', title="K-Means Clustering (PCA - 2D)")
+                                            st.plotly_chart(fig, use_container_width=True)
+                                        elif n_components == 3:
+                                            fig = px.scatter_3d(pca_df, x='PC1', y='PC2', z='PC3', color='Cluster', title="K-Means Clustering (PCA - 3D)")
+                                            st.plotly_chart(fig, use_container_width=True)
+                                        else:
+                                            st.write("PCA visualization is only supported for 2 or 3 components.")
+                    except Exception as e:
+                        st.error(f"An error occurred during clustering: {e}")
 elif app_mode == "Neural Network Studio":
     st.title("🧠 Neural Network Studio")