CosmickVisions commited on
Commit
d44dfe0
·
verified ·
1 Parent(s): 00b2520

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +135 -133
app.py CHANGED
@@ -796,153 +796,155 @@ elif app_mode == "Predictions":
796
  elif app_mode == "Visualization Lab":
797
  st.title("🔬 Advanced Data Visualization and Clustering Lab")
798
 
799
- # Initialize session state for cleaned data
800
- if 'cleaned_data' not in st.session_state:
801
- st.session_state.cleaned_data = None
802
 
803
- # Sample data upload (replace with your data loading logic)
804
- uploaded_file = st.file_uploader("Upload a CSV file", type=["csv"])
805
- if uploaded_file is not None:
806
- try:
807
- df = pd.read_csv(uploaded_file)
808
- st.session_state.cleaned_data = df
809
- st.success("Data loaded successfully!")
810
- except Exception as e:
811
- st.error(f"Error loading data: {e}")
812
-
813
- if st.session_state.cleaned_data is not None:
814
- df = st.session_state.cleaned_data.copy()
815
-
816
- # Visualization Type Selection
817
- visualization_type = st.selectbox("Select Visualization Type", [
818
- "Pair Plot", "Parallel Coordinates Plot", "Andrews Curves", "Pie Chart",
819
- "Area Chart", "Density Contour", "Sunburst Chart", "Funnel Chart", "Clustering Analysis"
820
- ])
821
-
822
- if visualization_type == "Pair Plot":
823
- st.subheader("Pair Plot")
824
- cols_for_pairplot = st.multiselect("Select Columns for Pair Plot", df.select_dtypes(include=np.number).columns.tolist(), default=df.select_dtypes(include=np.number).columns.tolist()[:3])
825
- if cols_for_pairplot:
826
- fig = px.scatter_matrix(df, dimensions=cols_for_pairplot)
827
- st.plotly_chart(fig, use_container_width=True)
828
 
829
- elif visualization_type == "Parallel Coordinates Plot":
830
- st.subheader("Parallel Coordinates Plot")
831
- cols_for_parallel = st.multiselect("Select Columns for Parallel Coordinates", df.select_dtypes(include=np.number).columns.tolist(), default=df.select_dtypes(include=np.number).columns.tolist()[:5])
832
- if cols_for_parallel:
833
- fig = px.parallel_coordinates(df[cols_for_parallel], color=df[cols_for_parallel[0]] if cols_for_parallel else None)
834
- st.plotly_chart(fig, use_container_width=True)
835
 
836
- elif visualization_type == "Andrews Curves":
837
- st.subheader("Andrews Curves")
838
- cols_for_andrews = st.multiselect("Select Columns for Andrews Curves", df.select_dtypes(include=np.number).columns.tolist(), default=df.select_dtypes(include=np.number).columns.tolist()[:5])
839
- if cols_for_andrews:
840
- fig = px.andrews_curves(df[cols_for_andrews + [df.columns[0]]], class_column=df.columns[0])
841
- st.plotly_chart(fig, use_container_width=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
842
 
843
- elif visualization_type == "Pie Chart":
844
- st.subheader("Pie Chart")
845
- col_for_pie = st.selectbox("Select Column for Pie Chart", df.columns)
846
- fig = px.pie(df, names=col_for_pie)
847
- st.plotly_chart(fig, use_container_width=True)
848
-
849
- elif visualization_type == "Area Chart":
850
- st.subheader("Area Chart")
851
- cols_for_area = st.multiselect("Select Columns for Area Chart", df.select_dtypes(include=np.number).columns.tolist(), default=df.select_dtypes(include=np.number).columns.tolist()[:3])
852
- if cols_for_area:
853
- fig = px.area(df[cols_for_area])
854
  st.plotly_chart(fig, use_container_width=True)
855
 
856
- elif visualization_type == "Density Contour":
857
- st.subheader("Density Contour")
858
- x_col = st.selectbox("Select X Column for Density Contour", df.select_dtypes(include=np.number).columns.tolist())
859
- y_col = st.selectbox("Select Y Column for Density Contour", df.select_dtypes(include=np.number).columns.tolist())
860
- fig = px.density_contour(df, x=x_col, y=y_col)
861
- st.plotly_chart(fig, use_container_width=True)
862
-
863
- elif visualization_type == "Sunburst Chart":
864
- st.subheader("Sunburst Chart")
865
- path_cols = st.multiselect("Select Path Columns for Sunburst Chart", df.columns)
866
- if path_cols:
867
- fig = px.sunburst(df, path=path_cols)
868
  st.plotly_chart(fig, use_container_width=True)
869
 
870
- elif visualization_type == "Funnel Chart":
871
- st.subheader("Funnel Chart")
872
- x_col = st.selectbox("Select X Column for Funnel Chart (Values)", df.select_dtypes(include=np.number).columns.tolist())
873
- y_col = st.selectbox("Select Y Column for Funnel Chart (Categories)", df.columns)
874
- fig = px.funnel(df, x=x_col, y=y_col)
875
- st.plotly_chart(fig, use_container_width=True)
876
 
877
- elif visualization_type == "Clustering Analysis":
878
- st.subheader("Clustering Analysis")
879
- numerical_cols = df.select_dtypes(include=np.number).columns.tolist()
 
 
 
880
 
881
- if not numerical_cols:
882
- st.warning("No numerical columns found for clustering.")
883
- else:
884
- cluster_cols = st.multiselect("Select Columns for Clustering", numerical_cols, default=numerical_cols[:2] if len(numerical_cols) >= 2 else numerical_cols)
885
 
886
- if cluster_cols:
887
- try:
888
- scaler = StandardScaler()
889
- scaled_data = scaler.fit_transform(df[cluster_cols])
890
- n_clusters = st.slider("Number of Clusters", 2, 10, 3, help="Number of clusters to form.")
891
- kmeans = KMeans(n_clusters=n_clusters, random_state=42)
892
- clusters = kmeans.fit_predict(scaled_data)
893
- df['Cluster'] = clusters
894
-
895
- if len(cluster_cols) == 2:
896
- fig = px.scatter(df, x=cluster_cols[0], y=cluster_cols[1], color='Cluster', title="K-Means Clustering")
897
- st.plotly_chart(fig, use_container_width=True)
898
- elif len(cluster_cols) == 3:
899
- fig = px.scatter_3d(df, x=cluster_cols[0], y=cluster_cols[1], z=cluster_cols[2], color='Cluster', title="K-Means Clustering (3D)")
900
- st.plotly_chart(fig, use_container_width=True)
901
- else:
902
- st.write("Clustering visualization is only supported for 2 or 3 selected columns.")
903
- st.success("Clustering applied successfully!")
904
- except Exception as e:
905
- st.error(f"An error occurred during clustering: {e}")
906
- #Add clustering performance in clustering analysis
907
- if len(cluster_cols) >= 2: # Evaluate Silhouette Score
908
- try:
909
- silhouette_avg = silhouette_score(scaled_data, clusters)
910
- st.write(f"Silhouette Score: {silhouette_avg:.4f}")
911
- except:
912
- st.write("Could not compute silhouette score")
913
-
914
- #Add dimensionality reduction option and 2d/3d plots
915
-
916
- dimension_reduction = st.selectbox("Dimensionality Reduction", ["None", "PCA"])
917
- if dimension_reduction == "PCA":
918
- n_components = st.slider("Number of Components", 2, min(3, len(cluster_cols)), 2)
919
- pca = PCA(n_components=n_components)
920
- principal_components = pca.fit_transform(scaled_data)
921
- pca_df = pd.DataFrame(data=principal_components, columns=[f'PC{i + 1}' for i in range(n_components)])
922
- pca_df['Cluster'] = clusters # Add Cluster
923
-
924
- if len(cluster_cols) >= 2: #plotting section
925
- fig = None #Initialize fig
926
- if dimension_reduction == "None":
927
- if len(cluster_cols) == 2:
928
- fig = px.scatter(df, x=cluster_cols[0], y=cluster_cols[1], color='Cluster', title="K-Means Clustering")
929
- st.plotly_chart(fig, use_container_width=True)
930
- elif len(cluster_cols) == 3:
931
- fig = px.scatter_3d(df, x=cluster_cols[0], y=cluster_cols[1], z=cluster_cols[2], color='Cluster', title="K-Means Clustering (3D)")
932
- st.plotly_chart(fig, use_container_width=True)
933
  else:
934
- st.write("Clustering visualization is only supported for 2 or 3 selected columns.")
935
 
936
- elif dimension_reduction == "PCA":
937
- if n_components == 2:
938
- fig = px.scatter(pca_df, x='PC1', y='PC2', color='Cluster', title="K-Means Clustering (PCA - 2D)")
939
- st.plotly_chart(fig, use_container_width=True)
940
- elif n_components == 3:
941
- fig = px.scatter_3d(pca_df, x='PC1', y='PC2', z='PC3', color='Cluster', title="K-Means Clustering (PCA - 3D)")
942
- st.plotly_chart(fig, use_container_width=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
943
 
944
- else:
945
- st.write("PCA visualization is only supported for 2 or 3 components.")
946
 
947
  elif app_mode == "Neural Network Studio":
948
  st.title("🧠 Neural Network Studio")
 
796
  elif app_mode == "Visualization Lab":
797
  st.title("🔬 Advanced Data Visualization and Clustering Lab")
798
 
799
+ # Initialize session state for cleaned data
800
+ if 'cleaned_data' not in st.session_state:
801
+ st.session_state.cleaned_data = None
802
 
803
+ # Sample data upload (replace with your data loading logic)
804
+ uploaded_file = st.file_uploader("Upload a CSV file", type=["csv"])
805
+ if uploaded_file is not None:
806
+ try:
807
+ df = pd.read_csv(uploaded_file)
808
+ st.session_state.cleaned_data = df
809
+ st.success("Data loaded successfully!")
810
+ except Exception as e:
811
+ st.error(f"Error loading data: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
812
 
813
+ if st.session_state.cleaned_data is not None:
814
+ df = st.session_state.cleaned_data.copy()
 
 
 
 
815
 
816
+ # Visualization Type Selection
817
+ visualization_type = st.selectbox("Select Visualization Type", [
818
+ "Pair Plot", "Parallel Coordinates Plot", "Andrews Curves", "Pie Chart",
819
+ "Area Chart", "Density Contour", "Sunburst Chart", "Funnel Chart", "Clustering Analysis"
820
+ ])
821
+
822
+ if visualization_type == "Pair Plot":
823
+ st.subheader("Pair Plot")
824
+ cols_for_pairplot = st.multiselect("Select Columns for Pair Plot", df.select_dtypes(include=np.number).columns.tolist(), default=df.select_dtypes(include=np.number).columns.tolist()[:3])
825
+ if cols_for_pairplot:
826
+ fig = px.scatter_matrix(df, dimensions=cols_for_pairplot)
827
+ st.plotly_chart(fig, use_container_width=True)
828
+
829
+ elif visualization_type == "Parallel Coordinates Plot":
830
+ st.subheader("Parallel Coordinates Plot")
831
+ cols_for_parallel = st.multiselect("Select Columns for Parallel Coordinates", df.select_dtypes(include=np.number).columns.tolist(), default=df.select_dtypes(include=np.number).columns.tolist()[:5])
832
+ if cols_for_parallel:
833
+ fig = px.parallel_coordinates(df[cols_for_parallel], color=df[cols_for_parallel[0]] if cols_for_parallel else None)
834
+ st.plotly_chart(fig, use_container_width=True)
835
+
836
+ elif visualization_type == "Andrews Curves":
837
+ st.subheader("Andrews Curves")
838
+ cols_for_andrews = st.multiselect("Select Columns for Andrews Curves", df.select_dtypes(include=np.number).columns.tolist(), default=df.select_dtypes(include=np.number).columns.tolist()[:5])
839
+ if cols_for_andrews:
840
+ fig = px.andrews_curves(df[cols_for_andrews + [df.columns[0]]], class_column=df.columns[0])
841
+ st.plotly_chart(fig, use_container_width=True)
842
 
843
+ elif visualization_type == "Pie Chart":
844
+ st.subheader("Pie Chart")
845
+ col_for_pie = st.selectbox("Select Column for Pie Chart", df.columns)
846
+ fig = px.pie(df, names=col_for_pie)
 
 
 
 
 
 
 
847
  st.plotly_chart(fig, use_container_width=True)
848
 
849
+ elif visualization_type == "Area Chart":
850
+ st.subheader("Area Chart")
851
+ cols_for_area = st.multiselect("Select Columns for Area Chart", df.select_dtypes(include=np.number).columns.tolist(), default=df.select_dtypes(include=np.number).columns.tolist()[:3])
852
+ if cols_for_area:
853
+ fig = px.area(df[cols_for_area])
854
+ st.plotly_chart(fig, use_container_width=True)
855
+
856
+ elif visualization_type == "Density Contour":
857
+ st.subheader("Density Contour")
858
+ x_col = st.selectbox("Select X Column for Density Contour", df.select_dtypes(include=np.number).columns.tolist())
859
+ y_col = st.selectbox("Select Y Column for Density Contour", df.select_dtypes(include=np.number).columns.tolist())
860
+ fig = px.density_contour(df, x=x_col, y=y_col)
861
  st.plotly_chart(fig, use_container_width=True)
862
 
863
+ elif visualization_type == "Sunburst Chart":
864
+ st.subheader("Sunburst Chart")
865
+ path_cols = st.multiselect("Select Path Columns for Sunburst Chart", df.columns)
866
+ if path_cols:
867
+ fig = px.sunburst(df, path=path_cols)
868
+ st.plotly_chart(fig, use_container_width=True)
869
 
870
+ elif visualization_type == "Funnel Chart":
871
+ st.subheader("Funnel Chart")
872
+ x_col = st.selectbox("Select X Column for Funnel Chart (Values)", df.select_dtypes(include=np.number).columns.tolist())
873
+ y_col = st.selectbox("Select Y Column for Funnel Chart (Categories)", df.columns)
874
+ fig = px.funnel(df, x=x_col, y=y_col)
875
+ st.plotly_chart(fig, use_container_width=True)
876
 
877
+ elif visualization_type == "Clustering Analysis":
878
+ st.subheader("Clustering Analysis")
879
+ numerical_cols = df.select_dtypes(include=np.number).columns.tolist()
 
880
 
881
+ if not numerical_cols:
882
+ st.warning("No numerical columns found for clustering.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
883
  else:
884
+ cluster_cols = st.multiselect("Select Columns for Clustering", numerical_cols, default=numerical_cols[:2] if len(numerical_cols) >= 2 else numerical_cols)
885
 
886
+ if cluster_cols:
887
+ try:
888
+ scaler = StandardScaler()
889
+ scaled_data = scaler.fit_transform(df[cluster_cols])
890
+ n_clusters = st.slider("Number of Clusters", 2, 10, 3, help="Number of clusters to form.")
891
+ kmeans = KMeans(n_clusters=n_clusters, random_state=42)
892
+ clusters = kmeans.fit_predict(scaled_data)
893
+ df['Cluster'] = clusters
894
+
895
+ if len(cluster_cols) == 2:
896
+ fig = px.scatter(df, x=cluster_cols[0], y=cluster_cols[1], color='Cluster', title="K-Means Clustering")
897
+ st.plotly_chart(fig, use_container_width=True)
898
+ elif len(cluster_cols) == 3:
899
+ fig = px.scatter_3d(df, x=cluster_cols[0], y=cluster_cols[1], z=cluster_cols[2], color='Cluster', title="K-Means Clustering (3D)")
900
+ st.plotly_chart(fig, use_container_width=True)
901
+ else:
902
+ st.write("Clustering visualization is only supported for 2 or 3 selected columns.")
903
+ st.success("Clustering applied successfully!")
904
+
905
+ #Add clustering performance in clustering analysis
906
+ if len(cluster_cols) >= 2: # Evaluate Silhouette Score
907
+ try:
908
+ silhouette_avg = silhouette_score(scaled_data, clusters)
909
+ st.write(f"Silhouette Score: {silhouette_avg:.4f}")
910
+ except:
911
+ st.write("Could not compute silhouette score")
912
+
913
+ #Add dimensionality reduction option and 2d/3d plots
914
+
915
+ dimension_reduction = st.selectbox("Dimensionality Reduction", ["None", "PCA"])
916
+ if dimension_reduction == "PCA":
917
+ n_components = st.slider("Number of Components", 2, min(3, len(cluster_cols)), 2)
918
+ pca = PCA(n_components=n_components)
919
+ principal_components = pca.fit_transform(scaled_data)
920
+ pca_df = pd.DataFrame(data=principal_components, columns=[f'PC{i + 1}' for i in range(n_components)])
921
+ pca_df['Cluster'] = clusters # Add Cluster
922
+
923
+ if len(cluster_cols) >= 2: #plotting section
924
+ fig = None #Initialize fig
925
+ if dimension_reduction == "None":
926
+ if len(cluster_cols) == 2:
927
+ fig = px.scatter(df, x=cluster_cols[0], y=cluster_cols[1], color='Cluster', title="K-Means Clustering")
928
+ st.plotly_chart(fig, use_container_width=True)
929
+ elif len(cluster_cols) == 3:
930
+ fig = px.scatter_3d(df, x=cluster_cols[0], y=cluster_cols[1], z=cluster_cols[2], color='Cluster', title="K-Means Clustering (3D)")
931
+ st.plotly_chart(fig, use_container_width=True)
932
+ else:
933
+ st.write("Clustering visualization is only supported for 2 or 3 selected columns.")
934
+
935
+ elif dimension_reduction == "PCA":
936
+ if n_components == 2:
937
+ fig = px.scatter(pca_df, x='PC1', y='PC2', color='Cluster', title="K-Means Clustering (PCA - 2D)")
938
+ st.plotly_chart(fig, use_container_width=True)
939
+ elif n_components == 3:
940
+ fig = px.scatter_3d(pca_df, x='PC1', y='PC2', z='PC3', color='Cluster', title="K-Means Clustering (PCA - 3D)")
941
+ st.plotly_chart(fig, use_container_width=True)
942
+
943
+ else:
944
+ st.write("PCA visualization is only supported for 2 or 3 components.")
945
 
946
+ except Exception as e:
947
+ st.error(f"An error occurred during clustering: {e}")
948
 
949
  elif app_mode == "Neural Network Studio":
950
  st.title("🧠 Neural Network Studio")