Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -796,153 +796,155 @@ elif app_mode == "Predictions":
|
|
796 |
elif app_mode == "Visualization Lab":
|
797 |
st.title("🔬 Advanced Data Visualization and Clustering Lab")
|
798 |
|
799 |
-
# Initialize session state for cleaned data
|
800 |
-
if 'cleaned_data' not in st.session_state:
|
801 |
-
|
802 |
|
803 |
-
# Sample data upload (replace with your data loading logic)
|
804 |
-
uploaded_file = st.file_uploader("Upload a CSV file", type=["csv"])
|
805 |
-
if uploaded_file is not None:
|
806 |
-
|
807 |
-
|
808 |
-
|
809 |
-
|
810 |
-
|
811 |
-
|
812 |
-
|
813 |
-
if st.session_state.cleaned_data is not None:
|
814 |
-
df = st.session_state.cleaned_data.copy()
|
815 |
-
|
816 |
-
# Visualization Type Selection
|
817 |
-
visualization_type = st.selectbox("Select Visualization Type", [
|
818 |
-
"Pair Plot", "Parallel Coordinates Plot", "Andrews Curves", "Pie Chart",
|
819 |
-
"Area Chart", "Density Contour", "Sunburst Chart", "Funnel Chart", "Clustering Analysis"
|
820 |
-
])
|
821 |
-
|
822 |
-
if visualization_type == "Pair Plot":
|
823 |
-
st.subheader("Pair Plot")
|
824 |
-
cols_for_pairplot = st.multiselect("Select Columns for Pair Plot", df.select_dtypes(include=np.number).columns.tolist(), default=df.select_dtypes(include=np.number).columns.tolist()[:3])
|
825 |
-
if cols_for_pairplot:
|
826 |
-
fig = px.scatter_matrix(df, dimensions=cols_for_pairplot)
|
827 |
-
st.plotly_chart(fig, use_container_width=True)
|
828 |
|
829 |
-
|
830 |
-
st.
|
831 |
-
cols_for_parallel = st.multiselect("Select Columns for Parallel Coordinates", df.select_dtypes(include=np.number).columns.tolist(), default=df.select_dtypes(include=np.number).columns.tolist()[:5])
|
832 |
-
if cols_for_parallel:
|
833 |
-
fig = px.parallel_coordinates(df[cols_for_parallel], color=df[cols_for_parallel[0]] if cols_for_parallel else None)
|
834 |
-
st.plotly_chart(fig, use_container_width=True)
|
835 |
|
836 |
-
|
837 |
-
st.
|
838 |
-
|
839 |
-
|
840 |
-
|
841 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
842 |
|
843 |
-
|
844 |
-
|
845 |
-
|
846 |
-
|
847 |
-
st.plotly_chart(fig, use_container_width=True)
|
848 |
-
|
849 |
-
elif visualization_type == "Area Chart":
|
850 |
-
st.subheader("Area Chart")
|
851 |
-
cols_for_area = st.multiselect("Select Columns for Area Chart", df.select_dtypes(include=np.number).columns.tolist(), default=df.select_dtypes(include=np.number).columns.tolist()[:3])
|
852 |
-
if cols_for_area:
|
853 |
-
fig = px.area(df[cols_for_area])
|
854 |
st.plotly_chart(fig, use_container_width=True)
|
855 |
|
856 |
-
|
857 |
-
|
858 |
-
|
859 |
-
|
860 |
-
|
861 |
-
|
862 |
-
|
863 |
-
|
864 |
-
|
865 |
-
|
866 |
-
|
867 |
-
fig = px.
|
868 |
st.plotly_chart(fig, use_container_width=True)
|
869 |
|
870 |
-
|
871 |
-
|
872 |
-
|
873 |
-
|
874 |
-
|
875 |
-
|
876 |
|
877 |
-
|
878 |
-
|
879 |
-
|
|
|
|
|
|
|
880 |
|
881 |
-
|
882 |
-
st.
|
883 |
-
|
884 |
-
cluster_cols = st.multiselect("Select Columns for Clustering", numerical_cols, default=numerical_cols[:2] if len(numerical_cols) >= 2 else numerical_cols)
|
885 |
|
886 |
-
if
|
887 |
-
|
888 |
-
scaler = StandardScaler()
|
889 |
-
scaled_data = scaler.fit_transform(df[cluster_cols])
|
890 |
-
n_clusters = st.slider("Number of Clusters", 2, 10, 3, help="Number of clusters to form.")
|
891 |
-
kmeans = KMeans(n_clusters=n_clusters, random_state=42)
|
892 |
-
clusters = kmeans.fit_predict(scaled_data)
|
893 |
-
df['Cluster'] = clusters
|
894 |
-
|
895 |
-
if len(cluster_cols) == 2:
|
896 |
-
fig = px.scatter(df, x=cluster_cols[0], y=cluster_cols[1], color='Cluster', title="K-Means Clustering")
|
897 |
-
st.plotly_chart(fig, use_container_width=True)
|
898 |
-
elif len(cluster_cols) == 3:
|
899 |
-
fig = px.scatter_3d(df, x=cluster_cols[0], y=cluster_cols[1], z=cluster_cols[2], color='Cluster', title="K-Means Clustering (3D)")
|
900 |
-
st.plotly_chart(fig, use_container_width=True)
|
901 |
-
else:
|
902 |
-
st.write("Clustering visualization is only supported for 2 or 3 selected columns.")
|
903 |
-
st.success("Clustering applied successfully!")
|
904 |
-
except Exception as e:
|
905 |
-
st.error(f"An error occurred during clustering: {e}")
|
906 |
-
#Add clustering performance in clustering analysis
|
907 |
-
if len(cluster_cols) >= 2: # Evaluate Silhouette Score
|
908 |
-
try:
|
909 |
-
silhouette_avg = silhouette_score(scaled_data, clusters)
|
910 |
-
st.write(f"Silhouette Score: {silhouette_avg:.4f}")
|
911 |
-
except:
|
912 |
-
st.write("Could not compute silhouette score")
|
913 |
-
|
914 |
-
#Add dimensionality reduction option and 2d/3d plots
|
915 |
-
|
916 |
-
dimension_reduction = st.selectbox("Dimensionality Reduction", ["None", "PCA"])
|
917 |
-
if dimension_reduction == "PCA":
|
918 |
-
n_components = st.slider("Number of Components", 2, min(3, len(cluster_cols)), 2)
|
919 |
-
pca = PCA(n_components=n_components)
|
920 |
-
principal_components = pca.fit_transform(scaled_data)
|
921 |
-
pca_df = pd.DataFrame(data=principal_components, columns=[f'PC{i + 1}' for i in range(n_components)])
|
922 |
-
pca_df['Cluster'] = clusters # Add Cluster
|
923 |
-
|
924 |
-
if len(cluster_cols) >= 2: #plotting section
|
925 |
-
fig = None #Initialize fig
|
926 |
-
if dimension_reduction == "None":
|
927 |
-
if len(cluster_cols) == 2:
|
928 |
-
fig = px.scatter(df, x=cluster_cols[0], y=cluster_cols[1], color='Cluster', title="K-Means Clustering")
|
929 |
-
st.plotly_chart(fig, use_container_width=True)
|
930 |
-
elif len(cluster_cols) == 3:
|
931 |
-
fig = px.scatter_3d(df, x=cluster_cols[0], y=cluster_cols[1], z=cluster_cols[2], color='Cluster', title="K-Means Clustering (3D)")
|
932 |
-
st.plotly_chart(fig, use_container_width=True)
|
933 |
else:
|
934 |
-
st.
|
935 |
|
936 |
-
|
937 |
-
|
938 |
-
|
939 |
-
|
940 |
-
|
941 |
-
|
942 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
943 |
|
944 |
-
|
945 |
-
|
946 |
|
947 |
elif app_mode == "Neural Network Studio":
|
948 |
st.title("🧠 Neural Network Studio")
|
|
|
796 |
elif app_mode == "Visualization Lab":
|
797 |
st.title("🔬 Advanced Data Visualization and Clustering Lab")
|
798 |
|
799 |
+
# Initialize session state for cleaned data
|
800 |
+
if 'cleaned_data' not in st.session_state:
|
801 |
+
st.session_state.cleaned_data = None
|
802 |
|
803 |
+
# Sample data upload (replace with your data loading logic)
|
804 |
+
uploaded_file = st.file_uploader("Upload a CSV file", type=["csv"])
|
805 |
+
if uploaded_file is not None:
|
806 |
+
try:
|
807 |
+
df = pd.read_csv(uploaded_file)
|
808 |
+
st.session_state.cleaned_data = df
|
809 |
+
st.success("Data loaded successfully!")
|
810 |
+
except Exception as e:
|
811 |
+
st.error(f"Error loading data: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
812 |
|
813 |
+
if st.session_state.cleaned_data is not None:
|
814 |
+
df = st.session_state.cleaned_data.copy()
|
|
|
|
|
|
|
|
|
815 |
|
816 |
+
# Visualization Type Selection
|
817 |
+
visualization_type = st.selectbox("Select Visualization Type", [
|
818 |
+
"Pair Plot", "Parallel Coordinates Plot", "Andrews Curves", "Pie Chart",
|
819 |
+
"Area Chart", "Density Contour", "Sunburst Chart", "Funnel Chart", "Clustering Analysis"
|
820 |
+
])
|
821 |
+
|
822 |
+
if visualization_type == "Pair Plot":
|
823 |
+
st.subheader("Pair Plot")
|
824 |
+
cols_for_pairplot = st.multiselect("Select Columns for Pair Plot", df.select_dtypes(include=np.number).columns.tolist(), default=df.select_dtypes(include=np.number).columns.tolist()[:3])
|
825 |
+
if cols_for_pairplot:
|
826 |
+
fig = px.scatter_matrix(df, dimensions=cols_for_pairplot)
|
827 |
+
st.plotly_chart(fig, use_container_width=True)
|
828 |
+
|
829 |
+
elif visualization_type == "Parallel Coordinates Plot":
|
830 |
+
st.subheader("Parallel Coordinates Plot")
|
831 |
+
cols_for_parallel = st.multiselect("Select Columns for Parallel Coordinates", df.select_dtypes(include=np.number).columns.tolist(), default=df.select_dtypes(include=np.number).columns.tolist()[:5])
|
832 |
+
if cols_for_parallel:
|
833 |
+
fig = px.parallel_coordinates(df[cols_for_parallel], color=df[cols_for_parallel[0]] if cols_for_parallel else None)
|
834 |
+
st.plotly_chart(fig, use_container_width=True)
|
835 |
+
|
836 |
+
elif visualization_type == "Andrews Curves":
|
837 |
+
st.subheader("Andrews Curves")
|
838 |
+
cols_for_andrews = st.multiselect("Select Columns for Andrews Curves", df.select_dtypes(include=np.number).columns.tolist(), default=df.select_dtypes(include=np.number).columns.tolist()[:5])
|
839 |
+
if cols_for_andrews:
|
840 |
+
fig = px.andrews_curves(df[cols_for_andrews + [df.columns[0]]], class_column=df.columns[0])
|
841 |
+
st.plotly_chart(fig, use_container_width=True)
|
842 |
|
843 |
+
elif visualization_type == "Pie Chart":
|
844 |
+
st.subheader("Pie Chart")
|
845 |
+
col_for_pie = st.selectbox("Select Column for Pie Chart", df.columns)
|
846 |
+
fig = px.pie(df, names=col_for_pie)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
847 |
st.plotly_chart(fig, use_container_width=True)
|
848 |
|
849 |
+
elif visualization_type == "Area Chart":
|
850 |
+
st.subheader("Area Chart")
|
851 |
+
cols_for_area = st.multiselect("Select Columns for Area Chart", df.select_dtypes(include=np.number).columns.tolist(), default=df.select_dtypes(include=np.number).columns.tolist()[:3])
|
852 |
+
if cols_for_area:
|
853 |
+
fig = px.area(df[cols_for_area])
|
854 |
+
st.plotly_chart(fig, use_container_width=True)
|
855 |
+
|
856 |
+
elif visualization_type == "Density Contour":
|
857 |
+
st.subheader("Density Contour")
|
858 |
+
x_col = st.selectbox("Select X Column for Density Contour", df.select_dtypes(include=np.number).columns.tolist())
|
859 |
+
y_col = st.selectbox("Select Y Column for Density Contour", df.select_dtypes(include=np.number).columns.tolist())
|
860 |
+
fig = px.density_contour(df, x=x_col, y=y_col)
|
861 |
st.plotly_chart(fig, use_container_width=True)
|
862 |
|
863 |
+
elif visualization_type == "Sunburst Chart":
|
864 |
+
st.subheader("Sunburst Chart")
|
865 |
+
path_cols = st.multiselect("Select Path Columns for Sunburst Chart", df.columns)
|
866 |
+
if path_cols:
|
867 |
+
fig = px.sunburst(df, path=path_cols)
|
868 |
+
st.plotly_chart(fig, use_container_width=True)
|
869 |
|
870 |
+
elif visualization_type == "Funnel Chart":
|
871 |
+
st.subheader("Funnel Chart")
|
872 |
+
x_col = st.selectbox("Select X Column for Funnel Chart (Values)", df.select_dtypes(include=np.number).columns.tolist())
|
873 |
+
y_col = st.selectbox("Select Y Column for Funnel Chart (Categories)", df.columns)
|
874 |
+
fig = px.funnel(df, x=x_col, y=y_col)
|
875 |
+
st.plotly_chart(fig, use_container_width=True)
|
876 |
|
877 |
+
elif visualization_type == "Clustering Analysis":
|
878 |
+
st.subheader("Clustering Analysis")
|
879 |
+
numerical_cols = df.select_dtypes(include=np.number).columns.tolist()
|
|
|
880 |
|
881 |
+
if not numerical_cols:
|
882 |
+
st.warning("No numerical columns found for clustering.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
883 |
else:
|
884 |
+
cluster_cols = st.multiselect("Select Columns for Clustering", numerical_cols, default=numerical_cols[:2] if len(numerical_cols) >= 2 else numerical_cols)
|
885 |
|
886 |
+
if cluster_cols:
|
887 |
+
try:
|
888 |
+
scaler = StandardScaler()
|
889 |
+
scaled_data = scaler.fit_transform(df[cluster_cols])
|
890 |
+
n_clusters = st.slider("Number of Clusters", 2, 10, 3, help="Number of clusters to form.")
|
891 |
+
kmeans = KMeans(n_clusters=n_clusters, random_state=42)
|
892 |
+
clusters = kmeans.fit_predict(scaled_data)
|
893 |
+
df['Cluster'] = clusters
|
894 |
+
|
895 |
+
if len(cluster_cols) == 2:
|
896 |
+
fig = px.scatter(df, x=cluster_cols[0], y=cluster_cols[1], color='Cluster', title="K-Means Clustering")
|
897 |
+
st.plotly_chart(fig, use_container_width=True)
|
898 |
+
elif len(cluster_cols) == 3:
|
899 |
+
fig = px.scatter_3d(df, x=cluster_cols[0], y=cluster_cols[1], z=cluster_cols[2], color='Cluster', title="K-Means Clustering (3D)")
|
900 |
+
st.plotly_chart(fig, use_container_width=True)
|
901 |
+
else:
|
902 |
+
st.write("Clustering visualization is only supported for 2 or 3 selected columns.")
|
903 |
+
st.success("Clustering applied successfully!")
|
904 |
+
|
905 |
+
#Add clustering performance in clustering analysis
|
906 |
+
if len(cluster_cols) >= 2: # Evaluate Silhouette Score
|
907 |
+
try:
|
908 |
+
silhouette_avg = silhouette_score(scaled_data, clusters)
|
909 |
+
st.write(f"Silhouette Score: {silhouette_avg:.4f}")
|
910 |
+
except:
|
911 |
+
st.write("Could not compute silhouette score")
|
912 |
+
|
913 |
+
#Add dimensionality reduction option and 2d/3d plots
|
914 |
+
|
915 |
+
dimension_reduction = st.selectbox("Dimensionality Reduction", ["None", "PCA"])
|
916 |
+
if dimension_reduction == "PCA":
|
917 |
+
n_components = st.slider("Number of Components", 2, min(3, len(cluster_cols)), 2)
|
918 |
+
pca = PCA(n_components=n_components)
|
919 |
+
principal_components = pca.fit_transform(scaled_data)
|
920 |
+
pca_df = pd.DataFrame(data=principal_components, columns=[f'PC{i + 1}' for i in range(n_components)])
|
921 |
+
pca_df['Cluster'] = clusters # Add Cluster
|
922 |
+
|
923 |
+
if len(cluster_cols) >= 2: #plotting section
|
924 |
+
fig = None #Initialize fig
|
925 |
+
if dimension_reduction == "None":
|
926 |
+
if len(cluster_cols) == 2:
|
927 |
+
fig = px.scatter(df, x=cluster_cols[0], y=cluster_cols[1], color='Cluster', title="K-Means Clustering")
|
928 |
+
st.plotly_chart(fig, use_container_width=True)
|
929 |
+
elif len(cluster_cols) == 3:
|
930 |
+
fig = px.scatter_3d(df, x=cluster_cols[0], y=cluster_cols[1], z=cluster_cols[2], color='Cluster', title="K-Means Clustering (3D)")
|
931 |
+
st.plotly_chart(fig, use_container_width=True)
|
932 |
+
else:
|
933 |
+
st.write("Clustering visualization is only supported for 2 or 3 selected columns.")
|
934 |
+
|
935 |
+
elif dimension_reduction == "PCA":
|
936 |
+
if n_components == 2:
|
937 |
+
fig = px.scatter(pca_df, x='PC1', y='PC2', color='Cluster', title="K-Means Clustering (PCA - 2D)")
|
938 |
+
st.plotly_chart(fig, use_container_width=True)
|
939 |
+
elif n_components == 3:
|
940 |
+
fig = px.scatter_3d(pca_df, x='PC1', y='PC2', z='PC3', color='Cluster', title="K-Means Clustering (PCA - 3D)")
|
941 |
+
st.plotly_chart(fig, use_container_width=True)
|
942 |
+
|
943 |
+
else:
|
944 |
+
st.write("PCA visualization is only supported for 2 or 3 components.")
|
945 |
|
946 |
+
except Exception as e:
|
947 |
+
st.error(f"An error occurred during clustering: {e}")
|
948 |
|
949 |
elif app_mode == "Neural Network Studio":
|
950 |
st.title("🧠 Neural Network Studio")
|