Spaces:
Sleeping
Sleeping
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import numpy as np
|
4 |
+
from sklearn.preprocessing import StandardScaler, MinMaxScaler
|
5 |
+
from sklearn.decomposition import PCA
|
6 |
+
from sklearn.cluster import KMeans, AgglomerativeClustering, DBSCAN
|
7 |
+
from sklearn.metrics import silhouette_score
|
8 |
+
import joblib
|
9 |
+
import matplotlib.pyplot as plt
|
10 |
+
|
11 |
+
# Function to load and process data (including PCA)
|
12 |
+
def process_data(file, scaler_option):
|
13 |
+
df = pd.read_csv(file)
|
14 |
+
features = ['RI4', 'RI5', 'RI7', 'RI9']
|
15 |
+
df_selected = df[features].fillna(df.mean())
|
16 |
+
|
17 |
+
# Apply chosen scaler
|
18 |
+
if scaler_option == 'StandardScaler':
|
19 |
+
scaler = StandardScaler()
|
20 |
+
elif scaler_option == 'MinMaxScaler':
|
21 |
+
scaler = MinMaxScaler()
|
22 |
+
|
23 |
+
scaled_data = scaler.fit_transform(df_selected)
|
24 |
+
|
25 |
+
# PCA Transformation (2 components for visualization)
|
26 |
+
pca = PCA(n_components=2)
|
27 |
+
pca_data = pca.fit_transform(scaled_data)
|
28 |
+
|
29 |
+
return pca_data
|
30 |
+
|
31 |
+
# Set up the Streamlit page
|
32 |
+
st.title("Clustering Analysis with K-means, Hierarchical, and DBSCAN Models")
|
33 |
+
|
34 |
+
# Upload the detectors report CSV file
|
35 |
+
data_file = st.file_uploader("Upload the detectors report file (.csv)", type="csv")
|
36 |
+
|
37 |
+
# Upload the models
|
38 |
+
kmeans_model = st.file_uploader("Upload the K-means model (.sav)", type="sav")
|
39 |
+
hierarchical_model = st.file_uploader("Upload the Hierarchical Clustering model (.sav)", type="sav")
|
40 |
+
dbscan_model = st.file_uploader("Upload the DBSCAN model (.sav)", type="sav")
|
41 |
+
|
42 |
+
# Parameter selection for K-means, Hierarchical Clustering, and DBSCAN
|
43 |
+
if data_file is not None:
|
44 |
+
st.sidebar.header("Adjust Clustering Parameters")
|
45 |
+
|
46 |
+
# Scaler selection
|
47 |
+
scaler_option = st.sidebar.selectbox("Choose Scaler", ("StandardScaler", "MinMaxScaler"))
|
48 |
+
|
49 |
+
# K-means parameters
|
50 |
+
kmeans_clusters = st.sidebar.slider("K-means: Number of Clusters", min_value=2, max_value=10, value=3)
|
51 |
+
|
52 |
+
# Hierarchical Clustering parameters
|
53 |
+
hierarchical_clusters = st.sidebar.slider("Hierarchical: Number of Clusters", min_value=2, max_value=10, value=3)
|
54 |
+
linkage = st.sidebar.selectbox("Hierarchical: Linkage Method", ["ward", "complete", "average", "single"])
|
55 |
+
|
56 |
+
# DBSCAN parameters
|
57 |
+
dbscan_eps = st.sidebar.number_input("DBSCAN: Epsilon", min_value=0.1, max_value=10.0, value=0.5, step=0.1)
|
58 |
+
dbscan_min_samples = st.sidebar.slider("DBSCAN: Minimum Samples", min_value=1, max_value=20, value=5)
|
59 |
+
|
60 |
+
# Load and process the data
|
61 |
+
pca_data = process_data(data_file, scaler_option)
|
62 |
+
|
63 |
+
# Prepare the plot
|
64 |
+
fig, ax = plt.subplots(1, 3, figsize=(15, 5))
|
65 |
+
ax = ax.flatten()
|
66 |
+
|
67 |
+
# K-means Clustering
|
68 |
+
if kmeans_model is not None:
|
69 |
+
kmeans = joblib.load(kmeans_model)
|
70 |
+
kmeans.set_params(n_clusters=kmeans_clusters)
|
71 |
+
kmeans_labels = kmeans.fit_predict(pca_data)
|
72 |
+
ax[0].scatter(pca_data[:, 0], pca_data[:, 1], c=kmeans_labels, cmap='viridis')
|
73 |
+
ax[0].set_title(f"K-means Clustering (n_clusters={kmeans_clusters})")
|
74 |
+
else:
|
75 |
+
ax[0].set_title("K-means Model Missing")
|
76 |
+
|
77 |
+
# Hierarchical Clustering
|
78 |
+
if hierarchical_model is not None:
|
79 |
+
hierarchical = joblib.load(hierarchical_model)
|
80 |
+
hierarchical.set_params(n_clusters=hierarchical_clusters, linkage=linkage)
|
81 |
+
hierarchical_labels = hierarchical.fit_predict(pca_data)
|
82 |
+
ax[1].scatter(pca_data[:, 0], pca_data[:, 1], c=hierarchical_labels, cmap='viridis')
|
83 |
+
ax[1].set_title(f"Hierarchical Clustering (n_clusters={hierarchical_clusters}, linkage={linkage})")
|
84 |
+
else:
|
85 |
+
ax[1].set_title("Hierarchical Model Missing")
|
86 |
+
|
87 |
+
# DBSCAN Clustering
|
88 |
+
if dbscan_model is not None:
|
89 |
+
dbscan = joblib.load(dbscan_model)
|
90 |
+
dbscan.set_params(eps=dbscan_eps, min_samples=dbscan_min_samples)
|
91 |
+
dbscan_labels = dbscan.fit_predict(pca_data)
|
92 |
+
ax[2].scatter(pca_data[:, 0], pca_data[:, 1], c=dbscan_labels, cmap='viridis')
|
93 |
+
ax[2].set_title(f"DBSCAN Clustering (eps={dbscan_eps}, min_samples={dbscan_min_samples})")
|
94 |
+
else:
|
95 |
+
ax[2].set_title("DBSCAN Model Missing")
|
96 |
+
|
97 |
+
# Display the plots
|
98 |
+
st.pyplot(fig)
|
99 |
+
else:
|
100 |
+
st.info("Please upload the detectors report file to proceed.")
|