Spencer525 commited on
Commit
1487912
·
verified ·
1 Parent(s): c109aa4

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +100 -0
app.py ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ from sklearn.preprocessing import StandardScaler, MinMaxScaler
5
+ from sklearn.decomposition import PCA
6
+ from sklearn.cluster import KMeans, AgglomerativeClustering, DBSCAN
7
+ from sklearn.metrics import silhouette_score
8
+ import joblib
9
+ import matplotlib.pyplot as plt
10
+
11
+ # Function to load and process data (including PCA)
12
+ def process_data(file, scaler_option):
13
+ df = pd.read_csv(file)
14
+ features = ['RI4', 'RI5', 'RI7', 'RI9']
15
+ df_selected = df[features].fillna(df.mean())
16
+
17
+ # Apply chosen scaler
18
+ if scaler_option == 'StandardScaler':
19
+ scaler = StandardScaler()
20
+ elif scaler_option == 'MinMaxScaler':
21
+ scaler = MinMaxScaler()
22
+
23
+ scaled_data = scaler.fit_transform(df_selected)
24
+
25
+ # PCA Transformation (2 components for visualization)
26
+ pca = PCA(n_components=2)
27
+ pca_data = pca.fit_transform(scaled_data)
28
+
29
+ return pca_data
30
+
31
+ # Set up the Streamlit page
32
+ st.title("Clustering Analysis with K-means, Hierarchical, and DBSCAN Models")
33
+
34
+ # Upload the detectors report CSV file
35
+ data_file = st.file_uploader("Upload the detectors report file (.csv)", type="csv")
36
+
37
+ # Upload the models
38
+ kmeans_model = st.file_uploader("Upload the K-means model (.sav)", type="sav")
39
+ hierarchical_model = st.file_uploader("Upload the Hierarchical Clustering model (.sav)", type="sav")
40
+ dbscan_model = st.file_uploader("Upload the DBSCAN model (.sav)", type="sav")
41
+
42
+ # Parameter selection for K-means, Hierarchical Clustering, and DBSCAN
43
+ if data_file is not None:
44
+ st.sidebar.header("Adjust Clustering Parameters")
45
+
46
+ # Scaler selection
47
+ scaler_option = st.sidebar.selectbox("Choose Scaler", ("StandardScaler", "MinMaxScaler"))
48
+
49
+ # K-means parameters
50
+ kmeans_clusters = st.sidebar.slider("K-means: Number of Clusters", min_value=2, max_value=10, value=3)
51
+
52
+ # Hierarchical Clustering parameters
53
+ hierarchical_clusters = st.sidebar.slider("Hierarchical: Number of Clusters", min_value=2, max_value=10, value=3)
54
+ linkage = st.sidebar.selectbox("Hierarchical: Linkage Method", ["ward", "complete", "average", "single"])
55
+
56
+ # DBSCAN parameters
57
+ dbscan_eps = st.sidebar.number_input("DBSCAN: Epsilon", min_value=0.1, max_value=10.0, value=0.5, step=0.1)
58
+ dbscan_min_samples = st.sidebar.slider("DBSCAN: Minimum Samples", min_value=1, max_value=20, value=5)
59
+
60
+ # Load and process the data
61
+ pca_data = process_data(data_file, scaler_option)
62
+
63
+ # Prepare the plot
64
+ fig, ax = plt.subplots(1, 3, figsize=(15, 5))
65
+ ax = ax.flatten()
66
+
67
+ # K-means Clustering
68
+ if kmeans_model is not None:
69
+ kmeans = joblib.load(kmeans_model)
70
+ kmeans.set_params(n_clusters=kmeans_clusters)
71
+ kmeans_labels = kmeans.fit_predict(pca_data)
72
+ ax[0].scatter(pca_data[:, 0], pca_data[:, 1], c=kmeans_labels, cmap='viridis')
73
+ ax[0].set_title(f"K-means Clustering (n_clusters={kmeans_clusters})")
74
+ else:
75
+ ax[0].set_title("K-means Model Missing")
76
+
77
+ # Hierarchical Clustering
78
+ if hierarchical_model is not None:
79
+ hierarchical = joblib.load(hierarchical_model)
80
+ hierarchical.set_params(n_clusters=hierarchical_clusters, linkage=linkage)
81
+ hierarchical_labels = hierarchical.fit_predict(pca_data)
82
+ ax[1].scatter(pca_data[:, 0], pca_data[:, 1], c=hierarchical_labels, cmap='viridis')
83
+ ax[1].set_title(f"Hierarchical Clustering (n_clusters={hierarchical_clusters}, linkage={linkage})")
84
+ else:
85
+ ax[1].set_title("Hierarchical Model Missing")
86
+
87
+ # DBSCAN Clustering
88
+ if dbscan_model is not None:
89
+ dbscan = joblib.load(dbscan_model)
90
+ dbscan.set_params(eps=dbscan_eps, min_samples=dbscan_min_samples)
91
+ dbscan_labels = dbscan.fit_predict(pca_data)
92
+ ax[2].scatter(pca_data[:, 0], pca_data[:, 1], c=dbscan_labels, cmap='viridis')
93
+ ax[2].set_title(f"DBSCAN Clustering (eps={dbscan_eps}, min_samples={dbscan_min_samples})")
94
+ else:
95
+ ax[2].set_title("DBSCAN Model Missing")
96
+
97
+ # Display the plots
98
+ st.pyplot(fig)
99
+ else:
100
+ st.info("Please upload the detectors report file to proceed.")