import streamlit as st import pandas as pd import matplotlib.pyplot as plt import joblib from sklearn.decomposition import PCA from sklearn.preprocessing import StandardScaler from sklearn.cluster import KMeans, DBSCAN from scipy.cluster.hierarchy import fcluster, linkage # 讀取保存的模型 scaler = joblib.load('scaler.sav') pca = joblib.load('pca_model.sav') kmeans = joblib.load('kmeans_model.sav') linked = joblib.load('hierarchical_model.sav') dbscan = joblib.load('dbscan_model.sav') # 標題和簡介 st.title("聚類分析 - KMeans, Hierarchical Clustering 和 DBSCAN") st.write("上傳 CSV 文件並查看聚類結果") # 上傳文件 uploaded_file = st.file_uploader("上傳 CSV 文件", type=["csv"]) if uploaded_file is not None: # 讀取上傳的 CSV 文件 data = pd.read_csv(uploaded_file) # 移除 'Time' 欄位 numerical_data = data.drop(columns=['Time']) # 標準化數據 scaled_data = scaler.transform(numerical_data) # 使用 PCA 進行降維 pca_data = pca.transform(scaled_data) # 創建包含主成分的 DataFrame pca_df = pd.DataFrame(pca_data, columns=['PC1', 'PC2']) # 使用保存的 K-means 模型進行聚類 kmeans_labels = kmeans.predict(pca_df) # 使用保存的Hierarchical Clustering 階層式聚類結果 hclust_labels = fcluster(linked, 3, criterion='maxclust') # 使用保存的 DBSCAN 模型進行聚類 dbscan_labels = dbscan.fit_predict(pca_df) # ================== 圖表選擇 ================== chart_option = st.selectbox( "選擇要顯示的聚類結果圖表", ("K-means", "Hierarchical Clustering", "DBSCAN") ) # ================== 根據選擇顯示對應的圖表 ================== if chart_option == "K-means": st.subheader("K-means_聚類結果") fig_kmeans, ax_kmeans = plt.subplots() ax_kmeans.scatter(pca_df['PC1'], pca_df['PC2'], c=kmeans_labels, cmap='viridis') ax_kmeans.set_title('K-means Clustering') ax_kmeans.set_xlabel('PC1') ax_kmeans.set_ylabel('PC2') st.pyplot(fig_kmeans) elif chart_option == "Hierarchical Clustering": st.subheader("Hierarchical Clustering_階層式聚類結果") fig_hclust, ax_hclust = plt.subplots() ax_hclust.scatter(pca_df['PC1'], pca_df['PC2'], c=hclust_labels, cmap='viridis') ax_hclust.set_title('Hierarchical Clustering') ax_hclust.set_xlabel('PC1') ax_hclust.set_ylabel('PC2') st.pyplot(fig_hclust) elif chart_option == "DBSCAN": st.subheader("DBSCAN_聚類結果") fig_dbscan, ax_dbscan = plt.subplots() ax_dbscan.scatter(pca_df['PC1'], pca_df['PC2'], c=dbscan_labels, cmap='viridis') ax_dbscan.set_title('DBSCAN Clustering') ax_dbscan.set_xlabel('PC1') ax_dbscan.set_ylabel('PC2') st.pyplot(fig_dbscan)