JERNGOC's picture
Update app.py
7eb6b34 verified
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from io import BytesIO
# 讓使用者上傳 CSV 檔案
uploaded_file = st.file_uploader("上傳一個 CSV 檔案", type="csv")
if uploaded_file is not None:
# 讀取上傳的 CSV 檔案
df = pd.read_csv(uploaded_file)
# 確保數據裡有 "target" 欄位
if 'target' in df.columns:
# 準備特徵和目標變量
X = df.drop('target', axis=1)
y = df['target']
# 分割數據
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# 標準化特徵
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
# 計算特徵重要性
def calculate_importance():
# Linear Regression
lr = LinearRegression()
lr.fit(X_train_scaled, y_train)
lr_importance = np.abs(lr.coef_)
# CART
cart = DecisionTreeClassifier(random_state=42)
cart.fit(X_train, y_train)
cart_importance = cart.feature_importances_
# Random Forest
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
rf_importance = rf.feature_importances_
return lr_importance, cart_importance, rf_importance
# 創建特徵重要性 DataFrame
lr_importance, cart_importance, rf_importance = calculate_importance()
feature_importance = pd.DataFrame({
'Feature': X.columns,
'Linear Regression': lr_importance,
'CART': cart_importance,
'Random Forest': rf_importance
})
# 排序
feature_importance = feature_importance.sort_values('Random Forest', ascending=False)
# 繪製相關矩陣
st.write("### 相關矩陣")
corr_matrix = df.corr()
plt.figure(figsize=(10, 8))
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', fmt='.2f', linewidths=0.5)
st.pyplot(plt)
# 分別繪製各個模型的特徵重要性圖表
def plot_individual_model(model_name):
plt.figure(figsize=(10, 6))
plt.bar(feature_importance['Feature'], feature_importance[model_name])
plt.title(f'{model_name} Feature Importance')
plt.xlabel('Features')
plt.ylabel('Importance')
plt.xticks(rotation=45, ha='right')
st.pyplot(plt)
# Streamlit UI
st.write("### 特徵重要性分析")
# 分開顯示三個模型的特徵重要性圖表
st.write("#### Linear Regression")
plot_individual_model('Linear Regression')
st.write("#### CART (Decision Tree)")
plot_individual_model('CART')
st.write("#### Random Forest")
plot_individual_model('Random Forest')
# 顯示數據框
st.write("### 特徵重要性數據表")
st.dataframe(feature_importance)
# 讓使用者下載特徵重要性的 Excel 檔案
def to_excel(df):
output = BytesIO()
writer = pd.ExcelWriter(output, engine='xlsxwriter')
df.to_excel(writer, index=False, sheet_name='Feature Importance')
writer.close() # 使用 close() 來正確保存 Excel 文件
processed_data = output.getvalue()
return processed_data
excel_data = to_excel(feature_importance)
st.download_button(label='下載特徵重要性數據為 Excel 檔案',
data=excel_data,
file_name='feature_importance.xlsx',
mime='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet')
else:
st.error("上傳的檔案中找不到 'target' 欄位,請確認檔案格式。")