|
import streamlit as st |
|
import pandas as pd |
|
import numpy as np |
|
import matplotlib.pyplot as plt |
|
import seaborn as sns |
|
from sklearn.model_selection import train_test_split |
|
from sklearn.linear_model import LinearRegression |
|
from sklearn.tree import DecisionTreeClassifier |
|
from sklearn.ensemble import RandomForestClassifier |
|
from sklearn.preprocessing import StandardScaler |
|
from io import BytesIO |
|
|
|
|
|
uploaded_file = st.file_uploader("上傳一個 CSV 檔案", type="csv") |
|
|
|
if uploaded_file is not None: |
|
|
|
df = pd.read_csv(uploaded_file) |
|
|
|
|
|
if 'target' in df.columns: |
|
|
|
X = df.drop('target', axis=1) |
|
y = df['target'] |
|
|
|
|
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) |
|
|
|
|
|
scaler = StandardScaler() |
|
X_train_scaled = scaler.fit_transform(X_train) |
|
X_test_scaled = scaler.transform(X_test) |
|
|
|
|
|
def calculate_importance(): |
|
|
|
lr = LinearRegression() |
|
lr.fit(X_train_scaled, y_train) |
|
lr_importance = np.abs(lr.coef_) |
|
|
|
|
|
cart = DecisionTreeClassifier(random_state=42) |
|
cart.fit(X_train, y_train) |
|
cart_importance = cart.feature_importances_ |
|
|
|
|
|
rf = RandomForestClassifier(n_estimators=100, random_state=42) |
|
rf.fit(X_train, y_train) |
|
rf_importance = rf.feature_importances_ |
|
|
|
return lr_importance, cart_importance, rf_importance |
|
|
|
|
|
lr_importance, cart_importance, rf_importance = calculate_importance() |
|
feature_importance = pd.DataFrame({ |
|
'Feature': X.columns, |
|
'Linear Regression': lr_importance, |
|
'CART': cart_importance, |
|
'Random Forest': rf_importance |
|
}) |
|
|
|
|
|
feature_importance = feature_importance.sort_values('Random Forest', ascending=False) |
|
|
|
|
|
st.write("### 相關矩陣") |
|
corr_matrix = df.corr() |
|
plt.figure(figsize=(10, 8)) |
|
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', fmt='.2f', linewidths=0.5) |
|
st.pyplot(plt) |
|
|
|
|
|
def plot_individual_model(model_name): |
|
plt.figure(figsize=(10, 6)) |
|
plt.bar(feature_importance['Feature'], feature_importance[model_name]) |
|
plt.title(f'{model_name} Feature Importance') |
|
plt.xlabel('Features') |
|
plt.ylabel('Importance') |
|
plt.xticks(rotation=45, ha='right') |
|
st.pyplot(plt) |
|
|
|
|
|
st.write("### 特徵重要性分析") |
|
|
|
|
|
st.write("#### Linear Regression") |
|
plot_individual_model('Linear Regression') |
|
|
|
st.write("#### CART (Decision Tree)") |
|
plot_individual_model('CART') |
|
|
|
st.write("#### Random Forest") |
|
plot_individual_model('Random Forest') |
|
|
|
|
|
st.write("### 特徵重要性數據表") |
|
st.dataframe(feature_importance) |
|
|
|
|
|
def to_excel(df): |
|
output = BytesIO() |
|
writer = pd.ExcelWriter(output, engine='xlsxwriter') |
|
df.to_excel(writer, index=False, sheet_name='Feature Importance') |
|
writer.save() |
|
processed_data = output.getvalue() |
|
return processed_data |
|
|
|
excel_data = to_excel(feature_importance) |
|
st.download_button(label='下載特徵重要性數據為 Excel 檔案', |
|
data=excel_data, |
|
file_name='feature_importance.xlsx', |
|
mime='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet') |
|
|
|
else: |
|
st.error("上傳的檔案中找不到 'target' 欄位,請確認檔案格式。") |
|
|