import streamlit as st import pandas as pd import requests import plotly.express as px import matplotlib.font_manager as fm import matplotlib as mpl import io import time from sklearn.cluster import KMeans from sklearn.preprocessing import StandardScaler import base64 from io import BytesIO # 確保正確的中文字符編碼 st.set_page_config(page_title="🌳台灣中小企業ESG數據分析與揭露儀表板🌲", page_icon=":chart_with_upwards_trend:", layout="wide") # 定義 URL urls = { "溫室氣體": "https://mopsfin.twse.com.tw/opendata/t187ap46_L_1.csv", "能源": "https://mopsfin.twse.com.tw/opendata/t187ap46_O_2.csv", "董事會揭露": "https://mopsfin.twse.com.tw/opendata/t187ap46_L_6.csv" } # 下載並載入 CSV 檔案到 DataFrame 的函數 @st.cache_data def load_data(url): response = requests.get(url) response.encoding = 'utf-8' df = pd.read_csv(io.StringIO(response.text), encoding='utf-8') df = df.fillna(0) return df # 函數:將圖表轉換為可下載的連結 def get_image_download_link(fig, filename, text): buf = BytesIO() fig.write_image(buf, format="png") buf.seek(0) b64 = base64.b64encode(buf.read()).decode() href = f'{text}' return href # Streamlit 應用程式 st.title("台灣企業ESG數據分析與揭露") st.subheader("以溫室氣體 X 再生能源 X 董事會資訊: https://www.tejwin.com/insight/carbon-footprint-verification/") st.subheader("ESG投資: https://www.fhtrust.com.tw/ESG/operating") # 允許用戶選擇數據集 dataset_choice = st.selectbox("選擇要顯示的數據集", list(urls.keys())) # 載入選定的數據集 selected_df = load_data(urls[dataset_choice]) # 顯示爬取的資料 st.write("### 爬取的資料預覽") st.dataframe(selected_df.head()) # 過濾出數值類型的欄位,排除 '出表日期' 和 '報告年度' numeric_columns = selected_df.select_dtypes(include=['float64', 'int64']).columns numeric_columns = [col for col in numeric_columns if col not in ['出表日期', '報告年度']] # 允許用戶選擇用於繪製圖表的欄位 column_choice = st.selectbox("選擇欄位來繪製圖表", numeric_columns) # 添加一個生成圖表的按鈕 if st.button("生成圖表"): # 顯示進度條 progress_bar = st.progress(0) for i in range(100): time.sleep(0.01) progress_bar.progress(i + 1) # 創建一個標籤頁佈局 tab1, tab2, tab3 = st.tabs(["圓餅圖", "長條圖", "K-means分析"]) with tab1: # 使用 plotly 創建圓餅圖 fig_pie = px.pie( selected_df, names='公司名稱', values=column_choice, title=f"{dataset_choice} - {column_choice} 圓餅圖", color_discrete_sequence=px.colors.qualitative.Pastel ) fig_pie.update_traces(textposition='inside', textinfo='percent+label') fig_pie.update_layout( font=dict(size=12), legend=dict( orientation="h", yanchor="top", y=-0.3, xanchor="center", x=0.5 ), height=700, margin=dict(t=50, b=50, l=50, r=50) ) st.plotly_chart(fig_pie, use_container_width=True) # 添加下載按鈕 st.markdown(get_image_download_link(fig_pie, "pie_chart.png", "下載圓餅圖"), unsafe_allow_html=True) with tab2: # 使用 plotly 創建長條圖 fig_bar = px.bar( selected_df, x='公司名稱', y=column_choice, title=f"{dataset_choice} - {column_choice} 長條圖", color='公司名稱', color_discrete_sequence=px.colors.qualitative.Pastel ) fig_bar.update_layout( xaxis_title="企業", yaxis_title=column_choice, font=dict(size=12), xaxis_tickangle=-45, showlegend=False, height=600 ) st.plotly_chart(fig_bar, use_container_width=True) # 添加下載按鈕 st.markdown(get_image_download_link(fig_bar, "bar_chart.png", "下載長條圖"), unsafe_allow_html=True) with tab3: # 對所有數據集執行K-means分析 st.subheader(f"{dataset_choice}數據的K-means分析") # 選擇用於聚類的特徵 cluster_features = st.multiselect("選擇用於聚類的特徵", numeric_columns, default=numeric_columns[:2]) # 選擇聚類數量 n_clusters = st.slider("選擇聚類數量", min_value=2, max_value=10, value=3) # 添加一個執行K-means分析的按鈕 if st.button("執行K-means分析"): if len(cluster_features) >= 2: # 準備數據 X = selected_df[cluster_features] scaler = StandardScaler() X_scaled = scaler.fit_transform(X) # 執行K-means聚類 kmeans = KMeans(n_clusters=n_clusters, random_state=42) clusters = kmeans.fit_predict(X_scaled) # 添加聚類結果到數據框 selected_df['Cluster'] = clusters # 視覺化聚類結果 fig_scatter = px.scatter( selected_df, x=cluster_features[0], y=cluster_features[1], color='Cluster', hover_data=['公司名稱'], title=f"{dataset_choice}數據的K-means聚類 ({cluster_features[0]} vs {cluster_features[1]})" ) st.plotly_chart(fig_scatter, use_container_width=True) # 添加下載按鈕 st.markdown(get_image_download_link(fig_scatter, "kmeans_scatter.png", "下載K-means散點圖"), unsafe_allow_html=True) # 顯示每個聚類的特徵 st.subheader("聚類特徵") cluster_stats = selected_df.groupby('Cluster')[cluster_features].mean() st.dataframe(cluster_stats) else: st.warning("請至少選擇兩個特徵進行聚類分析。") st.success("圖表生成完成!您可以使用下載按鈕保存圖表,然後列印。") # 下載並設置自定義字體以顯示中文字符 font_url = "https://drive.google.com/uc?id=1eGAsTN1HBpJAkeVM57_C7ccp7hbgSz3_&export=download" font_response = requests.get(font_url) with open("TaipeiSansTCBeta-Regular.ttf", "wb") as font_file: font_file.write(font_response.content) fm.fontManager.addfont("TaipeiSansTCBeta-Regular.ttf") mpl.rc('font', family='Taipei Sans TC Beta')