Spaces:

Roberta2024
/

Taiwan_ESG_Analysis2024

Sleeping

App Files Files Community

Roberta2024 commited on Aug 16, 2024

Commit

217f8d7

verified ·

1 Parent(s): 5585581

Update app.py

Browse files

Files changed (1) hide show

app.py +142 -98

app.py CHANGED Viewed

@@ -1,115 +1,159 @@
 import pandas as pd
 import requests
 import plotly.express as px
-import streamlit as st
 import time
-from io import StringIO
-# Download and set custom font (unchanged)
-font_url = "https://drive.google.com/uc?id=1eGAsTN1HBpJAkeVM57_C7ccp7hbgSz3_&export=download"
-font_response = requests.get(font_url)
-with open("TaipeiSansTCBeta-Regular.ttf", "wb") as font_file:
-    font_file.write(font_response.content)
-# Function to download and load CSV into a DataFrame
 @st.cache_data
-def download_csv(url):
     response = requests.get(url)
-    response.encoding = 'utf-8'  # Set the encoding to UTF-8
-    df = pd.read_csv(StringIO(response.text), encoding='utf-8')
-    df = df.fillna(0)  # Fill missing values with 0
     return df
-# URLs to download data from (unchanged)
-urls = {
-    "溫室氣體": "https://mopsfin.twse.com.tw/opendata/t187ap46_L_1.csv",
-    "再生能源": "https://mopsfin.twse.com.tw/opendata/t187ap46_O_2.csv",
-    "董事會": "https://mopsfin.twse.com.tw/opendata/t187ap46_L_6.csv"
-}
-# Set page config for a wider layout
-st.set_page_config(layout="wide", page_title="ESG Data Visualization")
-# Streamlit app title with custom styling
-st.markdown("""
-    <style>
-    .big-font {
-        font-size:50px !important;
-        color: #1E88E5;
-        font-family: 'Taipei Sans TC Beta';
-    }
-    </style>
-    """, unsafe_allow_html=True)
-st.markdown('<p class="big-font">ESG 數據視覺化</p>', unsafe_allow_html=True)
-# Sidebar for data selection
-st.sidebar.header("設置")
-selected_option = st.sidebar.selectbox("選擇資料類型", options=list(urls.keys()))
-# Button to load data and analyze
-if st.sidebar.button("載入並分析數據"):
-    with st.spinner('資料載入中...'):
-        # Add a progress bar
-        progress_bar = st.progress(0)
-        for i in range(100):
-            time.sleep(0.01)
-            progress_bar.progress(i + 1)
-        df = download_csv(urls[selected_option])
-    # Displaying the DataFrame
-    st.header(f"{selected_option} 資料")
-    st.dataframe(df)
-    # Data summary
-    st.subheader("資料摘要")
-    st.write(df.describe())
-    # Automatically select columns for visualization
-    company_column = df.columns[0]  # Assume the first column is the company name
-    numeric_columns = df.select_dtypes(include=['int64', 'float64']).columns
-    if len(numeric_columns) > 0:
-        y_column = st.sidebar.selectbox("選擇數值欄位", options=numeric_columns)
-        # Visualization
-        st.subheader("資料視覺化")
-        # Pie Chart
-        fig_pie = px.pie(df, names=company_column, values=y_column, title=f"{selected_option} 分佈 (圓餅圖)")
         fig_pie.update_layout(
-            font_family="Taipei Sans TC Beta",
-            title_font_size=24,
-            legend_title_font_size=14,
-            legend_font_size=12
         )
         st.plotly_chart(fig_pie, use_container_width=True)
-        # Bar Chart
-        fig_bar = px.bar(df, x=company_column, y=y_column, title=f"{selected_option} 比較 (柱狀圖)")
         fig_bar.update_layout(
-            font_family="Taipei Sans TC Beta",
-            title_font_size=24,
-            xaxis_title="公司名稱",
-            yaxis_title=y_column,
-            legend_title_font_size=14,
-            legend_font_size=12
         )
-        fig_bar.update_xaxes(tickangle=45)
         st.plotly_chart(fig_bar, use_container_width=True)
-    else:
-        st.warning("沒有足夠的數值型欄位來創建圖表。請選擇另一個數據集。")
-    # Add download button for the data
-    csv = df.to_csv(index=False, encoding='utf-8-sig')  # Use UTF-8 with BOM for Excel compatibility
-    st.download_button(
-        label="下載資料為 CSV",
-        data=csv,
-        file_name=f"{selected_option}_data.csv",
-        mime="text/csv",
-    )
-else:
-    st.info("請在左側選單選擇資料類型，然後點擊「載入並分析數據」按鈕來查看數據和圖表。")
-# Footer
-st.markdown("---")
-st.markdown("使用 Streamlit 創建 ❤️")

+import streamlit as st
 import pandas as pd
 import requests
 import plotly.express as px
+import matplotlib.font_manager as fm
+import matplotlib as mpl
+import io
 import time
+from sklearn.cluster import KMeans
+from sklearn.preprocessing import StandardScaler
+# 確保正確的中文字符編碼
+st.set_page_config(page_title="🌳台灣中小企業ESG數據分析與揭露儀表板🌲", page_icon=":chart_with_upwards_trend:", layout="wide")
+# 定義 URL
+urls = {
+    "溫室氣體": "https://mopsfin.twse.com.tw/opendata/t187ap46_L_1.csv",
+    "能源": "https://mopsfin.twse.com.tw/opendata/t187ap46_O_2.csv",
+    "董事會揭露": "https://mopsfin.twse.com.tw/opendata/t187ap46_L_6.csv"
+}
+# 下載並載入 CSV 檔案到 DataFrame 的函數
 @st.cache_data
+def load_data(url):
     response = requests.get(url)
+    response.encoding = 'utf-8'
+    df = pd.read_csv(io.StringIO(response.text), encoding='utf-8')
+    df = df.fillna(0)
     return df
+# Streamlit 應用程式
+st.title("台灣企業ESG數據分析與揭露")
+st.subheader("以溫室氣體 X 再生能源 X 董事會資訊: https://www.tejwin.com/insight/carbon-footprint-verification/")
+st.subheader("ESG投資: https://www.fhtrust.com.tw/ESG/operating")
+# 允許用戶選擇數據集
+dataset_choice = st.selectbox("選擇要顯示的數據集", list(urls.keys()))
+# 載入選定的數據集
+selected_df = load_data(urls[dataset_choice])
+# 顯示爬取的資料
+st.write("### 爬取的資料預覽")
+st.dataframe(selected_df.head())
+# 過濾出數值類型的欄位，排除 '出表日期' 和 '報告年度'
+numeric_columns = selected_df.select_dtypes(include=['float64', 'int64']).columns
+numeric_columns = [col for col in numeric_columns if col not in ['出表日期', '報告年度']]
+# 允許用戶選擇用於繪製圖表的欄位
+column_choice = st.selectbox("選擇欄位來繪製圖表", numeric_columns)
+# 添加一個生成圖表的按鈕
+if st.button("生成圖表"):
+    # 顯示進度條
+    progress_bar = st.progress(0)
+    for i in range(100):
+        time.sleep(0.01)
+        progress_bar.progress(i + 1)
+    # 創建一個標籤頁佈局
+    tab1, tab2, tab3 = st.tabs(["圓餅圖", "長條圖", "K-means分析"])
+    with tab1:
+        # 使用 plotly 創建圓餅圖
+        fig_pie = px.pie(
+            selected_df,
+            names='公司名稱',
+            values=column_choice,
+            title=f"{dataset_choice} - {column_choice} ��餅圖",
+            color_discrete_sequence=px.colors.qualitative.Pastel
+        )
+        fig_pie.update_traces(textposition='inside', textinfo='percent+label')
         fig_pie.update_layout(
+            font=dict(size=12),
+            legend=dict(
+                orientation="h",
+                yanchor="top",
+                y=-0.3,
+                xanchor="center",
+                x=0.5
+            ),
+            height=700,
+            margin=dict(t=50, b=50, l=50, r=50)
         )
         st.plotly_chart(fig_pie, use_container_width=True)
+    with tab2:
+        # 使用 plotly 創建長條圖
+        fig_bar = px.bar(
+            selected_df,
+            x='公司名稱',
+            y=column_choice,
+            title=f"{dataset_choice} - {column_choice} 長條圖",
+            color='公司名稱',
+            color_discrete_sequence=px.colors.qualitative.Pastel
+        )
         fig_bar.update_layout(
+            xaxis_title="企業",
+            yaxis_title=column_choice,
+            font=dict(size=12),
+            xaxis_tickangle=-45,
+            showlegend=False,
+            height=600
         )
         st.plotly_chart(fig_bar, use_container_width=True)
+    with tab3:
+        if dataset_choice == "溫室氣體":
+            # 對溫室氣體數據進行K-means分析
+            st.subheader("溫室氣體數據的K-means分析")
+            # 選擇用於聚類的特徵
+            cluster_features = st.multiselect("選擇用於聚類的特徵", numeric_columns, default=numeric_columns[:2])
+            if len(cluster_features) >= 2:
+                # 準備數據
+                X = selected_df[cluster_features]
+                scaler = StandardScaler()
+                X_scaled = scaler.fit_transform(X)
+                # 執行K-means聚類
+                n_clusters = st.slider("選擇聚類數量", min_value=2, max_value=10, value=3)
+                kmeans = KMeans(n_clusters=n_clusters, random_state=42)
+                clusters = kmeans.fit_predict(X_scaled)
+                # 添加聚類結果到數據框
+                selected_df['Cluster'] = clusters
+                # 視覺化聚類結果
+                fig_scatter = px.scatter(
+                    selected_df,
+                    x=cluster_features[0],
+                    y=cluster_features[1],
+                    color='Cluster',
+                    hover_data=['公司名稱'],
+                    title=f"溫室氣體數據的K-means聚類 ({cluster_features[0]} vs {cluster_features[1]})"
+                )
+                st.plotly_chart(fig_scatter, use_container_width=True)
+                # 顯示每個聚類的特徵
+                st.subheader("聚類特徵")
+                cluster_stats = selected_df.groupby('Cluster')[cluster_features].mean()
+                st.dataframe(cluster_stats)
+            else:
+                st.warning("請至少選擇兩個特徵進行聚類分析。")
+        else:
+            st.info("K-means分析僅適用於溫室氣體數據集。")
+    st.success("圖表生成完成！")
+# 下載並設置自定義字體以顯示中文字符
+font_url = "https://drive.google.com/uc?id=1eGAsTN1HBpJAkeVM57_C7ccp7hbgSz3_&export=download"
+font_response = requests.get(font_url)
+with open("TaipeiSansTCBeta-Regular.ttf", "wb") as font_file:
+    font_file.write(font_response.content)
+fm.fontManager.addfont("TaipeiSansTCBeta-Regular.ttf")
+mpl.rc('font', family='Taipei Sans TC Beta')