Roberta2024 commited on
Commit
217f8d7
·
verified ·
1 Parent(s): 5585581

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +142 -98
app.py CHANGED
@@ -1,115 +1,159 @@
 
1
  import pandas as pd
2
  import requests
3
  import plotly.express as px
4
- import streamlit as st
 
 
5
  import time
6
- from io import StringIO
 
7
 
8
- # Download and set custom font (unchanged)
9
- font_url = "https://drive.google.com/uc?id=1eGAsTN1HBpJAkeVM57_C7ccp7hbgSz3_&export=download"
10
- font_response = requests.get(font_url)
11
- with open("TaipeiSansTCBeta-Regular.ttf", "wb") as font_file:
12
- font_file.write(font_response.content)
 
 
 
 
13
 
14
- # Function to download and load CSV into a DataFrame
15
  @st.cache_data
16
- def download_csv(url):
17
  response = requests.get(url)
18
- response.encoding = 'utf-8' # Set the encoding to UTF-8
19
- df = pd.read_csv(StringIO(response.text), encoding='utf-8')
20
- df = df.fillna(0) # Fill missing values with 0
21
  return df
22
 
23
- # URLs to download data from (unchanged)
24
- urls = {
25
- "溫室氣體": "https://mopsfin.twse.com.tw/opendata/t187ap46_L_1.csv",
26
- "再生能源": "https://mopsfin.twse.com.tw/opendata/t187ap46_O_2.csv",
27
- "董事會": "https://mopsfin.twse.com.tw/opendata/t187ap46_L_6.csv"
28
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
- # Set page config for a wider layout
31
- st.set_page_config(layout="wide", page_title="ESG Data Visualization")
32
-
33
- # Streamlit app title with custom styling
34
- st.markdown("""
35
- <style>
36
- .big-font {
37
- font-size:50px !important;
38
- color: #1E88E5;
39
- font-family: 'Taipei Sans TC Beta';
40
- }
41
- </style>
42
- """, unsafe_allow_html=True)
43
- st.markdown('<p class="big-font">ESG 數據視覺化</p>', unsafe_allow_html=True)
44
-
45
- # Sidebar for data selection
46
- st.sidebar.header("設置")
47
- selected_option = st.sidebar.selectbox("選擇資料類型", options=list(urls.keys()))
48
-
49
- # Button to load data and analyze
50
- if st.sidebar.button("載入並分析數據"):
51
- with st.spinner('資料載入中...'):
52
- # Add a progress bar
53
- progress_bar = st.progress(0)
54
- for i in range(100):
55
- time.sleep(0.01)
56
- progress_bar.progress(i + 1)
57
-
58
- df = download_csv(urls[selected_option])
59
-
60
- # Displaying the DataFrame
61
- st.header(f"{selected_option} 資料")
62
- st.dataframe(df)
63
-
64
- # Data summary
65
- st.subheader("資料摘要")
66
- st.write(df.describe())
67
-
68
- # Automatically select columns for visualization
69
- company_column = df.columns[0] # Assume the first column is the company name
70
- numeric_columns = df.select_dtypes(include=['int64', 'float64']).columns
71
- if len(numeric_columns) > 0:
72
- y_column = st.sidebar.selectbox("選擇數值欄位", options=numeric_columns)
73
-
74
- # Visualization
75
- st.subheader("資料視覺化")
76
-
77
- # Pie Chart
78
- fig_pie = px.pie(df, names=company_column, values=y_column, title=f"{selected_option} 分佈 (圓餅圖)")
79
  fig_pie.update_layout(
80
- font_family="Taipei Sans TC Beta",
81
- title_font_size=24,
82
- legend_title_font_size=14,
83
- legend_font_size=12
 
 
 
 
 
 
84
  )
85
  st.plotly_chart(fig_pie, use_container_width=True)
86
-
87
- # Bar Chart
88
- fig_bar = px.bar(df, x=company_column, y=y_column, title=f"{selected_option} 比較 (柱狀圖)")
 
 
 
 
 
 
 
 
89
  fig_bar.update_layout(
90
- font_family="Taipei Sans TC Beta",
91
- title_font_size=24,
92
- xaxis_title="公司名稱",
93
- yaxis_title=y_column,
94
- legend_title_font_size=14,
95
- legend_font_size=12
96
  )
97
- fig_bar.update_xaxes(tickangle=45)
98
  st.plotly_chart(fig_bar, use_container_width=True)
99
- else:
100
- st.warning("沒有足夠的數值型欄位來創建圖表。請選擇另一個數據集。")
101
-
102
- # Add download button for the data
103
- csv = df.to_csv(index=False, encoding='utf-8-sig') # Use UTF-8 with BOM for Excel compatibility
104
- st.download_button(
105
- label="下載資料為 CSV",
106
- data=csv,
107
- file_name=f"{selected_option}_data.csv",
108
- mime="text/csv",
109
- )
110
- else:
111
- st.info("請在左側選單選擇資料類型,然後點擊「載入並分析數據」按鈕來查看數據和圖表。")
112
-
113
- # Footer
114
- st.markdown("---")
115
- st.markdown("使用 Streamlit 創建 ❤️")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
  import pandas as pd
3
  import requests
4
  import plotly.express as px
5
+ import matplotlib.font_manager as fm
6
+ import matplotlib as mpl
7
+ import io
8
  import time
9
+ from sklearn.cluster import KMeans
10
+ from sklearn.preprocessing import StandardScaler
11
 
12
+ # 確保正確的中文字符編碼
13
+ st.set_page_config(page_title="🌳台灣中小企業ESG數據分析與揭露儀表板🌲", page_icon=":chart_with_upwards_trend:", layout="wide")
14
+
15
+ # 定義 URL
16
+ urls = {
17
+ "溫室氣體": "https://mopsfin.twse.com.tw/opendata/t187ap46_L_1.csv",
18
+ "能源": "https://mopsfin.twse.com.tw/opendata/t187ap46_O_2.csv",
19
+ "董事會揭露": "https://mopsfin.twse.com.tw/opendata/t187ap46_L_6.csv"
20
+ }
21
 
22
+ # 下載並載入 CSV 檔案到 DataFrame 的函數
23
  @st.cache_data
24
+ def load_data(url):
25
  response = requests.get(url)
26
+ response.encoding = 'utf-8'
27
+ df = pd.read_csv(io.StringIO(response.text), encoding='utf-8')
28
+ df = df.fillna(0)
29
  return df
30
 
31
+ # Streamlit 應用程式
32
+ st.title("台灣企業ESG數據分析與揭露")
33
+ st.subheader("以溫室氣體 X 再生能源 X 董事會資訊: https://www.tejwin.com/insight/carbon-footprint-verification/")
34
+ st.subheader("ESG投資: https://www.fhtrust.com.tw/ESG/operating")
35
+
36
+ # 允許用戶選擇數據集
37
+ dataset_choice = st.selectbox("選擇要顯示的數據集", list(urls.keys()))
38
+
39
+ # 載入選定的數據集
40
+ selected_df = load_data(urls[dataset_choice])
41
+
42
+ # 顯示爬取的資料
43
+ st.write("### 爬取的資料預覽")
44
+ st.dataframe(selected_df.head())
45
+
46
+ # 過濾出數值類型的欄位,排除 '出表日期' 和 '報告年度'
47
+ numeric_columns = selected_df.select_dtypes(include=['float64', 'int64']).columns
48
+ numeric_columns = [col for col in numeric_columns if col not in ['出表日期', '報告年度']]
49
+
50
+ # 允許用戶選擇用於繪製圖表的欄位
51
+ column_choice = st.selectbox("選擇欄位來繪製圖表", numeric_columns)
52
+
53
+ # 添加一個生成圖表的按鈕
54
+ if st.button("生成圖表"):
55
+ # 顯示進度條
56
+ progress_bar = st.progress(0)
57
+ for i in range(100):
58
+ time.sleep(0.01)
59
+ progress_bar.progress(i + 1)
60
 
61
+ # 創建一個標籤頁佈局
62
+ tab1, tab2, tab3 = st.tabs(["圓餅圖", "長條圖", "K-means分析"])
63
+
64
+ with tab1:
65
+ # 使用 plotly 創建圓餅圖
66
+ fig_pie = px.pie(
67
+ selected_df,
68
+ names='公司名稱',
69
+ values=column_choice,
70
+ title=f"{dataset_choice} - {column_choice} ��餅圖",
71
+ color_discrete_sequence=px.colors.qualitative.Pastel
72
+ )
73
+ fig_pie.update_traces(textposition='inside', textinfo='percent+label')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
  fig_pie.update_layout(
75
+ font=dict(size=12),
76
+ legend=dict(
77
+ orientation="h",
78
+ yanchor="top",
79
+ y=-0.3,
80
+ xanchor="center",
81
+ x=0.5
82
+ ),
83
+ height=700,
84
+ margin=dict(t=50, b=50, l=50, r=50)
85
  )
86
  st.plotly_chart(fig_pie, use_container_width=True)
87
+
88
+ with tab2:
89
+ # 使用 plotly 創建長條圖
90
+ fig_bar = px.bar(
91
+ selected_df,
92
+ x='公司名稱',
93
+ y=column_choice,
94
+ title=f"{dataset_choice} - {column_choice} 長條圖",
95
+ color='公司名稱',
96
+ color_discrete_sequence=px.colors.qualitative.Pastel
97
+ )
98
  fig_bar.update_layout(
99
+ xaxis_title="企業",
100
+ yaxis_title=column_choice,
101
+ font=dict(size=12),
102
+ xaxis_tickangle=-45,
103
+ showlegend=False,
104
+ height=600
105
  )
 
106
  st.plotly_chart(fig_bar, use_container_width=True)
107
+
108
+ with tab3:
109
+ if dataset_choice == "溫室氣體":
110
+ # 對溫室氣體數據進行K-means分析
111
+ st.subheader("溫室氣體數據的K-means分析")
112
+
113
+ # 選擇用於聚類的特徵
114
+ cluster_features = st.multiselect("選擇用於聚類的特徵", numeric_columns, default=numeric_columns[:2])
115
+
116
+ if len(cluster_features) >= 2:
117
+ # 準備數據
118
+ X = selected_df[cluster_features]
119
+ scaler = StandardScaler()
120
+ X_scaled = scaler.fit_transform(X)
121
+
122
+ # 執行K-means聚類
123
+ n_clusters = st.slider("選擇聚類數量", min_value=2, max_value=10, value=3)
124
+ kmeans = KMeans(n_clusters=n_clusters, random_state=42)
125
+ clusters = kmeans.fit_predict(X_scaled)
126
+
127
+ # 添加聚類結果到數據框
128
+ selected_df['Cluster'] = clusters
129
+
130
+ # 視覺化聚類結果
131
+ fig_scatter = px.scatter(
132
+ selected_df,
133
+ x=cluster_features[0],
134
+ y=cluster_features[1],
135
+ color='Cluster',
136
+ hover_data=['公司名稱'],
137
+ title=f"溫室氣體數據的K-means聚類 ({cluster_features[0]} vs {cluster_features[1]})"
138
+ )
139
+ st.plotly_chart(fig_scatter, use_container_width=True)
140
+
141
+ # 顯示每個聚類的特徵
142
+ st.subheader("聚類特徵")
143
+ cluster_stats = selected_df.groupby('Cluster')[cluster_features].mean()
144
+ st.dataframe(cluster_stats)
145
+
146
+ else:
147
+ st.warning("請至少選擇兩個特徵進行聚類分析。")
148
+ else:
149
+ st.info("K-means分析僅適用於溫室氣體數據集。")
150
+
151
+ st.success("圖表生成完成!")
152
+
153
+ # 下載並設置自定義字體以顯示中文字符
154
+ font_url = "https://drive.google.com/uc?id=1eGAsTN1HBpJAkeVM57_C7ccp7hbgSz3_&export=download"
155
+ font_response = requests.get(font_url)
156
+ with open("TaipeiSansTCBeta-Regular.ttf", "wb") as font_file:
157
+ font_file.write(font_response.content)
158
+ fm.fontManager.addfont("TaipeiSansTCBeta-Regular.ttf")
159
+ mpl.rc('font', family='Taipei Sans TC Beta')