Spaces:
Sleeping
Sleeping
File size: 9,484 Bytes
ee00e6a a5e171c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 |
import streamlit as st
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
# Load data function
def load_data(uploaded_file):
if uploaded_file is not None:
df = pd.read_csv(uploaded_file)
df.fillna(0, inplace=True)
if '出表日期' in df.columns:
df['出表日期'] = df['出表日期'].astype(str)
if '公司代號' in df.columns:
df['公司代號'] = df['公司代號'].astype(str)
return df
else:
st.warning("請上傳檔案。")
return None
# Merge dataframes
def merge_dataframes(df1, df2, on_columns):
if df1 is None or df2 is None:
return None
for col in on_columns:
if col in df1.columns and col in df2.columns:
df1[col] = df1[col].astype(str)
df2[col] = df2[col].astype(str)
return pd.merge(df1, df2, on=on_columns, how="outer")
# Filter dataframe
def filter_dataframe(df, prefix):
return df[df['公司代號'].astype(str).str.startswith(prefix)]
# Get specific company data
def get_specific_company(df, company_code):
return df[df['公司代號'] == company_code]
# Plot radar chart
def plot_radar_chart(avg_values, specific_company_values, categories, prefix, specific_company_name):
fig = go.Figure()
fig.add_trace(go.Scatterpolar(
r=avg_values,
theta=categories,
fill='toself',
name=f"股號前兩位『{prefix}』的族群"
))
fig.add_trace(go.Scatterpolar(
r=specific_company_values,
theta=categories,
fill='toself',
name=f'{specific_company_name}'
))
fig.update_layout(
polar=dict(radialaxis=dict(visible=True, range=[0, 100])),
showlegend=True,
title="董事會和投資人溝通指標比較"
)
st.plotly_chart(fig)
# Plot emission chart
def plot_emission_chart(filtered_df, avg_emissions, prefix):
emission_columns = ['範疇一排放量(噸CO2e)', '範疇二排放量(噸CO2e)', '範疇三排放量(噸CO2e)']
fig = go.Figure()
for scope, color in zip(emission_columns, ['blue', 'green', 'red']):
fig.add_trace(go.Bar(
x=filtered_df['公司名稱'],
y=filtered_df[scope],
name=scope,
marker_color=color
))
fig.add_trace(go.Scatter(
x=filtered_df['公司名稱'],
y=[avg_emissions[scope]] * len(filtered_df),
mode='lines',
line=dict(color=color, dash='dash'),
name=f'{scope}平均值'
))
fig.update_layout(
title=f"代號前兩位『{prefix}』的族群 - 各範疇排放量",
barmode='group',
xaxis_title="公司名稱",
yaxis_title="排放量(噸CO2e)"
)
st.plotly_chart(fig)
# Plot energy usage
def plot_energy_usage(filtered_df, avg_energy_usage):
fig_energy = px.bar(filtered_df, x='公司名稱', y='使用率(再生能源/總能源)', title="再生能源使用率")
fig_energy.add_trace(go.Scatter(
x=filtered_df['公司名稱'],
y=[avg_energy_usage] * len(filtered_df),
mode='lines',
line=dict(color='red', dash='dash'),
name='群體平均值'
))
fig_energy.update_layout(
yaxis_title="再生能源使用率 (%)",
xaxis_title="公司名稱"
)
st.plotly_chart(fig_energy)
# Plot waste management box plots
def plot_waste_management(df_group, df_specific, company_name):
columns_to_analyze = ['有害廢棄物量-數據(公噸)', '非有害廢棄物量-數據(公噸)', '總重量(有害+非有害)-數據(公噸)', '廢棄物密集度-密集度(公噸/單位)']
fig = go.Figure()
# Loop through the columns and plot box plot for each column
for col in columns_to_analyze:
fig.add_trace(go.Box(y=df_group[col], name=f'母群體-{col}', boxmean=True, boxpoints='outliers'))
if not df_specific.empty:
specific_value = df_specific[col].values[0]
# Highlight specific company's value
fig.add_trace(go.Scatter(
y=[specific_value],
x=[f'母群體-{col}'],
mode='markers',
name=f'{company_name}-{col}',
marker=dict(color='red', size=11, symbol='star'),
showlegend=True,
hovertext=f'公司名稱: {company_name}, 值: {specific_value}'
))
# Update layout
fig.update_layout(
title=f"廢棄物統計數據箱型圖 (包含指定公司名稱 {company_name} 數據)",
yaxis_title="數值 (公噸)",
xaxis_title="廢棄物項目",
boxmode='group'
)
# Display the plot in Streamlit
st.plotly_chart(fig)
# Main function update
def main():
st.title("公司數據分析儀表板")
# File upload
st.sidebar.header("上傳 CSV 檔案")
investor_file = st.sidebar.file_uploader("上傳 投資人溝通.csv", type=["csv"])
board_file = st.sidebar.file_uploader("上傳 董事會.csv", type=["csv"])
emission_file = st.sidebar.file_uploader("上傳 溫室氣體排放.csv", type=["csv"])
energy_file = st.sidebar.file_uploader("上傳 能源管理.csv", type=["csv"])
waste_file = st.sidebar.file_uploader("上傳 廢棄物管理.csv", type=["csv"])
# Load data
investor_df = load_data(investor_file)
board_df = load_data(board_file)
emission_df = load_data(emission_file)
energy_df = load_data(energy_file)
waste_df = load_data(waste_file)
# Merge data
merged_df1 = merge_dataframes(investor_df, board_df, ["公司代號", "公司名稱", "出表日期", "報告年度"])
merged_df2 = merge_dataframes(emission_df, energy_df, ["公司代號", "公司名稱", "出表日期", "報告年度"])
# User input
prefix = st.sidebar.text_input("輸入公司代號前兩位")
specific_company_code = st.sidebar.text_input("輸入四位數字公司代號")
# Waste management analysis
if waste_df is not None and prefix:
waste_df['公司代號前兩位'] = waste_df['公司代號'].astype(str).str[:2]
df_group = waste_df[waste_df['公司代號前兩位'] == prefix]
df_specific = waste_df[waste_df['公司代號'] == specific_company_code]
if not df_specific.empty:
company_name = df_specific['公司名稱'].values[0]
plot_waste_management(df_group, df_specific, company_name)
else:
st.warning(f"找不到公司代號為 {specific_company_code} 的廢棄物管理數據")
# Handle 投資人溝通和董事會資料
if merged_df1 is not None and prefix and specific_company_code:
columns_of_interest = ['董事出席董事會出席率', '董事進修時數符合進修要點比率', '公司年度召開法說會次數(次)']
for col in ['董事出席董事會出席率', '董事進修時數符合進修要點比率']:
merged_df1[col] = merged_df1[col].replace({'%': ''}, regex=True).astype(float)
filtered_df1 = filter_dataframe(merged_df1, prefix)
avg_values = filtered_df1[columns_of_interest].mean()
specific_company_df1 = get_specific_company(merged_df1, specific_company_code)
if not specific_company_df1.empty:
specific_company_name = specific_company_df1['公司名稱'].iloc[0]
specific_company_values = specific_company_df1[columns_of_interest].iloc[0]
plot_radar_chart(avg_values, specific_company_values, ['董事出席率', '董事進修時數符合比率', '年度法說會次數'], prefix, specific_company_name)
else:
st.warning(f"找不到公司代號 {specific_company_code} 的資料")
# Handle 溫室氣體排放和能源管理資料
if merged_df2 is not None and prefix:
emission_columns = ['範疇一排放量(噸CO2e)', '範疇二排放量(噸CO2e)', '範疇三排放量(噸CO2e)']
energy_column = '使用率(再生能源/總能源)'
merged_df2[energy_column] = merged_df2[energy_column].replace({'%': ''}, regex=True).astype(float)
filtered_df2 = filter_dataframe(merged_df2, prefix)
specific_company_df2 = get_specific_company(merged_df2, specific_company_code)
if not filtered_df2.empty:
avg_emissions = filtered_df2[emission_columns].mean()
plot_emission_chart(filtered_df2, avg_emissions, prefix)
avg_energy_usage = filtered_df2[energy_column].mean()
plot_energy_usage(filtered_df2, avg_energy_usage)
if not specific_company_df2.empty:
specific_energy_usage = specific_company_df2[energy_column].iloc[0]
comparison_data = {
'公司名稱': [specific_company_df2['公司名稱'].iloc[0], f"{prefix} 母群體平均"],
'再生能源使用率 (%)': [specific_energy_usage, avg_energy_usage]
}
comparison_df = pd.DataFrame(comparison_data)
st.write("\n再生能源使用率比較表格:")
st.write(comparison_df)
else:
st.warning(f"找不到公司代號 {specific_company_code} 的能源管理數據")
else:
st.warning(f"找不到前兩碼為 {prefix} 的公司數據")
if __name__ == "__main__":
main()
# https://drive.google.com/drive/folders/1uZnryIluMn-bszuHsFMbLecbL6Vq3HyI?usp=sharing |