Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
import plotly.graph_objects as go | |
import plotly.express as px | |
# Load data function | |
def load_data(uploaded_file): | |
if uploaded_file is not None: | |
df = pd.read_csv(uploaded_file) | |
df.fillna(0, inplace=True) | |
if '出表日期' in df.columns: | |
df['出表日期'] = df['出表日期'].astype(str) | |
if '公司代號' in df.columns: | |
df['公司代號'] = df['公司代號'].astype(str) | |
return df | |
else: | |
st.warning("請上傳檔案。") | |
return None | |
# Merge dataframes | |
def merge_dataframes(df1, df2, on_columns): | |
if df1 is None or df2 is None: | |
return None | |
for col in on_columns: | |
if col in df1.columns and col in df2.columns: | |
df1[col] = df1[col].astype(str) | |
df2[col] = df2[col].astype(str) | |
return pd.merge(df1, df2, on=on_columns, how="outer") | |
# Filter dataframe | |
def filter_dataframe(df, prefix): | |
return df[df['公司代號'].astype(str).str.startswith(prefix)] | |
# Get specific company data | |
def get_specific_company(df, company_code): | |
return df[df['公司代號'] == company_code] | |
# Plot radar chart | |
def plot_radar_chart(avg_values, specific_company_values, categories, prefix, specific_company_name): | |
fig = go.Figure() | |
fig.add_trace(go.Scatterpolar( | |
r=avg_values, | |
theta=categories, | |
fill='toself', | |
name=f"股號前兩位『{prefix}』的族群" | |
)) | |
fig.add_trace(go.Scatterpolar( | |
r=specific_company_values, | |
theta=categories, | |
fill='toself', | |
name=f'{specific_company_name}' | |
)) | |
fig.update_layout( | |
polar=dict(radialaxis=dict(visible=True, range=[0, 100])), | |
showlegend=True, | |
title="董事會和投資人溝通指標比較" | |
) | |
st.plotly_chart(fig) | |
# Plot emission chart | |
def plot_emission_chart(filtered_df, avg_emissions, prefix): | |
emission_columns = ['範疇一排放量(噸CO2e)', '範疇二排放量(噸CO2e)', '範疇三排放量(噸CO2e)'] | |
fig = go.Figure() | |
for scope, color in zip(emission_columns, ['blue', 'green', 'red']): | |
fig.add_trace(go.Bar( | |
x=filtered_df['公司名稱'], | |
y=filtered_df[scope], | |
name=scope, | |
marker_color=color | |
)) | |
fig.add_trace(go.Scatter( | |
x=filtered_df['公司名稱'], | |
y=[avg_emissions[scope]] * len(filtered_df), | |
mode='lines', | |
line=dict(color=color, dash='dash'), | |
name=f'{scope}平均值' | |
)) | |
fig.update_layout( | |
title=f"代號前兩位『{prefix}』的族群 - 各範疇排放量", | |
barmode='group', | |
xaxis_title="公司名稱", | |
yaxis_title="排放量(噸CO2e)" | |
) | |
st.plotly_chart(fig) | |
# Plot energy usage | |
def plot_energy_usage(filtered_df, avg_energy_usage): | |
fig_energy = px.bar(filtered_df, x='公司名稱', y='使用率(再生能源/總能源)', title="再生能源使用率") | |
fig_energy.add_trace(go.Scatter( | |
x=filtered_df['公司名稱'], | |
y=[avg_energy_usage] * len(filtered_df), | |
mode='lines', | |
line=dict(color='red', dash='dash'), | |
name='群體平均值' | |
)) | |
fig_energy.update_layout( | |
yaxis_title="再生能源使用率 (%)", | |
xaxis_title="公司名稱" | |
) | |
st.plotly_chart(fig_energy) | |
# Plot waste management box plots | |
def plot_waste_management(df_group, df_specific, company_name): | |
columns_to_analyze = ['有害廢棄物量-數據(公噸)', '非有害廢棄物量-數據(公噸)', '總重量(有害+非有害)-數據(公噸)', '廢棄物密集度-密集度(公噸/單位)'] | |
fig = go.Figure() | |
# Loop through the columns and plot box plot for each column | |
for col in columns_to_analyze: | |
fig.add_trace(go.Box(y=df_group[col], name=f'母群體-{col}', boxmean=True, boxpoints='outliers')) | |
if not df_specific.empty: | |
specific_value = df_specific[col].values[0] | |
# Highlight specific company's value | |
fig.add_trace(go.Scatter( | |
y=[specific_value], | |
x=[f'母群體-{col}'], | |
mode='markers', | |
name=f'{company_name}-{col}', | |
marker=dict(color='red', size=11, symbol='star'), | |
showlegend=True, | |
hovertext=f'公司名稱: {company_name}, 值: {specific_value}' | |
)) | |
# Update layout | |
fig.update_layout( | |
title=f"廢棄物統計數據箱型圖 (包含指定公司名稱 {company_name} 數據)", | |
yaxis_title="數值 (公噸)", | |
xaxis_title="廢棄物項目", | |
boxmode='group' | |
) | |
# Display the plot in Streamlit | |
st.plotly_chart(fig) | |
# Main function update | |
def main(): | |
st.title("公司數據分析儀表板") | |
# File upload | |
st.sidebar.header("上傳 CSV 檔案") | |
investor_file = st.sidebar.file_uploader("上傳 投資人溝通.csv", type=["csv"]) | |
board_file = st.sidebar.file_uploader("上傳 董事會.csv", type=["csv"]) | |
emission_file = st.sidebar.file_uploader("上傳 溫室氣體排放.csv", type=["csv"]) | |
energy_file = st.sidebar.file_uploader("上傳 能源管理.csv", type=["csv"]) | |
waste_file = st.sidebar.file_uploader("上傳 廢棄物管理.csv", type=["csv"]) | |
# Load data | |
investor_df = load_data(investor_file) | |
board_df = load_data(board_file) | |
emission_df = load_data(emission_file) | |
energy_df = load_data(energy_file) | |
waste_df = load_data(waste_file) | |
# Merge data | |
merged_df1 = merge_dataframes(investor_df, board_df, ["公司代號", "公司名稱", "出表日期", "報告年度"]) | |
merged_df2 = merge_dataframes(emission_df, energy_df, ["公司代號", "公司名稱", "出表日期", "報告年度"]) | |
# User input | |
prefix = st.sidebar.text_input("輸入公司代號前兩位") | |
specific_company_code = st.sidebar.text_input("輸入四位數字公司代號") | |
# Waste management analysis | |
if waste_df is not None and prefix: | |
waste_df['公司代號前兩位'] = waste_df['公司代號'].astype(str).str[:2] | |
df_group = waste_df[waste_df['公司代號前兩位'] == prefix] | |
df_specific = waste_df[waste_df['公司代號'] == specific_company_code] | |
if not df_specific.empty: | |
company_name = df_specific['公司名稱'].values[0] | |
plot_waste_management(df_group, df_specific, company_name) | |
else: | |
st.warning(f"找不到公司代號為 {specific_company_code} 的廢棄物管理數據") | |
# Handle 投資人溝通和董事會資料 | |
if merged_df1 is not None and prefix and specific_company_code: | |
columns_of_interest = ['董事出席董事會出席率', '董事進修時數符合進修要點比率', '公司年度召開法說會次數(次)'] | |
for col in ['董事出席董事會出席率', '董事進修時數符合進修要點比率']: | |
merged_df1[col] = merged_df1[col].replace({'%': ''}, regex=True).astype(float) | |
filtered_df1 = filter_dataframe(merged_df1, prefix) | |
avg_values = filtered_df1[columns_of_interest].mean() | |
specific_company_df1 = get_specific_company(merged_df1, specific_company_code) | |
if not specific_company_df1.empty: | |
specific_company_name = specific_company_df1['公司名稱'].iloc[0] | |
specific_company_values = specific_company_df1[columns_of_interest].iloc[0] | |
plot_radar_chart(avg_values, specific_company_values, ['董事出席率', '董事進修時數符合比率', '年度法說會次數'], prefix, specific_company_name) | |
else: | |
st.warning(f"找不到公司代號 {specific_company_code} 的資料") | |
# Handle 溫室氣體排放和能源管理資料 | |
if merged_df2 is not None and prefix: | |
emission_columns = ['範疇一排放量(噸CO2e)', '範疇二排放量(噸CO2e)', '範疇三排放量(噸CO2e)'] | |
energy_column = '使用率(再生能源/總能源)' | |
merged_df2[energy_column] = merged_df2[energy_column].replace({'%': ''}, regex=True).astype(float) | |
filtered_df2 = filter_dataframe(merged_df2, prefix) | |
specific_company_df2 = get_specific_company(merged_df2, specific_company_code) | |
if not filtered_df2.empty: | |
avg_emissions = filtered_df2[emission_columns].mean() | |
plot_emission_chart(filtered_df2, avg_emissions, prefix) | |
avg_energy_usage = filtered_df2[energy_column].mean() | |
plot_energy_usage(filtered_df2, avg_energy_usage) | |
if not specific_company_df2.empty: | |
specific_energy_usage = specific_company_df2[energy_column].iloc[0] | |
comparison_data = { | |
'公司名稱': [specific_company_df2['公司名稱'].iloc[0], f"{prefix} 母群體平均"], | |
'再生能源使用率 (%)': [specific_energy_usage, avg_energy_usage] | |
} | |
comparison_df = pd.DataFrame(comparison_data) | |
st.write("\n再生能源使用率比較表格:") | |
st.write(comparison_df) | |
else: | |
st.warning(f"找不到公司代號 {specific_company_code} 的能源管理數據") | |
else: | |
st.warning(f"找不到前兩碼為 {prefix} 的公司數據") | |
if __name__ == "__main__": | |
main() | |
# https://drive.google.com/drive/folders/1uZnryIluMn-bszuHsFMbLecbL6Vq3HyI?usp=sharing |