import streamlit as st import pandas as pd import plotly.graph_objects as go import numpy as np # Set page config st.set_page_config(layout="wide", page_title="GHG Emissions Analysis") # Load data @st.cache_data def load_data(): url = "https://raw.githubusercontent.com/danielrosehill/GHG-Emissions-Data-Pipeline/refs/heads/main/company_data.csv" df = pd.read_csv(url) # Calculate total emissions and handle NaN values df['scope_1_emissions'] = pd.to_numeric(df['scope_1_emissions'], errors='coerce').fillna(0) df['scope_2_emissions'] = pd.to_numeric(df['scope_2_emissions'], errors='coerce').fillna(0) df['scope_3_emissions'] = pd.to_numeric(df['scope_3_emissions'], errors='coerce').fillna(0) df['total_emissions'] = df['scope_1_emissions'] + df['scope_2_emissions'] + df['scope_3_emissions'] df['monetized_emissions'] = (df['total_emissions'] * 236) / 1000 # Convert to billions df['monetized_emissions'] = df['monetized_emissions'].round(2) # Convert EBITDA to numeric and handle NaN values df['ebitda_2022'] = pd.to_numeric(df['ebitda_2022'], errors='coerce').fillna(0) return df df = load_data() # Sidebar st.sidebar.title("Selection Options") selection_mode = st.sidebar.radio("Selection Mode", ["Individual Companies", "By Sector"]) if selection_mode == "Individual Companies": selected_companies = st.sidebar.multiselect( "Select Companies (max 5)", options=df['company_name'].dropna().unique(), max_selections=5 ) filtered_df = df[df['company_name'].isin(selected_companies)] else: selected_sector = st.sidebar.multiselect( "Select Sectors", options=df['sector'].dropna().unique() ) filtered_df = df[df['sector'].isin(selected_sector)] # Main content st.title("Greenhouse Gas Emissions vs Financial Performance") if not filtered_df.empty: # Create visualization fig = go.Figure() # Add EBITDA bars fig.add_trace(go.Bar( x=filtered_df['company_name'], y=filtered_df['ebitda_2022'], name='EBITDA', marker_color='green' )) # Add monetized emissions bars fig.add_trace(go.Bar( x=filtered_df['company_name'], y=-filtered_df['monetized_emissions'], name='Monetized Emissions', marker_color='red' )) fig.update_layout( barmode='relative', title='EBITDA vs Monetized Emissions (Billions USD)', yaxis_title='Billions USD', height=600, showlegend=True, xaxis_tickangle=-45 ) st.plotly_chart(fig, use_container_width=True) # Calculate correlation valid_data = filtered_df[['ebitda_2022', 'monetized_emissions']].dropna() if len(valid_data) > 1: # Need at least 2 points for correlation correlation = np.corrcoef(valid_data['ebitda_2022'], valid_data['monetized_emissions'])[0,1] st.write(f"Correlation between EBITDA and Monetized Emissions: {correlation:.2f}") # Display data table st.subheader("Data Table") display_df = filtered_df[['company_name', 'sector', 'ebitda_2022', 'monetized_emissions']] display_df.columns = ['Company', 'Sector', 'EBITDA (Billions USD)', 'Monetized Emissions (Billions USD)'] st.dataframe(display_df) else: st.write("Please select companies or sectors to visualize data")