danielrosehill's picture
updated
995d03d
import streamlit as st
import pandas as pd
import plotly.graph_objects as go
import numpy as np
# Set page config
st.set_page_config(layout="wide", page_title="GHG Emissions Analysis")
# Load data
@st.cache_data
def load_data():
url = "https://raw.githubusercontent.com/danielrosehill/GHG-Emissions-Data-Pipeline/refs/heads/main/company_data.csv"
df = pd.read_csv(url)
# Calculate total emissions and handle NaN values
df['scope_1_emissions'] = pd.to_numeric(df['scope_1_emissions'], errors='coerce').fillna(0)
df['scope_2_emissions'] = pd.to_numeric(df['scope_2_emissions'], errors='coerce').fillna(0)
df['scope_3_emissions'] = pd.to_numeric(df['scope_3_emissions'], errors='coerce').fillna(0)
df['total_emissions'] = df['scope_1_emissions'] + df['scope_2_emissions'] + df['scope_3_emissions']
df['monetized_emissions'] = (df['total_emissions'] * 236) / 1000 # Convert to billions
df['monetized_emissions'] = df['monetized_emissions'].round(2)
# Convert EBITDA to numeric and handle NaN values
df['ebitda_2022'] = pd.to_numeric(df['ebitda_2022'], errors='coerce').fillna(0)
return df
df = load_data()
# Sidebar
st.sidebar.title("Selection Options")
selection_mode = st.sidebar.radio("Selection Mode", ["Individual Companies", "By Sector"])
if selection_mode == "Individual Companies":
selected_companies = st.sidebar.multiselect(
"Select Companies (max 5)",
options=df['company_name'].dropna().unique(),
max_selections=5
)
filtered_df = df[df['company_name'].isin(selected_companies)]
else:
selected_sector = st.sidebar.multiselect(
"Select Sectors",
options=df['sector'].dropna().unique()
)
filtered_df = df[df['sector'].isin(selected_sector)]
# Main content
st.title("Greenhouse Gas Emissions vs Financial Performance")
if not filtered_df.empty:
# Create visualization
fig = go.Figure()
# Add EBITDA bars
fig.add_trace(go.Bar(
x=filtered_df['company_name'],
y=filtered_df['ebitda_2022'],
name='EBITDA',
marker_color='green'
))
# Add monetized emissions bars
fig.add_trace(go.Bar(
x=filtered_df['company_name'],
y=-filtered_df['monetized_emissions'],
name='Monetized Emissions',
marker_color='red'
))
fig.update_layout(
barmode='relative',
title='EBITDA vs Monetized Emissions (Billions USD)',
yaxis_title='Billions USD',
height=600,
showlegend=True,
xaxis_tickangle=-45
)
st.plotly_chart(fig, use_container_width=True)
# Calculate correlation
valid_data = filtered_df[['ebitda_2022', 'monetized_emissions']].dropna()
if len(valid_data) > 1: # Need at least 2 points for correlation
correlation = np.corrcoef(valid_data['ebitda_2022'], valid_data['monetized_emissions'])[0,1]
st.write(f"Correlation between EBITDA and Monetized Emissions: {correlation:.2f}")
# Display data table
st.subheader("Data Table")
display_df = filtered_df[['company_name', 'sector', 'ebitda_2022', 'monetized_emissions']]
display_df.columns = ['Company', 'Sector', 'EBITDA (Billions USD)', 'Monetized Emissions (Billions USD)']
st.dataframe(display_df)
else:
st.write("Please select companies or sectors to visualize data")