File size: 3,380 Bytes
2aacdd5
 
995d03d
 
2aacdd5
995d03d
 
2aacdd5
995d03d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2aacdd5
995d03d
2aacdd5
995d03d
 
 
2aacdd5
995d03d
 
 
 
 
 
 
 
 
 
 
 
 
2aacdd5
995d03d
 
2aacdd5
995d03d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import streamlit as st
import pandas as pd
import plotly.graph_objects as go
import numpy as np

# Set page config
st.set_page_config(layout="wide", page_title="GHG Emissions Analysis")

# Load data
@st.cache_data
def load_data():
    url = "https://raw.githubusercontent.com/danielrosehill/GHG-Emissions-Data-Pipeline/refs/heads/main/company_data.csv"
    df = pd.read_csv(url)
    
    # Calculate total emissions and handle NaN values
    df['scope_1_emissions'] = pd.to_numeric(df['scope_1_emissions'], errors='coerce').fillna(0)
    df['scope_2_emissions'] = pd.to_numeric(df['scope_2_emissions'], errors='coerce').fillna(0)
    df['scope_3_emissions'] = pd.to_numeric(df['scope_3_emissions'], errors='coerce').fillna(0)
    
    df['total_emissions'] = df['scope_1_emissions'] + df['scope_2_emissions'] + df['scope_3_emissions']
    df['monetized_emissions'] = (df['total_emissions'] * 236) / 1000  # Convert to billions
    df['monetized_emissions'] = df['monetized_emissions'].round(2)
    
    # Convert EBITDA to numeric and handle NaN values
    df['ebitda_2022'] = pd.to_numeric(df['ebitda_2022'], errors='coerce').fillna(0)
    
    return df

df = load_data()

# Sidebar
st.sidebar.title("Selection Options")
selection_mode = st.sidebar.radio("Selection Mode", ["Individual Companies", "By Sector"])

if selection_mode == "Individual Companies":
    selected_companies = st.sidebar.multiselect(
        "Select Companies (max 5)",
        options=df['company_name'].dropna().unique(),
        max_selections=5
    )
    filtered_df = df[df['company_name'].isin(selected_companies)]
else:
    selected_sector = st.sidebar.multiselect(
        "Select Sectors",
        options=df['sector'].dropna().unique()
    )
    filtered_df = df[df['sector'].isin(selected_sector)]

# Main content
st.title("Greenhouse Gas Emissions vs Financial Performance")

if not filtered_df.empty:
    # Create visualization
    fig = go.Figure()
    
    # Add EBITDA bars
    fig.add_trace(go.Bar(
        x=filtered_df['company_name'],
        y=filtered_df['ebitda_2022'],
        name='EBITDA',
        marker_color='green'
    ))
    
    # Add monetized emissions bars
    fig.add_trace(go.Bar(
        x=filtered_df['company_name'],
        y=-filtered_df['monetized_emissions'],
        name='Monetized Emissions',
        marker_color='red'
    ))
    
    fig.update_layout(
        barmode='relative',
        title='EBITDA vs Monetized Emissions (Billions USD)',
        yaxis_title='Billions USD',
        height=600,
        showlegend=True,
        xaxis_tickangle=-45
    )
    
    st.plotly_chart(fig, use_container_width=True)
    
    # Calculate correlation
    valid_data = filtered_df[['ebitda_2022', 'monetized_emissions']].dropna()
    if len(valid_data) > 1:  # Need at least 2 points for correlation
        correlation = np.corrcoef(valid_data['ebitda_2022'], valid_data['monetized_emissions'])[0,1]
        st.write(f"Correlation between EBITDA and Monetized Emissions: {correlation:.2f}")
    
    # Display data table
    st.subheader("Data Table")
    display_df = filtered_df[['company_name', 'sector', 'ebitda_2022', 'monetized_emissions']]
    display_df.columns = ['Company', 'Sector', 'EBITDA (Billions USD)', 'Monetized Emissions (Billions USD)']
    st.dataframe(display_df)
else:
    st.write("Please select companies or sectors to visualize data")