File size: 6,085 Bytes
393f86d
 
32ac110
 
393f86d
 
db8cc8e
 
 
32ac110
 
 
db8cc8e
32ac110
 
 
9ca002e
32ac110
 
 
 
 
db8cc8e
393f86d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
795ccdc
393f86d
 
 
 
248e2bb
795ccdc
393f86d
 
795ccdc
 
f924cbe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
795ccdc
 
 
 
32ac110
 
393f86d
248e2bb
 
795ccdc
 
393f86d
 
 
 
 
 
 
 
248e2bb
 
 
393f86d
 
248e2bb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import streamlit as st
import pandas as pd
import os, csv
from huggingface_hub import hf_hub_download, HfApi

HF_TOKEN = os.getenv('HUGGING_FACE_HUB_TOKEN')

CACHED_FILE_PATH = hf_hub_download(repo_id="sasha/co2_submissions", filename="dynamic_emissions.csv", repo_type="dataset")

api = HfApi()

def write_to_csv(hardware, training_time, provider, carbon_intensity, dynamic_emissions):
    with open(CACHED_FILE_PATH,'a', newline='') as f:
        writer = csv.writer(f)
        writer.writerow([hardware, training_time, provider, carbon_intensity, dynamic_emissions])
    api.upload_file(
        path_or_fileobj=CACHED_FILE_PATH,
        path_in_repo="dynamic_emissions.csv",
        repo_id="sasha/co2_submissions",
        repo_type="dataset",
    )



st.set_page_config(
    page_title="AI Carbon Calculator",
    layout="wide",
)

tdp_url = "https://raw.githubusercontent.com/mlco2/impact/master/data/gpus.csv"
compute_url = "https://raw.githubusercontent.com/mlco2/impact/master/data/impact.csv"


electricity_url = "https://raw.githubusercontent.com/mlco2/impact/master/data/2021-10-27yearly_averages.csv"

server_sheet_id = "1DqYgQnEDLQVQm5acMAhLgHLD8xXCG9BIrk-_Nv6jF3k"
server_sheet_name = "Server%20Carbon%20Footprint"
server_url = f"https://docs.google.com/spreadsheets/d/{server_sheet_id}/gviz/tq?tqx=out:csv&sheet={server_sheet_name}"


embodied_gpu_sheet_id = "1DqYgQnEDLQVQm5acMAhLgHLD8xXCG9BIrk-_Nv6jF3k"
embodied_gpu_sheet_name = "Scope%203%20Ratios"
embodied_gpu_url = f"https://docs.google.com/spreadsheets/d/{embodied_gpu_sheet_id}/gviz/tq?tqx=out:csv&sheet={embodied_gpu_sheet_name}"

TDP =pd.read_csv(tdp_url)

instances = pd.read_csv(compute_url)
providers = [p.upper() for p in instances['provider'].unique().tolist()]
providers.append('Local/Private Infastructure')

kg_per_mile = 0.348

electricity = pd.read_csv(electricity_url)
servers = pd.read_csv(server_url)
embodied_gpu = pd.read_csv(embodied_gpu_url)

#st.image('images/MIT_carbon_image_narrow.png', use_column_width=True, caption = 'Image credit: ')
st.title("AI Carbon Calculator")

st.markdown('## Estimate your model\'s CO2 carbon footprint!')

st.markdown('##### You can use this tool to calculate different aspects of your model\'s carbon footprint.')
st.markdown('##### Share your data to help us get a better idea of AI model\'s carbon emissions.')

st.markdown('### Dynamic Emissions')
st.markdown('##### These are the carbon emissions produced by generating the electricity necessary for powering model training')
with st.expander("Calculate the dynamic emissions of your model"):
    col1, col2, col3, col4 = st.columns(4)
    with col1:
        hardware = st.selectbox('GPU used', TDP['name'].tolist())
        gpu_tdp = TDP['tdp_watts'][TDP['name'] == hardware].tolist()[0]
        st.markdown("Different GPUs have different TDP (Thermal Design Power), which impacts how much energy you use.")
    with col2:
       training_time = st.number_input('Total number of GPU hours')
       st.markdown('This is calculated by multiplying the number of GPUs you used by the training time: '
                   'i.e. if you used 100 GPUs for 10 hours, this is equal to 100x10 = 1,000 GPU hours.')
    with col3:
       provider = st.selectbox('Provider used', providers)
       st.markdown('If you can\'t find your provider here, select "Local/Private Infrastructure".')
    with col4:
        if provider != 'Local/Private Infastructure':
            provider_instances = instances['region'][instances['provider'] == provider.lower()].unique().tolist()
            region = st.selectbox('Provider used', provider_instances)
            carbon_intensity = instances['impact'][(instances['provider'] == provider.lower()) & (instances['region'] == region)].tolist()[0]

        else:
            carbon_intensity = st.number_input('Carbon intensity of your energy grid, in grams of CO2 per kWh')
            st.markdown('You can consult a resource like the [IEA](https://www.iea.org/countries) or '
                        ' [Electricity Map](https://app.electricitymaps.com/) to get this information.')
    dynamic_emissions = round(gpu_tdp * training_time * carbon_intensity/1000000)
    st.metric(label="Dynamic emissions", value=str(dynamic_emissions)+' kilograms of CO2eq')
    st.markdown('This is roughly equivalent to '+ str(round(dynamic_emissions/kg_per_mile,1)) + ' miles driven in an average US car'
    ' produced in 2021. [(Source: energy.gov)](https://www.energy.gov/eere/vehicles/articles/fotw-1223-january-31-2022-average-carbon-dioxide-emissions-2021-model-year)')
    st.button(label="Anonymously share my data", help="Share the data from your model anonymously for research purposes!",\
    on_click = lambda *args: write_to_csv(hardware, training_time, provider, carbon_intensity, dynamic_emissions))

st.markdown('### Idle Emissions')
st.markdown('##### These are the emissions produced by generating the electricity needed to power the rest of the infrastructure'
            'used for model training -- the datacenter, network, heating/cooling, storage, etc.')



st.markdown('### Idle Emissions')
st.markdown('##### These are the emissions produced by generating the electricity needed to power the rest of the infrastructure'
            'used for model training -- the datacenter, network, heating/cooling, storage, etc.')
with st.expander("Calculate the idle emissions of your model"):
    st.markdown('Do you know what the PUE (Power Usage Effectiveness) of your infrastructure is?')



st.markdown('### Embodied Emissions')
st.markdown('Choose your hardware, runtime and cloud provider/physical infrastructure to estimate the carbon impact of your research.')



with st.expander("More information about our Methodology"):
    st.markdown('Building on the work of the [ML CO2 Calculator](https://mlco2.github.io/impact/), this tool allows you to consider'
                ' other aspects of your model\'s carbon footprint based on the LCA methodology.')


    st.image('images/LCA_CO2.png', caption='The LCA methodology - the parts in green are those we focus on.')