Spaces:
Running
Running
File size: 5,908 Bytes
4b722ec ac20456 4b722ec ac20456 4b722ec ac20456 4b722ec ac20456 4b722ec ac20456 4b722ec ac20456 4b722ec ac20456 4b722ec ac20456 4b722ec ac20456 4b722ec ac20456 4b722ec ac20456 4b722ec ac20456 4b722ec ac20456 4b722ec ac20456 4b722ec ac20456 4b722ec ac20456 4b722ec ac20456 4b722ec ac20456 4b722ec ac20456 4b722ec ac20456 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 |
import sys
import os
from typing import Optional, Dict, Any
import pandas as pd
import logging
from dotenv import load_dotenv
load_dotenv()
logger = logging.getLogger(__name__)
logging.basicConfig(encoding='utf-8', level=logging.DEBUG)
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
sys.path.append(os.path.dirname(SCRIPT_DIR))
def get_emission_scores(emissions_df: pd.DataFrame, starting_point: str, destination: str, ):
"""
Returns the emission score for the connection with least co2e between two cities.
:param emissions_df:
:param starting_point:
:param destination:
:return:
"""
df = emissions_df.loc[(emissions_df["city_1"] == starting_point) & (emissions_df["city_2"] == destination)]
if len(df) == 0:
logger.info(f"Connection not found between {starting_point} and {destination}")
return 0, None
df.loc[:, 'min_co2e'] = df[['fly_co2e_kg', 'drive_co2e_kg', 'train_co2e_kg']].min(axis=1)
df.loc[:, 'min_co2e_colname'] = df[['fly_co2e_kg', 'drive_co2e_kg', 'train_co2e_kg']].idxmin(axis=1)
min_co2e = df.min_co2e.values[0]
mode_prefix = (df.min_co2e_colname.values[0]).split("_")[0]
min_cost = df[mode_prefix + "_cost_EUR"].values[0]
if mode_prefix == "train":
min_travel_time = df[mode_prefix + "_time_mins"].values[0] / 60
else:
min_travel_time = df[mode_prefix + "_time_hrs"].values[0]
emission_score = 0.352 * min_travel_time + 0.218 * min_co2e + 0.431 * min_cost
return emission_score, mode_prefix
def _check_city_present(df: pd.DataFrame, starting_point: Optional[str] = None, destination: str = "",
category: str = "popularity"):
if category == "emissions":
if not ((df['city_1'] == starting_point) & (df['city_2'] == destination)).any():
return False
else:
return True
if not len(df[df['city'] == destination]):
return False
return True
def get_scores(df: pd.DataFrame, starting_point: Optional[str] = None, destination="",
month: Optional[str] = None, category: str = "popularity"):
"""
Returns the seasonality/popularity score for a particular destination.
Seasonality is calculated for a particular month, while popularity is year-round.
If no month is provided then
the best month, i.e. month of lowest seasonality is returned.
Args:
- destination: str
- month: str (default: None)
- category: str (default: "popularity")
"""
# Check if city is present in dataframe
if not _check_city_present(df, starting_point, destination, category):
logger.info(f"{destination} does not have {category} data")
return None, None
match category:
case "popularity":
return df[df['city'] == destination]['weighted_pop_score'].item()
case "seasonality":
dest_df = df.loc[df['city'] == destination]
if month:
m = month.capitalize()[:3]
else:
dest_df['lowest_col'] = dest_df.loc[:, dest_df.columns != 'city'].idxmin(axis="columns")
m = dest_df[dest_df['city'] == destination]['lowest_col'].item()
return m, dest_df[dest_df['city'] == destination][m].item()
case "emissions":
emissions = get_emission_scores(df, starting_point, destination)
return emissions
def compute_sfairness_score(data: list[pd.DataFrame],
starting_point: str, destination: str,
month: Optional[str] = None) -> dict[str, Any] | dict[str, None]:
"""
Returns the s-fairness score for a particular destination city and (optional) month. If the destination doesn't
have popularity or seasonality scores, then the function returns None.
Args:
- data: list[pd.DataFrame]
- starting_point: str
- destination: str
- month: str (default: None)
"""
popularity_score = get_scores(df=data[0],
starting_point=None,
destination=destination, month=None, category="popularity")
month, seasonality_score = get_scores(df=data[1],
starting_point=None, destination=destination,
month=month, category="seasonality")
emission_score, mode = get_scores(df=data[2],
starting_point=starting_point, destination=destination, category="emissions")
if emission_score is None:
emission_score = 0
# RECHECK
if seasonality_score is not None and popularity_score is not None:
s_fairness = round(0.281 * emission_score + 0.334 * popularity_score + 0.385 * seasonality_score, 3)
return {
'month': month,
'mode': mode, # 'fly', 'drive', 'train'
's-fairness': s_fairness
}
# elif popularity is not None: # => seasonality is None
# s_fairness = 0.281 * emissions + 0.334 * popularity
# elif seasonality[1] is not None: # => popularity is None
# s_fairness = 0.281 * emissions + 0.385 * seasonality[1]
# else: # => both are non
# s_fairness = 0.281 * emissions
else:
return {
'month': None,
'mode': None, # 'fly', 'drive', 'train'
's-fairness': None
}
def test():
popularity_data = load_data("popularity")
seasonality_data = load_data("seasonality")
emissions_data = load_data("emissions")
data = [popularity_data, seasonality_data, emissions_data]
print(compute_sfairness_score(data=data, starting_point="Munich", destination="Dijon"))
print(compute_sfairness_score(data=data, starting_point="Munich", destination="Strasbourg", month="Dec"))
if __name__ == "__main__":
test()
|