File size: 5,908 Bytes
4b722ec
 
ac20456
 
4b722ec
 
ac20456
4b722ec
ac20456
4b722ec
 
 
 
 
 
ac20456
4b722ec
 
ac20456
 
 
 
 
4b722ec
ac20456
 
 
 
 
 
 
 
 
 
 
4b722ec
ac20456
 
 
4b722ec
 
ac20456
 
 
 
 
 
 
 
 
 
4b722ec
 
ac20456
 
4b722ec
 
ac20456
 
 
4b722ec
 
 
 
 
ac20456
4b722ec
 
 
ac20456
 
 
4b722ec
 
ac20456
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4b722ec
 
 
 
 
 
ac20456
 
4b722ec
 
 
 
ac20456
 
 
 
 
 
 
 
 
 
 
4b722ec
 
ac20456
 
4b722ec
 
ac20456
4b722ec
 
 
 
 
 
 
 
 
 
 
ac20456
4b722ec
 
 
 
ac20456
 
 
 
 
 
 
 
 
4b722ec
ac20456
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
import sys
import os
from typing import Optional, Dict, Any

import pandas as pd
import logging
from dotenv import load_dotenv

load_dotenv()
logger = logging.getLogger(__name__)
logging.basicConfig(encoding='utf-8', level=logging.DEBUG)

SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
sys.path.append(os.path.dirname(SCRIPT_DIR))

def get_emission_scores(emissions_df: pd.DataFrame, starting_point: str, destination: str, ):
    """

    Returns the emission score for the connection with least co2e between two cities.
    :param emissions_df:
    :param starting_point:
    :param destination:
    :return:
    """
    df = emissions_df.loc[(emissions_df["city_1"] == starting_point) & (emissions_df["city_2"] == destination)]
    if len(df) == 0:
        logger.info(f"Connection not found between {starting_point} and {destination}")
        return 0, None
    df.loc[:, 'min_co2e'] = df[['fly_co2e_kg', 'drive_co2e_kg', 'train_co2e_kg']].min(axis=1)
    df.loc[:, 'min_co2e_colname'] = df[['fly_co2e_kg', 'drive_co2e_kg', 'train_co2e_kg']].idxmin(axis=1)
    min_co2e = df.min_co2e.values[0]
    mode_prefix = (df.min_co2e_colname.values[0]).split("_")[0]
    min_cost = df[mode_prefix + "_cost_EUR"].values[0]
    if mode_prefix == "train":
        min_travel_time = df[mode_prefix + "_time_mins"].values[0] / 60
    else:
        min_travel_time = df[mode_prefix + "_time_hrs"].values[0]
    emission_score = 0.352 * min_travel_time + 0.218 * min_co2e + 0.431 * min_cost
    return emission_score, mode_prefix


def _check_city_present(df: pd.DataFrame, starting_point: Optional[str] = None, destination: str = "",
                        category: str = "popularity"):
    if category == "emissions":
        if not ((df['city_1'] == starting_point) & (df['city_2'] == destination)).any():
            return False
        else:
            return True
    if not len(df[df['city'] == destination]):
        return False
    return True


def get_scores(df: pd.DataFrame, starting_point: Optional[str] = None, destination="",
               month: Optional[str] = None, category: str = "popularity"):
    """
    
    Returns the seasonality/popularity score for a particular destination.
    Seasonality is calculated for a particular month, while popularity is year-round.
    If no month is provided then
    the best month, i.e. month of lowest seasonality is returned.

    Args:
        - destination: str
        - month: str (default: None)
        - category: str (default: "popularity")
    
    """

    # Check if city is present in dataframe
    if not _check_city_present(df, starting_point, destination, category):
        logger.info(f"{destination} does not have {category} data")
        return None, None

    match category:
        case "popularity":
            return df[df['city'] == destination]['weighted_pop_score'].item()
        case "seasonality":
            dest_df = df.loc[df['city'] == destination]
            if month:
                m = month.capitalize()[:3]
            else:
                dest_df['lowest_col'] = dest_df.loc[:, dest_df.columns != 'city'].idxmin(axis="columns")
                m = dest_df[dest_df['city'] == destination]['lowest_col'].item()
            return m, dest_df[dest_df['city'] == destination][m].item()
        case "emissions":
            emissions = get_emission_scores(df, starting_point, destination)
            return emissions


def compute_sfairness_score(data: list[pd.DataFrame],
                            starting_point: str, destination: str,
                            month: Optional[str] = None) -> dict[str, Any] | dict[str, None]:
    """
    
    Returns the s-fairness score for a particular destination city and (optional) month. If the destination doesn't
    have popularity or seasonality scores, then the function returns None.

    Args:
        - data: list[pd.DataFrame]
        - starting_point: str
        - destination: str
        - month: str (default: None)
    
    """
    popularity_score = get_scores(df=data[0],
                                  starting_point=None,
                                  destination=destination, month=None, category="popularity")
    month, seasonality_score = get_scores(df=data[1],
                                          starting_point=None, destination=destination,
                                          month=month, category="seasonality")

    emission_score, mode = get_scores(df=data[2],
                                      starting_point=starting_point, destination=destination, category="emissions")
    if emission_score is None:
        emission_score = 0

    # RECHECK
    if seasonality_score is not None and popularity_score is not None:
        s_fairness = round(0.281 * emission_score + 0.334 * popularity_score + 0.385 * seasonality_score, 3)
        return {
            'month': month,
            'mode': mode,  # 'fly', 'drive', 'train'
            's-fairness': s_fairness
        }
    # elif popularity is not None: # => seasonality is None
    #     s_fairness = 0.281 * emissions + 0.334 * popularity
    # elif seasonality[1] is not None: # => popularity is None
    #     s_fairness = 0.281 * emissions + 0.385 * seasonality[1]
    # else: # => both are non
    #     s_fairness =  0.281 * emissions
    else:
        return {
            'month': None,
            'mode': None,  # 'fly', 'drive', 'train'
            's-fairness': None
        }


def test():
    popularity_data = load_data("popularity")
    seasonality_data = load_data("seasonality")
    emissions_data = load_data("emissions")
    data = [popularity_data, seasonality_data, emissions_data]
    print(compute_sfairness_score(data=data, starting_point="Munich", destination="Dijon"))
    print(compute_sfairness_score(data=data, starting_point="Munich", destination="Strasbourg", month="Dec"))


if __name__ == "__main__":
    test()