Spaces:

ashmib
/

green-city-finder

Running

File size: 5,908 Bytes

import sys
import os
from typing import Optional, Dict, Any

import pandas as pd
import logging
from dotenv import load_dotenv

load_dotenv()
logger = logging.getLogger(__name__)
logging.basicConfig(encoding='utf-8', level=logging.DEBUG)

SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
sys.path.append(os.path.dirname(SCRIPT_DIR))

def get_emission_scores(emissions_df: pd.DataFrame, starting_point: str, destination: str, ):
    """

    Returns the emission score for the connection with least co2e between two cities.
    :param emissions_df:
    :param starting_point:
    :param destination:
    :return:
    """
    df = emissions_df.loc[(emissions_df["city_1"] == starting_point) & (emissions_df["city_2"] == destination)]
    if len(df) == 0:
        logger.info(f"Connection not found between {starting_point} and {destination}")
        return 0, None
    df.loc[:, 'min_co2e'] = df[['fly_co2e_kg', 'drive_co2e_kg', 'train_co2e_kg']].min(axis=1)
    df.loc[:, 'min_co2e_colname'] = df[['fly_co2e_kg', 'drive_co2e_kg', 'train_co2e_kg']].idxmin(axis=1)
    min_co2e = df.min_co2e.values[0]
    mode_prefix = (df.min_co2e_colname.values[0]).split("_")[0]
    min_cost = df[mode_prefix + "_cost_EUR"].values[0]
    if mode_prefix == "train":
        min_travel_time = df[mode_prefix + "_time_mins"].values[0] / 60
    else:
        min_travel_time = df[mode_prefix + "_time_hrs"].values[0]
    emission_score = 0.352 * min_travel_time + 0.218 * min_co2e + 0.431 * min_cost
    return emission_score, mode_prefix


def _check_city_present(df: pd.DataFrame, starting_point: Optional[str] = None, destination: str = "",
                        category: str = "popularity"):
    if category == "emissions":
        if not ((df['city_1'] == starting_point) & (df['city_2'] == destination)).any():
            return False
        else:
            return True
    if not len(df[df['city'] == destination]):
        return False
    return True


def get_scores(df: pd.DataFrame, starting_point: Optional[str] = None, destination="",
               month: Optional[str] = None, category: str = "popularity"):
    """
    
    Returns the seasonality/popularity score for a particular destination.
    Seasonality is calculated for a particular month, while popularity is year-round.
    If no month is provided then
    the best month, i.e. month of lowest seasonality is returned.

    Args:
        - destination: str
        - month: str (default: None)
        - category: str (default: "popularity")
    
    """

    # Check if city is present in dataframe
    if not _check_city_present(df, starting_point, destination, category):
        logger.info(f"{destination} does not have {category} data")
        return None, None

    match category:
        case "popularity":
            return df[df['city'] == destination]['weighted_pop_score'].item()
        case "seasonality":
            dest_df = df.loc[df['city'] == destination]
            if month:
                m = month.capitalize()[:3]
            else:
                dest_df['lowest_col'] = dest_df.loc[:, dest_df.columns != 'city'].idxmin(axis="columns")
                m = dest_df[dest_df['city'] == destination]['lowest_col'].item()
            return m, dest_df[dest_df['city'] == destination][m].item()
        case "emissions":
            emissions = get_emission_scores(df, starting_point, destination)
            return emissions


def compute_sfairness_score(data: list[pd.DataFrame],
                            starting_point: str, destination: str,
                            month: Optional[str] = None) -> dict[str, Any] | dict[str, None]:
    """
    
    Returns the s-fairness score for a particular destination city and (optional) month. If the destination doesn't
    have popularity or seasonality scores, then the function returns None.

    Args:
        - data: list[pd.DataFrame]
        - starting_point: str
        - destination: str
        - month: str (default: None)
    
    """
    popularity_score = get_scores(df=data[0],
                                  starting_point=None,
                                  destination=destination, month=None, category="popularity")
    month, seasonality_score = get_scores(df=data[1],
                                          starting_point=None, destination=destination,
                                          month=month, category="seasonality")

    emission_score, mode = get_scores(df=data[2],
                                      starting_point=starting_point, destination=destination, category="emissions")
    if emission_score is None:
        emission_score = 0

    # RECHECK
    if seasonality_score is not None and popularity_score is not None:
        s_fairness = round(0.281 * emission_score + 0.334 * popularity_score + 0.385 * seasonality_score, 3)
        return {
            'month': month,
            'mode': mode,  # 'fly', 'drive', 'train'
            's-fairness': s_fairness
        }
    # elif popularity is not None: # => seasonality is None
    #     s_fairness = 0.281 * emissions + 0.334 * popularity
    # elif seasonality[1] is not None: # => popularity is None
    #     s_fairness = 0.281 * emissions + 0.385 * seasonality[1]
    # else: # => both are non
    #     s_fairness =  0.281 * emissions
    else:
        return {
            'month': None,
            'mode': None,  # 'fly', 'drive', 'train'
            's-fairness': None
        }


def test():
    popularity_data = load_data("popularity")
    seasonality_data = load_data("seasonality")
    emissions_data = load_data("emissions")
    data = [popularity_data, seasonality_data, emissions_data]
    print(compute_sfairness_score(data=data, starting_point="Munich", destination="Dijon"))
    print(compute_sfairness_score(data=data, starting_point="Munich", destination="Strasbourg", month="Dec"))


if __name__ == "__main__":
    test()