import sys import os from typing import Optional, Dict, Any import pandas as pd import logging from dotenv import load_dotenv load_dotenv() logger = logging.getLogger(__name__) logging.basicConfig(encoding='utf-8', level=logging.DEBUG) SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) sys.path.append(os.path.dirname(SCRIPT_DIR)) def get_emission_scores(emissions_df: pd.DataFrame, starting_point: str, destination: str, ): """ Returns the emission score for the connection with least co2e between two cities. :param emissions_df: :param starting_point: :param destination: :return: """ df = emissions_df.loc[(emissions_df["city_1"] == starting_point) & (emissions_df["city_2"] == destination)] if len(df) == 0: logger.info(f"Connection not found between {starting_point} and {destination}") return 0, None df.loc[:, 'min_co2e'] = df[['fly_co2e_kg', 'drive_co2e_kg', 'train_co2e_kg']].min(axis=1) df.loc[:, 'min_co2e_colname'] = df[['fly_co2e_kg', 'drive_co2e_kg', 'train_co2e_kg']].idxmin(axis=1) min_co2e = df.min_co2e.values[0] mode_prefix = (df.min_co2e_colname.values[0]).split("_")[0] min_cost = df[mode_prefix + "_cost_EUR"].values[0] if mode_prefix == "train": min_travel_time = df[mode_prefix + "_time_mins"].values[0] / 60 else: min_travel_time = df[mode_prefix + "_time_hrs"].values[0] emission_score = 0.352 * min_travel_time + 0.218 * min_co2e + 0.431 * min_cost return emission_score, mode_prefix def _check_city_present(df: pd.DataFrame, starting_point: Optional[str] = None, destination: str = "", category: str = "popularity"): if category == "emissions": if not ((df['city_1'] == starting_point) & (df['city_2'] == destination)).any(): return False else: return True if not len(df[df['city'] == destination]): return False return True def get_scores(df: pd.DataFrame, starting_point: Optional[str] = None, destination="", month: Optional[str] = None, category: str = "popularity"): """ Returns the seasonality/popularity score for a particular destination. Seasonality is calculated for a particular month, while popularity is year-round. If no month is provided then the best month, i.e. month of lowest seasonality is returned. Args: - destination: str - month: str (default: None) - category: str (default: "popularity") """ # Check if city is present in dataframe if not _check_city_present(df, starting_point, destination, category): logger.info(f"{destination} does not have {category} data") return None, None match category: case "popularity": return df[df['city'] == destination]['weighted_pop_score'].item() case "seasonality": dest_df = df.loc[df['city'] == destination] if month: m = month.capitalize()[:3] else: dest_df['lowest_col'] = dest_df.loc[:, dest_df.columns != 'city'].idxmin(axis="columns") m = dest_df[dest_df['city'] == destination]['lowest_col'].item() return m, dest_df[dest_df['city'] == destination][m].item() case "emissions": emissions = get_emission_scores(df, starting_point, destination) return emissions def compute_sfairness_score(data: list[pd.DataFrame], starting_point: str, destination: str, month: Optional[str] = None) -> dict[str, Any] | dict[str, None]: """ Returns the s-fairness score for a particular destination city and (optional) month. If the destination doesn't have popularity or seasonality scores, then the function returns None. Args: - data: list[pd.DataFrame] - starting_point: str - destination: str - month: str (default: None) """ popularity_score = get_scores(df=data[0], starting_point=None, destination=destination, month=None, category="popularity") month, seasonality_score = get_scores(df=data[1], starting_point=None, destination=destination, month=month, category="seasonality") emission_score, mode = get_scores(df=data[2], starting_point=starting_point, destination=destination, category="emissions") if emission_score is None: emission_score = 0 # RECHECK if seasonality_score is not None and popularity_score is not None: s_fairness = round(0.281 * emission_score + 0.334 * popularity_score + 0.385 * seasonality_score, 3) return { 'month': month, 'mode': mode, # 'fly', 'drive', 'train' 's-fairness': s_fairness } # elif popularity is not None: # => seasonality is None # s_fairness = 0.281 * emissions + 0.334 * popularity # elif seasonality[1] is not None: # => popularity is None # s_fairness = 0.281 * emissions + 0.385 * seasonality[1] # else: # => both are non # s_fairness = 0.281 * emissions else: return { 'month': None, 'mode': None, # 'fly', 'drive', 'train' 's-fairness': None } def test(): popularity_data = load_data("popularity") seasonality_data = load_data("seasonality") emissions_data = load_data("emissions") data = [popularity_data, seasonality_data, emissions_data] print(compute_sfairness_score(data=data, starting_point="Munich", destination="Dijon")) print(compute_sfairness_score(data=data, starting_point="Munich", destination="Strasbourg", month="Dec")) if __name__ == "__main__": test()