Spaces:

ashmib
/

green-city-finder

Running

green-city-finder / src /sustainability /s_fairness.py

Ashmi Banerjee

updates to the s-fairness calculation and refactoring code duplication

ac20456 2 months ago

5.91 kB

	import sys
	import os
	from typing import Optional, Dict, Any

	import pandas as pd
	import logging
	from dotenv import load_dotenv

	load_dotenv()
	logger = logging.getLogger(__name__)
	logging.basicConfig(encoding='utf-8', level=logging.DEBUG)

	SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
	sys.path.append(os.path.dirname(SCRIPT_DIR))

	def get_emission_scores(emissions_df: pd.DataFrame, starting_point: str, destination: str, ):
	"""

	Returns the emission score for the connection with least co2e between two cities.
	:param emissions_df:
	:param starting_point:
	:param destination:
	:return:
	"""
	df = emissions_df.loc[(emissions_df["city_1"] == starting_point) & (emissions_df["city_2"] == destination)]
	if len(df) == 0:
	logger.info(f"Connection not found between {starting_point} and {destination}")
	return 0, None
	df.loc[:, 'min_co2e'] = df[['fly_co2e_kg', 'drive_co2e_kg', 'train_co2e_kg']].min(axis=1)
	df.loc[:, 'min_co2e_colname'] = df[['fly_co2e_kg', 'drive_co2e_kg', 'train_co2e_kg']].idxmin(axis=1)
	min_co2e = df.min_co2e.values[0]
	mode_prefix = (df.min_co2e_colname.values[0]).split("_")[0]
	min_cost = df[mode_prefix + "_cost_EUR"].values[0]
	if mode_prefix == "train":
	min_travel_time = df[mode_prefix + "_time_mins"].values[0] / 60
	else:
	min_travel_time = df[mode_prefix + "_time_hrs"].values[0]
	emission_score = 0.352 * min_travel_time + 0.218 * min_co2e + 0.431 * min_cost
	return emission_score, mode_prefix


	def _check_city_present(df: pd.DataFrame, starting_point: Optional[str] = None, destination: str = "",
	category: str = "popularity"):
	if category == "emissions":
	if not ((df['city_1'] == starting_point) & (df['city_2'] == destination)).any():
	return False
	else:
	return True
	if not len(df[df['city'] == destination]):
	return False
	return True


	def get_scores(df: pd.DataFrame, starting_point: Optional[str] = None, destination="",
	month: Optional[str] = None, category: str = "popularity"):
	"""

	Returns the seasonality/popularity score for a particular destination.
	Seasonality is calculated for a particular month, while popularity is year-round.
	If no month is provided then
	the best month, i.e. month of lowest seasonality is returned.

	Args:
	- destination: str
	- month: str (default: None)
	- category: str (default: "popularity")

	"""

	# Check if city is present in dataframe
	if not _check_city_present(df, starting_point, destination, category):
	logger.info(f"{destination} does not have {category} data")
	return None, None

	match category:
	case "popularity":
	return df[df['city'] == destination]['weighted_pop_score'].item()
	case "seasonality":
	dest_df = df.loc[df['city'] == destination]
	if month:
	m = month.capitalize()[:3]
	else:
	dest_df['lowest_col'] = dest_df.loc[:, dest_df.columns != 'city'].idxmin(axis="columns")
	m = dest_df[dest_df['city'] == destination]['lowest_col'].item()
	return m, dest_df[dest_df['city'] == destination][m].item()
	case "emissions":
	emissions = get_emission_scores(df, starting_point, destination)
	return emissions


	def compute_sfairness_score(data: list[pd.DataFrame],
	starting_point: str, destination: str,
	month: Optional[str] = None) -> dict[str, Any] \| dict[str, None]:
	"""

	Returns the s-fairness score for a particular destination city and (optional) month. If the destination doesn't
	have popularity or seasonality scores, then the function returns None.

	Args:
	- data: list[pd.DataFrame]
	- starting_point: str
	- destination: str
	- month: str (default: None)

	"""
	popularity_score = get_scores(df=data[0],
	starting_point=None,
	destination=destination, month=None, category="popularity")
	month, seasonality_score = get_scores(df=data[1],
	starting_point=None, destination=destination,
	month=month, category="seasonality")

	emission_score, mode = get_scores(df=data[2],
	starting_point=starting_point, destination=destination, category="emissions")
	if emission_score is None:
	emission_score = 0

	# RECHECK
	if seasonality_score is not None and popularity_score is not None:
	s_fairness = round(0.281 * emission_score + 0.334 * popularity_score + 0.385 * seasonality_score, 3)
	return {
	'month': month,
	'mode': mode, # 'fly', 'drive', 'train'
	's-fairness': s_fairness
	}
	# elif popularity is not None: # => seasonality is None
	# s_fairness = 0.281 * emissions + 0.334 * popularity
	# elif seasonality[1] is not None: # => popularity is None
	# s_fairness = 0.281 * emissions + 0.385 * seasonality[1]
	# else: # => both are non
	# s_fairness = 0.281 * emissions
	else:
	return {
	'month': None,
	'mode': None, # 'fly', 'drive', 'train'
	's-fairness': None
	}


	def test():
	popularity_data = load_data("popularity")
	seasonality_data = load_data("seasonality")
	emissions_data = load_data("emissions")
	data = [popularity_data, seasonality_data, emissions_data]
	print(compute_sfairness_score(data=data, starting_point="Munich", destination="Dijon"))
	print(compute_sfairness_score(data=data, starting_point="Munich", destination="Strasbourg", month="Dec"))


	if __name__ == "__main__":
	test()