import logging.config import re from typing import List import psutil from langdetect import LangDetectException, detect from mappingservice.constants import ENGLISH_KEYWORDS, SPANISH_KEYWORDS from mappingservice.models import BedData, BedType logging.basicConfig( level=logging.DEBUG, format="%(asctime)s - %(levelname)s - %(message)s" ) logger = logging.getLogger(__name__) def log_memory_usage(): process = psutil.Process() memory_info = process.memory_info() logging.info(f"Memory usage: RSS={memory_info.rss}, VMS={memory_info.vms}") def safe_round(value, decimals=0): return round(value, decimals) if isinstance(value, int | float) else 0 def process_predictions(predictions, score_key="score", label_key="label"): return [ {"label": pred.get(label_key, "No data"), "score": round(pred.get(score_key, 0), 3)} # noqa: E501 for pred in predictions if isinstance(pred, dict) and pred.get(score_key, 0) > 0 ] or None def parse_model_output(predictions): bed_data_list = [] threshold = 0.5 if not isinstance(predictions, list) or not all( isinstance(pred, dict) for pred in predictions ): return bed_data_list for prediction in predictions: type = prediction.get("type") count = prediction.get("count", 0) score = prediction.get("score", 0) if score > threshold and count > 0: if type in BedType._member_names_: bed_data = BedData(type=BedType[type], count=count) bed_data_list.append(bed_data) else: logger.debug(f"Unsupported bed type: {type}") return bed_data_list def get_bed_predictions(description: str): logger.debug(f"Extracting bed predictions from description: {description}") description = description.lower() bed_pattern = re.compile( r"(\d+)?\s*\b(king|queen|double|single|twin|bunk|sofa|rollaway|futon|" r"cama\s*king|cama\s*queen|cama\s*double|cama\s*single|cama\s*twin|" r"cama\s*bunk|cama\s*sofa|cama\s*rollaway|cama\s*futon)\b\s*(beds?|camas?)", re.IGNORECASE) # noqa: E501 or_pattern = re.compile( r"\b(king|queen|double|single|twin)\s+or\s+(king|queen|double|single|twin)\b", re.IGNORECASE) # noqa: E501 # default_bed_type_pattern = re.compile( # r"\b(double|twin|king|queen|single|doble|individual|gemela|matrimonial)\b\s+" # r"\b(room|apartment|suite|habitacion|apartamento)\b", re.IGNORECASE) matches = bed_pattern.findall(description) or_matches = or_pattern.findall(description) # default_matches = default_bed_type_pattern.findall(description) bed_data_list = [] bed_type_counts = {} for match in matches: count = int(match[0]) if match[0] else 1 bed_type_name = match[1] normalized_bed_type = bed_type_name.strip().lower() bed_type = BedType[normalized_bed_type] if normalized_bed_type in BedType._member_names_ else None # noqa: E501 if bed_type: bed_type_counts[bed_type] = count for match in or_matches: for bed_type_name in match: normalized_bed_type = bed_type_name.strip().lower() bed_type = BedType[normalized_bed_type] if normalized_bed_type in BedType._member_names_ else None # noqa: E501 if bed_type: bed_type_counts[bed_type] = bed_type_counts.get(bed_type, 0) + 1 for bed_type, count in bed_type_counts.items(): bed_data_list.append(BedData(type=bed_type, count=count)) if not bed_data_list: logger.warning("No valid bed data found from extracted information.") return bed_data_list def extract_bed_numbers(description: str): bed_number_pattern = re.compile( r"(\d+|\bone\b|\btwo\b|\bthree\b)\s*bed", re.IGNORECASE ) numbers = [] word_to_number = {"one": 1, "two": 2, "three": 3} matches = bed_number_pattern.findall(description) for match in matches: number = word_to_number.get(match.lower(), match) numbers.append(int(number)) return numbers def validate_bed_data(beds: List[BedData]) -> List[BedData]: valid_beds = [bed for bed in beds if bed.type and bed.count > 0] if not valid_beds: logger.info(f"No valid beds found in {beds}") return valid_beds def is_list_of_lists(variable): if not isinstance(variable, list): return False return all(isinstance(item, list) for item in variable) def predict_language(text): text_lower = text.lower() for keyword in SPANISH_KEYWORDS: if keyword in text_lower: return "es" for keyword in ENGLISH_KEYWORDS: if keyword in text_lower: return "en" try: language = detect(text) if language in {"en", "es"}: return language except LangDetectException: pass return None