#!/usr/bin/env python3 import pytest import torch from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline from mappingservice.utils import predict_language @pytest.fixture def classifier(): model_path = "papluca/xlm-roberta-base-language-detection" model = AutoModelForSequenceClassification.from_pretrained(model_path) tokenizer = AutoTokenizer.from_pretrained(model_path) classification = pipeline( "text-classification", model=model, tokenizer=tokenizer, framework="pt", device=0 if torch.cuda.is_available() else -1, ) return classification def test_model_predictions(classifier): test_data = [ {'input': 'Habitacion estandar con bano', 'expected_response': 'es'}, {'input': 'apartamento de lujo con vistas al mar', 'expected_response': 'es'}, # noqa: E501 {'input': 'casa ejecutiva', 'expected_response': 'es'}, {'input': 'villa doble', 'expected_response': 'es'}, {'input': 'estudio de una habitacion de lujo', 'expected_response': 'es'}, {'input': 'chalet premier con dos habitaciones', 'expected_response': 'es'}, {'input': 'casa de la playa premium con bano compartido', 'expected_response': 'es'}, # noqa: E501 {'input': 'estudio familiar grande', 'expected_response': 'es'}, {'input': 'suite familiar junior', 'expected_response': 'en'}, {'input': 'bungalow tradicional sin bano', 'expected_response': 'es'}, {'input': 'superior room 1 king superior room 1 king cupola or courtyard view french style 36sqm 385sq', 'expected_response': 'en'}, # noqa: E501 {'input': 'habitacion matrimonial adaptada discapacitados', 'expected_response': 'es'}, # noqa: E501 {'input': 'privilege room twin for 2 adults 0 children and 0 infants', 'expected_response': 'en'}, # noqa: E501 {'input': 'deluxe room double for 2 adults 0 children and 0 infants', 'expected_response': 'en'}, # noqa: E501 {'input': 'premier palace double room', 'expected_response': 'en'}, {'input': 'double single use deluxe', 'expected_response': 'en'}, {'input': 'double room queen bed superior', 'expected_response': 'en'}, {'input': 'double guest room', 'expected_response': 'en'}, {'input': 'single room for 1 adults 0 children and 0 infants', 'expected_response': 'en'}, # noqa: E501 {'input': 'twin premium room incl evening tasting welcome gift comp wifi 28 sqm espresso fridge bathrobe', 'expected_response': 'en'}, # noqa: E501 {'input': 'superior quadruple room', 'expected_response': 'en'}, {'input': 'superior one bedroom apartment x2013 2 adults', 'expected_response': 'en'}, # noqa: E501 {'input': 'deluxe room double for 2 adults 0 children and 0 infants', 'expected_response': 'en'}, # noqa: E501 {'input': 'premier palace double room', 'expected_response': 'en'}, {'input': 'double single use deluxe', 'expected_response': 'en'}, {'input': 'double room queen bed superior', 'expected_response': 'en'}, {'input': 'double guest room', 'expected_response': 'en'}, {'input': 'single room for 1 adults 0 children and 0 infants', 'expected_response': 'en'}, # noqa: E501 {'input': 'twin premium room incl evening tasting welcome gift comp wifi 28 sqm espresso fridge bathrobe', 'expected_response': 'en'}, # noqa: E501 {'input': 'superior quadruple room', 'expected_response': 'en'}, {'input': 'superior one bedroom apartment x2013 2 adults', 'expected_response': 'en'}, # noqa: E501 {'input': 'comfort double', 'expected_response': 'en'}, {'input': '1 king bed suite nonsmoking', 'expected_response': 'en'}, {'input': 'junior suite 1 king bed nonsmoking', 'expected_response': 'en'}, {'input': 'family room superior', 'expected_response': 'en'} ] for test_case in test_data: description = test_case["input"] expected_label = test_case["expected_response"] # First, try to predict based on keywords predicted_label = predict_language(description) # If no prediction was made, fallback to model prediction if not predicted_label: print(f"Fallback to model prediction for '{description}'") result = classifier(description) predicted_label = result[0]["label"] assert ( predicted_label == expected_label ), f"Incorrect prediction for '{description}': expected '{expected_label}', obtained '{predicted_label}'" # noqa: E501