map-room / tests /test_langdetect.py
Calin Rada
init
f006f31 unverified
raw
history blame
4.62 kB
#!/usr/bin/env python3
import pytest
import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
from mappingservice.utils import predict_language
@pytest.fixture
def classifier():
model_path = "papluca/xlm-roberta-base-language-detection"
model = AutoModelForSequenceClassification.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(model_path)
classification = pipeline(
"text-classification",
model=model,
tokenizer=tokenizer,
framework="pt",
device=0 if torch.cuda.is_available() else -1,
)
return classification
def test_model_predictions(classifier):
test_data = [
{'input': 'Habitacion estandar con bano', 'expected_response': 'es'},
{'input': 'apartamento de lujo con vistas al mar', 'expected_response': 'es'}, # noqa: E501
{'input': 'casa ejecutiva', 'expected_response': 'es'},
{'input': 'villa doble', 'expected_response': 'es'},
{'input': 'estudio de una habitacion de lujo', 'expected_response': 'es'},
{'input': 'chalet premier con dos habitaciones', 'expected_response': 'es'},
{'input': 'casa de la playa premium con bano compartido', 'expected_response': 'es'}, # noqa: E501
{'input': 'estudio familiar grande', 'expected_response': 'es'},
{'input': 'suite familiar junior', 'expected_response': 'en'},
{'input': 'bungalow tradicional sin bano', 'expected_response': 'es'},
{'input': 'superior room 1 king superior room 1 king cupola or courtyard view french style 36sqm 385sq', 'expected_response': 'en'}, # noqa: E501
{'input': 'habitacion matrimonial adaptada discapacitados', 'expected_response': 'es'}, # noqa: E501
{'input': 'privilege room twin for 2 adults 0 children and 0 infants', 'expected_response': 'en'}, # noqa: E501
{'input': 'deluxe room double for 2 adults 0 children and 0 infants', 'expected_response': 'en'}, # noqa: E501
{'input': 'premier palace double room', 'expected_response': 'en'},
{'input': 'double single use deluxe', 'expected_response': 'en'},
{'input': 'double room queen bed superior', 'expected_response': 'en'},
{'input': 'double guest room', 'expected_response': 'en'},
{'input': 'single room for 1 adults 0 children and 0 infants', 'expected_response': 'en'}, # noqa: E501
{'input': 'twin premium room incl evening tasting welcome gift comp wifi 28 sqm espresso fridge bathrobe', 'expected_response': 'en'}, # noqa: E501
{'input': 'superior quadruple room', 'expected_response': 'en'},
{'input': 'superior one bedroom apartment x2013 2 adults', 'expected_response': 'en'}, # noqa: E501
{'input': 'deluxe room double for 2 adults 0 children and 0 infants', 'expected_response': 'en'}, # noqa: E501
{'input': 'premier palace double room', 'expected_response': 'en'},
{'input': 'double single use deluxe', 'expected_response': 'en'},
{'input': 'double room queen bed superior', 'expected_response': 'en'},
{'input': 'double guest room', 'expected_response': 'en'},
{'input': 'single room for 1 adults 0 children and 0 infants', 'expected_response': 'en'}, # noqa: E501
{'input': 'twin premium room incl evening tasting welcome gift comp wifi 28 sqm espresso fridge bathrobe', 'expected_response': 'en'}, # noqa: E501
{'input': 'superior quadruple room', 'expected_response': 'en'},
{'input': 'superior one bedroom apartment x2013 2 adults', 'expected_response': 'en'}, # noqa: E501
{'input': 'comfort double', 'expected_response': 'en'},
{'input': '1 king bed suite nonsmoking', 'expected_response': 'en'},
{'input': 'junior suite 1 king bed nonsmoking', 'expected_response': 'en'},
{'input': 'family room superior', 'expected_response': 'en'}
]
for test_case in test_data:
description = test_case["input"]
expected_label = test_case["expected_response"]
# First, try to predict based on keywords
predicted_label = predict_language(description)
# If no prediction was made, fallback to model prediction
if not predicted_label:
print(f"Fallback to model prediction for '{description}'")
result = classifier(description)
predicted_label = result[0]["label"]
assert (
predicted_label == expected_label
), f"Incorrect prediction for '{description}': expected '{expected_label}', obtained '{predicted_label}'" # noqa: E501