Spaces:

travelgate
/

map-room

Paused

map-room / tests /test_langdetect.py

Calin Rada

init

f006f31 unverified 4 months ago

4.62 kB

	#!/usr/bin/env python3

	import pytest
	import torch
	from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline

	from mappingservice.utils import predict_language


	@pytest.fixture
	def classifier():
	model_path = "papluca/xlm-roberta-base-language-detection"
	model = AutoModelForSequenceClassification.from_pretrained(model_path)
	tokenizer = AutoTokenizer.from_pretrained(model_path)
	classification = pipeline(
	"text-classification",
	model=model,
	tokenizer=tokenizer,
	framework="pt",
	device=0 if torch.cuda.is_available() else -1,
	)

	return classification


	def test_model_predictions(classifier):
	test_data = [
	{'input': 'Habitacion estandar con bano', 'expected_response': 'es'},
	{'input': 'apartamento de lujo con vistas al mar', 'expected_response': 'es'}, # noqa: E501
	{'input': 'casa ejecutiva', 'expected_response': 'es'},
	{'input': 'villa doble', 'expected_response': 'es'},
	{'input': 'estudio de una habitacion de lujo', 'expected_response': 'es'},
	{'input': 'chalet premier con dos habitaciones', 'expected_response': 'es'},
	{'input': 'casa de la playa premium con bano compartido', 'expected_response': 'es'}, # noqa: E501
	{'input': 'estudio familiar grande', 'expected_response': 'es'},
	{'input': 'suite familiar junior', 'expected_response': 'en'},
	{'input': 'bungalow tradicional sin bano', 'expected_response': 'es'},
	{'input': 'superior room 1 king superior room 1 king cupola or courtyard view french style 36sqm 385sq', 'expected_response': 'en'}, # noqa: E501
	{'input': 'habitacion matrimonial adaptada discapacitados', 'expected_response': 'es'}, # noqa: E501
	{'input': 'privilege room twin for 2 adults 0 children and 0 infants', 'expected_response': 'en'}, # noqa: E501
	{'input': 'deluxe room double for 2 adults 0 children and 0 infants', 'expected_response': 'en'}, # noqa: E501
	{'input': 'premier palace double room', 'expected_response': 'en'},
	{'input': 'double single use deluxe', 'expected_response': 'en'},
	{'input': 'double room queen bed superior', 'expected_response': 'en'},
	{'input': 'double guest room', 'expected_response': 'en'},
	{'input': 'single room for 1 adults 0 children and 0 infants', 'expected_response': 'en'}, # noqa: E501
	{'input': 'twin premium room incl evening tasting welcome gift comp wifi 28 sqm espresso fridge bathrobe', 'expected_response': 'en'}, # noqa: E501
	{'input': 'superior quadruple room', 'expected_response': 'en'},
	{'input': 'superior one bedroom apartment x2013 2 adults', 'expected_response': 'en'}, # noqa: E501
	{'input': 'deluxe room double for 2 adults 0 children and 0 infants', 'expected_response': 'en'}, # noqa: E501
	{'input': 'premier palace double room', 'expected_response': 'en'},
	{'input': 'double single use deluxe', 'expected_response': 'en'},
	{'input': 'double room queen bed superior', 'expected_response': 'en'},
	{'input': 'double guest room', 'expected_response': 'en'},
	{'input': 'single room for 1 adults 0 children and 0 infants', 'expected_response': 'en'}, # noqa: E501
	{'input': 'twin premium room incl evening tasting welcome gift comp wifi 28 sqm espresso fridge bathrobe', 'expected_response': 'en'}, # noqa: E501
	{'input': 'superior quadruple room', 'expected_response': 'en'},
	{'input': 'superior one bedroom apartment x2013 2 adults', 'expected_response': 'en'}, # noqa: E501
	{'input': 'comfort double', 'expected_response': 'en'},
	{'input': '1 king bed suite nonsmoking', 'expected_response': 'en'},
	{'input': 'junior suite 1 king bed nonsmoking', 'expected_response': 'en'},
	{'input': 'family room superior', 'expected_response': 'en'}
	]

	for test_case in test_data:
	description = test_case["input"]
	expected_label = test_case["expected_response"]
	# First, try to predict based on keywords
	predicted_label = predict_language(description)

	# If no prediction was made, fallback to model prediction
	if not predicted_label:
	print(f"Fallback to model prediction for '{description}'")
	result = classifier(description)
	predicted_label = result[0]["label"]

	assert (
	predicted_label == expected_label
	), f"Incorrect prediction for '{description}': expected '{expected_label}', obtained '{predicted_label}'" # noqa: E501