Spaces:

travelgate
/

map-room

Paused

App Files Files Community

Calin Rada commited on Jun 17

Commit

f006f31

•

0 Parent(s):

init

Browse files

Files changed (46) hide show

.dockerignore +9 -0
.env.dist +10 -0
.github/CODEOWNERS +1 -0
.github/dependabot.yml +18 -0
.github/workflows/deploy-docker.yml +9 -0
.github/workflows/pr-test.yml +41 -0
.gitignore +165 -0
.pre-commit-config.yaml +25 -0
.vscode/extensions.json +5 -0
.vscode/launch.json +27 -0
.vscode/settings.json +16 -0
Dockerfile +27 -0
README.md +91 -0
benchmark/__init__.py +0 -0
benchmark/locustfile.py +35 -0
docker-compose.yml +27 -0
logging.conf +20 -0
mappingservice/__init__.py +0 -0
mappingservice/config.py +41 -0
mappingservice/config_consul.py +97 -0
mappingservice/constants.py +56 -0
mappingservice/dependencies.py +27 -0
mappingservice/main.py +117 -0
mappingservice/models.py +105 -0
mappingservice/ms/__init__.py +0 -0
mappingservice/ms/ml_models/__init__.py +0 -0
mappingservice/ms/ml_models/base.py +66 -0
mappingservice/ms/ml_models/bed_type.py +25 -0
mappingservice/ms/ml_models/environment.py +30 -0
mappingservice/ms/ml_models/room_category.py +23 -0
mappingservice/ms/ml_models/room_features.py +19 -0
mappingservice/ms/ml_models/room_type.py +23 -0
mappingservice/ms/ml_models/room_view.py +18 -0
mappingservice/ms/model_loader.py +73 -0
mappingservice/routers/__init__.py +0 -0
mappingservice/routers/admin.py +13 -0
mappingservice/routers/room.py +185 -0
mappingservice/utils.py +154 -0
poetry.lock +0 -0
pyproject.toml +138 -0
tests/__init__.py +0 -0
tests/test_langdetect.py +79 -0
tests/test_roomcategory.py +75 -0
tests/test_roomenvironment.py +77 -0
tests/test_roomtype.py +71 -0
tests/test_roomview.py +89 -0

.dockerignore ADDED Viewed

	@@ -0,0 +1,9 @@

+.vscode
+.idea
+.cache
+.git
+.github
+.ruff_cache
+.pre-commit-config.yaml
+.env
+act

.env.dist ADDED Viewed

	@@ -0,0 +1,10 @@

+API_KEY=your_api_key
+HUGGINGFACE_ACCESS_TOKEN=yours_hf_token
+TGX_CONSUL_AUTH_USER=
+TGX_CONSUL_AUTH_PASS=
+TGX_CONSUL_TOKEN=
+TGX_CONSUL_NODES=tgx
+HF_HUB_CACHE = /home/.cache/huggingface

.github/CODEOWNERS ADDED Viewed

	@@ -0,0 +1 @@


1	+ * @Travelgate/eng-architecture

.github/dependabot.yml ADDED Viewed

	@@ -0,0 +1,18 @@

+# To get started with Dependabot version updates, you'll need to specify which
+# package ecosystems to update and where the package manifests are located.
+# Please see the documentation for all configuration options:
+# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
+version: 2
+updates:
+  # Maintain dependencies for GitHub Actions
+  - package-ecosystem: "github-actions"
+    # Workflow files stored in the default location of `.github/workflows`. (You don't need to specify `/.github/workflows` for `directory`. You can use `directory: "/"`.)
+    directory: "/"
+    schedule:
+      interval: "weekly"
+  # Maintain dependencies for Python
+  - package-ecosystem: "pip"
+    directory: "/" # Location of package manifests
+    schedule:
+      interval: "weekly"

.github/workflows/deploy-docker.yml ADDED Viewed

	@@ -0,0 +1,9 @@

+name: Build and Deploy to Registry
+on:
+  workflow_dispatch
+jobs:
+  call-upload-to-registry:
+    uses: Travelgate/ci-workflows/.github/workflows/upload-to-registry.yml@main
+    permissions:
+      contents: write
+    secrets: inherit

.github/workflows/pr-test.yml ADDED Viewed

	@@ -0,0 +1,41 @@

+name: PR Python Poetry
+on:
+  pull_request:
+    branches: [ "main" ]
+jobs:
+  build:
+    runs-on:
+      labels: ["docker"]
+    container:
+      image: python:3.11.8
+    steps:
+    - uses: actions/checkout@v4
+      with:
+        fetch-depth: '0'
+    - name: Install poetry
+      run: |
+        python -m pip install --upgrade pip
+        pip install poetry huggingface_hub[cli]
+    - name: Login to huggingface
+      env:
+        HUGGINGFACE_TOKEN: ${{ secrets.HUGGINGFACE_TOKEN }}
+      run: |
+        huggingface-cli login --token=${HUGGINGFACE_TOKEN}
+    - name: Install dependencies
+      run: |
+        poetry update
+        poetry install
+    - name: Lint with Ruff
+      run: |
+        pip install ruff
+        ruff check . --output-format=github
+      continue-on-error: true
+    - name: Test with pytest
+      env:
+        APP_ENV: "dev"
+        HUGGINGFACE_TOKEN: ${{ secrets.HUGGINGFACE_TOKEN }}
+        HUGGINGFACE_ACCESS_TOKEN: ${{ secrets.HUGGINGFACE_TOKEN }}
+        API_KEY: ${{ secrets.API_KEY }}
+        HF_HUB_CACHE: "/home/.cache/huggingface"
+      run: poetry run pytest --cov=.
+      continue-on-error: false

.gitignore ADDED Viewed

	@@ -0,0 +1,165 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+.idea/
+.env
+consul.env
+.DS_Store
+act

.pre-commit-config.yaml ADDED Viewed

	@@ -0,0 +1,25 @@

+# See https://pre-commit.com for more information
+# See https://pre-commit.com/hooks.html for more hooks
+default_language_version:
+    python: python3.11
+repos:
+-   repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.6.0
+    hooks:
+    -   id: check-added-large-files
+    -   id: check-toml
+    -   id: check-yaml
+        args:
+        -   --unsafe
+    -   id: end-of-file-fixer
+    -   id: trailing-whitespace
+-   repo: https://github.com/charliermarsh/ruff-pre-commit
+    rev: v0.4.1
+    hooks:
+    -   id: ruff
+        args:
+        - --fix
+    -   id: ruff-format
+ci:
+    autofix_commit_msg: 🎨 [pre-commit.ci] Auto format from pre-commit.com hooks
+    autoupdate_commit_msg: ⬆ [pre-commit.ci] pre-commit autoupdate

.vscode/extensions.json ADDED Viewed

	@@ -0,0 +1,5 @@

+{
+    // See http://go.microsoft.com/fwlink/?LinkId=827846
+    // for the documentation about the extensions.json format
+    "recommendations": ["ms-python.python", "charliermarsh.ruff"]
+}

.vscode/launch.json ADDED Viewed

	@@ -0,0 +1,27 @@

+{
+    "version": "0.2.0",
+    "configurations": [
+        {
+            "name": "Python: FastAPI",
+            "type": "debugpy",
+            "request": "launch",
+            "module": "uvicorn",
+            "args": [
+                "mappingservice.main:app",
+                "--reload"
+            ],
+            "justMyCode": false
+        },
+        {
+            "name": "Debug Tests",
+            "type": "debugpy",
+            "justMyCode": false,
+            "request": "launch",
+            "module": "pytest",
+            "console": "integratedTerminal",
+            "env": {
+                "PYTEST_ADDOPTS": "--no-cov"
+            },
+        }
+    ]
+}

.vscode/settings.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+    "python.testing.pytestArgs": [
+        "tests"
+    ],
+    "python.testing.unittestEnabled": false,
+    "python.testing.pytestEnabled": true,
+    "[python]": {
+        "editor.formatOnSave": true,
+        "editor.codeActionsOnSave": {
+          "source.fixAll": "explicit",
+          "source.organizeImports": "explicit"
+        },
+        "editor.defaultFormatter": "charliermarsh.ruff"
+    }
+}

Dockerfile ADDED Viewed

	@@ -0,0 +1,27 @@

+FROM huggingface/transformers-pytorch-gpu as base
+RUN pip install fastapi uvicorn pydantic pydantic-settings langdetect accelerate optimum pytest pytest-cov
+RUN useradd -m -u 1000 user
+WORKDIR /code
+COPY --chown=user . /code
+COPY logging.conf /code
+ARG APP_ENV
+ENV APP_ENV=${APP_ENV}
+# Conditional commands based on APP_ENV
+RUN if [ "$APP_ENV" = "dev" ]; then \
+        echo "Force dev dependencies"; \
+        pip install pytest pytest-cov; \
+    fi
+RUN mkdir -p /home/.cache/huggingface
+RUN mkdir -p /.cache
+RUN chmod -R 777 /home/.cache/huggingface
+RUN chmod -R 777 /.cache
+RUN mkdir -p /code/room_environment-classifier_onxx
+RUN chmod -R 777 /code/room_environment-classifier_onxx
+# Remove when settings getted from consul
+# COPY .env /code
+CMD ["uvicorn", "mappingservice.main:app","--host", "0.0.0.0", "--port", "80"]

README.md ADDED Viewed

	@@ -0,0 +1,91 @@

+# Environment
+Set environment variables in .env file or via shell:
+```bash
+export API_KEY="APIKEY TO USE THIS API"
+export HUGGINGFACE_ACCESS_TOKEN="HF READ ACCESS TOKEN"
+```
+## Consul environment
+To use the enviroment variables from Consul, set variables in consul.env file or via shell:
+```bash
+TGX_CONSUL_AUTH_USER="CONSUL AUTH USER"
+TGX_CONSUL_AUTH_PASS="CONSUL AUTH PASSWORD"
+TGX_CONSUL_TOKEN="CONSUL TOKEN"
+TGX_CONSUL_NODES="CONSUL_URLS_SEPARTEDBY_;"
+```
+# Development
+Dependencies and run is managed by Poetry
+## Dependencies
+Use Poetry to Update and install dependencies:
+```bash
+poetry update
+poetry install
+```
+## Build
+### Using docker compose
+```bash
+docker compose build --no-cache
+docker compose up -d
+```
+*NOTE* Docker compose is set to run on port 8080, change the port in docker-compose.yml if needed.
+### Docker API
+Check if docker daemon is running:
+```bash
+sudo systemctl start docker
+```
+Build docker:
+```bash
+docker build -t mappingservice .
+```
+## Run
+### Console
+```bash
+poetry run uvicorn mappingservice.main:app --reload
+```
+Site runs at http://127.0.0.1:8000/ml-api
+### Docker
+```bash
+docker run -d --name mycontainer -p 80:80 mappingservice
+```
+##### Run
+Send Consul variables on Docker run
+###### API
+```bash
+docker run -e TGX_CONSUL_NODES='URL_NODES' -e TGX_CONSUL_TOKEN='TOKEN' -e TGX_CONSUL_AUTH_USER='USER' -e TGX_CONSUL_AUTH_PASS='PWD' -d --name mycontainer -p 80:80 mappingservice
+```
+### Benchmark
+Use Poetry to Update and install Locust dependencies:
+```bash
+poetry update --only benchmark
+poetry install --only benchmark
+```
+Run Locust in terminal
+```bash
+locust -f ./benchmark/
+```
+Site runs at http://localhost:8089/

benchmark/__init__.py ADDED Viewed

File without changes

benchmark/locustfile.py ADDED Viewed

	@@ -0,0 +1,35 @@

+import json
+import os
+from dotenv import load_dotenv
+from locust import HttpUser, between, task
+load_dotenv()
+class ApiUser(HttpUser):
+    host = 'http://127.0.0.1:8080/ml-api'
+    wait_time = between(1, 2)
+    @task
+    def predict_all(self):
+        headers = {
+            'Api-key': os.getenv('API_KEY'),
+            'Content-Type': 'application/json'
+        }
+        data = {
+            "room_description": "room with sea view",
+            "beds": [
+                {"type": "double", "count": 2}
+            ]
+        }
+        print("Headers:", headers)
+        print("Data being sent:", data)
+        with self.client.post("/predict/room/predict/all", headers=headers, data=json.dumps(data),  # noqa: E501
+                              catch_response=True) as response:
+            print("Response status code:", response.status_code)
+            print("Response data:", response.text)
+            if response.status_code != 200:
+                response.failure(f"Failed with {response.status_code}: {response.text}")
+            else:
+                response.success()

docker-compose.yml ADDED Viewed

	@@ -0,0 +1,27 @@

+services:
+  app:
+    image: &app mrml-room-api
+    build:
+      context: .
+      dockerfile: Dockerfile
+      network: host
+      args:
+        tag: local
+    hostname: app_local
+    user: "1000:1000"
+    ports:
+      - 8080:80
+    env_file:
+      - .env
+    volumes:
+      - .:/code
+    networks:
+      - webnet
+networks:
+  webnet:
+    driver: bridge
+    # Uncomment ipam if OpenVPN is causing troubles
+#    ipam:
+#      config:
+#        - subnet: 172.16.57.0/24

logging.conf ADDED Viewed

	@@ -0,0 +1,20 @@

+[loggers]
+keys=root
+[logger_root]
+level=INFO
+handlers=stream_handler
+[handlers]
+keys=stream_handler
+[formatters]
+keys=formatter
+[handler_stream_handler]
+class=StreamHandler
+formatter=formatter
+args=(sys.stdout,)
+[formatter_formatter]
+format=%(asctime)s %(name)-12s %(levelname)-8s %(message)s

mappingservice/__init__.py ADDED Viewed

File without changes

mappingservice/config.py ADDED Viewed

	@@ -0,0 +1,41 @@

+import os
+from pydantic import SecretStr
+from pydantic_settings import (
+    BaseSettings,
+    PydanticBaseSettingsSource,
+    SettingsConfigDict,
+)
+from mappingservice.config_consul import ConsulConfigSettingsSource
+class Settings(BaseSettings):
+    model_config = SettingsConfigDict(env_file=".env", env_file_encoding="utf-8")
+    api_key: SecretStr
+    huggingface_access_token: SecretStr
+    hf_hub_cache: str
+    app_env: str = "prod"
+    @classmethod
+    def settings_customise_sources(
+            cls,
+            settings_cls,
+            init_settings: PydanticBaseSettingsSource,
+            env_settings: PydanticBaseSettingsSource,
+            dotenv_settings: PydanticBaseSettingsSource,
+            file_secret_settings: PydanticBaseSettingsSource,
+    ):
+        settings_list = [
+            init_settings,
+            env_settings,
+            dotenv_settings,
+            file_secret_settings,
+            ConsulConfigSettingsSource(settings_cls),
+        ]
+        if os.getenv("APP_ENV", "prod").lower() == "dev":
+            del settings_list[-1]
+        return tuple(settings_list)

mappingservice/config_consul.py ADDED Viewed

	@@ -0,0 +1,97 @@

+import json
+import logging.config
+import random
+from typing import Any
+import requests
+from pydantic import HttpUrl, SecretStr
+from pydantic.fields import FieldInfo
+from pydantic_settings import (
+    BaseSettings,
+    PydanticBaseSettingsSource,
+    SettingsConfigDict,
+)
+from requests.exceptions import HTTPError, RequestException, Timeout
+logging.config.fileConfig("logging.conf", disable_existing_loggers=False)
+logger = logging.getLogger(__name__)
+logger.info("Logger Configured: config_consul")
+class ConsulSettings(BaseSettings):
+    model_config = SettingsConfigDict(env_file="consul.env", env_file_encoding="utf-8")
+    tgx_consul_nodes: str
+    tgx_consul_auth_user: str
+    tgx_consul_auth_pass: SecretStr
+    tgx_consul_token: SecretStr
+class ConsulClient:
+    def __init__(self, settings: ConsulSettings):
+        logger.info(
+            f"Consul init:[{settings.tgx_consul_nodes}], "
+            f"user:[{settings.tgx_consul_auth_user}]"
+        )
+        # Random get a valid url for consul
+        consul_nodes = settings.tgx_consul_nodes.split(";")
+        consul_url: HttpUrl = random.choice(consul_nodes)
+        self.headers = {
+            "X-Consul-Token": f"{settings.tgx_consul_token.get_secret_value()}"
+        }
+        self.auth = (
+            settings.tgx_consul_auth_user,
+            settings.tgx_consul_auth_pass.get_secret_value(),
+        )
+        self.url = consul_url
+        self.timeout = 20
+    def get_key_value(self, key: str, dc: str) -> json:
+        url = f"{self.url}/v1/kv/{key}?dc={dc}&raw=false"
+        logger.info(f"Donwloading keys from Consul: [{url}, {key}]")
+        response = requests.get(
+            url=url, headers=self.headers, auth=self.auth, timeout=self.timeout
+        )
+        response.raise_for_status()
+        return response.json()
+class ConsulConfigSettingsSource(PydanticBaseSettingsSource):
+    def get_field_value(
+            self, field: FieldInfo, field_name: str
+    ):
+        pass
+    def prepare_field_value(
+            self, field_name: str, field: FieldInfo, value: Any, value_is_complex: bool
+    ) -> Any:
+        pass
+    def __call__(self):
+        """
+        Customize settings soucre from Consul
+        Needs url and secrets to access consul to retrieve settings
+        """
+        CONSUL_KEY = "mapping/apps/room/ml-api/config"
+        consul_settings = ConsulSettings()
+        consul_cli = ConsulClient(consul_settings)
+        logger.info(f"Getting consul key value: [{CONSUL_KEY}]")
+        ret_json = {}
+        try:
+            ret_json = consul_cli.get_key_value(CONSUL_KEY, "gc-we1d")
+        except (Timeout, HTTPError, RequestException) as err:
+            logger.warn(f"Error get from Consul {err}")
+            return {}
+        if not ret_json:
+            logger.warn(f"Consul key not found: [{CONSUL_KEY}]", "gc-we1d")
+            return {}
+        else:
+            logger.info("Consul key read succesfully")
+            return ret_json
+        # convert Service Account json to string
+        # ret_json["tgx_pubsub_events_credentials_service_account_json"] = json.dumps(
+        #    ret_json["tgx_pubsub_events_credentials_service_account_json"])

mappingservice/constants.py ADDED Viewed

	@@ -0,0 +1,56 @@

+AVAILABLE_LANGUAGES = ["en", "es", "na"]  # na is a placeholder for the language detection model  # noqa: E501
+DEFAULT_LABEL = "No data"
+DEFAULT_SCORE = 0.0
+MODEL_NAMES = {
+    "bed_type": {
+        "classification": "text-classification",
+        "en": "travelgate/bed_type_model-classifier",
+        "es": "travelgate/bed_type_model-classifier",
+    },
+    "room_category": {
+        "classification": "text-classification",
+        "en": "travelgate/room_category-classifier",
+        "es": "travelgate/room_category-classifier",
+    },
+    "environment": {
+        "classification": "text-classification",
+        "en": "travelgate/room_environment-classifier",
+        "es": "travelgate/room_environment-classifier",
+    },
+    "room_features": {
+        "classification": "token-classification",
+        "en": "travelgate/feature_predicted_en",
+        "es": "travelgate/feature_predicted_es",
+    },
+    "room_type": {
+        "classification": "text-classification",
+        "en": "travelgate/room_type-classifier",
+        "es": "travelgate/room_type-classifier"
+    },
+    "room_view": {
+        "classification": "token-classification",
+        "en": "travelgate/view_predicted_en",
+        "es": "travelgate/view_predicted_es"
+    },
+    "lang_detect": {
+        "classification": "text-classification",
+        "na": "papluca/xlm-roberta-base-language-detection",  # use na as a placeholder
+    }
+}
+SPANISH_KEYWORDS = [
+    "habitacion", "cama", "bano", "aire acondicionado", "television", "desayuno",
+    "vista", "servicio", "caja fuerte", "ducha", "sabanas", "toallas",
+    "limpieza", "recepcion", "piscina", "gimnasio", "ascensor", "estacionamiento",
+    "secador de pelo", "armario", "escritorio", "telefono", "decoracion", "silla",
+    "mesa", "luz", "ventana", "apartamento", "casa", "vistas", "presidencial",
+    "estandar", "estudio", "terraza", "doble"
+]
+ENGLISH_KEYWORDS = {
+    "apartment", "room", "house", "views", "presidential", "studio", "suite",
+    "standard", "deluxe"
+}

mappingservice/dependencies.py ADDED Viewed

	@@ -0,0 +1,27 @@

+import logging.config
+from functools import lru_cache
+from fastapi import HTTPException, Security, status
+from fastapi.security.api_key import APIKeyHeader
+from mappingservice import config
+logging.config.fileConfig("logging.conf", disable_existing_loggers=False)
+logger = logging.getLogger(__name__)
+@lru_cache
+def get_settings() -> config.Settings:
+    return config.Settings()
+API_KEY = get_settings().api_key.get_secret_value()
+api_key_header_auth = APIKeyHeader(name="Api-key", auto_error=True)
+mc = {}
+def get_api_key(api_key_header: str = Security(api_key_header_auth)):
+    if api_key_header != API_KEY:
+        raise HTTPException(
+            status_code=status.HTTP_401_UNAUTHORIZED, detail="Invalid API Key"
+        )

mappingservice/main.py ADDED Viewed

	@@ -0,0 +1,117 @@

+import logging.config
+import time
+from contextlib import asynccontextmanager
+import psutil
+from fastapi import Depends, FastAPI, Request
+from fastapi.responses import JSONResponse
+import mappingservice.dependencies as deps
+from mappingservice.constants import AVAILABLE_LANGUAGES, MODEL_NAMES
+from mappingservice.dependencies import get_api_key, get_settings
+from mappingservice.ms.model_loader import ModelLoader
+from mappingservice.routers import admin, room
+from mappingservice.utils import log_memory_usage, predict_language
+logging.config.fileConfig("logging.conf", disable_existing_loggers=False)
+logger = logging.getLogger(__name__)
+@asynccontextmanager
+async def lifespan(application: FastAPI):
+    # Load the ML models
+    settings = get_settings()
+    initial_memory = psutil.Process().memory_info().rss
+    start_time = time.time()
+    ml_model = ModelLoader(settings, MODEL_NAMES)
+    for k in MODEL_NAMES:
+        logger.info(f"Loading model: {k}...")
+        for lang in AVAILABLE_LANGUAGES:
+            model_pipeline = ml_model.get_model(k, lang)
+            try:
+                deps.mc[k][lang] = model_pipeline
+            except KeyError:
+                deps.mc[k] = {}
+                deps.mc[k][lang] = model_pipeline
+    elapsed_time = time.time() - start_time
+    num_cores = psutil.cpu_count(logical=True)
+    total_ram = psutil.virtual_memory().total / (1024 ** 3)  # Convert to GB
+    final_memory = psutil.Process().memory_info().rss
+    total_memory_used = (final_memory - initial_memory) / (1024 ** 3)  # Convert to GB
+    logger.info("*" * 60)  # ASCII Art
+    logger.info(f"* Number of Cores: {num_cores}")
+    logger.info(f"* Total RAM: {total_ram:.2f} GB")
+    logger.info(
+        f"* AI Models loaded in {elapsed_time:.2f} seconds, "
+        f"using {total_memory_used:.2f} GB"
+    )
+    logger.info("*" * 60)  # ASCII Art
+    yield
+    # Clean up the ML models and release the resources
+    deps.mc.clear()
+app = FastAPI(root_path="/ml-api", lifespan=lifespan)
+@app.middleware("http")
+async def handle_exceptions(request: Request, call_next):
+    try:
+        response = await call_next(request)
+        return response
+    except Exception as e:
+        logging.error(f"Unhandled error: {e}")
+        logging.exception(e)
+        log_memory_usage()
+        return JSONResponse(status_code=500, content={
+            "message": "Internal Server Error"
+        })
+    finally:
+        log_memory_usage()
+@app.middleware("http")
+async def detect_language(request: Request, call_next):
+    room_description = request.query_params.get("room_description", "")
+    language = "en"
+    if not room_description:
+        try:
+            body = await request.json()
+            room_description = body.get("room_description", "")
+        except ValueError:
+            # If the room description is still empty, continue with the request as
+            # the language detection is not required
+            if not room_description:
+                response = await call_next(request)
+                return response
+    try:
+        language = predict_language(room_description)
+        if language not in AVAILABLE_LANGUAGES:
+            logger.error(f"Unsupported language for room description: {room_description}. Falling back to model prediction.")  # noqa: E501
+            language = deps.mc['lang_detect']['na'].predict(room_description)[0][0]['label']  # noqa: E501
+            if language not in AVAILABLE_LANGUAGES:
+                logger.error(f"Unsupported language for room description using model prediction: {room_description}. Falling back to English.")  # noqa: E501
+                language = "en"
+    except Exception as e:
+        logger.error(f"Error detecting language: {e}")
+    request.state.predicted_language = language
+    response = await call_next(request)
+    return response
+app.include_router(room.router, dependencies=[Depends(get_api_key)])
+app.include_router(admin.router)

mappingservice/models.py ADDED Viewed

	@@ -0,0 +1,105 @@

+import logging.config
+from enum import Enum
+from typing import List, Union
+from pydantic import BaseModel, validator
+logger = logging.getLogger(__name__)
+class PredictionResponse(BaseModel):
+    label: str
+    score: float
+class BedType(str, Enum):
+    none = ""
+    single = "single"
+    double = "double"
+    queen = "queen"
+    king = "king"
+    california_king = "california_king"
+    bunk = "bunk"
+    sofa = "sofa"
+    rollaway = "rollaway"
+    futon = "futon"
+class BedData(BaseModel):
+    type: Union[BedType, None] = None
+    count: Union[int, None] = None
+    @validator("count", pre=True, always=True)
+    def validate_count(cls, v):
+        return v if v and v > 0 else None
+    @validator("type", pre=True, always=True)
+    def validate_type(cls, v):
+        try:
+            return BedType(v.lower()) if v else None
+        except ValueError:
+            return None
+    @classmethod
+    def create_valid_bed(cls, type: Union[str, None], count: Union[int, None]):
+        if type and count and count > 0:
+            try:
+                type_enum = BedType(type.lower())
+                return cls(type=type_enum, count=count)
+            except ValueError:
+                logger.error(f"Invalid bed type: {type}")
+        return None
+    def __init__(self, **data):
+        variations = {
+            "individual": BedType.single,
+            "camaindividual": BedType.single,
+            "doble": BedType.double,
+            "camadoble": BedType.double,
+            "reina": BedType.queen,
+            "queenbed": BedType.queen,
+            "rey": BedType.king,
+            "californiaking": BedType.california_king,
+            "litera": BedType.bunk,
+            "sofá": BedType.sofa,
+            "sofacama": BedType.sofa,
+            "plegable": BedType.rollaway,
+            "futón": BedType.futon,
+            "twin": BedType.single,
+            "twinbed": BedType.single,
+            "singlebed": BedType.single,
+            "doublebed": BedType.double,
+            "largedouble": BedType.queen,
+            "extralargedouble": BedType.king,
+            "bunkbed": BedType.bunk,
+            "couch": BedType.sofa,
+            "airmattress": BedType.futon,
+            "floormattress": BedType.futon,
+        }
+        bed_type = data.get("type")
+        if bed_type:
+            normalized_bed_type = bed_type.replace(" ", "").lower()
+            data["type"] = variations.get(normalized_bed_type, bed_type)
+        super().__init__(**data)
+class RoomData(BaseModel):
+    room_description: str
+    beds: Union[BedData, None]
+class Predictions(BaseModel):
+    type: PredictionResponse
+    category: PredictionResponse
+    environment: List[PredictionResponse]
+    feature: List[PredictionResponse]
+    view: List[PredictionResponse]
+    language_detected: str
+    beds: List[BedData]
+class AllPredictionsResponse(BaseModel):
+    predictions: Predictions

mappingservice/ms/__init__.py ADDED Viewed

File without changes

mappingservice/ms/ml_models/__init__.py ADDED Viewed

File without changes

mappingservice/ms/ml_models/base.py ADDED Viewed

	@@ -0,0 +1,66 @@

+import re
+class Base:
+    CONFIDENCE_THRESHOLD = 0.95
+    def predict(self, data, pipeline, language):
+        raise NotImplementedError()
+    @staticmethod
+    def process_ner_results(ner_results, confidence_threshold):
+        processed_entities = []
+        current_entity_group = []
+        current_entity_score = 0
+        for result in ner_results:
+            if result["score"] < confidence_threshold:
+                continue
+            if result["word"].startswith("##"):
+                if current_entity_group:
+                    current_entity_group[-1] += result["word"][2:]
+                continue
+            if current_entity_group:
+                average_score = current_entity_score / len(current_entity_group)
+                processed_entities.append({
+                    "word": " ".join(current_entity_group),
+                    "score": average_score
+                })
+                current_entity_group = []
+                current_entity_score = 0
+            current_entity_group.append(result["word"])
+            current_entity_score += result["score"]
+        if current_entity_group:
+            average_score = current_entity_score / len(current_entity_group)
+            processed_entities.append({
+                "word": " ".join(current_entity_group),
+                "score": average_score
+            })
+        return processed_entities
+    @staticmethod
+    def preprocess_data(data, language):
+        translations = {
+            "deluxe": "de lujo",
+            "twin": "individual",
+            "size": "tamaño",
+            "premium": "calidad",
+            "double": "doble",
+            "room": "habitacion"
+            }
+        if language == "es":
+            for key, value in translations.items():
+                data = data.replace(key, value)
+            data = re.sub(r'\b\w{1,2}\b', '', data)
+            data = re.sub(r'\b\d+\b', '', data)
+            data = re.sub(r'\s+', ' ', data).strip()  # Elimina espacios extra y espacios al inicio/final  # noqa E501
+        return data

mappingservice/ms/ml_models/bed_type.py ADDED Viewed

	@@ -0,0 +1,25 @@

+import re
+from .base import Base
+class BedType(Base):
+    def predict(self, data, pipeline, language):
+        return pipeline.predict(data)[0][0]
+    @staticmethod
+    def extract_bed_numbers(description):
+        pattern = re.compile(
+            r"\b(\d+|\bone\b|\btwo\b|\bthree\b|\buno\b|\bdos\b|\btres\b)\b",
+            re.IGNORECASE,
+        )
+        matches = pattern.findall(description)
+        word_to_number = {"one": 1, "two": 2, "three": 3, "uno": 1, "dos": 2, "tres": 3}
+        bed_numbers = []
+        for match in matches:
+            if match.isdigit():
+                bed_numbers.append(int(match))
+            else:
+                bed_numbers.append(word_to_number.get(match.lower(), 1))
+        return bed_numbers

mappingservice/ms/ml_models/environment.py ADDED Viewed

	@@ -0,0 +1,30 @@

+import logging
+from .base import Base
+logger = logging.getLogger(__name__)
+class Environment(Base):
+    CONFIDENCE_THRESHOLD = 0.5
+    def predict(self, data, pipeline, language):
+        data = data.replace("connecting", "connected")
+        scores = pipeline.predict(data)[0]
+        predicted_results = []
+        for result in scores:
+            if result["score"] > self.CONFIDENCE_THRESHOLD:
+                predicted_results.append(
+                    {
+                        "label": result["label"],
+                        "score": result["score"],
+                        "language_detected": language,
+                    }
+                )
+        logger.info(f"Predicted Labels and Scores: {predicted_results}")
+        return predicted_results

mappingservice/ms/ml_models/room_category.py ADDED Viewed

	@@ -0,0 +1,23 @@

+from .base import Base
+class RoomCategory(Base):
+    CONFIDENCE_THRESHOLD = 0.7
+    def predict(self, data: str, pipeline, language) -> dict:
+        predictions = pipeline.predict(data)
+        if predictions and predictions[0][0]["score"] > self.CONFIDENCE_THRESHOLD:
+            return {
+                "language_detected": language,
+                "label": predictions[0][0]["label"],
+                "score": predictions[0][0]["score"],
+            }
+        else:
+            score = predictions[0][0]["score"] if predictions else 0
+            return {
+                "language_detected": language,
+                "label": "default",
+                "score": score
+            }

mappingservice/ms/ml_models/room_features.py ADDED Viewed

	@@ -0,0 +1,19 @@

+from .base import Base
+class RoomFeatures(Base):
+    CONFIDENCE_THRESHOLD = 0.5
+    def predict(self, data, pipeline, language):
+        preprocessed_data = self.preprocess_data(data, language)
+        results = pipeline.predict(preprocessed_data)
+        return {
+            "language_detected": language,
+            "features": self.process_ner_results(
+                results, self.CONFIDENCE_THRESHOLD
+            ),
+        }

mappingservice/ms/ml_models/room_type.py ADDED Viewed

	@@ -0,0 +1,23 @@

+from .base import Base
+class RoomType(Base):
+    CONFIDENCE_THRESHOLD = 0.7
+    def predict(self, data: str, pipeline, language) -> dict:
+        predictions = pipeline.predict(data)
+        if predictions and predictions[0][0]["score"] > self.CONFIDENCE_THRESHOLD:
+            return {
+                "language_detected": language,
+                "label": predictions[0][0]["label"],
+                "score": predictions[0][0]["score"],
+            }
+        else:
+            score = predictions[0][0]["score"] if predictions else 0
+            return {
+                "language_detected": language,
+                "label": "default",
+                "score": score
+            }

mappingservice/ms/ml_models/room_view.py ADDED Viewed

	@@ -0,0 +1,18 @@

+from .base import Base
+class RoomView(Base):
+    CONFIDENCE_THRESHOLD = 0.6
+    def predict(self, data, pipeline, language):
+        preprocessed_data = self.preprocess_data(data, language)
+        ner_results = pipeline.predict(preprocessed_data)
+        return {
+            "language_detected": language,
+            "has_views": bool(ner_results),
+            "views": self.process_ner_results(
+                ner_results, self.CONFIDENCE_THRESHOLD
+            ),
+        }

mappingservice/ms/model_loader.py ADDED Viewed

	@@ -0,0 +1,73 @@

+import logging
+import time
+import torch
+from optimum.bettertransformer import BetterTransformer
+from transformers import (
+    AutoModelForSequenceClassification,
+    AutoModelForTokenClassification,
+    AutoTokenizer,
+    pipeline,
+)
+from mappingservice.config import Settings
+class ModelLoader:
+    def __init__(self, settings: Settings, model_names):
+        self.settings = settings
+        self.model_names = model_names
+        self.model_cache = {}
+        self.token = self.settings.huggingface_access_token.get_secret_value()
+        self.classification = "token-classification"
+    def load_model(self, model_name):
+        start_time = time.time()
+        if self.classification == "token-classification":
+            model = AutoModelForTokenClassification.from_pretrained(model_name, token=self.token)  # noqa: E501
+        else:
+            model = AutoModelForSequenceClassification.from_pretrained(model_name, token=self.token)  # noqa: E501
+        tokenizer = AutoTokenizer.from_pretrained(model_name, token=self.token)
+        model = BetterTransformer.transform(model)
+        end_time = time.time()
+        logging.info(f"Model {model_name} loaded in {end_time - start_time:.2f} seconds.")  # noqa: E501
+        return model, tokenizer
+    def get_model(self, model_name, language):
+        mc = self.model_cache.get(model_name) or {}
+        if language in mc:
+            logging.info(f"Using cached model for language: {language}")
+            return self.model_cache[model_name][language]
+        self.classification = self.model_names.get(model_name).get("classification")
+        model_name = self.model_names.get(model_name).get(language)
+        if not model_name:
+            logging.warning(f"Unsupported language: {language}")
+            return None
+        model, tokenizer = self.load_model(model_name)
+        pipeline_dict = {
+            'task': self.classification,
+            'model': model,
+            'tokenizer': tokenizer,
+            'token': self.token,
+            'device': 0 if torch.cuda.is_available() else -1,
+        }
+        if self.classification == "token-classification":
+            pipeline_dict.update({'framework': 'pt'})
+        if self.classification == "text-classification":
+            pipeline_dict.update({'top_k': 1})
+        model_pipeline = pipeline(**pipeline_dict)
+        self.model_cache[model_name] = {}
+        self.model_cache[model_name][language] = model_pipeline
+        return model_pipeline

mappingservice/routers/__init__.py ADDED Viewed

File without changes

mappingservice/routers/admin.py ADDED Viewed

	@@ -0,0 +1,13 @@

+from fastapi import APIRouter
+router = APIRouter(tags=["admin"])
+@router.get("/health")
+async def health():
+    return {"health": "OK"}
+@router.get("/status")
+async def status():
+    return {"status": "OK"}

mappingservice/routers/room.py ADDED Viewed

	@@ -0,0 +1,185 @@

+import logging
+import time
+from concurrent.futures import ThreadPoolExecutor
+from fastapi import APIRouter, Query, Request
+from mappingservice.constants import DEFAULT_LABEL, DEFAULT_SCORE
+from mappingservice.dependencies import (
+    mc,
+)
+from mappingservice.models import (
+    AllPredictionsResponse,
+    PredictionResponse,
+    Predictions,
+    RoomData,
+)
+from mappingservice.ms.ml_models.bed_type import BedType as BedTypeModel
+from mappingservice.ms.ml_models.environment import Environment
+from mappingservice.ms.ml_models.room_category import RoomCategory
+from mappingservice.ms.ml_models.room_features import RoomFeatures
+from mappingservice.ms.ml_models.room_type import RoomType
+from mappingservice.ms.ml_models.room_view import RoomView
+from mappingservice.utils import (
+    get_bed_predictions,
+    process_predictions,
+    safe_round,
+)
+logging.basicConfig(
+    level=logging.DEBUG, format="%(asctime)s - %(levelname)s - %(message)s"
+)
+logger = logging.getLogger(__name__)
+router = APIRouter(
+    prefix="/predict/room", tags=["room"], responses={404: {"description": "Not found"}}
+)
+def get_room_type_prediction(room_description: str, language: str = "en"):
+    pipeline = mc['room_type'][language]
+    model = BedTypeModel()
+    return {"msg": model.predict(room_description, pipeline, language)}
+def get_view_prediction(room_description: str, language: str = "en"):
+    pipeline = mc['room_view'][language]
+    model = RoomView()
+    return {"view_prediction": model.predict(room_description, pipeline, language)}
+def get_room_category_prediction(room_description: str, language: str = "en"):
+    pipeline = mc['room_category'][language]
+    model = RoomCategory()
+    return {"msg": model.predict(room_description, pipeline, language)}
+def get_feature_prediction(room_description: str, language: str = "en"):
+    pipeline = mc['room_features'][language]
+    model = RoomFeatures()
+    return {"feature_prediction": model.predict(room_description, pipeline, language)}
+def get_room_environment_prediction(room_description: str, language: str = "en"):
+    pipeline = mc['environment'][language]
+    model = Environment()
+    return {"msg": model.predict(room_description, pipeline, language)}
+@router.post("/predict/beds")
+async def predict_beds(request: Request, room_description: str = Query(...)):  # noqa: E501
+    language = request.state.predicted_language
+    pipeline = mc['bed_type'][language]
+    model = BedTypeModel()
+    prediction = model.predict(room_description, pipeline, language)
+    return prediction
+@router.get("/type")
+async def predict_room_type_endpoint(request: Request, room_description: str = Query(...)):  # noqa: E501
+    language = request.state.predicted_language
+    pipeline = mc['room_type'][language]
+    model = RoomType()
+    prediction = model.predict(room_description, pipeline, language)
+    return prediction
+@router.get("/category")
+async def predict_room_category_endpoint(request: Request, room_description: str = Query(...)):  # noqa: E501
+    prediction = mc['room_category']['en'].predict(room_description)
+    return prediction
+@router.get("/environment")
+async def predict_room_environment_endpoint(request: Request, room_description: str = Query(...)):  # noqa: E501
+    prediction = mc['environment']['en'].predict(room_description)
+    return prediction
+@router.get("/view")
+async def predict_view_endpoint(request: Request, room_description: str = Query(...)):  # noqa: E501
+    prediction = mc['room_view']['en'].predict(room_description)
+    return prediction
+@router.get("/feature")
+async def predict_feature_endpoint(request: Request, room_description: str = Query(...)):  # noqa: E501
+    prediction = mc['room_features']['en'].predict(room_description)
+    return prediction
+@router.post("/predict/all", response_model=AllPredictionsResponse)
+async def predict_all(request: Request, room_data: RoomData):
+    start_time = time.time()
+    room_data = RoomData(**await request.json())
+    language = request.state.predicted_language
+    with ThreadPoolExecutor() as executor:
+        type_future = executor.submit(
+            get_room_type_prediction, room_data.room_description, language
+        )
+        category_future = executor.submit(
+            get_room_category_prediction, room_data.room_description, language
+        )
+        environment_future = executor.submit(
+            get_room_environment_prediction, room_data.room_description, language
+        )
+        feature_future = executor.submit(
+            get_feature_prediction, room_data.room_description, language
+        )
+        view_future = executor.submit(
+            get_view_prediction, room_data.room_description, language
+        )
+    type_pred = type_future.result()["msg"]
+    category_pred = category_future.result()["msg"]
+    environment_pred_results = environment_future.result()["msg"]
+    feature_pred_results = feature_future.result()["feature_prediction"]
+    view_pred_results = view_future.result()["view_prediction"]
+    bed_predictions = [
+        bed_data for bed_data in room_data.beds if bed_data.type is not None and bed_data.count is not None  # noqa: E501
+    ]
+    if not bed_predictions:
+        logger.debug("No bed data provided or valid; extracting from description.")
+        extracted_beds = get_bed_predictions(room_data.room_description)
+        if extracted_beds:
+            bed_predictions.extend(extracted_beds)
+    end_time = time.time()
+    total_time = end_time - start_time
+    logger.info(f"Total processing time: {total_time:.3f} seconds")
+    formatted_predictions = {
+        "type": {
+            "label": type_pred.get("label", DEFAULT_LABEL),
+            "score": safe_round(type_pred.get("score", DEFAULT_SCORE), 3),
+        },
+        "category": {
+            "label": category_pred.get("label", DEFAULT_LABEL),
+            "score": safe_round(category_pred.get("score", DEFAULT_SCORE), 3),
+        },
+    }
+    env_preds = process_predictions(environment_pred_results)
+    feat_preds = process_predictions(
+        feature_pred_results.get("features", []), label_key="word"
+    )
+    view_preds = process_predictions(
+        view_pred_results.get("views", []), label_key="word"
+    )
+    predictions = Predictions(
+        type=PredictionResponse(**formatted_predictions["type"]),
+        category=PredictionResponse(**formatted_predictions["category"]),
+        environment=[PredictionResponse(**pred) for pred in env_preds] if env_preds else [],  # noqa: E501
+        feature=[PredictionResponse(**pred) for pred in feat_preds] if feat_preds else [],  # noqa: E501
+        view=[PredictionResponse(**pred) for pred in view_preds] if view_preds else [],
+        language_detected=language,
+        beds=bed_predictions,
+    )
+    return AllPredictionsResponse(predictions=predictions)

mappingservice/utils.py ADDED Viewed

	@@ -0,0 +1,154 @@

+import logging.config
+import re
+from typing import List
+import psutil
+from langdetect import LangDetectException, detect
+from mappingservice.constants import ENGLISH_KEYWORDS, SPANISH_KEYWORDS
+from mappingservice.models import BedData, BedType
+logging.basicConfig(
+    level=logging.DEBUG, format="%(asctime)s - %(levelname)s - %(message)s"
+)
+logger = logging.getLogger(__name__)
+def log_memory_usage():
+    process = psutil.Process()
+    memory_info = process.memory_info()
+    logging.info(f"Memory usage: RSS={memory_info.rss}, VMS={memory_info.vms}")
+def safe_round(value, decimals=0):
+    return round(value, decimals) if isinstance(value, int | float) else 0
+def process_predictions(predictions, score_key="score", label_key="label"):
+    return [
+        {"label": pred.get(label_key, "No data"), "score": round(pred.get(score_key, 0), 3)}  # noqa: E501
+        for pred in predictions if isinstance(pred, dict) and pred.get(score_key, 0) > 0
+    ] or None
+def parse_model_output(predictions):
+    bed_data_list = []
+    threshold = 0.5
+    if not isinstance(predictions, list) or not all(
+            isinstance(pred, dict) for pred in predictions
+    ):
+        return bed_data_list
+    for prediction in predictions:
+        type = prediction.get("type")
+        count = prediction.get("count", 0)
+        score = prediction.get("score", 0)
+        if score > threshold and count > 0:
+            if type in BedType._member_names_:
+                bed_data = BedData(type=BedType[type], count=count)
+                bed_data_list.append(bed_data)
+            else:
+                logger.debug(f"Unsupported bed type: {type}")
+    return bed_data_list
+def get_bed_predictions(description: str):
+    logger.debug(f"Extracting bed predictions from description: {description}")
+    description = description.lower()
+    bed_pattern = re.compile(
+        r"(\d+)?\s*\b(king|queen|double|single|twin|bunk|sofa|rollaway|futon|"
+        r"cama\s*king|cama\s*queen|cama\s*double|cama\s*single|cama\s*twin|"
+        r"cama\s*bunk|cama\s*sofa|cama\s*rollaway|cama\s*futon)\b\s*(beds?|camas?)", re.IGNORECASE)  # noqa: E501
+    or_pattern = re.compile(
+        r"\b(king|queen|double|single|twin)\s+or\s+(king|queen|double|single|twin)\b", re.IGNORECASE)  # noqa: E501
+    # default_bed_type_pattern = re.compile(
+    #     r"\b(double|twin|king|queen|single|doble|individual|gemela|matrimonial)\b\s+"
+    #     r"\b(room|apartment|suite|habitacion|apartamento)\b", re.IGNORECASE)
+    matches = bed_pattern.findall(description)
+    or_matches = or_pattern.findall(description)
+    # default_matches = default_bed_type_pattern.findall(description)
+    bed_data_list = []
+    bed_type_counts = {}
+    for match in matches:
+        count = int(match[0]) if match[0] else 1
+        bed_type_name = match[1]
+        normalized_bed_type = bed_type_name.strip().lower()
+        bed_type = BedType[normalized_bed_type] if normalized_bed_type in BedType._member_names_ else None  # noqa: E501
+        if bed_type:
+            bed_type_counts[bed_type] = count
+    for match in or_matches:
+        for bed_type_name in match:
+            normalized_bed_type = bed_type_name.strip().lower()
+            bed_type = BedType[normalized_bed_type] if normalized_bed_type in BedType._member_names_ else None  # noqa: E501
+            if bed_type:
+                bed_type_counts[bed_type] = bed_type_counts.get(bed_type, 0) + 1
+    for bed_type, count in bed_type_counts.items():
+        bed_data_list.append(BedData(type=bed_type, count=count))
+    if not bed_data_list:
+        logger.warning("No valid bed data found from extracted information.")
+    return bed_data_list
+def extract_bed_numbers(description: str):
+    bed_number_pattern = re.compile(
+        r"(\d+|\bone\b|\btwo\b|\bthree\b)\s*bed", re.IGNORECASE
+    )
+    numbers = []
+    word_to_number = {"one": 1, "two": 2, "three": 3}
+    matches = bed_number_pattern.findall(description)
+    for match in matches:
+        number = word_to_number.get(match.lower(), match)
+        numbers.append(int(number))
+    return numbers
+def validate_bed_data(beds: List[BedData]) -> List[BedData]:
+    valid_beds = [bed for bed in beds if bed.type and bed.count > 0]
+    if not valid_beds:
+        logger.info(f"No valid beds found in {beds}")
+    return valid_beds
+def is_list_of_lists(variable):
+    if not isinstance(variable, list):
+        return False
+    return all(isinstance(item, list) for item in variable)
+def predict_language(text):
+    text_lower = text.lower()
+    for keyword in SPANISH_KEYWORDS:
+        if keyword in text_lower:
+            return "es"
+    for keyword in ENGLISH_KEYWORDS:
+        if keyword in text_lower:
+            return "en"
+    try:
+        language = detect(text)
+        if language in {"en", "es"}:
+            return language
+    except LangDetectException:
+        pass
+    return None

poetry.lock ADDED Viewed

The diff for this file is too large to render. See raw diff

pyproject.toml ADDED Viewed

	@@ -0,0 +1,138 @@

+[tool.poetry]
+name = "mappingservice"
+version = "0.1.0"
+description = ""
+authors = ["oscar <[email protected]>"]
+readme = "README.md"
+[tool.poetry.dependencies]
+python = "^3.11"
+psutil = "^5.9.8"
+fastapi = "^0.111.0"
+uvicorn = {extras = ["standard"], version = "^0.29.0"}
+pydantic = {version = "^2.7.1", extras = ["email"]}
+pydantic-settings = "^2.2.1"
+transformers = "^4.40.2"
+torch = {version = "^2.3.0", source = "pytorch-cpu"}
+langdetect = "^1.0.9"
+accelerate = "^0.30.1"
+optimum = "^1.19.2"
+onnx = "^1.16.0"
+onnxruntime = "^1.18.0"
+[tool.poetry.group.dev.dependencies]
+pytest = "^8.2.0"
+pytest-cov="^5.0.0"
+pre-commit="^3.7.0"
+ruff="^0.4.4"
+[tool.poetry.group.benchmark.dependencies]
+locust = "^2.27.0"
+[tool.pytest.ini_options]
+addopts = "-vv -rA --disable-warnings --cov=app --cov-report term-missing"
+testpaths = [
+    "tests",
+]
+asyncio_mode = "auto"
+pythonpath = [
+    "."
+]
+[tool.ruff]
+# Same as Black.
+line-length = 88
+indent-width = 4
+# Assume Python 3.11
+target-version = "py38"
+# Exclude a variety of commonly ignored directories.
+lint.exclude = [
+    ".bzr",
+    ".direnv",
+    ".eggs",
+    ".git",
+    ".git-rewrite",
+    ".hg",
+    ".ipynb_checkpoints",
+    ".mypy_cache",
+    ".nox",
+    ".pants.d",
+    ".pyenv",
+    ".pytest_cache",
+    ".pytype",
+    ".ruff_cache",
+    ".svn",
+    ".tox",
+    ".venv",
+    ".vscode",
+    "__pypackages__",
+    "_build",
+    "buck-out",
+    "build",
+    "dist",
+    "node_modules",
+    "site-packages",
+    "venv",
+]
+lint.select = [
+    "E",  # pycodestyle errors
+    "W",  # pycodestyle warnings
+    "F",  # pyflakes
+    "I",  # isort
+    "C",  # flake8-comprehensions
+    "B",  # flake8-bugbear
+    "UP",  # pyupgrade
+]
+lint.ignore = [
+    "B008",  # do not perform function calls in argument defaults
+    "C901",  # too complex
+    "W191", # indentation contains tabs
+    "E711", # `None` should be `cond is None`
+]
+[tool.ruff.format]
+# Like Black, use double quotes for strings.
+quote-style = "double"
+# Like Black, indent with spaces, rather than tabs.
+indent-style = "space"
+# Like Black, respect magic trailing commas.
+skip-magic-trailing-comma = false
+# Like Black, automatically detect the appropriate line ending.
+line-ending = "auto"
+# Enable auto-formatting of code examples in docstrings. Markdown,
+# reStructuredText code/literal blocks and doctests are all supported.
+#
+# This is currently disabled by default, but it is planned for this
+# to be opt-out in the future.
+docstring-code-format = false
+# Set the line length limit used when formatting code snippets in
+# docstrings.
+#
+# This only has an effect when the `docstring-code-format` setting is
+# enabled.
+docstring-code-line-length = "dynamic"
+[tool.ruff.lint.per-file-ignores]
+"__init__.py" = ["F401"]
+"**/{tests,docs,tools}/*" = ["E402"]
+[tool.ruff.lint.pyupgrade]
+# Preserve types, even if a file imports `from __future__ import annotations`.
+keep-runtime-typing = true
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"
+[[tool.poetry.source]]
+name = "pytorch-cpu"
+url = "https://download.pytorch.org/whl/cpu"
+priority = "supplemental"

tests/__init__.py ADDED Viewed

File without changes

tests/test_langdetect.py ADDED Viewed

	@@ -0,0 +1,79 @@

+#!/usr/bin/env python3
+import pytest
+import torch
+from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
+from mappingservice.utils import predict_language
+@pytest.fixture
+def classifier():
+    model_path = "papluca/xlm-roberta-base-language-detection"
+    model = AutoModelForSequenceClassification.from_pretrained(model_path)
+    tokenizer = AutoTokenizer.from_pretrained(model_path)
+    classification = pipeline(
+        "text-classification",
+        model=model,
+        tokenizer=tokenizer,
+        framework="pt",
+        device=0 if torch.cuda.is_available() else -1,
+    )
+    return classification
+def test_model_predictions(classifier):
+    test_data = [
+        {'input': 'Habitacion estandar con bano', 'expected_response': 'es'},
+        {'input': 'apartamento de lujo con vistas al mar', 'expected_response': 'es'},  # noqa: E501
+        {'input': 'casa ejecutiva', 'expected_response': 'es'},
+        {'input': 'villa doble', 'expected_response': 'es'},
+        {'input': 'estudio de una habitacion de lujo', 'expected_response': 'es'},
+        {'input': 'chalet premier con dos habitaciones', 'expected_response': 'es'},
+        {'input': 'casa de la playa premium con bano compartido', 'expected_response': 'es'},  # noqa: E501
+        {'input': 'estudio familiar grande', 'expected_response': 'es'},
+        {'input': 'suite familiar junior', 'expected_response': 'en'},
+        {'input': 'bungalow tradicional sin bano', 'expected_response': 'es'},
+        {'input': 'superior room 1 king superior room 1 king cupola or courtyard view french style 36sqm 385sq', 'expected_response': 'en'},  # noqa: E501
+        {'input': 'habitacion matrimonial adaptada discapacitados', 'expected_response': 'es'},  # noqa: E501
+        {'input': 'privilege room twin for 2 adults 0 children and 0 infants', 'expected_response': 'en'},  # noqa: E501
+        {'input': 'deluxe room double for 2 adults 0 children and 0 infants', 'expected_response': 'en'},  # noqa: E501
+        {'input': 'premier palace double room', 'expected_response': 'en'},
+        {'input': 'double single use deluxe', 'expected_response': 'en'},
+        {'input': 'double room queen bed superior', 'expected_response': 'en'},
+        {'input': 'double guest room', 'expected_response': 'en'},
+        {'input': 'single room for 1 adults 0 children and 0 infants', 'expected_response': 'en'},  # noqa: E501
+        {'input': 'twin premium room incl evening tasting welcome gift comp wifi 28 sqm espresso fridge bathrobe', 'expected_response': 'en'},  # noqa: E501
+        {'input': 'superior quadruple room', 'expected_response': 'en'},
+        {'input': 'superior one bedroom apartment x2013 2 adults', 'expected_response': 'en'},  # noqa: E501
+        {'input': 'deluxe room double for 2 adults 0 children and 0 infants', 'expected_response': 'en'},  # noqa: E501
+        {'input': 'premier palace double room', 'expected_response': 'en'},
+        {'input': 'double single use deluxe', 'expected_response': 'en'},
+        {'input': 'double room queen bed superior', 'expected_response': 'en'},
+        {'input': 'double guest room', 'expected_response': 'en'},
+        {'input': 'single room for 1 adults 0 children and 0 infants', 'expected_response': 'en'},  # noqa: E501
+        {'input': 'twin premium room incl evening tasting welcome gift comp wifi 28 sqm espresso fridge bathrobe', 'expected_response': 'en'},  # noqa: E501
+        {'input': 'superior quadruple room', 'expected_response': 'en'},
+        {'input': 'superior one bedroom apartment x2013 2 adults', 'expected_response': 'en'},  # noqa: E501
+        {'input': 'comfort double', 'expected_response': 'en'},
+        {'input': '1 king bed suite nonsmoking', 'expected_response': 'en'},
+        {'input': 'junior suite 1 king bed nonsmoking', 'expected_response': 'en'},
+        {'input': 'family room superior', 'expected_response': 'en'}
+    ]
+    for test_case in test_data:
+        description = test_case["input"]
+        expected_label = test_case["expected_response"]
+        # First, try to predict based on keywords
+        predicted_label = predict_language(description)
+        # If no prediction was made, fallback to model prediction
+        if not predicted_label:
+            print(f"Fallback to model prediction for '{description}'")
+            result = classifier(description)
+            predicted_label = result[0]["label"]
+        assert (
+            predicted_label == expected_label
+        ), f"Incorrect prediction for '{description}': expected '{expected_label}', obtained '{predicted_label}'"  # noqa: E501

tests/test_roomcategory.py ADDED Viewed

	@@ -0,0 +1,75 @@

+#!/usr/bin/env python3
+import pytest
+import torch
+from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
+@pytest.fixture
+def classifier():
+    model_path = "travelgate/room_category-classifier"
+    try:
+        model = AutoModelForSequenceClassification.from_pretrained(model_path)
+        tokenizer = AutoTokenizer.from_pretrained(model_path)
+        classification = pipeline(
+            "text-classification",
+            model=model,
+            tokenizer=tokenizer,
+            framework="pt",
+            device=0 if torch.cuda.is_available() else -1,
+        )
+        print("Modelo y tokenizer cargados exitosamente.")
+    except Exception as e:
+        print(f"Error al cargar el modelo o tokenizer: {e}")
+        raise e
+    return classification
+def test_model_predictions(classifier):
+    test_data = [
+        {'input': 'comfort double', 'expected_response': 'comfort'},
+        {'input': 'full size bed 1 big bed', 'expected_response': 'default'},
+        {'input': '1 king bed suite nonsmoking', 'expected_response': 'default'},
+        {'input': 'junior suite 1 king bed nonsmoking', 'expected_response': 'junior'},
+        {'input': 'superior sea view', 'expected_response': 'superior'},
+        {'input': 'family room superior', 'expected_response': 'superior'},
+        {'input': '1 king standard', 'expected_response': 'standard'},
+        {'input': 'superior king balcon vista al mar', 'expected_response': 'superior'},
+        {'input': 'standard double room king bed accessible', 'expected_response': 'standard'},  # noqa: E501
+        {'input': 'sagamore garden view suite standard', 'expected_response': 'standard'},  # noqa: E501
+        {'input': 'standard 1 king bed courtyard view non smoking', 'expected_response': 'standard'},  # noqa: E501
+        {'input': 'suite double for 2 adults 0 children and 0 infants', 'expected_response': 'default'},  # noqa: E501
+        {'input': 'one bedroom superior king suite with ocean view non smoking', 'expected_response': 'superior'},  # noqa: E501
+        {'input': 'omnia deluxe triple for 2 adults 0 children and 0 infants', 'expected_response': 'deluxe'},  # noqa: E501
+        {'input': 'king premium room incl evening tasting welcome gift comp wifi 27 30 sqm espresso fridge bathrobe', 'expected_response': 'premium'},  # noqa: E501
+        {'input': 'standard twin double room single use', 'expected_response': 'standard'},  # noqa: E501
+        {'input': 'habitacion 2 camas deluxe', 'expected_response': 'deluxe'},
+        {'input': 'standard 2 double beds courtyard view non smoking', 'expected_response': 'standard'},  # noqa: E501
+        {'input': 'habitacion estandar con bano', 'expected_response': 'standard'},
+        {'input': 'superior room 1 king superior room 1 king cupola or courtyard view french style 36sqm 385sq', 'expected_response': 'superior'},  # noqa: E501
+        {'input': 'habitacion estandar matrimonial adaptada discapacitados', 'expected_response': 'standard'},  # noqa: E501
+        {'input': 'privilege twin for 2 adults 0 children and 0 infants', 'expected_response': 'privilege'},  # noqa: E501
+        {'input': 'classic single for 1 adults 0 children and 0 infants', 'expected_response': 'classic'},  # noqa: E501
+        {'input': 'deluxe room double for 2 adults 0 children and 0 infants', 'expected_response': 'deluxe'},  # noqa: E501
+        {'input': 'gallery two queens', 'expected_response': 'default'},
+        {'input': 'premier palace double room', 'expected_response': 'premier'},
+        {'input': 'double single use deluxe', 'expected_response': 'deluxe'},
+        {'input': 'double room queen bed superior', 'expected_response': 'superior'},
+        {'input': 'double guest standard room', 'expected_response': 'standard'},
+        {'input': '2 queen beds disney view non smoking', 'expected_response': 'default'},  # noqa: E501
+        {'input': 'standard single room for 1 adults 0 children and 0 infants', 'expected_response': 'standard'},  # noqa: E501
+        {'input': 'twin premium room incl evening tasting welcome gift comp wifi 28 sqm espresso fridge bathrobe', 'expected_response': 'premium'},  # noqa: E501
+        {'input': 'superior quadruple room', 'expected_response': 'superior'},
+        {'input': 'superior one bedroom apartment x2013 2 adults', 'expected_response': 'superior'}  # noqa: E501
+    ]
+    for test_case in test_data:
+        description = test_case["input"]
+        expected_category = test_case["expected_response"]
+        result = classifier(description)[0]
+        predicted_category = result["label"]
+        assert (
+            predicted_category == expected_category
+        ), f"Incorrect prediction for '{description}': expected '{expected_category}', obtained '{predicted_category}'"  # noqa: E501

tests/test_roomenvironment.py ADDED Viewed

	@@ -0,0 +1,77 @@

+#!/usr/bin/env python3
+import pytest
+import torch
+from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
+@pytest.fixture
+def classifier():
+    model_path = "travelgate/room_environment-classifier"
+    try:
+        model = AutoModelForSequenceClassification.from_pretrained(model_path)
+        tokenizer = AutoTokenizer.from_pretrained(model_path)
+        classification = pipeline(
+            "text-classification",
+            model=model,
+            tokenizer=tokenizer,
+            framework="pt",
+            device=0 if torch.cuda.is_available() else -1,
+        )
+        print("Modelo y tokenizer cargados exitosamente.")
+    except Exception as e:
+        print(f"Error al cargar el modelo o tokenizer: {e}")
+        raise e
+    return classification
+def test_model_predictions(classifier):
+    test_data = [
+        {'input': 'comfort double with shared bed', 'expected_response': 'shared bed'},
+        {'input': 'habitacion con banno compartido', 'expected_response': 'shared bathroom'},  # noqa: E501
+        {'input': 'habitacion con bano compartido', 'expected_response': 'shared bathroom'},  # noqa: E501
+        {'input': 'full size bed 1 big bed with shared bathroom', 'expected_response': 'shared bathroom'},  # noqa: E501
+        {'input': '1 king bed suite nonsmoking shared bathroom', 'expected_response': 'shared bathroom'},  # noqa: E501
+        {'input': 'junior suite 1 king bed nonsmoking shared bed', 'expected_response': 'shared bed'},  # noqa: E501
+        {'input': 'superior sea view connected room', 'expected_response': 'connected'},
+        {'input': 'twin room with two shared beds', 'expected_response': 'shared bed'},
+        {'input': 'double room with connected rooms available', 'expected_response': 'connected'},  # noqa: E501
+        {'input': 'two queen beds with shared bathroom access', 'expected_response': 'shared bathroom'},  # noqa: E501
+        {'input': 'suite with king bed and connected room option', 'expected_response': 'connected'},  # noqa: E501
+        {'input': 'shared dormitory room mixed gender', 'expected_response': 'shared room'},  # noqa: E501
+        {'input': 'private room with shared bathroom facilities', 'expected_response': 'shared bathroom'},  # noqa: E501
+        {'input': 'family suite with connected rooms', 'expected_response': 'connected'},  # noqa: E501
+        {'input': 'bunk bed in mixed shared room', 'expected_response': 'shared room'},
+        {'input': 'deluxe queen room with two queen beds shared bathroom', 'expected_response': 'shared bathroom'},  # noqa: E501
+        {'input': 'hostel dorm bed shared room', 'expected_response': 'shared room'},
+        {'input': 'economy double room with shared bathroom outside the room', 'expected_response': 'shared bathroom'},  # noqa: E501
+        {'input': 'luxury suite with sea view and private connected rooms', 'expected_response': 'connected'},  # noqa: E501
+        {'input': 'single bed in co-ed shared hostel room', 'expected_response': 'shared room'},  # noqa: E501
+        {'input': 'executive suite with optional connecting room', 'expected_response': 'connected'},  # noqa: E501
+        {'input': 'standard twin room shared beds', 'expected_response': 'shared bed'},
+        {'input': 'cozy single room with access to shared bathroom', 'expected_response': 'shared bathroom'},  # noqa: E501
+        {'input': 'spacious family room with interconnecting doors', 'expected_response': 'connected'},  # noqa: E501
+        {'input': 'shared female dormitory for 4 guests', 'expected_response': 'shared room'},  # noqa: E501
+        {'input': 'luxury double room with shared bed arrangements', 'expected_response': 'shared bed'},  # noqa: E501
+        {'input': 'master suite with a shared bedroom balcony and sea view', 'expected_response': 'shared room'},  # noqa: E501
+        {'input': 'budget twin room with shared bath', 'expected_response': 'shared bathroom'},  # noqa: E501
+        {'input': 'two-bedroom apartment with connecting options', 'expected_response': 'connected'},  # noqa: E501
+        {'input': 'en-suite room with shared bathroom', 'expected_response': 'shared bathroom'},  # noqa: E501
+        {'input': 'single guest room with shared bed', 'expected_response': 'shared bed'},  # noqa: E501
+        {'input': 'shared room in a modern loft city center', 'expected_response': 'shared room'},  # noqa: E501
+        {'input': 'double bed in a shared room apartment', 'expected_response': 'shared room'},  # noqa: E501
+        {'input': 'studio with shared bath', 'expected_response': 'shared bathroom'},
+        {'input': 'penthouse suite connected rooms available upon request', 'expected_response': 'connected'},  # noqa: E501
+        {'input': "backpacker's special one bed in a shared room of six", 'expected_response': 'shared room'}  # noqa: E501
+    ]
+    for test_case in test_data:
+        description = test_case["input"]
+        expected_environment = test_case["expected_response"]
+        result = classifier(description)[0]
+        predicted_environment = result["label"]
+        assert (
+            predicted_environment == expected_environment
+        ), f"Incorrect prediction for '{description}': expected '{expected_environment}', obtained '{predicted_environment}'"  # noqa: E501

tests/test_roomtype.py ADDED Viewed

	@@ -0,0 +1,71 @@

+#!/usr/bin/env python3
+import pytest
+import torch
+from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
+@pytest.fixture
+def classifier():
+    model_path = "travelgate/room_type-classifier"
+    model = AutoModelForSequenceClassification.from_pretrained(model_path)
+    tokenizer = AutoTokenizer.from_pretrained(model_path)
+    classification = pipeline(
+        "text-classification",
+        model=model,
+        tokenizer=tokenizer,
+        framework="pt",
+        device=0 if torch.cuda.is_available() else -1,
+    )
+    return classification
+def test_model_predictions(classifier):
+    test_data = [
+        {'input': 'Habitacion estandar con bano', 'expected_response': 'room'},
+        {'input': 'apartamento de lujo con vistas al mar', 'expected_response': 'apartment'},  # noqa: E501
+        {'input': 'casa ejecutiva', 'expected_response': 'house'},
+        {'input': 'villa doble', 'expected_response': 'villa'},
+        {'input': 'estudio de una habitacion de lujo', 'expected_response': 'studio'},
+        {'input': 'chalet premier con dos habitaciones', 'expected_response': 'chalet'},
+        {'input': 'casa de la playa premium con bano compartido', 'expected_response': 'beach house'},  # noqa: E501
+        {'input': 'estudio familiar grande', 'expected_response': 'family studio'},
+        {'input': 'suite familiar junior', 'expected_response': 'family suite'},
+        {'input': 'bungalow tradicional sin bano', 'expected_response': 'bungalow'},
+        {'input': 'superior room 1 king superior room 1 king cupola or courtyard view french style 36sqm 385sq', 'expected_response': 'room'},  # noqa: E501
+        {'input': 'habitacion matrimonial adaptada discapacitados', 'expected_response': 'room'},  # noqa: E501
+        {'input': 'privilege room twin for 2 adults 0 children and 0 infants', 'expected_response': 'room'},  # noqa: E501
+        {'input': 'deluxe room double for 2 adults 0 children and 0 infants', 'expected_response': 'room'},  # noqa: E501
+        {'input': 'premier palace double room', 'expected_response': 'room'},
+        {'input': 'double single use deluxe', 'expected_response': 'default'},
+        {'input': 'double room queen bed superior', 'expected_response': 'room'},
+        {'input': 'double guest room', 'expected_response': 'room'},
+        {'input': 'single room for 1 adults 0 children and 0 infants', 'expected_response': 'room'},  # noqa: E501
+        {'input': 'twin premium room incl evening tasting welcome gift comp wifi 28 sqm espresso fridge bathrobe', 'expected_response': 'room'},  # noqa: E501
+        {'input': 'superior quadruple room', 'expected_response': 'room'},
+        {'input': 'superior one bedroom apartment x2013 2 adults', 'expected_response': 'apartment'},  # noqa: E501
+        {'input': 'deluxe room double for 2 adults 0 children and 0 infants', 'expected_response': 'room'},  # noqa: E501
+        {'input': 'premier palace double room', 'expected_response': 'room'},
+        {'input': 'double single use deluxe', 'expected_response': 'default'},
+        {'input': 'double room queen bed superior', 'expected_response': 'room'},
+        {'input': 'double guest room', 'expected_response': 'room'},
+        {'input': 'single room for 1 adults 0 children and 0 infants', 'expected_response': 'room'},  # noqa: E501
+        {'input': 'twin premium room incl evening tasting welcome gift comp wifi 28 sqm espresso fridge bathrobe', 'expected_response': 'room'},  # noqa: E501
+        {'input': 'superior quadruple room', 'expected_response': 'room'},
+        {'input': 'superior one bedroom apartment x2013 2 adults', 'expected_response': 'apartment'},  # noqa: E501
+        {'input': 'comfort double', 'expected_response': 'default'},
+        {'input': '1 king bed suite nonsmoking', 'expected_response': 'suite'},
+        {'input': 'junior suite 1 king bed nonsmoking', 'expected_response': 'suite'},
+        {'input': 'family room superior', 'expected_response': 'room'}
+    ]
+    for test_case in test_data:
+        description = test_case["input"]
+        expected_label = test_case["expected_response"]
+        result = classifier(description)[0]
+        predicted_label = result["label"]
+        assert (
+            predicted_label == expected_label
+        ), f"Incorrect prediction for '{description}': expected '{expected_label}', obtained '{predicted_label}'"  # noqa: E501

tests/test_roomview.py ADDED Viewed

	@@ -0,0 +1,89 @@

+#!/usr/bin/env python3
+import pytest
+from langdetect import detect_langs
+import mappingservice.dependencies as deps
+from mappingservice.config import Settings
+from mappingservice.constants import AVAILABLE_LANGUAGES, MODEL_NAMES
+from mappingservice.ms.model_loader import ModelLoader
+from mappingservice.utils import predict_language
+settings = Settings()
+ml_model = ModelLoader(settings, MODEL_NAMES)
+@pytest.fixture
+def classifier():
+    for lang in AVAILABLE_LANGUAGES:
+        model_pipeline = ml_model.get_model('room_view', lang)
+        try:
+            deps.mc['room_view'][lang] = model_pipeline
+        except KeyError:
+            deps.mc['room_view'] = {}
+            deps.mc['room_view'][lang] = model_pipeline
+    def get_model(language):
+        return deps.mc['room_view'][language]
+    return get_model
+def test_model_predictions(classifier):
+    test_data = [
+        {'input': 'studio with ruins view', 'expected_response': 'ruins'},  # noqa: E501
+        {'input': 'suite with pool view', 'expected_response': 'pool'},
+        {'input': 'executive room garden view', 'expected_response': 'garden'},
+        {'input': 'studio two bedroom vineyard view 4 guests wine country charm', 'expected_response': 'vineyard'},  # noqa: E501
+        {'input': 'superior suite city view', 'expected_response': 'city'},
+        {'input': 'room with a balcony and harbour views', 'expected_response': 'harbour'},  # noqa: E501
+        {'input': 'junior studio stars views', 'expected_response': 'stars'},
+        {'input': 'room park views', 'expected_response': 'park'},
+        {'input': 'loft two bedroom and marina view 4 adults coastal luxury', 'expected_response': 'marina'},  # noqa: E501
+        {'input': 'house sea view', 'expected_response': 'sea'},
+        {'input': 'villa hill views', 'expected_response': 'hill'},
+        {'input': 'room park view', 'expected_response': 'park'},
+        {'input': 'townhouse one bedroom and park view 2 adults urban charm', 'expected_response': 'park'},  # noqa: E501
+        {'input': 'suite garden view', 'expected_response': 'garden'},
+        {'input': 'twin room ocean view with balcony', 'expected_response': 'ocean'},
+        {'input': 'residencia con vistas al mar', 'expected_response': 'mar'},
+        {'input': 'habitacion con vistas a la piscina', 'expected_response': 'piscina'},
+        {'input': 'habitacion ejecutiva con vistas al jardin', 'expected_response': 'jardin'},  # noqa: E501
+        # {'input': 'habitacion con vistas al oceano', 'expected_response': 'oceano'},  # noqa: E501
+        {'input': 'suite superior con vistas a la ciudad', 'expected_response': 'ciudad'},  # noqa: E501
+        # {'input': 'habitacion con balcon con vistas a la bahia', 'expected_response': 'bahia'},  # noqa: E501
+        {'input': 'estudio junior con vistas a las estrellas', 'expected_response': 'estrellas'},  # noqa: E501
+        {'input': 'habitacion con vistas al parque', 'expected_response': 'parque'},
+        {'input': 'habitacion con vistas a la ciudad', 'expected_response': 'ciudad'},
+        {'input': 'casa con vista al mar', 'expected_response': 'mar'},
+        {'input': 'corner suite pool view', 'expected_response': 'pool'},
+        {'input': 'estudio con vista al parque', 'expected_response': 'parque'},
+        {'input': 'estudio con vista al jardin', 'expected_response': 'jardin'},
+        {'input': 'residencia con vista al amazonas', 'expected_response': 'amazonas'},
+        {'input': 'habitacion con vista a la montana', 'expected_response': 'montana'},  # noqa: E501
+        {'input': 'residencia moderno con vista al park', 'expected_response': 'park'},  # noqa: E501
+        {'input': 'dormitorio con vistas al lago', 'expected_response': 'lago'},  # noqa: E501
+        # {'input': 'dormitorio con vistas a la bahia', 'expected_response': 'bahia'},
+        {'input': 'classic ocean view', 'expected_response': 'ocean'}  # noqa: E501
+    ]
+    # this test case will always pass untill we solve the mistery with model data
+    print("Test data length:", len(test_data))
+    for test_case in test_data:
+        language = detect_langs(test_case['input'])
+        for item in language:
+            if item.lang in AVAILABLE_LANGUAGES:
+                language = item.lang
+                break
+        if language not in AVAILABLE_LANGUAGES:
+            language = predict_language(test_case['input'])
+        cls = classifier(language)
+        description = test_case["input"]
+        expected_view = test_case["expected_response"]
+        processed_results = cls.predict(description)
+        predicted_views = [entity["word"] for entity in processed_results]
+        assert expected_view in predicted_views, f"Incorrect prediction for '{description}' using '{language}' language: expected '{expected_view}', obtained '{predicted_views}'"  # noqa: E501