Calin Rada
init
f006f31 unverified
raw
history blame
3.96 kB
import logging.config
import time
from contextlib import asynccontextmanager
import psutil
from fastapi import Depends, FastAPI, Request
from fastapi.responses import JSONResponse
import mappingservice.dependencies as deps
from mappingservice.constants import AVAILABLE_LANGUAGES, MODEL_NAMES
from mappingservice.dependencies import get_api_key, get_settings
from mappingservice.ms.model_loader import ModelLoader
from mappingservice.routers import admin, room
from mappingservice.utils import log_memory_usage, predict_language
logging.config.fileConfig("logging.conf", disable_existing_loggers=False)
logger = logging.getLogger(__name__)
@asynccontextmanager
async def lifespan(application: FastAPI):
# Load the ML models
settings = get_settings()
initial_memory = psutil.Process().memory_info().rss
start_time = time.time()
ml_model = ModelLoader(settings, MODEL_NAMES)
for k in MODEL_NAMES:
logger.info(f"Loading model: {k}...")
for lang in AVAILABLE_LANGUAGES:
model_pipeline = ml_model.get_model(k, lang)
try:
deps.mc[k][lang] = model_pipeline
except KeyError:
deps.mc[k] = {}
deps.mc[k][lang] = model_pipeline
elapsed_time = time.time() - start_time
num_cores = psutil.cpu_count(logical=True)
total_ram = psutil.virtual_memory().total / (1024 ** 3) # Convert to GB
final_memory = psutil.Process().memory_info().rss
total_memory_used = (final_memory - initial_memory) / (1024 ** 3) # Convert to GB
logger.info("*" * 60) # ASCII Art
logger.info(f"* Number of Cores: {num_cores}")
logger.info(f"* Total RAM: {total_ram:.2f} GB")
logger.info(
f"* AI Models loaded in {elapsed_time:.2f} seconds, "
f"using {total_memory_used:.2f} GB"
)
logger.info("*" * 60) # ASCII Art
yield
# Clean up the ML models and release the resources
deps.mc.clear()
app = FastAPI(root_path="/ml-api", lifespan=lifespan)
@app.middleware("http")
async def handle_exceptions(request: Request, call_next):
try:
response = await call_next(request)
return response
except Exception as e:
logging.error(f"Unhandled error: {e}")
logging.exception(e)
log_memory_usage()
return JSONResponse(status_code=500, content={
"message": "Internal Server Error"
})
finally:
log_memory_usage()
@app.middleware("http")
async def detect_language(request: Request, call_next):
room_description = request.query_params.get("room_description", "")
language = "en"
if not room_description:
try:
body = await request.json()
room_description = body.get("room_description", "")
except ValueError:
# If the room description is still empty, continue with the request as
# the language detection is not required
if not room_description:
response = await call_next(request)
return response
try:
language = predict_language(room_description)
if language not in AVAILABLE_LANGUAGES:
logger.error(f"Unsupported language for room description: {room_description}. Falling back to model prediction.") # noqa: E501
language = deps.mc['lang_detect']['na'].predict(room_description)[0][0]['label'] # noqa: E501
if language not in AVAILABLE_LANGUAGES:
logger.error(f"Unsupported language for room description using model prediction: {room_description}. Falling back to English.") # noqa: E501
language = "en"
except Exception as e:
logger.error(f"Error detecting language: {e}")
request.state.predicted_language = language
response = await call_next(request)
return response
app.include_router(room.router, dependencies=[Depends(get_api_key)])
app.include_router(admin.router)