from typing import Dict, Any
from diffusers import AutoPipelineForText2Image
import torch
from PIL import Image
import base64
from io import BytesIO

class EndpointHandler:
    def __init__(self, path: str = ""):
        """
        Initialize the handler, loading the model and LoRA weights.
        The path parameter is provided by Hugging Face Inference Endpoints to point to the model directory.
        """
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.pipeline = AutoPipelineForText2Image.from_pretrained(
            'black-forest-labs/FLUX.1-dev', 
            torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
        ).to(self.device)

        # Load LoRA weights
        lora_weights_path = 'krtk00/pan_crd_lora_v2'
        self.pipeline.load_lora_weights(lora_weights_path, weight_name='lora.safetensors')

    def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
        """
        This method will be called on every request. The input is expected to be a dictionary
        with a key "inputs" containing the text prompt.
        """
        # Preprocess input
        prompt = data.get("inputs", None)
        if not prompt:
            raise ValueError("No prompt provided in the input")

        # Run inference
        with torch.no_grad():
            images = self.pipeline(prompt).images

        # Postprocess output: Convert image to base64
        pil_image = images[0]  # Assuming one image is generated
        buffered = BytesIO()
        pil_image.save(buffered, format="PNG")
        img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")

        # Return result
        return {"image": img_str}