from typing import Dict, Any from diffusers import AutoPipelineForText2Image import torch from PIL import Image import base64 from io import BytesIO class EndpointHandler: def __init__(self, path: str = ""): """ Initialize the handler, loading the model and LoRA weights. The path parameter is provided by Hugging Face Inference Endpoints to point to the model directory. """ self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') self.pipeline = AutoPipelineForText2Image.from_pretrained( 'black-forest-labs/FLUX.1-dev', torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32 ).to(self.device) # Load LoRA weights lora_weights_path = 'krtk00/pan_crd_lora_v2' self.pipeline.load_lora_weights(lora_weights_path, weight_name='lora.safetensors') def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]: """ This method will be called on every request. The input is expected to be a dictionary with a key "inputs" containing the text prompt. """ # Preprocess input prompt = data.get("inputs", None) if not prompt: raise ValueError("No prompt provided in the input") # Run inference with torch.no_grad(): images = self.pipeline(prompt).images # Postprocess output: Convert image to base64 pil_image = images[0] # Assuming one image is generated buffered = BytesIO() pil_image.save(buffered, format="PNG") img_str = base64.b64encode(buffered.getvalue()).decode("utf-8") # Return result return {"image": img_str}