from fastapi import FastAPI from pydantic import BaseModel from diffusers import ControlNetModel, StableDiffusionXLControlNetPipeline, AutoencoderKL, EulerAncestralDiscreteScheduler from PIL import Image import torch import base64 from io import BytesIO # Initialize FastAPI app app = FastAPI() # Load Hugging Face pipeline components model_id = "fyp1/sketchToImage" controlnet = ControlNetModel.from_pretrained(f"{model_id}/controlnet", torch_dtype=torch.float16) vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16) scheduler = EulerAncestralDiscreteScheduler.from_pretrained(f"{model_id}/scheduler") pipe = StableDiffusionXLControlNetPipeline.from_pretrained( "stabilityai/stable-diffusion-xl-base-1.0", controlnet=controlnet, vae=vae, scheduler=scheduler, safety_checker=None, torch_dtype=torch.float16, ).to("cuda" if torch.cuda.is_available() else "cpu") class GenerateRequest(BaseModel): prompt: str negative_prompt: str sketch: str # Base64 encoded image @app.post("/generate") async def generate_image(data: GenerateRequest): try: # Decode and preprocess the sketch image sketch_bytes = base64.b64decode(data.sketch) sketch_image = Image.open(BytesIO(sketch_bytes)).convert("L") # Convert to grayscale sketch_image = sketch_image.resize((1024, 1024)) # Generate the image using the pipeline with torch.no_grad(): images = pipe( prompt=data.prompt, negative_prompt=data.negative_prompt, image=sketch_image, controlnet_conditioning_scale=1.0, width=1024, height=1024, num_inference_steps=30, ).images # Convert output image to Base64 buffered = BytesIO() images[0].save(buffered, format="PNG") image_base64 = base64.b64encode(buffered.getvalue()).decode("utf-8") return {"image": image_base64} except Exception as e: return {"error": str(e)}