Spaces:
Running
on
Zero
Running
on
Zero
File size: 2,579 Bytes
2ef30e0 32cb516 ae5e067 8c4b099 32cb516 8c4b099 32cb516 8c4b099 32cb516 6187e8f 32cb516 2ef30e0 32cb516 f985251 32cb516 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
import spaces
import gradio as gr
import torch
from diffusers import StableDiffusion3Pipeline
from huggingface_hub import snapshot_download,login
from transformers import pipeline
from PIL import Image
import os
# Retrieve the API token from the environment variable
huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
if huggingface_token is None:
raise ValueError("HUGGINGFACE_TOKEN environment variable is not set.")
# Log in to Hugging Face
login(token=huggingface_token)
# Check if CUDA is available
device = "cuda" if torch.cuda.is_available() else "cpu"
# Ensure GPU is available
if device == "cuda":
print("CUDA is available. Using GPU.")
else:
print("CUDA is not available. Using CPU.")
# Download and load the Stable Diffusion model
model_path = snapshot_download(
repo_id="stabilityai/stable-diffusion-3-medium",
revision="refs/pr/26",
repo_type="model",
ignore_patterns=["*.md", "*.gitattributes"],
local_dir="stable-diffusion-3-medium",
token=huggingface_token
)
image_gen = StableDiffusion3Pipeline.from_pretrained(model_path, text_encoder_3=None, tokenizer_3=None,torch_dtype=torch.float16)
image_gen = image_gen.to(device)
# Load the image-to-text pipeline
caption_image = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large", device=device)
@spaces.GPU(enable_queue=True)
def generate_image_from_caption(image, num_inference_steps=50, guidance_scale=7.5):
# Generate the caption
caption = caption_image(image)[0]['generated_text']
print("Generated Caption:", caption)
# Generate the image from the caption
result = image_gen(
prompt=caption,
num_inference_steps=num_inference_steps,
guidance_scale=guidance_scale,
negative_prompt="blurred, ugly, watermark, low resolution, blurry",
height=512,
width=512
)
# Get the generated image
generated_image = result.images[0]
return generated_image
# Create the Gradio interface
iface = gr.Interface(
fn=generate_image_from_caption,
inputs=[
gr.Image(type="pil",label="Upload an image"),
gr.Slider(label="Number of inference steps", minimum=1, maximum=100, value=50),
gr.Slider(label="Guidance scale", minimum=1.0, maximum=20.0, value=7.5)
],
outputs=gr.Image(label="Generated Image"),
title="Image-to-Image Generator using Caption",
description="Upload an image to generate a caption, and then use the caption as a prompt to generate a new image using Stable Diffusion."
)
# Launch the Gradio app
iface.launch()
|