Stable-Diffusion-3.5-Serverless

Runtime error

File size: 7,622 Bytes

9b5f4e1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bfc38ae
9b5f4e1
bfc38ae
 
 
 
9b5f4e1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bfc38ae
9b5f4e1
bfc38ae
9b5f4e1
 
 
 
 
 
 
 
 
 
bfc38ae
 
9b5f4e1
 
bfc38ae
 
9b5f4e1
 
 
bfc38ae
9b5f4e1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bfc38ae
 
9b5f4e1
 
bfc38ae
9b5f4e1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bfc38ae
 
52bd1b1
bfc38ae
9b5f4e1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bfc38ae
 
 
 
9b5f4e1
 
 
bfc38ae
 
9b5f4e1
 
 
ae7beff

import gradio as gr
import requests
import io
import random
import os
import time
from PIL import Image
from deep_translator import GoogleTranslator
import json
from gradio_client import Client

# Project by Nymbo

API_URL = "https://api-inference.huggingface.co/models/stabilityai/stable-diffusion-3.5-large"
API_TOKEN = os.getenv("HF_READ_TOKEN")
headers = {"Authorization": f"Bearer {API_TOKEN}"}
timeout = 100

# Initialize the prompt enhancer client
prompt_enhancer = Client("K00B404/mistral-nemo-prompt-enhancer")

def enhance_prompt(prompt, enable_enhancement=True):
    """Enhance the given prompt using the Mistral Nemo prompt enhancer."""
    if not enable_enhancement:
        print(f'\033[1mPrompt enhancement disabled, using original prompt:\033[0m {prompt}')
        return prompt
        
    try:
        system_message = "You are an expert at writing detailed, high-quality image generation prompts. Enhance the given prompt by adding relevant artistic details, style elements, and quality descriptors. Keep the original intent but make it more elaborate and specific."
        enhanced = prompt_enhancer.predict(
            message=prompt,
            system_message=system_message,
            max_tokens=512,
            temperature=0.7,
            top_p=0.95,
            api_name="/chat"
        )
        print(f'\033[1mOriginal prompt:\033[0m {prompt}')
        print(f'\033[1mEnhanced prompt:\033[0m {enhanced}')
        return enhanced
    except Exception as e:
        print(f"Error enhancing prompt: {e}")
        return prompt  # Fall back to original prompt if enhancement fails

# Function to query the API and return the generated image
def query(prompt, is_negative=False, steps=35, cfg_scale=7, sampler="DPM++ 2M Karras", seed=-1, strength=0.7, width=1024, height=1024, enable_enhancement=True):
    if prompt == "" or prompt is None:
        return None, None

    key = random.randint(0, 999)
    
    API_TOKEN = random.choice([os.getenv("HF_READ_TOKEN")])
    headers = {"Authorization": f"Bearer {API_TOKEN}"}
    
    # Translate the prompt from Russian to English if necessary
    prompt = GoogleTranslator(source='ru', target='en').translate(prompt)
    print(f'\033[1mGeneration {key} translation:\033[0m {prompt}')

    # Enhance the prompt using the Mistral Nemo model if enabled
    enhanced_prompt = enhance_prompt(prompt, enable_enhancement)

    # Add some extra flair to the prompt
    final_prompt = f"{enhanced_prompt} | ultra detail, ultra elaboration, ultra quality, perfect."
    print(f'\033[1mGeneration {key} final prompt:\033[0m {final_prompt}')
    
    # Prepare the payload for the API call, including width and height
    payload = {
        "inputs": final_prompt,
        "is_negative": is_negative,
        "steps": steps,
        "cfg_scale": cfg_scale,
        "seed": seed if seed != -1 else random.randint(1, 1000000000),
        "strength": strength,
        "parameters": {
            "width": width,
            "height": height
        }
    }

    # Send the request to the API and handle the response
    response = requests.post(API_URL, headers=headers, json=payload, timeout=timeout)
    if response.status_code != 200:
        print(f"Error: Failed to get image. Response status: {response.status_code}")
        print(f"Response content: {response.text}")
        if response.status_code == 503:
            raise gr.Error(f"{response.status_code} : The model is being loaded")
        raise gr.Error(f"{response.status_code}")
    
    try:
        # Convert the response content into an image
        image_bytes = response.content
        image = Image.open(io.BytesIO(image_bytes))
        print(f'\033[1mGeneration {key} completed!\033[0m ({final_prompt})')
        return image, enhanced_prompt
    except Exception as e:
        print(f"Error when trying to open the image: {e}")
        return None, None

# CSS to style the app
css = """
#app-container {
    max-width: 800px;
    margin-left: auto;
    margin-right: auto;
}
"""

# Build the Gradio UI with Blocks
with gr.Blocks(theme='Nymbo/Nymbo_Theme', css=css) as app:
    # Add a title to the app
    gr.HTML("<center><h1>Stable Diffusion 3.5 Large with Prompt Enhancement</h1></center>")
    
    # Container for all the UI elements
    with gr.Column(elem_id="app-container"):
        # Add a text input for the main prompt
        with gr.Row():
            with gr.Column(elem_id="prompt-container"):
                with gr.Row():
                    text_prompt = gr.Textbox(
                        label="Prompt", 
                        placeholder="Enter a prompt here - it will be automatically enhanced for better results", 
                        lines=2, 
                        elem_id="prompt-text-input"
                    )
                
                # Accordion for advanced settings
                with gr.Row():
                    with gr.Accordion("Advanced Settings", open=False):
                        enable_enhancement = gr.Checkbox(
                            label="Enable Prompt Enhancement",
                            value=True
                        )
                        negative_prompt = gr.Textbox(
                            label="Negative Prompt", 
                            placeholder="What should not be in the image", 
                            value="(deformed, distorted, disfigured), poorly drawn, bad anatomy, wrong anatomy, extra limb, missing limb, floating limbs, (mutated hands and fingers), disconnected limbs, mutation, mutated, ugly, disgusting, blurry, amputation, misspellings, typos", 
                            lines=3, 
                            elem_id="negative-prompt-text-input"
                        )
                        with gr.Row():
                            width = gr.Slider(label="Width", value=1024, minimum=64, maximum=1216, step=32)
                            height = gr.Slider(label="Height", value=1024, minimum=64, maximum=1216, step=32)
                        steps = gr.Slider(label="Sampling steps", value=35, minimum=1, maximum=100, step=1)
                        cfg = gr.Slider(label="CFG Scale", value=7, minimum=1, maximum=20, step=1)
                        strength = gr.Slider(label="Strength", value=0.7, minimum=0, maximum=1, step=0.001)
                        seed = gr.Slider(label="Seed", value=-1, minimum=-1, maximum=1000000000, step=1)
                        method = gr.Radio(
                            label="Sampling method", 
                            value="DPM++ 2M Karras", 
                            choices=["DPM++ 2M Karras", "DPM++ SDE Karras", "Euler", "Euler a", "Heun", "DDIM"]
                        )

        # Add a button to trigger the image generation
        with gr.Row():
            text_button = gr.Button("Generate Enhanced Image", variant='primary', elem_id="gen-button")
        
        # Image output area to display the generated image
        with gr.Row():
            image_output = gr.Image(type="pil", label="Image Output", elem_id="gallery")
        
        # Text output area to display the enhanced prompt
        with gr.Row():
            prompt_output = gr.Textbox(label="Enhanced Prompt", elem_id="prompt-output")
        
        # Bind the button to the query function with all inputs
        text_button.click(
            query, 
            inputs=[text_prompt, negative_prompt, steps, cfg, method, seed, strength, width, height, enable_enhancement], 
            outputs=[image_output, prompt_output]
        )

# Launch the Gradio app
app.launch(show_api=True, share=False)