K00B404's picture
Update app.py
52bd1b1 verified
import gradio as gr
import requests
import io
import random
import os
import time
from PIL import Image
from deep_translator import GoogleTranslator
import json
from gradio_client import Client
# Project by Nymbo
API_URL = "https://api-inference.huggingface.co/models/stabilityai/stable-diffusion-3.5-large"
API_TOKEN = os.getenv("HF_READ_TOKEN")
headers = {"Authorization": f"Bearer {API_TOKEN}"}
timeout = 100
# Initialize the prompt enhancer client
prompt_enhancer = Client("K00B404/mistral-nemo-prompt-enhancer")
def enhance_prompt(prompt, enable_enhancement=True):
"""Enhance the given prompt using the Mistral Nemo prompt enhancer."""
if not enable_enhancement:
print(f'\033[1mPrompt enhancement disabled, using original prompt:\033[0m {prompt}')
return prompt
try:
system_message = "You are an expert at writing detailed, high-quality image generation prompts. Enhance the given prompt by adding relevant artistic details, style elements, and quality descriptors. Keep the original intent but make it more elaborate and specific."
enhanced = prompt_enhancer.predict(
message=prompt,
system_message=system_message,
max_tokens=512,
temperature=0.7,
top_p=0.95,
api_name="/chat"
)
print(f'\033[1mOriginal prompt:\033[0m {prompt}')
print(f'\033[1mEnhanced prompt:\033[0m {enhanced}')
return enhanced
except Exception as e:
print(f"Error enhancing prompt: {e}")
return prompt # Fall back to original prompt if enhancement fails
# Function to query the API and return the generated image
def query(prompt, is_negative=False, steps=35, cfg_scale=7, sampler="DPM++ 2M Karras", seed=-1, strength=0.7, width=1024, height=1024, enable_enhancement=True):
if prompt == "" or prompt is None:
return None, None
key = random.randint(0, 999)
API_TOKEN = random.choice([os.getenv("HF_READ_TOKEN")])
headers = {"Authorization": f"Bearer {API_TOKEN}"}
# Translate the prompt from Russian to English if necessary
prompt = GoogleTranslator(source='ru', target='en').translate(prompt)
print(f'\033[1mGeneration {key} translation:\033[0m {prompt}')
# Enhance the prompt using the Mistral Nemo model if enabled
enhanced_prompt = enhance_prompt(prompt, enable_enhancement)
# Add some extra flair to the prompt
final_prompt = f"{enhanced_prompt} | ultra detail, ultra elaboration, ultra quality, perfect."
print(f'\033[1mGeneration {key} final prompt:\033[0m {final_prompt}')
# Prepare the payload for the API call, including width and height
payload = {
"inputs": final_prompt,
"is_negative": is_negative,
"steps": steps,
"cfg_scale": cfg_scale,
"seed": seed if seed != -1 else random.randint(1, 1000000000),
"strength": strength,
"parameters": {
"width": width,
"height": height
}
}
# Send the request to the API and handle the response
response = requests.post(API_URL, headers=headers, json=payload, timeout=timeout)
if response.status_code != 200:
print(f"Error: Failed to get image. Response status: {response.status_code}")
print(f"Response content: {response.text}")
if response.status_code == 503:
raise gr.Error(f"{response.status_code} : The model is being loaded")
raise gr.Error(f"{response.status_code}")
try:
# Convert the response content into an image
image_bytes = response.content
image = Image.open(io.BytesIO(image_bytes))
print(f'\033[1mGeneration {key} completed!\033[0m ({final_prompt})')
return image, enhanced_prompt
except Exception as e:
print(f"Error when trying to open the image: {e}")
return None, None
# CSS to style the app
css = """
#app-container {
max-width: 800px;
margin-left: auto;
margin-right: auto;
}
"""
# Build the Gradio UI with Blocks
with gr.Blocks(theme='Nymbo/Nymbo_Theme', css=css) as app:
# Add a title to the app
gr.HTML("<center><h1>Stable Diffusion 3.5 Large with Prompt Enhancement</h1></center>")
# Container for all the UI elements
with gr.Column(elem_id="app-container"):
# Add a text input for the main prompt
with gr.Row():
with gr.Column(elem_id="prompt-container"):
with gr.Row():
text_prompt = gr.Textbox(
label="Prompt",
placeholder="Enter a prompt here - it will be automatically enhanced for better results",
lines=2,
elem_id="prompt-text-input"
)
# Accordion for advanced settings
with gr.Row():
with gr.Accordion("Advanced Settings", open=False):
enable_enhancement = gr.Checkbox(
label="Enable Prompt Enhancement",
value=True
)
negative_prompt = gr.Textbox(
label="Negative Prompt",
placeholder="What should not be in the image",
value="(deformed, distorted, disfigured), poorly drawn, bad anatomy, wrong anatomy, extra limb, missing limb, floating limbs, (mutated hands and fingers), disconnected limbs, mutation, mutated, ugly, disgusting, blurry, amputation, misspellings, typos",
lines=3,
elem_id="negative-prompt-text-input"
)
with gr.Row():
width = gr.Slider(label="Width", value=1024, minimum=64, maximum=1216, step=32)
height = gr.Slider(label="Height", value=1024, minimum=64, maximum=1216, step=32)
steps = gr.Slider(label="Sampling steps", value=35, minimum=1, maximum=100, step=1)
cfg = gr.Slider(label="CFG Scale", value=7, minimum=1, maximum=20, step=1)
strength = gr.Slider(label="Strength", value=0.7, minimum=0, maximum=1, step=0.001)
seed = gr.Slider(label="Seed", value=-1, minimum=-1, maximum=1000000000, step=1)
method = gr.Radio(
label="Sampling method",
value="DPM++ 2M Karras",
choices=["DPM++ 2M Karras", "DPM++ SDE Karras", "Euler", "Euler a", "Heun", "DDIM"]
)
# Add a button to trigger the image generation
with gr.Row():
text_button = gr.Button("Generate Enhanced Image", variant='primary', elem_id="gen-button")
# Image output area to display the generated image
with gr.Row():
image_output = gr.Image(type="pil", label="Image Output", elem_id="gallery")
# Text output area to display the enhanced prompt
with gr.Row():
prompt_output = gr.Textbox(label="Enhanced Prompt", elem_id="prompt-output")
# Bind the button to the query function with all inputs
text_button.click(
query,
inputs=[text_prompt, negative_prompt, steps, cfg, method, seed, strength, width, height, enable_enhancement],
outputs=[image_output, prompt_output]
)
# Launch the Gradio app
app.launch(show_api=True, share=False)