Spaces:
Runtime error
Runtime error
import streamlit as st | |
from PIL import Image | |
from diffusers import StableDiffusionPipeline, ControlNetModel, DDIMScheduler, LMSDiscreteScheduler, UNet2DConditionModel, DiffusionPipeline | |
from diffusers import DDPMScheduler, DDPMSchedulerV2, PNDMScheduler | |
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModelForSeq2SeqLM, LlamaTokenizerFast, LlamaForCausalLM | |
from accelerate import Accelerator | |
import torch | |
from peft import PeftModel, LoraConfig, get_peft_model, prepare_model_for_int8_training, prepare_model_for_int8_bf16_training | |
# Define a dictionary with all available models, schedulers, features, weights, and adapters | |
model_dict = { | |
"Stable Diffusion": { | |
"Models": [ | |
"stabilityai/stable-diffusion-3-medium" | |
"CompVis/stable-diffusion-v1-4", | |
"stabilityai/stable-diffusion-2-1", | |
"runwayml/stable-diffusion-v1-5", | |
"runwayml/stable-diffusion-inpainting", | |
"runwayml/stable-diffusion-video-v1-5", | |
"stabilityai/stable-diffusion-2-base" | |
], | |
"Schedulers": [ | |
"DDIMScheduler", | |
"LMSDiscreteScheduler" | |
], | |
"Features": [ | |
"Unconditional image generation", | |
"Text-to-image", | |
"Image-to-image", | |
"Inpainting", | |
"Text or image-to-video", | |
"Depth-to-image" | |
], | |
"Adapters": [ | |
"ControlNet", | |
"T2I-Adapter" | |
], | |
"Weights": [ | |
"Stable Diffusion XL", | |
"SDXL Turbo", | |
"Kandinsky", | |
"IP-Adapter", | |
"ControlNet", | |
"Latent Consistency Model", | |
"Textual inversion", | |
"Shap-E", | |
"DiffEdit", | |
"Trajectory Consistency Distillation-LoRA", | |
"Stable Video Diffusion", | |
"Marigold Computer Vision" | |
] | |
}, | |
"Llama": { | |
"Models": [ | |
"decapoda-research/llama-7b-hf", | |
"decapoda-research/llama-13b-hf", | |
"decapoda-research/llama-30b-hf", | |
"decapoda-research/llama-65b-hf" | |
], | |
"Tokenizers": [ | |
"LlamaTokenizerFast" | |
], | |
"Features": [ | |
"AutoPipeline", | |
"Train a diffusion model", | |
"Load LoRAs for inference", | |
"Accelerate inference of text-to-image diffusion models", | |
"LOAD PIPELINES AND ADAPTERS", | |
"Load community pipelines and components", | |
"Load schedulers and models", | |
"Model files and layouts", | |
"Load adapters", | |
"Push files to the Hub", | |
"GENERATIVE TASKS", | |
"Unconditional image generation", | |
"Text-to-image", | |
"Image-to-image", | |
"Inpainting", | |
"Text or image-to-video", | |
"Depth-to-image", | |
"INFERENCE TECHNIQUES", | |
"Overview", | |
"Distributed inference with multiple GPUs", | |
"Merge LoRAs", | |
"Scheduler features", | |
"Pipeline callbacks", | |
"Reproducible pipelines", | |
"Controlling image quality", | |
"Prompt techniques", | |
"ADVANCED INFERENCE", | |
"Outpainting", | |
"SPECIFIC PIPELINE EXAMPLES", | |
"Stable Diffusion XL", | |
"SDXL Turbo", | |
"Kandinsky", | |
"IP-Adapter", | |
"ControlNet", | |
"T2I-Adapter", | |
"Latent Consistency Model", | |
"Textual inversion", | |
"Shap-E", | |
"DiffEdit", | |
"Trajectory Consistency Distillation-LoRA", | |
"Stable Video Diffusion", | |
"Marigold Computer Vision" | |
], | |
"Weights": [ | |
"LoRA weights" | |
] | |
} | |
} | |
model_type = st.selectbox("Select a model type:", list(model_dict.keys())) | |
if model_type == "Stable Diffusion": | |
model = st.selectbox("Select a Stable Diffusion model:", model_dict[model_type]["Models"]) | |
scheduler = st.selectbox("Select a scheduler:", model_dict[model_type]["Schedulers"]) | |
feature = st.selectbox("Select a feature:", model_dict[model_type]["Features"]) | |
adapter = st.selectbox("Select an adapter:", model_dict[model_type]["Adapters"]) | |
weight = st.selectbox("Select a weight:", model_dict[model_type]["Weights"]) | |
if st.button("Generate Images"): | |
st.write("Generating images...") | |
pipe = StableDiffusionPipeline.from_pretrained(model) | |
pipe.scheduler = eval(scheduler)() | |
if adapter == "ControlNet": | |
controlnet = ControlNetModel.from_pretrained("lllyasviel/control_v11e_sd15_openpose") | |
pipe = pipe.to_controlnet(controlnet) | |
# Define the prompt and number of images to generate | |
prompt = st.text_input("Enter a prompt:") | |
num_images = st.slider("Number of images to generate", min_value=1, max_value=10, value=1) | |
# Generate the images | |
images = pipe(prompt, num_images=num_images, guidance_scale=7.5).images | |
# Display the generated images | |
cols = st.columns(num_images) | |
for i, image in enumerate(images): | |
cols[i].image(image, caption=f"Image {i+1}", use_column_width=True) | |
if model_type == "Llama": | |
model = st.selectbox("Select a Llama model:", model_dict[model_type]["Models"]) | |
tokenizer = st.selectbox("Select a tokenizer:", model_dict[model_type]["Tokenizers"]) | |
feature = st.selectbox("Select a feature:", model_dict[model_type]["Features"]) | |
weight = st.selectbox("Select a weight:", model_dict[model_type]["Weights"]) | |
if st.button("Generate Text"): | |
st.write("Generating text...") | |
tokenizer = AutoTokenizer.from_pretrained(tokenizer) | |
model = AutoModelForCausalLM.from_pretrained(model) | |
input_text = st.text_area("Enter a prompt:") | |
# Tokenize the input text | |
inputs = tokenizer(input_text, return_tensors="pt") | |
# Generate the text | |
output = model.generate(**inputs) | |
# Decode the generated text | |
generated_text = tokenizer.decode(output[0], skip_special_tokens=True) | |
st.write("Generated Text:") | |
st.write(generated_text) |