# Text to Vedio # import torch # from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler # from diffusers.utils import export_to_video # import streamlit as st # import numpy as np # # Title and User Input # st.title("Text-to-Video with Streamlit") # prompt = st.text_input("Enter your text prompt:", "Spiderman is surfing") # # Button to trigger generation # if st.button("Generate Video"): # # Ensure you have 'accelerate' version 0.17.0 or higher # import accelerate # if accelerate.__version__ < "0.17.0": # st.warning("Please upgrade 'accelerate' to version 0.17.0 or higher for CPU offloading.") # else: # with st.spinner("Generating video..."): # # Define the pipeline for image generation # pipe = DiffusionPipeline.from_pretrained("damo-vilab/text-to-video-ms-1.7b", # torch_dtype=torch.float16, variant="fp16", device="cpu") # pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config) # pipe.enable_model_cpu_offload() # # Generate video frames # video_frames = pipe(prompt, num_inference_steps=25).frames # # Create dummy frames for testing (replace with actual manipulation later) # dummy_frames = [np.ones((256, 256, 3), dtype=np.uint8) for _ in range(20)] # # Export to video # video_path = export_to_video(dummy_frames) # # Display the video in the Streamlit app # st.video(video_path) # Text to 3D import streamlit as st import torch from diffusers import ShapEPipeline from diffusers.utils import export_to_gif from PIL import Image # Model loading (Ideally done once at the start for efficiency) ckpt_id = "openai/shap-e" @st.cache_resource # Caches the model for faster subsequent runs def process_image_for_pil(image): """Converts image data into a format compatible with PIL.Image.fromarray()""" if isinstance(image, np.ndarray): # Assuming NumPy array return Image.fromarray(image) else: raise TypeError("Unsupported image format. Please provide conversion logic.") def should_resize(): """Determines whether to resize images (replace with your own logic)""" # Example: Resize only if the image dimensions exceed a threshold if image.width > 512 or image.height > 512: return True else: return False def load_model(): return ShapEPipeline.from_pretrained(ckpt_id).to("cuda") pipe = load_model() # App Title st.title("Shark 3D Image Generator") # User Inputs prompt = st.text_input("Enter your prompt:", "a shark") guidance_scale = st.slider("Guidance Scale", 0.0, 20.0, 15.0, step=0.5) # Generate and Display Images if st.button("Generate"): with st.spinner("Generating images..."): images = pipe(prompt, guidance_scale=guidance_scale, num_inference_steps=64).images # Process images for PIL conversion (This will be customized) pil_images = [] for image in images: processed_image = process_image_for_pil(image) pil_images.append(processed_image) # Resize Images (Optional) if should_resize(): # Add a function to control if resizing is needed for i in range(len(pil_images)): pil_images[i] = pil_images[i].resize((256, 256)) gif_path = export_to_gif(pil_images, "shark_3d.gif") st.image(pil_images[0]) st.success("GIF saved as shark_3d.gif")