# Text to Vedio # import torch # from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler # from diffusers.utils import export_to_video # import streamlit as st # import numpy as np # # Title and User Input # st.title("Text-to-Video with Streamlit") # prompt = st.text_input("Enter your text prompt:", "Spiderman is surfing") # # Button to trigger generation # if st.button("Generate Video"): # # Ensure you have 'accelerate' version 0.17.0 or higher # import accelerate # if accelerate.__version__ < "0.17.0": # st.warning("Please upgrade 'accelerate' to version 0.17.0 or higher for CPU offloading.") # else: # with st.spinner("Generating video..."): # # Define the pipeline for image generation # pipe = DiffusionPipeline.from_pretrained("damo-vilab/text-to-video-ms-1.7b", # torch_dtype=torch.float16, variant="fp16", device="cpu") # pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config) # pipe.enable_model_cpu_offload() # # Generate video frames # video_frames = pipe(prompt, num_inference_steps=25).frames # # Create dummy frames for testing (replace with actual manipulation later) # dummy_frames = [np.ones((256, 256, 3), dtype=np.uint8) for _ in range(20)] # # Export to video # video_path = export_to_video(dummy_frames) # # Display the video in the Streamlit app # st.video(video_path) # Text to 3D # import streamlit as st # import torch # from diffusers import ShapEPipeline # from diffusers.utils import export_to_gif # from PIL import Image # import numpy as np # # import PyTorch # # Model loading (Ideally done once at the start for efficiency) # ckpt_id = "openai/shap-e" # @st.cache_resource # Caches the model for faster subsequent runs # def process_image_for_pil(image): # if isinstance(image, torch.Tensor): # # Your PyTorch conversion logic here (with correct indentation) # # elif isinstance(image, np.ndarray): # # Your Numpy conversion logic here (with correct indentation) # image_array = image.astype('uint8') # Assuming 8-bit conversion is needed # return Image.fromarray(image_array) # else: # raise TypeError("Unsupported image format. Please provide conversion logic.") # test_image = np.random.randint(0, 256, size=(256, 256, 3), dtype=np.uint8) # Placeholder image # result = process_image_for_pil(test_image) # def should_resize(image): # Add 'image' as an argument # """Determines whether to resize images (replace with your own logic)""" # if image.width > 512 or image.height > 512: # return True # else: # return False # def load_model(): # return ShapEPipeline.from_pretrained(ckpt_id).to("cuda") # pipe = load_model() # # App Title # st.title("Shark 3D Image Generator") # # User Inputs # prompt = st.text_input("Enter your prompt:", "a shark") # guidance_scale = st.slider("Guidance Scale", 0.0, 20.0, 15.0, step=0.5) # # Generate and Display Images # if st.button("Generate"): # with st.spinner("Generating images..."): # images = pipe(prompt, guidance_scale=guidance_scale, num_inference_steps=64).images # # ... (Process images for PIL conversion) # # Resize Images (Optional) # pil_images = [] # Modified to store resized images if needed # for image in images: # processed_image = process_image_for_pil(image) # if should_resize(processed_image): # Pass image to should_resize # resized_image = processed_image.resize((256, 256)) # pil_images.append(resized_image) # else: # pil_images.append(processed_image) # Append without resizing # gif_path = export_to_gif(pil_images, "shark_3d.gif") # st.image(pil_images[0]) # st.success("GIF saved as shark_3d.gif") # visual QA import requests from PIL import Image from transformers import Pix2StructForConditionalGeneration, Pix2StructProcessor import streamlit as st image_url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/ai2d-demo.jpg" image = Image.open(requests.get(image_url, stream=True).raw) model = Pix2StructForConditionalGeneration.from_pretrained("google/pix2struct-ai2d-base") processor = Pix2StructProcessor.from_pretrained("google/pix2struct-ai2d-base") question = "What does the label 15 represent? (1) lava (2) core (3) tunnel (4) ash cloud" inputs = processor(images=image, text=question, return_tensors="pt") predictions = model.generate(**inputs,max_new_tokens= 1000) # print(processor.decode(predictions[0], skip_special_tokens=True)) def load_image(): with st.sidebar: if img := st.text_input("Enter Image URL") or st.selectbox("Select Image", ("https://images.unsplash.com/photo-1593466144596-8abd50ad2c52?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=3434&q=80", "https://images.unsplash.com/photo-1566438480900-0609be27a4be?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=3394&q=80")): if st.button("Load Image"): st.write("Image Uploaded!") st.image(img) else: st.warning("Please enter an image URL and click 'Load Image' before asking a question.") return img def visual_qna(): st.title("Visual Q&A") img = load_image() if img: if query := st.chat_input("Enter your message"): response = model(question=query, image=img) with st.chat_message("assistant"): st.write(response) else: st.warning("Please enter an image URL and click 'Load Image' before asking a question.")