video

Runtime error

App Files Files Community

video / app.py

aakashch0179

Update app.py

eadb20d verified 8 months ago

raw

history blame

6.1 kB

	# Text to Vedio
	# import torch
	# from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler
	# from diffusers.utils import export_to_video
	# import streamlit as st
	# import numpy as np

	# # Title and User Input
	# st.title("Text-to-Video with Streamlit")
	# prompt = st.text_input("Enter your text prompt:", "Spiderman is surfing")

	# # Button to trigger generation
	# if st.button("Generate Video"):
	# # Ensure you have 'accelerate' version 0.17.0 or higher
	# import accelerate
	# if accelerate.__version__ < "0.17.0":
	# st.warning("Please upgrade 'accelerate' to version 0.17.0 or higher for CPU offloading.")
	# else:
	# with st.spinner("Generating video..."):
	# # Define the pipeline for image generation
	# pipe = DiffusionPipeline.from_pretrained("damo-vilab/text-to-video-ms-1.7b",
	# torch_dtype=torch.float16, variant="fp16", device="cpu")
	# pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
	# pipe.enable_model_cpu_offload()

	# # Generate video frames
	# video_frames = pipe(prompt, num_inference_steps=25).frames

	# # Create dummy frames for testing (replace with actual manipulation later)
	# dummy_frames = [np.ones((256, 256, 3), dtype=np.uint8) for _ in range(20)]

	# # Export to video
	# video_path = export_to_video(dummy_frames)

	# # Display the video in the Streamlit app
	# st.video(video_path)





	# Text to 3D

	# import streamlit as st
	# import torch
	# from diffusers import ShapEPipeline
	# from diffusers.utils import export_to_gif
	# from PIL import Image
	# import numpy as np
	# # import PyTorch

	# # Model loading (Ideally done once at the start for efficiency)
	# ckpt_id = "openai/shap-e"

	# @st.cache_resource # Caches the model for faster subsequent runs

	# def process_image_for_pil(image):
	# if isinstance(image, torch.Tensor):
	# # Your PyTorch conversion logic here (with correct indentation)
	# # elif isinstance(image, np.ndarray):
	# # Your Numpy conversion logic here (with correct indentation)
	# image_array = image.astype('uint8') # Assuming 8-bit conversion is needed
	# return Image.fromarray(image_array)
	# else:
	# raise TypeError("Unsupported image format. Please provide conversion logic.")

	# test_image = np.random.randint(0, 256, size=(256, 256, 3), dtype=np.uint8) # Placeholder image
	# result = process_image_for_pil(test_image)





	# def should_resize(image): # Add 'image' as an argument
	# """Determines whether to resize images (replace with your own logic)"""
	# if image.width > 512 or image.height > 512:
	# return True
	# else:
	# return False
	# def load_model():
	# return ShapEPipeline.from_pretrained(ckpt_id).to("cuda")

	# pipe = load_model()

	# # App Title
	# st.title("Shark 3D Image Generator")

	# # User Inputs
	# prompt = st.text_input("Enter your prompt:", "a shark")
	# guidance_scale = st.slider("Guidance Scale", 0.0, 20.0, 15.0, step=0.5)

	# # Generate and Display Images
	# if st.button("Generate"):
	# with st.spinner("Generating images..."):
	# images = pipe(prompt, guidance_scale=guidance_scale, num_inference_steps=64).images

	# # ... (Process images for PIL conversion)

	# # Resize Images (Optional)
	# pil_images = [] # Modified to store resized images if needed
	# for image in images:
	# processed_image = process_image_for_pil(image)
	# if should_resize(processed_image): # Pass image to should_resize
	# resized_image = processed_image.resize((256, 256))
	# pil_images.append(resized_image)
	# else:
	# pil_images.append(processed_image) # Append without resizing

	# gif_path = export_to_gif(pil_images, "shark_3d.gif")
	# st.image(pil_images[0])
	# st.success("GIF saved as shark_3d.gif")


	# visual QA
	import requests
	from PIL import Image
	from transformers import Pix2StructForConditionalGeneration, Pix2StructProcessor
	import streamlit as st


	image_url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/ai2d-demo.jpg"
	image = Image.open(requests.get(image_url, stream=True).raw)

	model = Pix2StructForConditionalGeneration.from_pretrained("google/pix2struct-ai2d-base")
	processor = Pix2StructProcessor.from_pretrained("google/pix2struct-ai2d-base")

	question = "What does the label 15 represent? (1) lava (2) core (3) tunnel (4) ash cloud"

	inputs = processor(images=image, text=question, return_tensors="pt")

	predictions = model.generate(**inputs,max_new_tokens= 1000)
	# print(processor.decode(predictions[0], skip_special_tokens=True))



	def load_image():
	with st.sidebar:
	if img := st.text_input("Enter Image URL") or st.selectbox("Select Image", ("https://images.unsplash.com/photo-1593466144596-8abd50ad2c52?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=3434&q=80", "https://images.unsplash.com/photo-1566438480900-0609be27a4be?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=3394&q=80")):
	if st.button("Load Image"):
	st.write("Image Uploaded!")
	st.image(img)
	else:
	st.warning("Please enter an image URL and click 'Load Image' before asking a question.")
	return img



	def visual_qna():
	st.title("Visual Q&A")
	img = load_image()
	if img:
	if query := st.chat_input("Enter your message"):
	response = model(question=query, image=img)
	with st.chat_message("assistant"):
	st.write(response)
	else:
	st.warning("Please enter an image URL and click 'Load Image' before asking a question.")