Spaces:
Runtime error
Runtime error
File size: 6,097 Bytes
24be6de eadb20d b36cde3 eadb20d b36cde3 eadb20d b36cde3 eadb20d b36cde3 eadb20d b36cde3 db4a0b6 f3b84cf dc84e2c a7afcaa dc84e2c a7afcaa dc84e2c 0d89cb6 dc84e2c 0d89cb6 eadb20d 0d89cb6 b36cde3 0d89cb6 9f983ee 0d89cb6 9f983ee |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 |
# Text to Vedio
# import torch
# from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler
# from diffusers.utils import export_to_video
# import streamlit as st
# import numpy as np
# # Title and User Input
# st.title("Text-to-Video with Streamlit")
# prompt = st.text_input("Enter your text prompt:", "Spiderman is surfing")
# # Button to trigger generation
# if st.button("Generate Video"):
# # Ensure you have 'accelerate' version 0.17.0 or higher
# import accelerate
# if accelerate.__version__ < "0.17.0":
# st.warning("Please upgrade 'accelerate' to version 0.17.0 or higher for CPU offloading.")
# else:
# with st.spinner("Generating video..."):
# # Define the pipeline for image generation
# pipe = DiffusionPipeline.from_pretrained("damo-vilab/text-to-video-ms-1.7b",
# torch_dtype=torch.float16, variant="fp16", device="cpu")
# pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
# pipe.enable_model_cpu_offload()
# # Generate video frames
# video_frames = pipe(prompt, num_inference_steps=25).frames
# # Create dummy frames for testing (replace with actual manipulation later)
# dummy_frames = [np.ones((256, 256, 3), dtype=np.uint8) for _ in range(20)]
# # Export to video
# video_path = export_to_video(dummy_frames)
# # Display the video in the Streamlit app
# st.video(video_path)
# Text to 3D
# import streamlit as st
# import torch
# from diffusers import ShapEPipeline
# from diffusers.utils import export_to_gif
# from PIL import Image
# import numpy as np
# # import PyTorch
# # Model loading (Ideally done once at the start for efficiency)
# ckpt_id = "openai/shap-e"
# @st.cache_resource # Caches the model for faster subsequent runs
# def process_image_for_pil(image):
# if isinstance(image, torch.Tensor):
# # Your PyTorch conversion logic here (with correct indentation)
# # elif isinstance(image, np.ndarray):
# # Your Numpy conversion logic here (with correct indentation)
# image_array = image.astype('uint8') # Assuming 8-bit conversion is needed
# return Image.fromarray(image_array)
# else:
# raise TypeError("Unsupported image format. Please provide conversion logic.")
# test_image = np.random.randint(0, 256, size=(256, 256, 3), dtype=np.uint8) # Placeholder image
# result = process_image_for_pil(test_image)
# def should_resize(image): # Add 'image' as an argument
# """Determines whether to resize images (replace with your own logic)"""
# if image.width > 512 or image.height > 512:
# return True
# else:
# return False
# def load_model():
# return ShapEPipeline.from_pretrained(ckpt_id).to("cuda")
# pipe = load_model()
# # App Title
# st.title("Shark 3D Image Generator")
# # User Inputs
# prompt = st.text_input("Enter your prompt:", "a shark")
# guidance_scale = st.slider("Guidance Scale", 0.0, 20.0, 15.0, step=0.5)
# # Generate and Display Images
# if st.button("Generate"):
# with st.spinner("Generating images..."):
# images = pipe(prompt, guidance_scale=guidance_scale, num_inference_steps=64).images
# # ... (Process images for PIL conversion)
# # Resize Images (Optional)
# pil_images = [] # Modified to store resized images if needed
# for image in images:
# processed_image = process_image_for_pil(image)
# if should_resize(processed_image): # Pass image to should_resize
# resized_image = processed_image.resize((256, 256))
# pil_images.append(resized_image)
# else:
# pil_images.append(processed_image) # Append without resizing
# gif_path = export_to_gif(pil_images, "shark_3d.gif")
# st.image(pil_images[0])
# st.success("GIF saved as shark_3d.gif")
# visual QA
import requests
from PIL import Image
from transformers import Pix2StructForConditionalGeneration, Pix2StructProcessor
import streamlit as st
image_url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/ai2d-demo.jpg"
image = Image.open(requests.get(image_url, stream=True).raw)
model = Pix2StructForConditionalGeneration.from_pretrained("google/pix2struct-ai2d-base")
processor = Pix2StructProcessor.from_pretrained("google/pix2struct-ai2d-base")
question = "What does the label 15 represent? (1) lava (2) core (3) tunnel (4) ash cloud"
inputs = processor(images=image, text=question, return_tensors="pt")
predictions = model.generate(**inputs,max_new_tokens= 1000)
# print(processor.decode(predictions[0], skip_special_tokens=True))
def load_image():
with st.sidebar:
if img := st.text_input("Enter Image URL") or st.selectbox("Select Image", ("https://images.unsplash.com/photo-1593466144596-8abd50ad2c52?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=3434&q=80", "https://images.unsplash.com/photo-1566438480900-0609be27a4be?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=3394&q=80")):
if st.button("Load Image"):
st.write("Image Uploaded!")
st.image(img)
else:
st.warning("Please enter an image URL and click 'Load Image' before asking a question.")
return img
def visual_qna():
st.title("Visual Q&A")
img = load_image()
if img:
if query := st.chat_input("Enter your message"):
response = model(question=query, image=img)
with st.chat_message("assistant"):
st.write(response)
else:
st.warning("Please enter an image URL and click 'Load Image' before asking a question.")
|