video

Runtime error

App Files Files Community

aakashch0179 commited on Feb 26

Commit

eadb20d

•

1 Parent(s): 24be6de

Update app.py

Browse files

Files changed (1) hide show

app.py +75 -31

app.py CHANGED Viewed

@@ -1,39 +1,39 @@
 # Text to Vedio
-import torch
-from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler
-from diffusers.utils import export_to_video
-import streamlit as st
-import numpy as np
-# Title and User Input
-st.title("Text-to-Video with Streamlit")
-prompt = st.text_input("Enter your text prompt:", "Spiderman is surfing")
-# Button to trigger generation
-if st.button("Generate Video"):
-    # Ensure you have 'accelerate' version 0.17.0 or higher
-    import accelerate
-    if accelerate.__version__ < "0.17.0":
-        st.warning("Please upgrade 'accelerate' to version 0.17.0 or higher for CPU offloading.")
-    else:
-        with st.spinner("Generating video..."):
-            # Define the pipeline for image generation
-            pipe = DiffusionPipeline.from_pretrained("damo-vilab/text-to-video-ms-1.7b",
-                                                 torch_dtype=torch.float16,                                                 variant="fp16",                                                 device="cpu")
-            pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
-            pipe.enable_model_cpu_offload()
-            # Generate video frames
-            video_frames = pipe(prompt, num_inference_steps=25).frames
-            # Create dummy frames for testing (replace with actual manipulation later)
-            dummy_frames = [np.ones((256, 256, 3), dtype=np.uint8) for _ in range(20)]
-            # Export to video
-            video_path = export_to_video(dummy_frames)
-            # Display the video in the Streamlit app
-            st.video(video_path)
@@ -111,6 +111,50 @@ if st.button("Generate Video"):
 #         st.success("GIF saved as shark_3d.gif")

 # Text to Vedio
+# import torch
+# from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler
+# from diffusers.utils import export_to_video
+# import streamlit as st
+# import numpy as np
+# # Title and User Input
+# st.title("Text-to-Video with Streamlit")
+# prompt = st.text_input("Enter your text prompt:", "Spiderman is surfing")
+# # Button to trigger generation
+# if st.button("Generate Video"):
+#     # Ensure you have 'accelerate' version 0.17.0 or higher
+#     import accelerate
+#     if accelerate.__version__ < "0.17.0":
+#         st.warning("Please upgrade 'accelerate' to version 0.17.0 or higher for CPU offloading.")
+#     else:
+#         with st.spinner("Generating video..."):
+#             # Define the pipeline for image generation
+#             pipe = DiffusionPipeline.from_pretrained("damo-vilab/text-to-video-ms-1.7b",
+#                                                  torch_dtype=torch.float16,                                                 variant="fp16",                                                 device="cpu")
+#             pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
+#             pipe.enable_model_cpu_offload()
+#             # Generate video frames
+#             video_frames = pipe(prompt, num_inference_steps=25).frames
+#             # Create dummy frames for testing (replace with actual manipulation later)
+#             dummy_frames = [np.ones((256, 256, 3), dtype=np.uint8) for _ in range(20)]
+#             # Export to video
+#             video_path = export_to_video(dummy_frames)
+#             # Display the video in the Streamlit app
+#             st.video(video_path)
 #         st.success("GIF saved as shark_3d.gif")
+# visual QA
+import requests
+from PIL import Image
+from transformers import Pix2StructForConditionalGeneration, Pix2StructProcessor
+import streamlit as st
+image_url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/ai2d-demo.jpg"
+image = Image.open(requests.get(image_url, stream=True).raw)
+model = Pix2StructForConditionalGeneration.from_pretrained("google/pix2struct-ai2d-base")
+processor = Pix2StructProcessor.from_pretrained("google/pix2struct-ai2d-base")
+question = "What does the label 15 represent? (1) lava (2) core (3) tunnel (4) ash cloud"
+inputs = processor(images=image, text=question, return_tensors="pt")
+predictions = model.generate(**inputs,max_new_tokens= 1000)
+# print(processor.decode(predictions[0], skip_special_tokens=True))
+def load_image():
+    with st.sidebar:
+        if img := st.text_input("Enter Image URL") or st.selectbox("Select Image", ("https://images.unsplash.com/photo-1593466144596-8abd50ad2c52?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=3434&q=80", "https://images.unsplash.com/photo-1566438480900-0609be27a4be?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=3394&q=80")):
+            if st.button("Load Image"):
+                st.write("Image Uploaded!")
+                st.image(img)
+        else:
+            st.warning("Please enter an image URL and click 'Load Image' before asking a question.")
+    return img
+def visual_qna():
+    st.title("Visual Q&A")
+    img = load_image()
+    if img:
+        if query := st.chat_input("Enter your message"):
+            response = model(question=query, image=img)
+            with st.chat_message("assistant"):
+                st.write(response)
+    else:
+        st.warning("Please enter an image URL and click 'Load Image' before asking a question.")