aakashch0179 commited on
Commit
eadb20d
1 Parent(s): 24be6de

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +75 -31
app.py CHANGED
@@ -1,39 +1,39 @@
1
  # Text to Vedio
2
- import torch
3
- from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler
4
- from diffusers.utils import export_to_video
5
- import streamlit as st
6
- import numpy as np
7
-
8
- # Title and User Input
9
- st.title("Text-to-Video with Streamlit")
10
- prompt = st.text_input("Enter your text prompt:", "Spiderman is surfing")
11
-
12
- # Button to trigger generation
13
- if st.button("Generate Video"):
14
- # Ensure you have 'accelerate' version 0.17.0 or higher
15
- import accelerate
16
- if accelerate.__version__ < "0.17.0":
17
- st.warning("Please upgrade 'accelerate' to version 0.17.0 or higher for CPU offloading.")
18
- else:
19
- with st.spinner("Generating video..."):
20
- # Define the pipeline for image generation
21
- pipe = DiffusionPipeline.from_pretrained("damo-vilab/text-to-video-ms-1.7b",
22
- torch_dtype=torch.float16, variant="fp16", device="cpu")
23
- pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
24
- pipe.enable_model_cpu_offload()
25
 
26
- # Generate video frames
27
- video_frames = pipe(prompt, num_inference_steps=25).frames
28
 
29
- # Create dummy frames for testing (replace with actual manipulation later)
30
- dummy_frames = [np.ones((256, 256, 3), dtype=np.uint8) for _ in range(20)]
31
 
32
- # Export to video
33
- video_path = export_to_video(dummy_frames)
34
 
35
- # Display the video in the Streamlit app
36
- st.video(video_path)
37
 
38
 
39
 
@@ -111,6 +111,50 @@ if st.button("Generate Video"):
111
  # st.success("GIF saved as shark_3d.gif")
112
 
113
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
 
115
 
116
 
 
1
  # Text to Vedio
2
+ # import torch
3
+ # from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler
4
+ # from diffusers.utils import export_to_video
5
+ # import streamlit as st
6
+ # import numpy as np
7
+
8
+ # # Title and User Input
9
+ # st.title("Text-to-Video with Streamlit")
10
+ # prompt = st.text_input("Enter your text prompt:", "Spiderman is surfing")
11
+
12
+ # # Button to trigger generation
13
+ # if st.button("Generate Video"):
14
+ # # Ensure you have 'accelerate' version 0.17.0 or higher
15
+ # import accelerate
16
+ # if accelerate.__version__ < "0.17.0":
17
+ # st.warning("Please upgrade 'accelerate' to version 0.17.0 or higher for CPU offloading.")
18
+ # else:
19
+ # with st.spinner("Generating video..."):
20
+ # # Define the pipeline for image generation
21
+ # pipe = DiffusionPipeline.from_pretrained("damo-vilab/text-to-video-ms-1.7b",
22
+ # torch_dtype=torch.float16, variant="fp16", device="cpu")
23
+ # pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
24
+ # pipe.enable_model_cpu_offload()
25
 
26
+ # # Generate video frames
27
+ # video_frames = pipe(prompt, num_inference_steps=25).frames
28
 
29
+ # # Create dummy frames for testing (replace with actual manipulation later)
30
+ # dummy_frames = [np.ones((256, 256, 3), dtype=np.uint8) for _ in range(20)]
31
 
32
+ # # Export to video
33
+ # video_path = export_to_video(dummy_frames)
34
 
35
+ # # Display the video in the Streamlit app
36
+ # st.video(video_path)
37
 
38
 
39
 
 
111
  # st.success("GIF saved as shark_3d.gif")
112
 
113
 
114
+ # visual QA
115
+ import requests
116
+ from PIL import Image
117
+ from transformers import Pix2StructForConditionalGeneration, Pix2StructProcessor
118
+ import streamlit as st
119
+
120
+
121
+ image_url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/ai2d-demo.jpg"
122
+ image = Image.open(requests.get(image_url, stream=True).raw)
123
+
124
+ model = Pix2StructForConditionalGeneration.from_pretrained("google/pix2struct-ai2d-base")
125
+ processor = Pix2StructProcessor.from_pretrained("google/pix2struct-ai2d-base")
126
+
127
+ question = "What does the label 15 represent? (1) lava (2) core (3) tunnel (4) ash cloud"
128
+
129
+ inputs = processor(images=image, text=question, return_tensors="pt")
130
+
131
+ predictions = model.generate(**inputs,max_new_tokens= 1000)
132
+ # print(processor.decode(predictions[0], skip_special_tokens=True))
133
+
134
+
135
+
136
+ def load_image():
137
+ with st.sidebar:
138
+ if img := st.text_input("Enter Image URL") or st.selectbox("Select Image", ("https://images.unsplash.com/photo-1593466144596-8abd50ad2c52?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=3434&q=80", "https://images.unsplash.com/photo-1566438480900-0609be27a4be?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=3394&q=80")):
139
+ if st.button("Load Image"):
140
+ st.write("Image Uploaded!")
141
+ st.image(img)
142
+ else:
143
+ st.warning("Please enter an image URL and click 'Load Image' before asking a question.")
144
+ return img
145
+
146
+
147
+
148
+ def visual_qna():
149
+ st.title("Visual Q&A")
150
+ img = load_image()
151
+ if img:
152
+ if query := st.chat_input("Enter your message"):
153
+ response = model(question=query, image=img)
154
+ with st.chat_message("assistant"):
155
+ st.write(response)
156
+ else:
157
+ st.warning("Please enter an image URL and click 'Load Image' before asking a question.")
158
 
159
 
160