Spaces:
Running
on
Zero
Running
on
Zero
Vision-CAIR
commited on
Commit
•
5934d4b
1
Parent(s):
99e0f78
Update app.py
Browse files
app.py
CHANGED
@@ -25,9 +25,20 @@ from longvu.mm_datautils import (
|
|
25 |
from decord import cpu, VideoReader
|
26 |
|
27 |
|
28 |
-
title_markdown =
|
29 |
-
|
30 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
|
32 |
block_css = """
|
33 |
#buttons button {
|
@@ -123,6 +134,15 @@ class Chat:
|
|
123 |
|
124 |
@spaces.GPU(duration=120)
|
125 |
def generate(image, video, message, chatbot, textbox_in, temperature, top_p, max_output_tokens, dtype=torch.float16):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
126 |
data = []
|
127 |
|
128 |
processor = handler.processor
|
@@ -157,7 +177,7 @@ def generate(image, video, message, chatbot, textbox_in, temperature, top_p, max
|
|
157 |
raise NotImplementedError("Not support image and video at the same time")
|
158 |
except Exception as e:
|
159 |
traceback.print_exc()
|
160 |
-
return gr.update(value=None, interactive=True), gr.update(value=None, interactive=True), message, chatbot
|
161 |
|
162 |
assert len(message) % 2 == 0, "The message should be a pair of user and system message."
|
163 |
|
@@ -202,7 +222,7 @@ def generate(image, video, message, chatbot, textbox_in, temperature, top_p, max
|
|
202 |
one_turn_chat[1] = text_en_out
|
203 |
chatbot.append(one_turn_chat)
|
204 |
|
205 |
-
return gr.update(value=image, interactive=True), gr.update(value=video, interactive=True), message, chatbot
|
206 |
|
207 |
|
208 |
def regenerate(message, chatbot):
|
@@ -284,14 +304,18 @@ with gr.Blocks(title='LongVU', theme=theme, css=block_css) as demo:
|
|
284 |
with gr.Column():
|
285 |
gr.Examples(
|
286 |
examples=[
|
|
|
|
|
|
|
|
|
287 |
[
|
288 |
f"./examples/video1.mp4",
|
289 |
"Describe this video in detail.",
|
290 |
],
|
291 |
[
|
292 |
f"./examples/video2.mp4",
|
293 |
-
"
|
294 |
-
]
|
295 |
],
|
296 |
inputs=[video, textbox],
|
297 |
)
|
@@ -301,6 +325,21 @@ with gr.Blocks(title='LongVU', theme=theme, css=block_css) as demo:
|
|
301 |
[image, video, message, chatbot, textbox, temperature, top_p, max_output_tokens],
|
302 |
[image, video, message, chatbot])
|
303 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
304 |
regenerate_btn.click(
|
305 |
regenerate,
|
306 |
[message, chatbot],
|
|
|
25 |
from decord import cpu, VideoReader
|
26 |
|
27 |
|
28 |
+
title_markdown = """
|
29 |
+
<div style="display: flex; justify-content: center; align-items: center; text-align: center;">
|
30 |
+
<div>
|
31 |
+
<h1 >LongVU: Spatiotemporal Adaptive Compression for Long Video-Language Understanding</h1>
|
32 |
+
</div>
|
33 |
+
</div>
|
34 |
+
<div align="center">
|
35 |
+
<div style="display:flex; gap: 0.25rem; margin-top: 10px;" align="center">
|
36 |
+
<a href=''><img src='https://img.shields.io/badge/arXiv-paper-red'></a>
|
37 |
+
<a href='https://vision-cair.github.io/LongVU/'><img src='https://img.shields.io/badge/Project-LongVU-blue'></a>
|
38 |
+
<a href=''><img src='https://img.shields.io/badge/model-checkpoints-green'></a>
|
39 |
+
</div>
|
40 |
+
</div>
|
41 |
+
"""
|
42 |
|
43 |
block_css = """
|
44 |
#buttons button {
|
|
|
134 |
|
135 |
@spaces.GPU(duration=120)
|
136 |
def generate(image, video, message, chatbot, textbox_in, temperature, top_p, max_output_tokens, dtype=torch.float16):
|
137 |
+
if textbox_in is None:
|
138 |
+
raise gr.Error("Chat messages cannot be empty")
|
139 |
+
return (
|
140 |
+
gr.update(value=image, interactive=True),
|
141 |
+
gr.update(value=video, interactive=True),
|
142 |
+
message,
|
143 |
+
chatbot,
|
144 |
+
None,
|
145 |
+
)
|
146 |
data = []
|
147 |
|
148 |
processor = handler.processor
|
|
|
177 |
raise NotImplementedError("Not support image and video at the same time")
|
178 |
except Exception as e:
|
179 |
traceback.print_exc()
|
180 |
+
return gr.update(value=None, interactive=True), gr.update(value=None, interactive=True), message, chatbot, None
|
181 |
|
182 |
assert len(message) % 2 == 0, "The message should be a pair of user and system message."
|
183 |
|
|
|
222 |
one_turn_chat[1] = text_en_out
|
223 |
chatbot.append(one_turn_chat)
|
224 |
|
225 |
+
return gr.update(value=image, interactive=True), gr.update(value=video, interactive=True), message, chatbot, None
|
226 |
|
227 |
|
228 |
def regenerate(message, chatbot):
|
|
|
304 |
with gr.Column():
|
305 |
gr.Examples(
|
306 |
examples=[
|
307 |
+
[
|
308 |
+
f"./examples/video3.mp4",
|
309 |
+
"What is the moving direction of the yellow ball?",
|
310 |
+
],
|
311 |
[
|
312 |
f"./examples/video1.mp4",
|
313 |
"Describe this video in detail.",
|
314 |
],
|
315 |
[
|
316 |
f"./examples/video2.mp4",
|
317 |
+
"What is the name of the store?",
|
318 |
+
],
|
319 |
],
|
320 |
inputs=[video, textbox],
|
321 |
)
|
|
|
325 |
[image, video, message, chatbot, textbox, temperature, top_p, max_output_tokens],
|
326 |
[image, video, message, chatbot])
|
327 |
|
328 |
+
textbox.submit(
|
329 |
+
generate,
|
330 |
+
[
|
331 |
+
image,
|
332 |
+
video,
|
333 |
+
message,
|
334 |
+
chatbot,
|
335 |
+
textbox,
|
336 |
+
temperature,
|
337 |
+
top_p,
|
338 |
+
max_output_tokens,
|
339 |
+
],
|
340 |
+
[image, video, message, chatbot, textbox],
|
341 |
+
)
|
342 |
+
|
343 |
regenerate_btn.click(
|
344 |
regenerate,
|
345 |
[message, chatbot],
|