Vision-CAIR commited on
Commit
5934d4b
1 Parent(s): 99e0f78

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -7
app.py CHANGED
@@ -25,9 +25,20 @@ from longvu.mm_datautils import (
25
  from decord import cpu, VideoReader
26
 
27
 
28
- title_markdown = ("""
29
- LongVU
30
- """)
 
 
 
 
 
 
 
 
 
 
 
31
 
32
  block_css = """
33
  #buttons button {
@@ -123,6 +134,15 @@ class Chat:
123
 
124
  @spaces.GPU(duration=120)
125
  def generate(image, video, message, chatbot, textbox_in, temperature, top_p, max_output_tokens, dtype=torch.float16):
 
 
 
 
 
 
 
 
 
126
  data = []
127
 
128
  processor = handler.processor
@@ -157,7 +177,7 @@ def generate(image, video, message, chatbot, textbox_in, temperature, top_p, max
157
  raise NotImplementedError("Not support image and video at the same time")
158
  except Exception as e:
159
  traceback.print_exc()
160
- return gr.update(value=None, interactive=True), gr.update(value=None, interactive=True), message, chatbot
161
 
162
  assert len(message) % 2 == 0, "The message should be a pair of user and system message."
163
 
@@ -202,7 +222,7 @@ def generate(image, video, message, chatbot, textbox_in, temperature, top_p, max
202
  one_turn_chat[1] = text_en_out
203
  chatbot.append(one_turn_chat)
204
 
205
- return gr.update(value=image, interactive=True), gr.update(value=video, interactive=True), message, chatbot
206
 
207
 
208
  def regenerate(message, chatbot):
@@ -284,14 +304,18 @@ with gr.Blocks(title='LongVU', theme=theme, css=block_css) as demo:
284
  with gr.Column():
285
  gr.Examples(
286
  examples=[
 
 
 
 
287
  [
288
  f"./examples/video1.mp4",
289
  "Describe this video in detail.",
290
  ],
291
  [
292
  f"./examples/video2.mp4",
293
- "Which country does the boy in the video probably come from?",
294
- ]
295
  ],
296
  inputs=[video, textbox],
297
  )
@@ -301,6 +325,21 @@ with gr.Blocks(title='LongVU', theme=theme, css=block_css) as demo:
301
  [image, video, message, chatbot, textbox, temperature, top_p, max_output_tokens],
302
  [image, video, message, chatbot])
303
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
304
  regenerate_btn.click(
305
  regenerate,
306
  [message, chatbot],
 
25
  from decord import cpu, VideoReader
26
 
27
 
28
+ title_markdown = """
29
+ <div style="display: flex; justify-content: center; align-items: center; text-align: center;">
30
+ <div>
31
+ <h1 >LongVU: Spatiotemporal Adaptive Compression for Long Video-Language Understanding</h1>
32
+ </div>
33
+ </div>
34
+ <div align="center">
35
+ <div style="display:flex; gap: 0.25rem; margin-top: 10px;" align="center">
36
+ <a href=''><img src='https://img.shields.io/badge/arXiv-paper-red'></a>
37
+ <a href='https://vision-cair.github.io/LongVU/'><img src='https://img.shields.io/badge/Project-LongVU-blue'></a>
38
+ <a href=''><img src='https://img.shields.io/badge/model-checkpoints-green'></a>
39
+ </div>
40
+ </div>
41
+ """
42
 
43
  block_css = """
44
  #buttons button {
 
134
 
135
  @spaces.GPU(duration=120)
136
  def generate(image, video, message, chatbot, textbox_in, temperature, top_p, max_output_tokens, dtype=torch.float16):
137
+ if textbox_in is None:
138
+ raise gr.Error("Chat messages cannot be empty")
139
+ return (
140
+ gr.update(value=image, interactive=True),
141
+ gr.update(value=video, interactive=True),
142
+ message,
143
+ chatbot,
144
+ None,
145
+ )
146
  data = []
147
 
148
  processor = handler.processor
 
177
  raise NotImplementedError("Not support image and video at the same time")
178
  except Exception as e:
179
  traceback.print_exc()
180
+ return gr.update(value=None, interactive=True), gr.update(value=None, interactive=True), message, chatbot, None
181
 
182
  assert len(message) % 2 == 0, "The message should be a pair of user and system message."
183
 
 
222
  one_turn_chat[1] = text_en_out
223
  chatbot.append(one_turn_chat)
224
 
225
+ return gr.update(value=image, interactive=True), gr.update(value=video, interactive=True), message, chatbot, None
226
 
227
 
228
  def regenerate(message, chatbot):
 
304
  with gr.Column():
305
  gr.Examples(
306
  examples=[
307
+ [
308
+ f"./examples/video3.mp4",
309
+ "What is the moving direction of the yellow ball?",
310
+ ],
311
  [
312
  f"./examples/video1.mp4",
313
  "Describe this video in detail.",
314
  ],
315
  [
316
  f"./examples/video2.mp4",
317
+ "What is the name of the store?",
318
+ ],
319
  ],
320
  inputs=[video, textbox],
321
  )
 
325
  [image, video, message, chatbot, textbox, temperature, top_p, max_output_tokens],
326
  [image, video, message, chatbot])
327
 
328
+ textbox.submit(
329
+ generate,
330
+ [
331
+ image,
332
+ video,
333
+ message,
334
+ chatbot,
335
+ textbox,
336
+ temperature,
337
+ top_p,
338
+ max_output_tokens,
339
+ ],
340
+ [image, video, message, chatbot, textbox],
341
+ )
342
+
343
  regenerate_btn.click(
344
  regenerate,
345
  [message, chatbot],