app.py CHANGED
@@ -97,8 +97,9 @@ class Chat:
97
 
98
 
99
  @spaces.GPU(duration=120)
100
- def generate(image, video, audio, message, chatbot, va_tag, textbox_in, temperature, top_p, max_output_tokens, dtype=torch.float16):
101
  data = []
 
102
 
103
  processor = handler.processor
104
  try:
@@ -182,7 +183,7 @@ def generate(image, video, audio, message, chatbot, va_tag, textbox_in, temperat
182
  one_turn_chat[1] = text_en_out
183
  chatbot.append(one_turn_chat)
184
 
185
- return gr.update(value=image, interactive=True), gr.update(value=video, interactive=True), gr.update(value=audio, interactive=True), message, chatbot
186
 
187
 
188
  def regenerate(message, chatbot):
@@ -226,8 +227,6 @@ with gr.Blocks(title='VideoLLaMA 2 πŸ”₯πŸš€πŸ”₯', theme=theme, css=block_css) as
226
 
227
  with gr.Row():
228
  with gr.Column(scale=3):
229
- #image = gr.Image(label="Input Image", type="filepath")
230
- image = None
231
  video = gr.Video(label="Input Video")
232
  audio = gr.Audio(label="Input Audio", type="filepath")
233
 
@@ -287,22 +286,6 @@ with gr.Blocks(title='VideoLLaMA 2 πŸ”₯πŸš€πŸ”₯', theme=theme, css=block_css) as
287
 
288
  with gr.Row():
289
  cur_dir = os.path.dirname(os.path.abspath(__file__))
290
- '''
291
- with gr.Column():
292
- gr.Examples(
293
- examples=[
294
- [
295
- f"{cur_dir}/examples/extreme_ironing.jpg",
296
- "What happens in this image?",
297
- ],
298
- [
299
- f"{cur_dir}/examples/waterview.jpg",
300
- "What are the things I should be cautious about when I visit here?",
301
- ],
302
- ],
303
- inputs=[image, textbox],
304
- )
305
- '''
306
  with gr.Column():
307
  gr.Examples(
308
  examples=[
@@ -336,11 +319,11 @@ with gr.Blocks(title='VideoLLaMA 2 πŸ”₯πŸš€πŸ”₯', theme=theme, css=block_css) as
336
  gr.Examples(
337
  examples=[
338
  [
339
- f"{cur_dir}/examples/Y--ZHUMfueO0.flac",
340
  "Please describe the audio.",
341
  ],
342
  [
343
- f"{cur_dir}/examples/Traffic and pedestrians.wav",
344
  "Please describe the audio.",
345
  ],
346
  ],
@@ -352,20 +335,20 @@ with gr.Blocks(title='VideoLLaMA 2 πŸ”₯πŸš€πŸ”₯', theme=theme, css=block_css) as
352
 
353
  submit_btn.click(
354
  generate,
355
- [image, video, audio, message, chatbot, va_tag, textbox, temperature, top_p, max_output_tokens],
356
- [image, video, audio, message, chatbot])
357
 
358
  regenerate_btn.click(
359
  regenerate,
360
  [message, chatbot],
361
  [message, chatbot]).then(
362
  generate,
363
- [image, video, audio, message, chatbot, va_tag, textbox, temperature, top_p, max_output_tokens],
364
- [image, video, audio, message, chatbot])
365
 
366
  clear_btn.click(
367
  clear_history,
368
  [message, chatbot],
369
- [image, video, audio, message, chatbot, textbox])
370
 
371
  demo.launch(share=False)
 
97
 
98
 
99
  @spaces.GPU(duration=120)
100
+ def generate(video, audio, message, chatbot, va_tag, textbox_in, temperature, top_p, max_output_tokens, dtype=torch.float16):
101
  data = []
102
+ image = None
103
 
104
  processor = handler.processor
105
  try:
 
183
  one_turn_chat[1] = text_en_out
184
  chatbot.append(one_turn_chat)
185
 
186
+ return gr.update(value=video, interactive=True), gr.update(value=audio, interactive=True), message, chatbot
187
 
188
 
189
  def regenerate(message, chatbot):
 
227
 
228
  with gr.Row():
229
  with gr.Column(scale=3):
 
 
230
  video = gr.Video(label="Input Video")
231
  audio = gr.Audio(label="Input Audio", type="filepath")
232
 
 
286
 
287
  with gr.Row():
288
  cur_dir = os.path.dirname(os.path.abspath(__file__))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
289
  with gr.Column():
290
  gr.Examples(
291
  examples=[
 
319
  gr.Examples(
320
  examples=[
321
  [
322
+ f"{cur_dir}/examples/bird-twitter-car.wav",
323
  "Please describe the audio.",
324
  ],
325
  [
326
+ f"{cur_dir}/examples/door.of.bar.raining2.wav",
327
  "Please describe the audio.",
328
  ],
329
  ],
 
335
 
336
  submit_btn.click(
337
  generate,
338
+ [video, audio, message, chatbot, va_tag, textbox, temperature, top_p, max_output_tokens],
339
+ [video, audio, message, chatbot])
340
 
341
  regenerate_btn.click(
342
  regenerate,
343
  [message, chatbot],
344
  [message, chatbot]).then(
345
  generate,
346
+ [video, audio, message, chatbot, va_tag, textbox, temperature, top_p, max_output_tokens],
347
+ [video, audio, message, chatbot])
348
 
349
  clear_btn.click(
350
  clear_history,
351
  [message, chatbot],
352
+ [video, audio, message, chatbot, textbox])
353
 
354
  demo.launch(share=False)
examples/Y--ZHUMfueO0.flac DELETED
Binary file (324 kB)
 
examples/{1034346401.mp4 β†’ bird-twitter-car.wav} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:08b62a634fe49edc0a19fc53f6ea5cfb345d9b2a6a7047811344c16832dc42b2
3
- size 1678095
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9d2287cf4fe2eec00c9c7c623df34cacdc3f2a0e91655db805b4871193fb680
3
+ size 2412098
examples/desert.jpg DELETED
Binary file (881 kB)
 
examples/{Traffic and pedestrians.wav β†’ door.of.bar.raining2.wav} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:39d805c8e0e487427d60c47ded7d7cca9b8fa288c1a53c93118b15f68ecf6792
3
- size 1656254
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:535cab1a35770077b8ca31e6773ec5121b9ac6559430600821b2c747a944f7d2
3
+ size 1339018
examples/extreme_ironing.jpg DELETED
Binary file (62.6 kB)
 
examples/sample_demo_3.mp4 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:da6126bce64c64a3d6f7ce889fbe15b5f1c2e3f978846351d8c7a79a950b429e
3
- size 463547
 
 
 
 
examples/sample_demo_9.mp4 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:f9702694f185e27ae016b85024b367e140cf93a4e3124d072816fd32f2ca0d96
3
- size 631864
 
 
 
 
examples/waterview.jpg DELETED
Binary file (95.5 kB)