TH9817 commited on
Commit
d50ab60
1 Parent(s): 893fc64

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -48
app.py CHANGED
@@ -41,55 +41,55 @@ def read_video_pyav(container, indices):
41
  frames.append(frame)
42
  return np.stack([x.to_ndarray(format="rgb24") for x in frames])
43
 
44
-
45
  # Download video from the hub
46
  #video_path_1 = hf_hub_download(repo_id="raushan-testing-hf/videos-test", filename="sample_demo_1.mp4", repo_type="dataset")
47
- video_path="/Users/aa469627/Desktop/videollama/scene/sample1-Scene-049.mp4"
48
- #video_path_2 = hf_hub_download(repo_id="raushan-testing-hf/videos-test", filename="karate.mp4", repo_type="dataset")
49
-
50
- container = av.open(video_path_1)
51
-
52
- # sample uniformly 8 frames from the video (we can sample more for longer videos)
53
- total_frames = container.streams.video[0].frames
54
- indices = np.arange(0, total_frames, total_frames / 8).astype(int)
55
- clip_baby = read_video_pyav(container, indices)
56
-
57
-
58
- #container = av.open(video_path_2)
59
-
60
- # sample uniformly 8 frames from the video (we can sample more for longer videos)
61
- #total_frames = container.streams.video[0].frames
62
- #indices = np.arange(0, total_frames, total_frames / 8).astype(int)
63
- #clip_karate = read_video_pyav(container, indices)
64
-
65
- # Each "content" is a list of dicts and you can add image/video/text modalities
66
- conversation = [
67
- {
68
- "role": "user",
69
- "content": [
70
- {"type": "text", "text": "What happens in the video?"},
71
- {"type": "video"},
72
- ],
73
- },
74
- ]
75
-
76
- conversation_2 = [
77
- {
78
- "role": "user",
79
- "content": [
80
- {"type": "text", "text": "What do you see in this video?"},
81
- {"type": "video"},
82
- ],
83
- },
84
- ]
85
-
86
- prompt = processor.apply_chat_template(conversation, add_generation_prompt=True)
87
- #prompt_2 = processor.apply_chat_template(conversation_2, add_generation_prompt=True)
88
-
89
- inputs = processor(prompt, videos=clip_baby, padding=True, return_tensors="pt").to(model.device)
90
-
91
- def chat(i):
92
- generate_kwargs = {"max_new_tokens": i, "do_sample": True, "top_p": 0.9}
93
 
94
  output = model.generate(**inputs, **generate_kwargs)
95
  generated_text = processor.batch_decode(output, skip_special_tokens=True)
@@ -98,7 +98,7 @@ def chat(i):
98
 
99
  demo = gr.Interface(
100
  fn=chat,
101
- inputs=[gr.Slider(100,300)],
102
  outputs=["text"],
103
  )
104
 
 
41
  frames.append(frame)
42
  return np.stack([x.to_ndarray(format="rgb24") for x in frames])
43
 
44
+ def chat(path,token):
45
  # Download video from the hub
46
  #video_path_1 = hf_hub_download(repo_id="raushan-testing-hf/videos-test", filename="sample_demo_1.mp4", repo_type="dataset")
47
+ video_path='"'+path+'"'
48
+ #video_path_2 = hf_hub_download(repo_id="raushan-testing-hf/videos-test", filename="karate.mp4", repo_type="dataset")
49
+
50
+ container = av.open(video_path)
51
+
52
+ # sample uniformly 8 frames from the video (we can sample more for longer videos)
53
+ total_frames = container.streams.video[0].frames
54
+ indices = np.arange(0, total_frames, total_frames / 8).astype(int)
55
+ clip_baby = read_video_pyav(container, indices)
56
+
57
+
58
+ #container = av.open(video_path_2)
59
+
60
+ # sample uniformly 8 frames from the video (we can sample more for longer videos)
61
+ #total_frames = container.streams.video[0].frames
62
+ #indices = np.arange(0, total_frames, total_frames / 8).astype(int)
63
+ #clip_karate = read_video_pyav(container, indices)
64
+
65
+ # Each "content" is a list of dicts and you can add image/video/text modalities
66
+ conversation = [
67
+ {
68
+ "role": "user",
69
+ "content": [
70
+ {"type": "text", "text": "What happens in the video?"},
71
+ {"type": "video"},
72
+ ],
73
+ },
74
+ ]
75
+
76
+ conversation_2 = [
77
+ {
78
+ "role": "user",
79
+ "content": [
80
+ {"type": "text", "text": "What do you see in this video?"},
81
+ {"type": "video"},
82
+ ],
83
+ },
84
+ ]
85
+
86
+ prompt = processor.apply_chat_template(conversation, add_generation_prompt=True)
87
+ #prompt_2 = processor.apply_chat_template(conversation_2, add_generation_prompt=True)
88
+
89
+ inputs = processor(prompt, videos=clip_baby, padding=True, return_tensors="pt").to(model.device)
90
+
91
+
92
+ generate_kwargs = {"max_new_tokens": token, "do_sample": True, "top_p": 0.9}
93
 
94
  output = model.generate(**inputs, **generate_kwargs)
95
  generated_text = processor.batch_decode(output, skip_special_tokens=True)
 
98
 
99
  demo = gr.Interface(
100
  fn=chat,
101
+ inputs=["text",gr.Slider(100,300)],
102
  outputs=["text"],
103
  )
104