Staticaliza commited on
Commit
45099c6
1 Parent(s): 32f0fe9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -3
app.py CHANGED
@@ -4,6 +4,7 @@ import spaces
4
  import torch
5
 
6
  from PIL import Image
 
7
  from transformers import AutoModel, AutoTokenizer
8
 
9
  # Pre-Initialize
@@ -14,6 +15,7 @@ print(f"[SYSTEM] | Using {DEVICE} type compute device.")
14
 
15
  # Variables
16
  DEFAULT_INPUT = "Describe in one paragraph."
 
17
 
18
  repo = AutoModel.from_pretrained("openbmb/MiniCPM-V-2_6", torch_dtype=torch.bfloat16, trust_remote_code=True)
19
  tokenizer = AutoTokenizer.from_pretrained("openbmb/MiniCPM-V-2_6", trust_remote_code=True)
@@ -27,6 +29,21 @@ footer {
27
  '''
28
 
29
  # Functions
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  @spaces.GPU(duration=60)
31
  def generate(image, video, instruction=DEFAULT_INPUT, sampling=False, temperature=0.7, top_p=0.8, top_k=100, repetition_penalty=1.05, max_tokens=512):
32
  repo.to(DEVICE)
@@ -36,11 +53,10 @@ def generate(image, video, instruction=DEFAULT_INPUT, sampling=False, temperatur
36
 
37
  if not video:
38
  image_data = Image.fromarray(image.astype('uint8'), 'RGB')
39
- print(image_data, instruction)
40
-
41
  inputs = [{"role": "user", "content": [image_data, instruction]}]
42
  else:
43
- priny("video")
 
44
 
45
  parameters = {
46
  "sampling": sampling,
@@ -49,6 +65,8 @@ def generate(image, video, instruction=DEFAULT_INPUT, sampling=False, temperatur
49
  "top_k": top_k,
50
  "repetition_penalty": repetition_penalty,
51
  "max_new_tokens": max_tokens
 
 
52
  }
53
 
54
  output = repo.chat(image=None, msgs=inputs, tokenizer=tokenizer, **parameters)
 
4
  import torch
5
 
6
  from PIL import Image
7
+ from decord import VideoReader, cpu
8
  from transformers import AutoModel, AutoTokenizer
9
 
10
  # Pre-Initialize
 
15
 
16
  # Variables
17
  DEFAULT_INPUT = "Describe in one paragraph."
18
+ MAX_FRAMES = 64
19
 
20
  repo = AutoModel.from_pretrained("openbmb/MiniCPM-V-2_6", torch_dtype=torch.bfloat16, trust_remote_code=True)
21
  tokenizer = AutoTokenizer.from_pretrained("openbmb/MiniCPM-V-2_6", trust_remote_code=True)
 
29
  '''
30
 
31
  # Functions
32
+ def encode_video(video_path):
33
+ def uniform_sample(l, n):
34
+ gap = len(l) / n
35
+ idxs = [int(i * gap + gap / 2) for i in range(n)]
36
+ return [l[i] for i in idxs]
37
+
38
+ vr = VideoReader(video_path, ctx=cpu(0))
39
+ sample_fps = round(vr.get_avg_fps() / 1)
40
+ frame_idx = [i for i in range(0, len(vr), sample_fps)]
41
+ if len(frame_idx) > MAX_NUM_FRAMES:
42
+ frame_idx = uniform_sample(frame_idx, MAX_FRAMES)
43
+ frames = vr.get_batch(frame_idx).asnumpy()
44
+ frames = [Image.fromarray(v.astype('uint8')) for v in frames]
45
+ return frames
46
+
47
  @spaces.GPU(duration=60)
48
  def generate(image, video, instruction=DEFAULT_INPUT, sampling=False, temperature=0.7, top_p=0.8, top_k=100, repetition_penalty=1.05, max_tokens=512):
49
  repo.to(DEVICE)
 
53
 
54
  if not video:
55
  image_data = Image.fromarray(image.astype('uint8'), 'RGB')
 
 
56
  inputs = [{"role": "user", "content": [image_data, instruction]}]
57
  else:
58
+ video_data = encode_video(video)
59
+ inputs = [{"role": "user", "content": video_data + [instruction]}]
60
 
61
  parameters = {
62
  "sampling": sampling,
 
65
  "top_k": top_k,
66
  "repetition_penalty": repetition_penalty,
67
  "max_new_tokens": max_tokens
68
+ "use_image_id": False,
69
+ "max_slice_nums": 2,
70
  }
71
 
72
  output = repo.chat(image=None, msgs=inputs, tokenizer=tokenizer, **parameters)