Update handler.py
Browse files- handler.py +5 -3
handler.py
CHANGED
@@ -9,7 +9,7 @@ import io
|
|
9 |
from PIL import Image
|
10 |
import logging
|
11 |
import requests
|
12 |
-
from moviepy.editor import VideoFileClip
|
13 |
|
14 |
class EndpointHandler():
|
15 |
def __init__(self, path=""):
|
@@ -33,6 +33,7 @@ class EndpointHandler():
|
|
33 |
# Construct the messages list from the input string
|
34 |
messages = [{"role": "user", "content": self._parse_input(inputs)}]
|
35 |
|
|
|
36 |
text = self.processor.apply_chat_template(
|
37 |
messages, tokenize=False, add_generation_prompt=True
|
38 |
)
|
@@ -47,6 +48,7 @@ class EndpointHandler():
|
|
47 |
)
|
48 |
inputs = inputs.to("cuda" if torch.cuda.is_available() else "cpu")
|
49 |
|
|
|
50 |
generated_ids = self.model.generate(**inputs, max_new_tokens=max_new_tokens)
|
51 |
generated_ids_trimmed = [
|
52 |
out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
|
@@ -65,11 +67,11 @@ class EndpointHandler():
|
|
65 |
if i % 2 == 0: # Text part
|
66 |
content.append({"type": "text", "text": part.strip()})
|
67 |
else: # Image/video part
|
68 |
-
if part.startswith("video:"):
|
69 |
video_path = part.split("video:")[1].strip()
|
70 |
video_frames = self._extract_video_frames(video_path)
|
71 |
if video_frames:
|
72 |
-
content.append({"type": "video", "video": video_frames, "fps": 1})
|
73 |
else:
|
74 |
image = self._load_image(part.strip())
|
75 |
if image:
|
|
|
9 |
from PIL import Image
|
10 |
import logging
|
11 |
import requests
|
12 |
+
from moviepy.editor import VideoFileClip
|
13 |
|
14 |
class EndpointHandler():
|
15 |
def __init__(self, path=""):
|
|
|
33 |
# Construct the messages list from the input string
|
34 |
messages = [{"role": "user", "content": self._parse_input(inputs)}]
|
35 |
|
36 |
+
# Prepare for inference (using qwen_vl_utils)
|
37 |
text = self.processor.apply_chat_template(
|
38 |
messages, tokenize=False, add_generation_prompt=True
|
39 |
)
|
|
|
48 |
)
|
49 |
inputs = inputs.to("cuda" if torch.cuda.is_available() else "cpu")
|
50 |
|
51 |
+
# Inference
|
52 |
generated_ids = self.model.generate(**inputs, max_new_tokens=max_new_tokens)
|
53 |
generated_ids_trimmed = [
|
54 |
out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
|
|
|
67 |
if i % 2 == 0: # Text part
|
68 |
content.append({"type": "text", "text": part.strip()})
|
69 |
else: # Image/video part
|
70 |
+
if part.lower().startswith("video:"):
|
71 |
video_path = part.split("video:")[1].strip()
|
72 |
video_frames = self._extract_video_frames(video_path)
|
73 |
if video_frames:
|
74 |
+
content.append({"type": "video", "video": video_frames, "fps": 1})
|
75 |
else:
|
76 |
image = self._load_image(part.strip())
|
77 |
if image:
|