EnariGmbH
/

surftown-1.0

llava_next_video

Inference Endpoints

Model card Files Files and versions Community

EnariGmbH commited on Sep 5

Commit

623c6f2

•

1 Parent(s): 20c305b

Update handler.py

Files changed (1) hide show

handler.py +15 -1

handler.py CHANGED Viewed

@@ -2,6 +2,13 @@ from typing import Dict, List, Any
 import torch
 from transformers import LlavaNextVideoForConditionalGeneration, LlavaNextVideoProcessor
 from peft import PeftModel
 class EndpointHandler:
     def __init__(self):
@@ -39,7 +46,14 @@ class EndpointHandler:
             List[Dict[str, Any]]: The generated text from the model.
         """
         # Extract inputs from the data dictionary
-        clip = data.get("clip")
         prompt = """
         You are a surfing coach specialized on perfecting surfer's pop-up move. Please analyze the surfer's pop-up move in detail from the video.

 import torch
 from transformers import LlavaNextVideoForConditionalGeneration, LlavaNextVideoProcessor
 from peft import PeftModel
+import base64
+import numpy as np
+def base64_to_numpy(base64_str, shape):
+    arr_bytes = base64.b64decode(base64_str)
+    arr = np.frombuffer(arr_bytes, dtype=np.uint8)
+    return arr.reshape(shape)
 class EndpointHandler:
     def __init__(self):
             List[Dict[str, Any]]: The generated text from the model.
         """
         # Extract inputs from the data dictionary
+        clip_base64 = data.get("clip")
+        clip_shape = data.get("clip_shape")  # Expect the shape to be passed
+        if clip_base64 is None or clip_shape is None:
+            return [{"error": "Missing 'clip' or 'clip_shape' in input data"}]
+        # Decode the base64 back to numpy array and reshape
+        clip = base64_to_numpy(clip_base64, tuple(clip_shape))
         prompt = """
         You are a surfing coach specialized on perfecting surfer's pop-up move. Please analyze the surfer's pop-up move in detail from the video.