kiddobellamy
/

Llama_Vision

@@ -1,66 +1,40 @@
 import requests
 import torch
-from PIL import Image, UnidentifiedImageError
 from transformers import MllamaForConditionalGeneration, AutoProcessor
-import logging
-# Configure logging
-logging.basicConfig(level=logging.INFO)
 class EndpointHandler:
     def __init__(self, model_dir):
-        try:
-            # Initialize the model and processor from the directory
-            model_id = "meta-llama/Llama-3.2-90B-Vision-Instruct"
-            self.model = MllamaForConditionalGeneration.from_pretrained(
-                model_id,
-                torch_dtype=torch.bfloat16,
-                device_map="auto"
-            )
-            self.processor = AutoProcessor.from_pretrained(model_id)
-            logging.info("Model and processor loaded successfully.")
-        except Exception as e:
-            logging.error(f"Error loading model or processor: {e}")
-            raise
     def process(self, inputs):
         """
         Process the input data and return the output.
         Expecting inputs in the form of a dictionary containing 'image_url' and 'prompt'.
         """
-        try:
-            # Input validation
-            image_url = inputs.get("image_url")
-            if not image_url:
-                raise ValueError("No image URL provided in the input.")
-            prompt = inputs.get("prompt", "If I had to write a haiku for this one, it would be:")
-            # Process the image
-            try:
-                image = Image.open(requests.get(image_url, stream=True).raw)
-            except UnidentifiedImageError:
-                logging.error(f"Failed to identify the image from the URL: {image_url}")
-                raise
-            except Exception as e:
-                logging.error(f"Error downloading or processing the image: {e}")
-                raise
-            # Generate response
-            messages = [
-                {"role": "user", "content": [
-                    {"type": "image"},
-                    {"type": "text", "text": prompt}
-                ]}
-            ]
-            input_text = self.processor.apply_chat_template(messages, add_generation_prompt=True)
-            model_inputs = self.processor(image, input_text, return_tensors="pt").to(self.model.device)
-            output = self.model.generate(**model_inputs, max_new_tokens=30)
-            # Return the output as a string
-            return self.processor.decode(output[0])
-        except Exception as e:
-            logging.error(f"Error during processing: {e}")
-            raise

 import requests
 import torch
+from PIL import Image
 from transformers import MllamaForConditionalGeneration, AutoProcessor
 class EndpointHandler:
     def __init__(self, model_dir):
+        # Initialize the model and processor from the directory
+        model_id = "meta-llama/Llama-3.2-90B-Vision-Instruct"
+        self.model = MllamaForConditionalGeneration.from_pretrained(
+            model_id,
+            torch_dtype=torch.bfloat16,
+            device_map="auto"
+        )
+        self.processor = AutoProcessor.from_pretrained(model_id)
     def process(self, inputs):
         """
         Process the input data and return the output.
         Expecting inputs in the form of a dictionary containing 'image_url' and 'prompt'.
         """
+        image_url = inputs.get("image_url")
+        prompt = inputs.get("prompt", "If I had to write a haiku for this one, it would be:")
+        # Process the image
+        image = Image.open(requests.get(image_url, stream=True).raw)
+        # Generate response
+        messages = [
+            {"role": "user", "content": [
+                {"type": "image"},
+                {"type": "text", "text": prompt}
+            ]}
+        ]
+        input_text = self.processor.apply_chat_template(messages, add_generation_prompt=True)
+        model_inputs = self.processor(image, input_text, return_tensors="pt").to(self.model.device)
+        output = self.model.generate(**model_inputs, max_new_tokens=30)
+        # Return the output as a string
+        return self.processor.decode(output[0])