kiddobellamy
/

Llama_Vision

@@ -1,53 +1,66 @@
 import requests
 import torch
-from PIL import Image
 from transformers import MllamaForConditionalGeneration, AutoProcessor
-# Define the model ID and load the model and processor
-model_id = "meta-llama/Llama-3.2-90B-Vision-Instruct"
-def load_model():
-    """Loads the Llama 3.2-90B Vision-Instruct model and processor."""
-    model = MllamaForConditionalGeneration.from_pretrained(
-        model_id,
-        torch_dtype=torch.bfloat16,
-        device_map="auto",
-    )
-    processor = AutoProcessor.from_pretrained(model_id)
-    return model, processor
-def process_image(url):
-    """Processes the image from the given URL."""
-    image = Image.open(requests.get(url, stream=True).raw)
-    return image
-def generate_response(model, processor, image, prompt):
-    """Generates a text response based on the image and the prompt."""
-    messages = [
-        {"role": "user", "content": [
-            {"type": "image"},
-            {"type": "text", "text": prompt}
-        ]}
-    ]
-    input_text = processor.apply_chat_template(messages, add_generation_prompt=True)
-    inputs = processor(image, input_text, return_tensors="pt").to(model.device)
-    output = model.generate(**inputs, max_new_tokens=30)
-    return processor.decode(output[0])
-def main():
-    # Load model and processor
-    model, processor = load_model()
-    # Sample image URL
-    url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/0052a70beed5bf71b92610a43a52df6d286cd5f3/diffusers/rabbit.jpg"
-    image = process_image(url)
-    # Define a sample prompt
-    prompt = "If I had to write a haiku for this one, it would be:"
-    # Generate response
-    response = generate_response(model, processor, image, prompt)
-    print(response)
-if __name__ == "__main__":
-    main()

 import requests
 import torch
+from PIL import Image, UnidentifiedImageError
 from transformers import MllamaForConditionalGeneration, AutoProcessor
+import logging
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+class EndpointHandler:
+    def __init__(self, model_dir):
+        try:
+            # Initialize the model and processor from the directory
+            model_id = "meta-llama/Llama-3.2-90B-Vision-Instruct"
+            self.model = MllamaForConditionalGeneration.from_pretrained(
+                model_id,
+                torch_dtype=torch.bfloat16,
+                device_map="auto"
+            )
+            self.processor = AutoProcessor.from_pretrained(model_id)
+            logging.info("Model and processor loaded successfully.")
+        except Exception as e:
+            logging.error(f"Error loading model or processor: {e}")
+            raise
+    def process(self, inputs):
+        """
+        Process the input data and return the output.
+        Expecting inputs in the form of a dictionary containing 'image_url' and 'prompt'.
+        """
+        try:
+            # Input validation
+            image_url = inputs.get("image_url")
+            if not image_url:
+                raise ValueError("No image URL provided in the input.")
+            prompt = inputs.get("prompt", "If I had to write a haiku for this one, it would be:")
+            # Process the image
+            try:
+                image = Image.open(requests.get(image_url, stream=True).raw)
+            except UnidentifiedImageError:
+                logging.error(f"Failed to identify the image from the URL: {image_url}")
+                raise
+            except Exception as e:
+                logging.error(f"Error downloading or processing the image: {e}")
+                raise
+            # Generate response
+            messages = [
+                {"role": "user", "content": [
+                    {"type": "image"},
+                    {"type": "text", "text": prompt}
+                ]}
+            ]
+            input_text = self.processor.apply_chat_template(messages, add_generation_prompt=True)
+            model_inputs = self.processor(image, input_text, return_tensors="pt").to(self.model.device)
+            output = self.model.generate(**model_inputs, max_new_tokens=30)
+            # Return the output as a string
+            return self.processor.decode(output[0])
+        except Exception as e:
+            logging.error(f"Error during processing: {e}")
+            raise