zesquirrelnator commited on
Commit
5d906c8
1 Parent(s): d7456e8

Update handler.py

Browse files
Files changed (1) hide show
  1. handler.py +51 -42
handler.py CHANGED
@@ -1,47 +1,56 @@
1
- import requests
2
- from typing import Dict, Any
3
  from PIL import Image
4
  import torch
5
- import base64
6
  from io import BytesIO
7
- from transformers import AutoProcessor, AutoModelForVision2Seq
8
- from transformers.image_utils import load_image
9
-
10
- device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
11
-
12
- class EndpointHandler():
13
- def __init__(self, path=""):
14
- self.processor = AutoProcessor.from_pretrained("zesquirrelnator/idefics2-8b-docvqa-finetuned-tutorial")
15
- self.model = AutoModelForVision2Seq.from_pretrained(
16
- "zesquirrelnator/idefics2-8b-docvqa-finetuned-tutorial"
17
- ).to(device)
18
- self.model.eval()
19
-
20
- def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
21
- input_data = data.get("inputs", {})
22
- encoded_images = input_data.get("images")
23
-
24
- if not encoded_images:
25
- return {"captions": [], "error": "No images provided"}
26
-
27
- texts = input_data.get("texts", ["move to red ball"] * len(encoded_images))
28
-
29
- try:
30
- raw_images = [Image.open(BytesIO(base64.b64decode(img))).convert("RGB") for img in encoded_images]
31
- processed_inputs = [
32
- self.processor(image, text, return_tensors="pt") for image, text in zip(raw_images, texts)
33
- ]
34
- processed_inputs = {
35
- "pixel_values": torch.cat([inp["pixel_values"] for inp in processed_inputs], dim=0).to(device),
36
- "input_ids": torch.cat([inp["input_ids"] for inp in processed_inputs], dim=0).to(device),
37
- "attention_mask": torch.cat([inp["attention_mask"] for inp in processed_inputs], dim=0).to(device)
38
- }
39
 
40
- with torch.no_grad():
41
- out = self.model.generate(**processed_inputs)
 
 
 
 
42
 
43
- captions = self.processor.batch_decode(out, skip_special_tokens=True)
44
- return {"captions": captions}
45
- except Exception as e:
46
- print(f"Error during processing: {str(e)}")
47
- return {"captions": [], "error": str(e)}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoModelForCausalLM, AutoTokenizer
 
2
  from PIL import Image
3
  import torch
 
4
  from io import BytesIO
5
+ import base64
6
+
7
+ # Initialize the model and tokenizer
8
+ model_id = "HuggingFaceM4/idefics2-8b"
9
+ model = AutoModelForCausalLM.from_pretrained(model_id)
10
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
11
+
12
+ # Check if CUDA (GPU support) is available and then set the device to GPU or CPU
13
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
14
+ model.to(device)
15
+
16
+ def preprocess_image(encoded_image):
17
+ """Decode and preprocess the input image."""
18
+ decoded_image = base64.b64decode(encoded_image)
19
+ img = Image.open(BytesIO(decoded_image)).convert("RGB")
20
+ return img
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
+ def handler(event, context):
23
+ """Handle the incoming request."""
24
+ try:
25
+ # Extract the base64-encoded image and question from the event
26
+ input_image = event['body']['image']
27
+ question = event['body'].get('question', "What is this image about?")
28
 
29
+ # Preprocess the image
30
+ img = preprocess_image(input_image)
31
+
32
+ # Perform inference
33
+ enc_image = model.encode_image(img).to(device)
34
+ answer = model.answer_question(enc_image, question, tokenizer)
35
+
36
+ # If the output is a tensor, move it back to CPU and convert to list
37
+ if isinstance(answer, torch.Tensor):
38
+ answer = answer.cpu().numpy().tolist()
39
+
40
+ # Create the response
41
+ response = {
42
+ "statusCode": 200,
43
+ "body": {
44
+ "answer": answer
45
+ }
46
+ }
47
+ return response
48
+ except Exception as e:
49
+ # Handle any errors
50
+ response = {
51
+ "statusCode": 500,
52
+ "body": {
53
+ "error": str(e)
54
+ }
55
+ }
56
+ return response