|
import base64 |
|
from openai import OpenAI |
|
|
|
|
|
class GPT4Vision: |
|
def __init__(self): |
|
self.client = OpenAI() |
|
|
|
def encode_image(self, image_path): |
|
""" |
|
Encode the image to base64 format. |
|
|
|
:param image_path: Path to the image file. |
|
:return: Base64 encoded string of the image. |
|
""" |
|
with open(image_path, "rb") as image_file: |
|
return base64.b64encode(image_file.read()).decode('utf-8') |
|
|
|
def describe(self, image_path, user_message): |
|
""" |
|
Get a description of the image using OpenAI's GPT-4 Vision API. |
|
|
|
:param image_path: Path to the image file. |
|
:param user_message: Custom text message to send as user input. |
|
:return: The API response. |
|
""" |
|
base64_image = self.encode_image(image_path) |
|
|
|
response = self.client.chat.completions.create( |
|
model="gpt-4-vision-preview", |
|
messages=[ |
|
{ |
|
"role": "user", |
|
"content": [ |
|
{"type": "text", "text": user_message}, |
|
{ |
|
"type": "image_url", |
|
"image_url": { |
|
"url": f"data:image/png;base64,{base64_image}" |
|
}, |
|
}, |
|
], |
|
} |
|
], |
|
max_tokens=1000, |
|
) |
|
return response.choices[0] |
|
|