demo_generative_img / gpt_vision_prompt.py
joelorellana's picture
first commit for the project
1c681f7
raw
history blame
1.88 kB
""" Generate a prompt for Generative AI APIs with the given image and prompt. """
import requests
from encode_image import encode_image
from config import OPENAI_API_KEY
# prompt for GPT Vision API
PROMPT = """ Return a prompt to describe the image and pass it
to DALLE or Stable Diffusion to generate an image.
The prompt must not exceed 75 tokens.
The prompt must improve the quality of the original image.
The prompt must be in the form of:
[STYLE OF PHOTO] photo of a [SUBJECT], [IMPORTANT
FEATURE], [MORE DETAILS], [POSE OR ACTION],
[FRAMING], [SETTING/BACKGROUND], [LIGHTING],
[CAMERA ANGLE], [CAMERA PROPERTIES],in style of
[PHOTOGRAPHER],
"""
def generate_prompt_with_vision(image_path, prompt=PROMPT, api_key=OPENAI_API_KEY ):
"""Generate a prompt for Generative AI APIs with the given image and prompt."""
# Getting the base64 string
print('Encoding image...')
base64_image = encode_image(image_path)
print("Encoded image. ")
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {api_key}"
}
payload = {
"model": "gpt-4-vision-preview",
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": prompt
},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}"
}
}
]
}
],
"max_tokens": 300
}
print('Creating an special prompt using Vision from OpenAI...')
response = requests.post(
"https://api.openai.com/v1/chat/completions",
headers=headers,
json=payload,
timeout=30)
print(response.status_code)
print(response.text)
print(response.json())
return response.json()['choices'][0]['message']['content']