Spaces:

joelorellana
/

demo_generative_img

Running

App Files Files Community

demo_generative_img / gpt_vision_prompt.py

joelorellana

first commit for the project

1c681f7 8 months ago

raw

history blame

1.88 kB

	""" Generate a prompt for Generative AI APIs with the given image and prompt. """

	import requests
	from encode_image import encode_image
	from config import OPENAI_API_KEY

	# prompt for GPT Vision API
	PROMPT = """ Return a prompt to describe the image and pass it
	to DALLE or Stable Diffusion to generate an image.
	The prompt must not exceed 75 tokens.
	The prompt must improve the quality of the original image.
	The prompt must be in the form of:
	[STYLE OF PHOTO] photo of a [SUBJECT], [IMPORTANT
	FEATURE], [MORE DETAILS], [POSE OR ACTION],
	[FRAMING], [SETTING/BACKGROUND], [LIGHTING],
	[CAMERA ANGLE], [CAMERA PROPERTIES],in style of
	[PHOTOGRAPHER],
	"""


	def generate_prompt_with_vision(image_path, prompt=PROMPT, api_key=OPENAI_API_KEY ):
	"""Generate a prompt for Generative AI APIs with the given image and prompt."""
	# Getting the base64 string
	print('Encoding image...')
	base64_image = encode_image(image_path)
	print("Encoded image. ")
	headers = {
	"Content-Type": "application/json",
	"Authorization": f"Bearer {api_key}"
	}
	payload = {
	"model": "gpt-4-vision-preview",
	"messages": [
	{
	"role": "user",
	"content": [
	{
	"type": "text",
	"text": prompt
	},
	{
	"type": "image_url",
	"image_url": {
	"url": f"data:image/jpeg;base64,{base64_image}"
	}
	}
	]
	}
	],
	"max_tokens": 300
	}
	print('Creating an special prompt using Vision from OpenAI...')
	response = requests.post(
	"https://api.openai.com/v1/chat/completions",
	headers=headers,
	json=payload,
	timeout=30)
	print(response.status_code)
	print(response.text)
	print(response.json())
	return response.json()['choices'][0]['message']['content']