Spaces:

gabrielchua
/

llama_vision_groq

Runtime error

App Files Files Community

llama_vision_groq / app.py

gabrielchua

Update app.py

2286dbf verified about 2 months ago

raw

history blame

2.2 kB

	"""
	app.py
	"""

	import base64
	from io import BytesIO

	import gradio as gr
	from groq import Groq

	# Initialize Groq client
	client = Groq()

	# Function to encode the image in base64
	def encode_image_to_base64(image):
	# Convert PIL image to base64 string
	buffered = BytesIO()
	image.save(buffered, format="JPEG")
	return base64.b64encode(buffered.getvalue()).decode('utf-8')

	# Function to process the uploaded image and extract receipt information
	def extract_receipt_info(image):
	# Encode the image to base64
	base64_image = encode_image_to_base64(image)

	# Send request to Groq API
	chat_completion = client.chat.completions.create(
	model="llama-3.2-11b-vision-preview",
	messages=[
	{
	"role": "user",
	"content": [
	{
	"type": "text",
	"text": "Your task is to extract key information from the provided receipt image.\n\nReply in table.\n\nThis is the schema:\n- item (str), description of the item\n- price (float), price of the item\n- quantity (int), quantity of the item\n- total (float), total cost for the item"
	},
	{
	"type": "image_url",
	"image_url": {
	"url": image
	}
	}
	]
	},
	{
	"role": "assistant",
	"content": "```markdown"
	}
	],
	temperature=0.1,
	max_tokens=8192,
	top_p=1,
	stop="```"
	)

	# Return the response from the model
	return chat_completion.choices[0].message['content']

	# Create the Gradio app
	def gradio_app():
	# Gradio interface
	gr.Interface(
	fn=extract_receipt_info,
	inputs=gr.Image(type="pil", label="Upload Receipt Image"),
	outputs=gr.Textbox(),
	title="Receipt Information Extractor",
	description="Upload a receipt image and the model will extract the items, quantities, and prices from the receipt."
	).launch()

	# Start the Gradio app
	if __name__ == "__main__":
	gradio_app()