gabrielchua's picture
Create app.py
36dd136 verified
raw
history blame
2.21 kB
"""
app.py
"""
import base64
import gradio as gr
from groq import Groq
# Initialize Groq client
client = Groq()
# Function to encode the image in base64
def encode_image_to_base64(image):
# Convert PIL image to base64 string
buffered = BytesIO()
image.save(buffered, format="JPEG")
return base64.b64encode(buffered.getvalue()).decode('utf-8')
# Function to process the uploaded image and extract receipt information
def extract_receipt_info(image):
# Encode the image to base64
base64_image = encode_image_to_base64(image)
`
# Send request to Groq API
chat_completion = client.chat.completions.create(
model="llama-3.2-11b-vision-preview",
messages=[
{
"role": "user",
"content": [
{
"type": "text",
"text": "Your task is to extract key information from the provided receipt image.\n\nReply in table.\n\nThis is the schema:\n- item (str), description of the item\n- price (float), price of the item\n- quantity (int), quantity of the item\n- total (float), total cost for the item"
},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}",
},
},
],
},
{
"role": "assistant",
"content": "```markdown"
}
],
temperature=0.1,
max_tokens=8192,
top_p=1,
stop="```",
)
# Return the response from the model
return chat_completion.choices[0].message.content
# Create the Gradio app
def gradio_app():
# Gradio interface
gr.Interface(
fn=extract_receipt_info,
inputs=gr.inputs.Image(type="pil", label="Upload Receipt Image"),
outputs="text",
title="Receipt Information Extractor",
description="Upload a receipt image and the model will extract the items, quantities, and prices from the receipt."
).launch()
# Start the Gradio app
if __name__ == "__main__":
gradio_app()