shashankkandimalla's picture
update app.py
8860032 verified
import gradio as gr
import http.client
import json
import requests
import base64
import os
from dotenv import load_dotenv
from openai import OpenAI
from PIL import Image
import io
# Load environment variables
load_dotenv()
# Function to upload image to imgbb
def upload_image_to_imgbb(image_bytes):
try:
url = "https://api.imgbb.com/1/upload"
payload = {
"key": os.getenv("IMGBB_API_KEY"),
"image": base64.b64encode(image_bytes).decode()
}
res = requests.post(url, data=payload)
res.raise_for_status() # Raises an HTTPError for bad responses
return res.json()['data']['url']
except Exception as e:
return f"Error uploading image: {str(e)}"
# OCR API request
def get_ocr_results(image_url):
try:
conn = http.client.HTTPSConnection("ocr43.p.rapidapi.com")
payload = f"-----011000010111000001101001\r\nContent-Disposition: form-data; name=\"url\"\r\n\r\n{image_url}\r\n-----011000010111000001101001--\r\n\r\n"
headers = {
'x-rapidapi-key': os.getenv("RAPIDAPI_KEY"),
'x-rapidapi-host': "ocr43.p.rapidapi.com",
'Content-Type': "multipart/form-data; boundary=---011000010111000001101001"
}
conn.request("POST", "/v1/results", payload, headers)
res = conn.getresponse()
data = res.read()
return data.decode("utf-8")
except Exception as e:
return f"Error in OCR processing: {str(e)}"
# OpenAI API configuration
openai_client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
# Function to process OCR results with OpenAI
def process_ocr_with_gpt(ocr_results):
try:
prompt = f"""
You are an AI assistant tasked with processing OCR results and providing a clean, formatted output.
Here are the OCR results:
{ocr_results}
Please process this information and provide a clean, well-formatted output. Arrange all elements in order and omit any elements not present in the file.
"""
response = openai_client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": "You are a helpful assistant that processes OCR results."},
{"role": "user", "content": prompt}
]
)
return response.choices[0].message.content
except Exception as e:
return f"Error in GPT processing: {str(e)}"
# Gradio interface function
def process_images(files):
ocr_results_list = []
processed_results_list = []
download_links = []
for idx, file in enumerate(files):
try:
# Read the uploaded binary file
image = Image.open(io.BytesIO(file))
image_path = f"temp_image_{idx}.png"
image.save(image_path)
# Upload image to imgbb and get URL
image_url = upload_image_to_imgbb(file)
if image_url.startswith("Error"):
ocr_results_list.append(image_url)
processed_results_list.append("Failed to process due to image upload error")
continue
# Get OCR results
ocr_results = get_ocr_results(image_url)
if ocr_results.startswith("Error"):
ocr_results_list.append(ocr_results)
processed_results_list.append("Failed to process due to OCR error")
continue
# Process with GPT
processed_results = process_ocr_with_gpt(ocr_results)
# Save processed results to a file for download
result_file_path = f"processed_result_{idx}.txt"
with open(result_file_path, 'w') as result_file:
result_file.write(processed_results)
download_links.append(result_file_path)
# Clean up temporary file
os.remove(image_path)
ocr_results_list.append(ocr_results)
processed_results_list.append(processed_results)
except Exception as e:
ocr_results_list.append(f"Error in image processing: {str(e)}")
processed_results_list.append("Failed to process due to an error")
return ocr_results_list, processed_results_list, download_links
# Create Gradio interface
iface = gr.Interface(
fn=process_images,
inputs=gr.Files(label="Upload Images", file_count="multiple", type="binary"),
outputs=[
gr.Textbox(label="OCR Results"),
gr.Textbox(label="Processed Results"),
gr.File(label="Download Processed Results")
],
title="OCR and Text Processing App",
description="Upload images to extract text and process it. Download the processed results."
)
# Launch the app
iface.launch()