Spaces:

Prasada
/

OCR_Demo

Sleeping

App Files Files Community

Prasada commited on Sep 8

Commit

ad14750

•

1 Parent(s): 103f0fb

Create app.py

Browse files

Files changed (1) hide show

app.py +111 -0

app.py ADDED Viewed

	@@ -0,0 +1,111 @@

+import cv2
+import numpy as np
+from PIL import Image, ImageDraw, ImageFont
+import json
+from paddleocr import PaddleOCR
+import gradio as gr
+import os
+# Initialize PaddleOCR
+ocr = PaddleOCR(use_angle_cls=True, lang='en')
+# Function to draw bounding boxes on the image
+def draw_boxes_on_image(image, data):
+    # Convert the image to RGB (OpenCV uses BGR by default)
+    image_rgb = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2RGB)
+    # Load the image into PIL for easier drawing
+    pil_image = Image.fromarray(image_rgb)
+    draw = ImageDraw.Draw(pil_image)
+    # Define a font (using DejaVuSans since it's available by default)
+    try:
+        font = ImageFont.truetype("DejaVuSans.ttf", 20)
+    except IOError:
+        font = ImageFont.load_default()
+    for item in data:
+        bounding_box, (text, confidence) = item
+        # Convert bounding box to integer
+        box = np.array(bounding_box).astype(int)
+        # Draw the bounding box
+        draw.line([tuple(box[0]), tuple(box[1])], fill="green", width=2)
+        draw.line([tuple(box[1]), tuple(box[2])], fill="green", width=2)
+        draw.line([tuple(box[2]), tuple(box[3])], fill="green", width=2)
+        draw.line([tuple(box[3]), tuple(box[0])], fill="green", width=2)
+        # Draw the text above the bounding box
+        text_position = (box[0][0], box[0][1] - 20)
+        draw.text(text_position, f"{text} ({confidence:.2f})", fill="red", font=font)
+    return pil_image
+# Function to save OCR results to JSON
+def save_results_to_json(ocr_results):
+    results = []
+    for line in ocr_results:
+        for word_info in line:
+            bounding_box = word_info[0]
+            text, confidence = word_info[1]
+            results.append({
+                "bounding_box": [list(map(float, coord)) for coord in bounding_box],
+                "text": text,
+                "confidence": confidence
+            })
+    return results
+# Function to identify 'field', 'value' pairs
+def identify_field_value_pairs(ocr_results, fields):
+    field_value_pairs = {}
+    for line in ocr_results:
+        for word_info in line:
+            text, _ = word_info[1]
+            for field in fields:
+                if field.lower() in text.lower():
+                    # Assuming the value comes immediately after the field
+                    value_index = line.index(word_info) + 1
+                    if value_index < len(line):
+                        field_value_pairs[field] = line[value_index][1][0]
+                    break
+    return field_value_pairs
+# Function to process the image and generate outputs
+def process_image(image):
+    ocr_results = ocr.ocr(np.array(image), cls=True)
+    processed_image = draw_boxes_on_image(image, ocr_results[0])
+    # Save OCR results to JSON
+    results_json = save_results_to_json(ocr_results[0])
+    json_path = "ocr_results.json"
+    with open(json_path, 'w') as json_file:
+        json.dump(results_json, json_file, indent=4)
+    # Identify field-value pairs
+    fields = ["Scheme Name", "Folio Number", "Number of Units", "PAN", "Signature", "Tax Status",
+              "Mobile Number", "Email", "Address", "Bank Account Details"]
+    field_value_pairs = identify_field_value_pairs(ocr_results[0], fields)
+    field_value_json_path = "field_value_pairs.json"
+    with open(field_value_json_path, 'w') as json_file:
+        json.dump(field_value_pairs, json_file, indent=4)
+    return processed_image, json_path, field_value_json_path
+# Gradio Interface
+interface = gr.Interface(
+    fn=process_image,
+    inputs="image",
+    outputs=[
+        "image",
+        gr.File(label="OCR Results JSON"),
+        gr.File(label="Field-Value Pairs JSON")
+    ],
+    title="OCR Web Application",
+    description="Upload an image and get OCR results with bounding boxes and two JSON outputs."
+)
+if __name__ == "__main__":
+    interface.launch()