Spaces:

sabaridsnfuji
/

HOCR

Runtime error

App Files Files Community

sabari commited on Jul 23, 2023

Commit

e934ef6

•

1 Parent(s): 07fc4bd

new codes

Browse files

Files changed (6) hide show

.gitignore +7 -0
app.py +121 -0
config.json +1 -0
get_coordinate.py +66 -0
model-ocr-0.1829.h5 +3 -0
requirements.txt +7 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,7 @@

+flagged/
+*.pt
+*.png
+*.jpg
+*.mp4
+*.mkv
+gradio_cached_examples/

app.py ADDED Viewed

	@@ -0,0 +1,121 @@

+import cv2
+import math
+import argparse
+from tensorflow.keras.models import load_model
+from flask import Flask, request, jsonify
+import cv2
+import json
+import numpy as np
+from tensorflow.keras import backend as K
+from get_coordinate import get_object_coordinates
+import requests
+import gradio as gr
+import os
+file_urls = [
+        'https://www.dropbox.com/scl/fi/skt4o9a37ccrxvruojk3o/2.png?rlkey=kxppvdnvbs9852rj6ly123xfk&dl=0',
+        'https://www.dropbox.com/scl/fi/3opkr5aoca1fq0wrudlcx/3.png?rlkey=wm4vog7yyk5naoqu68vr6v48s&dl=0',
+        'https://www.dropbox.com/scl/fi/t74nd09fod52x0gua93ty/1.png?rlkey=er4ktuephlapzyvh5glkym5b4&dl=0']
+def download_file(url, save_name):
+    url = url
+    if not os.path.exists(save_name):
+        file = requests.get(url)
+        open(save_name, 'wb').write(file.content)
+for i, url in enumerate(file_urls):
+    if 'mp4' in file_urls[i]:
+        download_file(
+            file_urls[i],
+            f"video.mp4"
+        )
+    else:
+        download_file(
+            file_urls[i],
+            f"image_{i}.jpg"
+        )
+class OCR():
+    def __init__(self,path="model-ocr-0.1829.h5",config_path="config.json"):
+        # Read the config JSON file
+        with open(config_path, 'r',encoding="utf-8") as file:
+            self.config_data = json.load(file)
+        # Get the threshold value
+        self.threshold = self.config_data['hiragana']['threshold']
+        # Get the label dictionary
+        self.label_dict = self.config_data['hiragana']['label']
+        # load the model from local
+        self.model = load_model(path,custom_objects={"K": K})
+    def run(self,image):
+        # extract the character coordinates using the cv2 contours
+        coordinate,thresholdedImage = get_object_coordinates(image)
+        image_batch = np.zeros((1,64,64,1))
+        output =[]
+        for row in range(len(coordinate)):
+           temp = {}
+           # crop the image
+           cropImg = thresholdedImage[coordinate[row][1]:coordinate[row][3],coordinate[row][0]:coordinate[row][2]]
+           # resize the image
+           image_batch[0,:,:,0] = cv2.resize(cropImg,(64,64))*255
+           # predict the results
+           predict = self.model.predict(image_batch)
+           position = np.argmax(predict)
+           label_name = self.label_dict[str(position)]
+           temp["text"] = label_name
+           temp["prob"] = predict[position]
+           temp["coord"] = coordinate[row]  # Xmin,Ymin,Xmax,Ymax
+           output.append(temp)
+        return output
+def getOCRResults(image_path):
+    image =  cv2.imread(image_path)
+    results0 = ocrAPP.run(image)
+#    temp0.append(results0)
+    result_json={}
+    result_json["result"] = results0
+    response = jsonify(result_json)
+    response.headers['Content-Type'] = 'application/json; charset=utf-8'
+    return response
+ocrAPP = OCR()
+video_path = [['video.mp4']]
+path  = [['image_0.jpg'], ['image_1.jpg']]
+inputs_image = [
+    gr.components.Image(type="filepath", label="Input Image"),
+]
+outputs = [
+    gr.components.JSON(label="Output Json"),
+]
+interface_image = gr.Interface(
+    fn=getOCRResults,
+    inputs=inputs_image,
+    outputs=outputs,
+    title="Hiragana Character Recognition",
+    examples=path,
+    cache_examples=False,
+)
+gr.TabbedInterface(
+    [interface_image],
+    tab_names=['Image inference']
+).queue().launch()

config.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"hiragana": {"threshold": 0.5, "label": {"0": "あ", "1": "い", "2": "う", "3": "え", "4": "お", "5": "か", "6": "き", "7": "く", "8": "け", "9": "こ", "10": "さ", "11": "し", "12": "す", "13": "せ", "14": "そ", "15": "た", "16": "ち", "17": "つ", "18": "て", "19": "と", "20": "な", "21": "に", "22": "ぬ", "23": "ね", "24": "の", "25": "は", "26": "ひ", "27": "ふ", "28": "へ", "29": "ほ", "30": "ま", "31": "み", "32": "む", "33": "め", "34": "も", "35": "や", "36": "ゆ", "37": "よ", "38": "ら", "39": "り", "40": "る", "41": "れ", "42": "ろ", "43": "わ", "44": "ゐ", "45": "ゑ", "46": "を", "47": "ん", "48": "ゝ"}}}

get_coordinate.py ADDED Viewed

	@@ -0,0 +1,66 @@

+# -*- coding: utf-8 -*-
+"""
+Created on Sat Jul 22 14:22:34 2023
+@author: SABARI
+"""
+import cv2
+import numpy as np
+from skimage.filters import threshold_sauvola
+import tensorflow as tf
+from tensorflow.keras import backend as K
+import json
+def sauvola_thresholding(grayImage_,window_size=15):
+    """"
+    Sauvola thresholds are local thresholding techniques that are
+    useful for images where the background is not uniform, especially for text recognition
+    grayImage--- Input image should be in 2-Dimension Gray Scale format
+    window_size --- It represents the filter window size
+    """
+    thresh_sauvolavalue = threshold_sauvola(grayImage_, window_size=window_size)
+    thresholdImage_=(grayImage_>thresh_sauvolavalue)
+    return  1- np.uint8(np.array(thresholdImage_)*1)
+# Function to get coordinates of the object
+def get_object_coordinates(image):
+    # Convert the image from BGR to GRAY  color space
+    grayImage=cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
+    # Create a mask using the specified color range
+    thresholdedImage=sauvola_thresholding(grayImage)
+    kernel = np.ones((35, 1), np.uint8)
+    dilated_image = cv2.dilate(thresholdedImage, kernel, iterations=1)
+    # Find contours in the mask
+    contours, _ = cv2.findContours(dilated_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+    coordinate = []
+    # Check if any contours were found
+    if len(contours) > 0:
+        for i in range(len(contours)):
+            # Get the largest contour (assuming it's the object of interest)
+#            largest_contour = max(contours, key=cv2.contourArea)
+            # Get the bounding box of the contour
+            x, y, w, h = cv2.boundingRect(contours[i])
+            coordinate.append([x,y,x+w,y+h])
+            # Calculate the center coordinates of the object
+    #        center_x = x + w // 2
+    #        center_y = y + h // 2
+        return coordinate,thresholdedImage
+    else:
+        # Return None if no object was found
+        return None,thresholdedImage

model-ocr-0.1829.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f52ee58d51590ebd2143a66d23030bdf930692962507410b0af96db3e4c15d24
+size 10458528

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+opencv-python
+tensorflow==2.7.0
+numpy
+flask
+scikit-image