sabari commited on
Commit
e934ef6
โ€ข
1 Parent(s): 07fc4bd
Files changed (6) hide show
  1. .gitignore +7 -0
  2. app.py +121 -0
  3. config.json +1 -0
  4. get_coordinate.py +66 -0
  5. model-ocr-0.1829.h5 +3 -0
  6. requirements.txt +7 -0
.gitignore ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ flagged/
2
+ *.pt
3
+ *.png
4
+ *.jpg
5
+ *.mp4
6
+ *.mkv
7
+ gradio_cached_examples/
app.py ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import cv2
3
+ import math
4
+ import argparse
5
+ from tensorflow.keras.models import load_model
6
+ from flask import Flask, request, jsonify
7
+ import cv2
8
+ import json
9
+ import numpy as np
10
+ from tensorflow.keras import backend as K
11
+ from get_coordinate import get_object_coordinates
12
+ import requests
13
+ import gradio as gr
14
+ import os
15
+
16
+ file_urls = [
17
+ 'https://www.dropbox.com/scl/fi/skt4o9a37ccrxvruojk3o/2.png?rlkey=kxppvdnvbs9852rj6ly123xfk&dl=0',
18
+ 'https://www.dropbox.com/scl/fi/3opkr5aoca1fq0wrudlcx/3.png?rlkey=wm4vog7yyk5naoqu68vr6v48s&dl=0',
19
+ 'https://www.dropbox.com/scl/fi/t74nd09fod52x0gua93ty/1.png?rlkey=er4ktuephlapzyvh5glkym5b4&dl=0']
20
+
21
+ def download_file(url, save_name):
22
+ url = url
23
+ if not os.path.exists(save_name):
24
+ file = requests.get(url)
25
+ open(save_name, 'wb').write(file.content)
26
+
27
+ for i, url in enumerate(file_urls):
28
+ if 'mp4' in file_urls[i]:
29
+ download_file(
30
+ file_urls[i],
31
+ f"video.mp4"
32
+ )
33
+ else:
34
+ download_file(
35
+ file_urls[i],
36
+ f"image_{i}.jpg"
37
+ )
38
+
39
+
40
+ class OCR():
41
+
42
+ def __init__(self,path="model-ocr-0.1829.h5",config_path="config.json"):
43
+
44
+ # Read the config JSON file
45
+ with open(config_path, 'r',encoding="utf-8") as file:
46
+ self.config_data = json.load(file)
47
+
48
+ # Get the threshold value
49
+ self.threshold = self.config_data['hiragana']['threshold']
50
+
51
+ # Get the label dictionary
52
+ self.label_dict = self.config_data['hiragana']['label']
53
+
54
+ # load the model from local
55
+ self.model = load_model(path,custom_objects={"K": K})
56
+
57
+ def run(self,image):
58
+ # extract the character coordinates using the cv2 contours
59
+ coordinate,thresholdedImage = get_object_coordinates(image)
60
+
61
+ image_batch = np.zeros((1,64,64,1))
62
+ output =[]
63
+
64
+ for row in range(len(coordinate)):
65
+ temp = {}
66
+ # crop the image
67
+ cropImg = thresholdedImage[coordinate[row][1]:coordinate[row][3],coordinate[row][0]:coordinate[row][2]]
68
+ # resize the image
69
+ image_batch[0,:,:,0] = cv2.resize(cropImg,(64,64))*255
70
+
71
+ # predict the results
72
+ predict = self.model.predict(image_batch)
73
+ position = np.argmax(predict)
74
+
75
+ label_name = self.label_dict[str(position)]
76
+ temp["text"] = label_name
77
+ temp["prob"] = predict[position]
78
+ temp["coord"] = coordinate[row] # Xmin,Ymin,Xmax,Ymax
79
+
80
+ output.append(temp)
81
+
82
+ return output
83
+
84
+ def getOCRResults(image_path):
85
+
86
+ image = cv2.imread(image_path)
87
+
88
+ results0 = ocrAPP.run(image)
89
+ # temp0.append(results0)
90
+ result_json={}
91
+ result_json["result"] = results0
92
+ response = jsonify(result_json)
93
+ response.headers['Content-Type'] = 'application/json; charset=utf-8'
94
+ return response
95
+
96
+
97
+ ocrAPP = OCR()
98
+
99
+ video_path = [['video.mp4']]
100
+ path = [['image_0.jpg'], ['image_1.jpg']]
101
+
102
+
103
+ inputs_image = [
104
+ gr.components.Image(type="filepath", label="Input Image"),
105
+ ]
106
+ outputs = [
107
+ gr.components.JSON(label="Output Json"),
108
+ ]
109
+ interface_image = gr.Interface(
110
+ fn=getOCRResults,
111
+ inputs=inputs_image,
112
+ outputs=outputs,
113
+ title="Hiragana Character Recognition",
114
+ examples=path,
115
+ cache_examples=False,
116
+ )
117
+
118
+ gr.TabbedInterface(
119
+ [interface_image],
120
+ tab_names=['Image inference']
121
+ ).queue().launch()
config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"hiragana": {"threshold": 0.5, "label": {"0": "ใ‚", "1": "ใ„", "2": "ใ†", "3": "ใˆ", "4": "ใŠ", "5": "ใ‹", "6": "ใ", "7": "ใ", "8": "ใ‘", "9": "ใ“", "10": "ใ•", "11": "ใ—", "12": "ใ™", "13": "ใ›", "14": "ใ", "15": "ใŸ", "16": "ใก", "17": "ใค", "18": "ใฆ", "19": "ใจ", "20": "ใช", "21": "ใซ", "22": "ใฌ", "23": "ใญ", "24": "ใฎ", "25": "ใฏ", "26": "ใฒ", "27": "ใต", "28": "ใธ", "29": "ใป", "30": "ใพ", "31": "ใฟ", "32": "ใ‚€", "33": "ใ‚", "34": "ใ‚‚", "35": "ใ‚„", "36": "ใ‚†", "37": "ใ‚ˆ", "38": "ใ‚‰", "39": "ใ‚Š", "40": "ใ‚‹", "41": "ใ‚Œ", "42": "ใ‚", "43": "ใ‚", "44": "ใ‚", "45": "ใ‚‘", "46": "ใ‚’", "47": "ใ‚“", "48": "ใ‚"}}}
get_coordinate.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Created on Sat Jul 22 14:22:34 2023
4
+
5
+ @author: SABARI
6
+ """
7
+
8
+ import cv2
9
+ import numpy as np
10
+
11
+ from skimage.filters import threshold_sauvola
12
+
13
+ import tensorflow as tf
14
+ from tensorflow.keras import backend as K
15
+ import json
16
+
17
+ def sauvola_thresholding(grayImage_,window_size=15):
18
+
19
+ """"
20
+ Sauvola thresholds are local thresholding techniques that are
21
+ useful for images where the background is not uniform, especially for text recognition
22
+
23
+ grayImage--- Input image should be in 2-Dimension Gray Scale format
24
+ window_size --- It represents the filter window size
25
+
26
+ """
27
+ thresh_sauvolavalue = threshold_sauvola(grayImage_, window_size=window_size)
28
+
29
+ thresholdImage_=(grayImage_>thresh_sauvolavalue)
30
+
31
+ return 1- np.uint8(np.array(thresholdImage_)*1)
32
+
33
+ # Function to get coordinates of the object
34
+ def get_object_coordinates(image):
35
+
36
+ # Convert the image from BGR to GRAY color space
37
+ grayImage=cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
38
+
39
+ # Create a mask using the specified color range
40
+ thresholdedImage=sauvola_thresholding(grayImage)
41
+
42
+ kernel = np.ones((35, 1), np.uint8)
43
+ dilated_image = cv2.dilate(thresholdedImage, kernel, iterations=1)
44
+ # Find contours in the mask
45
+ contours, _ = cv2.findContours(dilated_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
46
+
47
+ coordinate = []
48
+ # Check if any contours were found
49
+ if len(contours) > 0:
50
+ for i in range(len(contours)):
51
+
52
+ # Get the largest contour (assuming it's the object of interest)
53
+ # largest_contour = max(contours, key=cv2.contourArea)
54
+
55
+ # Get the bounding box of the contour
56
+ x, y, w, h = cv2.boundingRect(contours[i])
57
+
58
+ coordinate.append([x,y,x+w,y+h])
59
+ # Calculate the center coordinates of the object
60
+ # center_x = x + w // 2
61
+ # center_y = y + h // 2
62
+
63
+ return coordinate,thresholdedImage
64
+ else:
65
+ # Return None if no object was found
66
+ return None,thresholdedImage
model-ocr-0.1829.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f52ee58d51590ebd2143a66d23030bdf930692962507410b0af96db3e4c15d24
3
+ size 10458528
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ opencv-python
2
+ tensorflow==2.7.0
3
+ numpy
4
+ flask
5
+ scikit-image
6
+
7
+