BookRecogntionKZ / kz_ocr_easy.py
ardakshalkar's picture
add files
d7deef5
import os
import cv2
import numpy as np
from PIL import Image, ImageDraw, ImageFont
from tqdm import tqdm
import os
import easyocr
models_dir = "./models"
images_dir = "./images"
output_dir = "./output"
dirs = [models_dir, images_dir, output_dir]
for d in dirs:
if not os.path.exists(output_dir):
os.makedirs(output_dir)
"""
Upload easy OCR model files with the same name and font file named Ubuntu-Regular.ttf, examples:
best_norm_ED.pth
best_norm_ED.py
best_norm_ED.yaml
Ubuntu-Regular.ttf
to models directory
Upload image files you want to test, examples:
kz_book_simple.jpeg
kz_blur.jpg
kz_book_complex.jpg
to images directory
"""
font_path = models_dir + "/Ubuntu-Regular.ttf"
reader = easyocr.Reader(
['en'],
gpu=True,
recog_network='best_norm_ED',
detect_network="craft",
user_network_directory=models_dir,
model_storage_directory=models_dir,
) # this needs to run only once to load the model into memory
image_extensions = (".jpg", ".jpeg", ".png")
for image_name in tqdm(os.listdir(images_dir)):
if not image_name.lower().endswith(image_extensions):
print(f'unsupported file {image_name}')
continue
image_path = f'{images_dir}/{image_name}'
print(image_path)
# Read image as numpy array
image = cv2.imread(image_path)
# Rotate the image by 270 degrees
# image = cv2.rotate(image, cv2.ROTATE_90_CLOCKWISE)
# Convert the image from BGR to RGB (because OpenCV loads images in BGR format)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
results = reader.readtext(image=image)
# Load custom font
font = ImageFont.truetype(font_path, 32)
# Display the results
for (bbox, text, prob) in results:
# Get the bounding box coordinates
(top_left, top_right, bottom_right, bottom_left) = bbox
top_left = (int(top_left[0]), int(top_left[1]))
bottom_right = (int(bottom_right[0]), int(bottom_right[1]))
# Draw the bounding box on the image
cv2.rectangle(image, top_left, bottom_right, (0, 255, 0), 2)
# Convert the OpenCV image to a PIL image, draw the text, then convert back to an OpenCV image
image_pil = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
draw = ImageDraw.Draw(image_pil)
draw.text((top_left[0], top_left[1] - 40), text, font=font, fill=(0, 0, 255))
image = cv2.cvtColor(np.array(image_pil), cv2.COLOR_RGB2BGR)
# Save image
cv2.imwrite( f'{output_dir}/{image_name}', image)
# reader.readtext(image = image, paragraph=True)