Flight_ATA_Class / preprocessing_images.py
anupam210's picture
Duplicate from ai-based/azure_ocr
99c2b2d
#importing packages
from pdf2image import convert_from_path
from fpdf import FPDF
import cv2
import numpy as np
import requests
from PIL import Image
from PIL import ImageEnhance
def boxcutter(img):
img = np.array(img)
height, width = img.shape[:2]
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # convert to grayscale
blurred = cv2.GaussianBlur(gray, (5, 5), 0)
(T, threshInv) = cv2.threshold(blurred, 230, 255, cv2.THRESH_BINARY_INV)
contours, hierarchy = cv2.findContours(threshInv,cv2.RETR_LIST, \
cv2.CHAIN_APPROX_SIMPLE)
mx = (0,0,0,0) # biggest bounding box so far
mx_area =0
my_dict = {}
for cont in contours:
x,y,w,h = cv2.boundingRect(cont)
area = w*h
nw = x,y,w,h
my_dict[nw] = area
if len(my_dict)<=2:
return img
else:
x,y,w,h = sorted(my_dict, key=my_dict.get)[-2]
roi=img[y:y+h,x:x+w]
height_r, width_r = roi.shape[:2]
if height_r > int(height*0.6) and width_r > int(width*0.6):
return roi
# cv2.imwrite('C:/Users/Meet/Desktop/crop/'+ img_path.split('/')[-1], roi)
# cv2.rectangle(img,(x,y),(x+w,y+h),(200,0,0),2)
# cv2.imwrite('C:/Users/Meet/Desktop/cont/' + img_path.split('/')[-1], img)
else:
return img
# cv2.imwrite('C:/Users/Meet/Desktop/crop/'+ img_path.split('/')[-1], img)
# cv2.rectangle(img,(x,y),(x+w,y+h),(200,0,0),2)
# cv2.imwrite('C:/Users/Meet/Desktop/cont/' + img_path.split('/')[-1], img)
def noise_removal(image):
#increasing contrast of text
image=Image.fromarray(image)
enhancer=ImageEnhance.Contrast(image)
enhanced_image=enhancer.enhance(2.5)
image=np.array(enhanced_image)
#noise removel using dilate,erode, morphology and median blur
kernel = np.ones((1, 1), np.uint8)
image=cv2.dilate(image,kernel,iterations=1)
kernel=np.ones((1,1),np.uint8)
image=cv2.erode(image,kernel,iterations=1)
image = cv2.morphologyEx(image, cv2.MORPH_CLOSE, kernel,iterations=1)
image=cv2.medianBlur(image,3)
image=cv2.bitwise_not(image)
kernel=np.ones((1,1),np.uint8)
image=cv2.erode(image,kernel,iterations=3)
image=cv2.bitwise_not(image)
image=cv2.bitwise_not(image)
kernel=np.ones((1,1),np.uint8)
image=cv2.dilate(image,kernel,iterations=3)
image=cv2.bitwise_not(image)
return (image)
def preprocessing_function(url):
#reading and converting the pdf into image format
response = requests.get(url)
with open('metadata.pdf', 'wb') as f:
f.write(response.content)
images = convert_from_path('metadata.pdf', 200)
#removing red color from the answer paper
for i in range(len(images)):
img = boxcutter(images[i])
image = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
img_hsv=cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
lower_red = np.array([161,50,50])
upper_red = np.array([189,255,255])
mask1 = cv2.inRange(img_hsv, lower_red, upper_red)
image[np.where(mask1==255)] = 255
#using noise removal function to remove noise and enhance the images
img_bw = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
no_noise = noise_removal(img_bw)
cv2.imwrite("no_noise"+str(i)+".jpg", no_noise)
#saving the the final preprocessed images as pdf
pdf = FPDF()
for j in range(len(images)):
pdf.add_page()
pdf.image("no_noise"+str(j)+".jpg", 0, 0, 200, 300)
pdf.output("answer_paper.pdf", "F")