NoteCrawling / process.py
nyonyong
First commit
30c8b41
raw
history blame
No virus
2.95 kB
import os
import shutil
import subprocess
from PIL import Image
import directories as Dir
def clearDir():
#/text_detection/cookie/user_input
#shutil.rmtree('/cookie')
#os.remove("/cookie/user_input.jpg")
#cropped_img_path = "/runs/detect/" + cropped_img_folder_name
shutil.rmtree(Dir.cropped_img_path) #'/runs/detect/user_output'
#txt_file_path = "/HCR/TextRecognition/log_demo_result.txt"
os.remove(Dir.txt_file_path)
def textDetection(im):
#change dir to yolo folder
#yolo_dir = "/HCR/TextDetection/"
subprocess.call('cd'+ Dir.yolo_dir, shell=True)
#transfrom ndarray type to PIL type
im = Image.fromarray(im)
# save input image to cookie folder
subprocess.call('cd cookie', shell=True)
im.save("user_input.jpg", 'JPEG')
#yolo_dir = "/HCR/TextDetection/"
subprocess.call('cd'+ Dir.yolo_dir, shell=True)
# (Shell) run detect.py to get cropped word images
subprocess.call(['python','detect.py',
#User Input Data : /text_detection/cookie
'--source','/cookie',
#Text Detection Model : /runs/wordDetection/weights/best.pt
'--weights', Dir.detect_model_dir,
'--conf','0.25',
#Output Images Save Directory /runs/detect/user_output
'--name', Dir.cropped_img_folder_name,
'--save-crop',
'--save-conf'])
#g = (size / max(im.size)) # gain
#im = im.resize((int(x * g) for x in im.size), Image.ANTIALIAS) # resize
#results = model(im) # inference
#results.render() # updates results.imgs with boxes and labels
#return Image.fromarray(results.imgs[0])
def textRearrange():
subprocess.call('cd' + Dir.DBSCAN_dir, shell=True)
subprocess.call(['python','DBSCAN.py'])
def textRecognition():
#%cd /content/drive/MyDrive/KITA/Text/lmdb/deep-text-recognition-benchmark
subprocess.call('cd '+Dir.recog_dir, shell=True)
#!CUDA_VISIBLE_DEVICES=0 python3 demo.py --Transformation TPS --FeatureExtraction ResNet --SequenceModeling BiLSTM --Prediction Attn --image_folder /content/drive/MyDrive/KITA/Text/YOLO/runs/detect/youtube_data2/crops/word --saved_model /content/drive/MyDrive/KITA/Text/best_accuracy_s/best_accuracy_s.pth
subprocess.call('CUDA_VISIBLE_DEVICES="" python3 demo.py --Transformation TPS --FeatureExtraction ResNet --SequenceModeling BiLSTM --Prediction Attn --image_folder ' + Dir.home_dir + Dir.cropped_img_path + '/crops/word --saved_model '+ Dir.recog_model_dir, shell=True)
def getHcrResult(file_path):#*#
texts = ""
with open(file_path, 'r') as file:
lines = file.readlines()
for line in lines[3:]:
line = line.replace("\t","*",1)
line = line.replace(" ","*",1)
parts = line.replace(" ","")
parts2 = parts.split("*",2)
#print(len(parts2))
texts = texts +" "+ str(parts2[1:2])[2:-2]
return texts