Spaces:
Sleeping
Sleeping
Update app_pages/ocr_comparator.py
Browse files- app_pages/ocr_comparator.py +39 -39
app_pages/ocr_comparator.py
CHANGED
@@ -620,8 +620,8 @@ def app():
|
|
620 |
# Recognize with Tesseract
|
621 |
with st.spinner('Tesseract Text recognition in progress ...'):
|
622 |
out_df_results_tesseract, status_tesseract = \
|
623 |
-
#tesserocr_recog(in_image_cv, in_list_dict_params[3], len(list_cropped_images))
|
624 |
tesserocr_recog(in_image_cv, in_list_dict_params[2], len(list_cropped_images))
|
|
|
625 |
##
|
626 |
|
627 |
# Create results data frame
|
@@ -757,44 +757,44 @@ def app():
|
|
757 |
#
|
758 |
# return out_list_text_mmocr, out_list_confidence_mmocr, out_status
|
759 |
#
|
760 |
-
|
761 |
-
|
762 |
-
|
763 |
-
|
764 |
-
|
765 |
-
|
766 |
-
|
767 |
-
|
768 |
-
|
769 |
-
|
770 |
-
|
771 |
-
|
772 |
-
|
773 |
-
|
774 |
-
|
775 |
-
|
776 |
-
|
777 |
-
|
778 |
-
|
779 |
-
|
780 |
-
|
781 |
-
|
782 |
-
|
783 |
-
|
784 |
-
|
785 |
-
|
786 |
-
|
787 |
-
|
788 |
-
|
789 |
-
|
790 |
-
|
791 |
-
|
792 |
-
|
793 |
-
|
794 |
-
|
795 |
-
|
796 |
-
|
797 |
-
|
798 |
|
799 |
###
|
800 |
def draw_reco_images(in_image, in_boxes_coordinates, in_list_texts, in_list_confid, \
|
|
|
620 |
# Recognize with Tesseract
|
621 |
with st.spinner('Tesseract Text recognition in progress ...'):
|
622 |
out_df_results_tesseract, status_tesseract = \
|
|
|
623 |
tesserocr_recog(in_image_cv, in_list_dict_params[2], len(list_cropped_images))
|
624 |
+
#tesserocr_recog(in_image_cv, in_list_dict_params[3], len(list_cropped_images))
|
625 |
##
|
626 |
|
627 |
# Create results data frame
|
|
|
757 |
#
|
758 |
# return out_list_text_mmocr, out_list_confidence_mmocr, out_status
|
759 |
#
|
760 |
+
###
|
761 |
+
@st.experimental_memo(suppress_st_warning=True, show_spinner=False)
|
762 |
+
def tesserocr_recog(in_img, in_params, in_nb_images):
|
763 |
+
"""Recognition with Tesseract
|
764 |
+
|
765 |
+
Args:
|
766 |
+
in_image_cv (matrix) : original image
|
767 |
+
in_params (dict) : parameters for recognition
|
768 |
+
in_nb_images : nb cropped images (used for progress bar)
|
769 |
+
|
770 |
+
Returns:
|
771 |
+
Pandas data frame : recognition results
|
772 |
+
string/Exception : recognition status
|
773 |
+
"""
|
774 |
+
## ------- Tesseract Text recognition
|
775 |
+
step = 3*in_nb_images # fourth recognition process
|
776 |
+
nb_steps = 4 * in_nb_images
|
777 |
+
progress_bar = st.progress(step/nb_steps)
|
778 |
+
|
779 |
+
try:
|
780 |
+
out_df_result = pytesseract.image_to_data(in_img, **in_params,output_type=Output.DATAFRAME)
|
781 |
+
|
782 |
+
out_df_result['box'] = out_df_result.apply(lambda d: [[d['left'], d['top']], \
|
783 |
+
[d['left'] + d['width'], d['top']], \
|
784 |
+
[d['left']+d['width'], d['top']+d['height']], \
|
785 |
+
[d['left'], d['top'] + d['height']], \
|
786 |
+
], axis=1)
|
787 |
+
out_df_result['cropped'] = out_df_result['box'].apply(lambda b: cropped_1box(b, in_img))
|
788 |
+
out_df_result = out_df_result[(out_df_result.word_num > 0) & (out_df_result.text != ' ')] \
|
789 |
+
.reset_index(drop=True)
|
790 |
+
out_status = 'OK'
|
791 |
+
except Exception as e:
|
792 |
+
out_df_result = pd.DataFrame([])
|
793 |
+
out_status = e
|
794 |
+
|
795 |
+
progress_bar.progress(1.)
|
796 |
+
|
797 |
+
return out_df_result, out_status
|
798 |
|
799 |
###
|
800 |
def draw_reco_images(in_image, in_boxes_coordinates, in_list_texts, in_list_confid, \
|