Spaces:

johnlockejrr
/

kraken_ocr

Sleeping

App Files Files Community

kraken_ocr / app.py

johnlockejrr

Add application file

6fe78dc 20 days ago

raw

history blame

4.2 kB

	import streamlit as st
	from kraken import blla, rpred
	from kraken.lib import vgsl
	from kraken.lib import models
	from PIL import Image, ImageDraw

	# Define available OCR models for segmentation and recognition
	segmentation_models = {
	"1col_442_sam_v1.mlmodel": "models/1col_442_sam_v1.mlmodel",
	"ubma_sam_v4.mlmodel": "models/ubma_sam_v4.mlmodel"
	}

	recognition_models = {
	"sinai_sam_rec_v4.mlmodel": "models/sinai_sam_rec_v4.mlmodel",
	"sinai_sam_rec_v2.mlmodel": "models/sinai_sam_rec_v2.mlmodel"
	}

	# Streamlit app title and description
	st.title("OCR with Kraken - Segmentation and Recognition")
	st.write("Upload an image, select segmentation and recognition models, and view OCR results.")

	# Upload image file
	uploaded_image = st.file_uploader("Upload an image file", type=["png", "jpg", "jpeg"])

	# Select segmentation and recognition models
	selected_seg_model = st.selectbox("Select Kraken Segmentation Model", list(segmentation_models.keys()))
	selected_rec_model = st.selectbox("Select Kraken Recognition Model", list(recognition_models.keys()))

	# Option to draw baselines
	draw_baselines = st.radio("Options", ("Do not draw baselines", "Draw baselines")) == "Draw baselines"

	# Process the image if uploaded and models selected
	if uploaded_image and selected_seg_model and selected_rec_model:
	# Load the image
	image = Image.open(uploaded_image)
	st.image(image, caption="Uploaded Image", use_column_width=True)

	# Load selected Kraken segmentation and recognition models
	seg_model_path = segmentation_models[selected_seg_model]
	rec_model_path = recognition_models[selected_rec_model]
	seg_model = vgsl.TorchVGSLModel.load_model(seg_model_path)
	rec_model = models.load_any(rec_model_path)

	# Segment image using Kraken segmentation model
	baseline_seg = blla.segment(image, model=seg_model)

	# Pass segmentation result to recognition model
	pred_it = rpred.rpred(network=rec_model, im=image, bounds=baseline_seg)

	# Prepare to draw boundaries and display info
	boundaries_info = []
	draw = ImageDraw.Draw(image)

	# Process recognition predictions for lines and draw on image
	for idx, pred in enumerate(pred_it):
	prediction = pred.prediction
	line_boundary = [(int(x), int(y)) for x, y in pred.boundary]
	line_baseline = [(int(x), int(y)) for x, y in pred.baseline] if pred.baseline else None
	line_type = pred.tags.get("type", "undefined") # Get line type dynamically if available

	# Add boundary, baseline (if selected), and prediction to display info in the new order
	boundaries_info.append(f"Line {idx + 1} (type: {line_type}):\n - Boundary: {line_boundary}")

	# Draw boundary in green
	draw.polygon(line_boundary, outline="green")

	# Draw baseline if the option is selected and add it to display info
	if draw_baselines and line_baseline:
	boundaries_info.append(f" - Baseline: {line_baseline}")
	draw.line(line_baseline, fill="red", width=2) # Draw baseline in red

	# Add prediction last
	boundaries_info.append(f" - Prediction: {prediction}")

	# Process and draw region boundaries from baseline_seg
	for region_type, region_list in baseline_seg.regions.items():
	for idx, region_data in enumerate(region_list):
	if hasattr(region_data, "boundary"):
	region_boundary = [(int(x), int(y)) for x, y in region_data.boundary]
	region_type_name = region_data.tags.get("type", region_type) # Get region type dynamically
	boundaries_info.append(f"Region {idx + 1} (type: {region_type_name}):\n - Boundary: {region_boundary}")
	draw.polygon(region_boundary, outline="blue") # Draw region boundary in blue

	# Display the image with boundaries drawn
	st.image(image, caption="Image with OCR boundaries (green for lines, blue for regions), baselines (red if selected)", use_column_width=True)

	# Display the list of boundaries, predictions, and baselines
	st.write("List of Boundaries, Predictions, and Baselines (if selected):")
	for info in boundaries_info:
	st.write(info)