kraken_ocr / app.py
johnlockejrr's picture
Add application file
6fe78dc
raw
history blame
4.2 kB
import streamlit as st
from kraken import blla, rpred
from kraken.lib import vgsl
from kraken.lib import models
from PIL import Image, ImageDraw
# Define available OCR models for segmentation and recognition
segmentation_models = {
"1col_442_sam_v1.mlmodel": "models/1col_442_sam_v1.mlmodel",
"ubma_sam_v4.mlmodel": "models/ubma_sam_v4.mlmodel"
}
recognition_models = {
"sinai_sam_rec_v4.mlmodel": "models/sinai_sam_rec_v4.mlmodel",
"sinai_sam_rec_v2.mlmodel": "models/sinai_sam_rec_v2.mlmodel"
}
# Streamlit app title and description
st.title("OCR with Kraken - Segmentation and Recognition")
st.write("Upload an image, select segmentation and recognition models, and view OCR results.")
# Upload image file
uploaded_image = st.file_uploader("Upload an image file", type=["png", "jpg", "jpeg"])
# Select segmentation and recognition models
selected_seg_model = st.selectbox("Select Kraken Segmentation Model", list(segmentation_models.keys()))
selected_rec_model = st.selectbox("Select Kraken Recognition Model", list(recognition_models.keys()))
# Option to draw baselines
draw_baselines = st.radio("Options", ("Do not draw baselines", "Draw baselines")) == "Draw baselines"
# Process the image if uploaded and models selected
if uploaded_image and selected_seg_model and selected_rec_model:
# Load the image
image = Image.open(uploaded_image)
st.image(image, caption="Uploaded Image", use_column_width=True)
# Load selected Kraken segmentation and recognition models
seg_model_path = segmentation_models[selected_seg_model]
rec_model_path = recognition_models[selected_rec_model]
seg_model = vgsl.TorchVGSLModel.load_model(seg_model_path)
rec_model = models.load_any(rec_model_path)
# Segment image using Kraken segmentation model
baseline_seg = blla.segment(image, model=seg_model)
# Pass segmentation result to recognition model
pred_it = rpred.rpred(network=rec_model, im=image, bounds=baseline_seg)
# Prepare to draw boundaries and display info
boundaries_info = []
draw = ImageDraw.Draw(image)
# Process recognition predictions for lines and draw on image
for idx, pred in enumerate(pred_it):
prediction = pred.prediction
line_boundary = [(int(x), int(y)) for x, y in pred.boundary]
line_baseline = [(int(x), int(y)) for x, y in pred.baseline] if pred.baseline else None
line_type = pred.tags.get("type", "undefined") # Get line type dynamically if available
# Add boundary, baseline (if selected), and prediction to display info in the new order
boundaries_info.append(f"**Line {idx + 1}** (type: {line_type}):\n - Boundary: {line_boundary}")
# Draw boundary in green
draw.polygon(line_boundary, outline="green")
# Draw baseline if the option is selected and add it to display info
if draw_baselines and line_baseline:
boundaries_info.append(f" - Baseline: {line_baseline}")
draw.line(line_baseline, fill="red", width=2) # Draw baseline in red
# Add prediction last
boundaries_info.append(f" - Prediction: {prediction}")
# Process and draw region boundaries from baseline_seg
for region_type, region_list in baseline_seg.regions.items():
for idx, region_data in enumerate(region_list):
if hasattr(region_data, "boundary"):
region_boundary = [(int(x), int(y)) for x, y in region_data.boundary]
region_type_name = region_data.tags.get("type", region_type) # Get region type dynamically
boundaries_info.append(f"**Region {idx + 1}** (type: {region_type_name}):\n - Boundary: {region_boundary}")
draw.polygon(region_boundary, outline="blue") # Draw region boundary in blue
# Display the image with boundaries drawn
st.image(image, caption="Image with OCR boundaries (green for lines, blue for regions), baselines (red if selected)", use_column_width=True)
# Display the list of boundaries, predictions, and baselines
st.write("**List of Boundaries, Predictions, and Baselines (if selected):**")
for info in boundaries_info:
st.write(info)