Spaces:

johnlockejrr
/

kraken_ocr

Sleeping

File size: 4,199 Bytes

6fe78dc

import streamlit as st
from kraken import blla, rpred
from kraken.lib import vgsl
from kraken.lib import models
from PIL import Image, ImageDraw

# Define available OCR models for segmentation and recognition
segmentation_models = {
    "1col_442_sam_v1.mlmodel": "models/1col_442_sam_v1.mlmodel",
    "ubma_sam_v4.mlmodel": "models/ubma_sam_v4.mlmodel"
}

recognition_models = {
    "sinai_sam_rec_v4.mlmodel": "models/sinai_sam_rec_v4.mlmodel",
    "sinai_sam_rec_v2.mlmodel": "models/sinai_sam_rec_v2.mlmodel"
}

# Streamlit app title and description
st.title("OCR with Kraken - Segmentation and Recognition")
st.write("Upload an image, select segmentation and recognition models, and view OCR results.")

# Upload image file
uploaded_image = st.file_uploader("Upload an image file", type=["png", "jpg", "jpeg"])

# Select segmentation and recognition models
selected_seg_model = st.selectbox("Select Kraken Segmentation Model", list(segmentation_models.keys()))
selected_rec_model = st.selectbox("Select Kraken Recognition Model", list(recognition_models.keys()))

# Option to draw baselines
draw_baselines = st.radio("Options", ("Do not draw baselines", "Draw baselines")) == "Draw baselines"

# Process the image if uploaded and models selected
if uploaded_image and selected_seg_model and selected_rec_model:
    # Load the image
    image = Image.open(uploaded_image)
    st.image(image, caption="Uploaded Image", use_column_width=True)
    
    # Load selected Kraken segmentation and recognition models
    seg_model_path = segmentation_models[selected_seg_model]
    rec_model_path = recognition_models[selected_rec_model]
    seg_model = vgsl.TorchVGSLModel.load_model(seg_model_path)
    rec_model = models.load_any(rec_model_path)

    # Segment image using Kraken segmentation model
    baseline_seg = blla.segment(image, model=seg_model)

    # Pass segmentation result to recognition model
    pred_it = rpred.rpred(network=rec_model, im=image, bounds=baseline_seg)

    # Prepare to draw boundaries and display info
    boundaries_info = []
    draw = ImageDraw.Draw(image)

    # Process recognition predictions for lines and draw on image
    for idx, pred in enumerate(pred_it):
        prediction = pred.prediction
        line_boundary = [(int(x), int(y)) for x, y in pred.boundary]
        line_baseline = [(int(x), int(y)) for x, y in pred.baseline] if pred.baseline else None
        line_type = pred.tags.get("type", "undefined")  # Get line type dynamically if available

        # Add boundary, baseline (if selected), and prediction to display info in the new order
        boundaries_info.append(f"**Line {idx + 1}** (type: {line_type}):\n  - Boundary: {line_boundary}")

        # Draw boundary in green
        draw.polygon(line_boundary, outline="green")

        # Draw baseline if the option is selected and add it to display info
        if draw_baselines and line_baseline:
            boundaries_info.append(f"  - Baseline: {line_baseline}")
            draw.line(line_baseline, fill="red", width=2)  # Draw baseline in red

        # Add prediction last
        boundaries_info.append(f"  - Prediction: {prediction}")

    # Process and draw region boundaries from baseline_seg
    for region_type, region_list in baseline_seg.regions.items():
        for idx, region_data in enumerate(region_list):
            if hasattr(region_data, "boundary"):
                region_boundary = [(int(x), int(y)) for x, y in region_data.boundary]
                region_type_name = region_data.tags.get("type", region_type)  # Get region type dynamically
                boundaries_info.append(f"**Region {idx + 1}** (type: {region_type_name}):\n  - Boundary: {region_boundary}")
                draw.polygon(region_boundary, outline="blue")  # Draw region boundary in blue

    # Display the image with boundaries drawn
    st.image(image, caption="Image with OCR boundaries (green for lines, blue for regions), baselines (red if selected)", use_column_width=True)

    # Display the list of boundaries, predictions, and baselines
    st.write("**List of Boundaries, Predictions, and Baselines (if selected):**")
    for info in boundaries_info:
        st.write(info)