Spaces:

wjbmattingly
/

kraken-api

Running

App Files Files Community

wjbmattingly commited on Aug 11

Commit

a0babed

•

1 Parent(s): 2f2cdb8

expanded api to include all features dl model

Browse files

Files changed (2) hide show

Dockerfile +5 -1
app/main.py +125 -35

Dockerfile CHANGED Viewed

@@ -7,7 +7,8 @@ RUN apt-get update && apt-get install -y \
     libsm6 \
     libxext6 \
     libxrender-dev \
-    libgl1-mesa-glx
 # Set the working directory in the container
 WORKDIR /code
@@ -18,6 +19,9 @@ COPY ./requirements.txt /code/requirements.txt
 # Install Python dependencies
 RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
 # Copy the FastAPI app into the container
 COPY ./app /code/app

     libsm6 \
     libxext6 \
     libxrender-dev \
+    libgl1-mesa-glx \
+    wget
 # Set the working directory in the container
 WORKDIR /code
 # Install Python dependencies
 RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
+# Download the Kraken model
+RUN kraken get 10.5281/zenodo.12743230
 # Copy the FastAPI app into the container
 COPY ./app /code/app

app/main.py CHANGED Viewed

@@ -1,50 +1,140 @@
-from fastapi import FastAPI, UploadFile, File
-from fastapi.responses import JSONResponse
-import subprocess
 import json
-import os
 import tempfile
-import shutil
-from pydantic import BaseModel
-import torch
 app = FastAPI()
 class LineDetectionResponse(BaseModel):
-    lines: list
 @app.post("/detect_lines", response_model=LineDetectionResponse)
 async def detect_lines(file: UploadFile = File(...)):
-    # Check if CUDA is available
-    device = "cuda" if torch.cuda.is_available() else "cpu"
-    # Create a temporary directory
-    with tempfile.TemporaryDirectory() as temp_dir:
-        # Save the uploaded file
-        temp_file_path = os.path.join(temp_dir, file.filename)
-        with open(temp_file_path, "wb") as buffer:
-            shutil.copyfileobj(file.file, buffer)
-        # Set up the output JSON path
-        lines_json_path = os.path.join(temp_dir, "lines.json")
-        # Run Kraken for line detection
-        kraken_command = f"kraken -i {temp_file_path} {lines_json_path} segment -bl"
-        subprocess.run(kraken_command, shell=True, check=True)
-        # Load the lines from the JSON file
-        with open(lines_json_path, 'r') as f:
-            lines_data = json.load(f)
-    # Return the lines data
-    return LineDetectionResponse(lines=lines_data['lines'])
-# Optionally, you can add a root endpoint for basic information
 @app.get("/")
 async def root():
-    return {"message": "Welcome to the Kraken Line Detection API"}
-# To run the app with GPU support on Hugging Face Spaces, you need to use uvicorn with the following settings:
 if __name__ == "__main__":
     import uvicorn
     uvicorn.run(app, host="0.0.0.0", port=7860, workers=1)

+from fastapi import FastAPI, UploadFile, File, Form, HTTPException
+from fastapi.responses import JSONResponse, FileResponse
+from pydantic import BaseModel
+import io
+from PIL import Image
 import json
 import tempfile
+import base64
+from typing import List, Optional
+from kraken import binarization
+from kraken import pageseg
+from kraken import rpred
+from kraken.lib import models
+from kraken import serialization
 app = FastAPI()
 class LineDetectionResponse(BaseModel):
+    lines: List[dict]
+class OCRResponse(BaseModel):
+    text: str
+class SegmentationResponse(BaseModel):
+    regions: List[dict]
+    lines: List[dict]
+class ComprehensiveResponse(BaseModel):
+    binarized_image: str
+    segmentation: SegmentationResponse
+    ocr_result: str
 @app.post("/detect_lines", response_model=LineDetectionResponse)
 async def detect_lines(file: UploadFile = File(...)):
+    content = await file.read()
+    image = Image.open(io.BytesIO(content))
+    bw_img = binarization.nlbin(image)
+    baseline_seg = pageseg.segment(bw_img, text_direction='horizontal-lr')
+    lines_data = [line.to_dict() for line in baseline_seg.lines]
+    return LineDetectionResponse(lines=lines_data)
+@app.post("/ocr", response_model=OCRResponse)
+async def perform_ocr(
+    file: UploadFile = File(...),
+    model_path: str = Form("catmus-medieval.mlmodel"),
+    binarize: bool = Form(False)
+):
+    content = await file.read()
+    image = Image.open(io.BytesIO(content))
+    if binarize:
+        image = binarization.nlbin(image)
+    model = models.load_any(model_path)
+    baseline_seg = pageseg.segment(image)
+    result = rpred.rpred(model, image, baseline_seg)
+    text = '\n'.join(record.prediction for record in result)
+    return OCRResponse(text=text)
+@app.post("/segment", response_model=SegmentationResponse)
+async def segment_image(
+    file: UploadFile = File(...),
+    baseline: bool = Form(True)
+):
+    content = await file.read()
+    image = Image.open(io.BytesIO(content))
+    bw_img = binarization.nlbin(image)
+    if baseline:
+        segmentation = pageseg.segment(bw_img)
+    else:
+        segmentation = pageseg.segment(bw_img, text_direction='horizontal-lr')
+    regions_data = [region.to_dict() for region in segmentation.regions]
+    lines_data = [line.to_dict() for line in segmentation.lines]
+    return SegmentationResponse(regions=regions_data, lines=lines_data)
+@app.post("/binarize")
+async def binarize_image(file: UploadFile = File(...)):
+    content = await file.read()
+    image = Image.open(io.BytesIO(content))
+    bw_img = binarization.nlbin(image)
+    with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as temp_file:
+        bw_img.save(temp_file.name)
+        return FileResponse(temp_file.name, media_type="image/png", filename="binarized.png")
+@app.post("/process_all", response_model=ComprehensiveResponse)
+async def process_all(
+    file: UploadFile = File(...),
+    model_path: str = Form("catmus-medieval.mlmodel")
+):
+    content = await file.read()
+    image = Image.open(io.BytesIO(content))
+    # Step 1: Binarization
+    bw_img = binarization.nlbin(image)
+    # Convert binarized image to base64 for JSON response
+    buffered = io.BytesIO()
+    bw_img.save(buffered, format="PNG")
+    binarized_base64 = base64.b64encode(buffered.getvalue()).decode()
+    # Step 2: Segmentation
+    segmentation = pageseg.segment(bw_img)
+    regions_data = [region.to_dict() for region in segmentation.regions]
+    lines_data = [line.to_dict() for line in segmentation.lines]
+    # Step 3: OCR
+    model = models.load_any(model_path)
+    result = rpred.rpred(model, bw_img, segmentation)
+    ocr_text = '\n'.join(record.prediction for record in result)
+    return ComprehensiveResponse(
+        binarized_image=binarized_base64,
+        segmentation=SegmentationResponse(regions=regions_data, lines=lines_data),
+        ocr_result=ocr_text
+    )
 @app.get("/")
 async def root():
+    return {
+        "message": "Welcome to the Comprehensive Kraken Python API",
+        "available_endpoints": ["/detect_lines", "/ocr", "/segment", "/binarize", "/process_all"]
+    }
 if __name__ == "__main__":
     import uvicorn
     uvicorn.run(app, host="0.0.0.0", port=7860, workers=1)