wjbmattingly commited on
Commit
a0babed
1 Parent(s): 2f2cdb8

expanded api to include all features dl model

Browse files
Files changed (2) hide show
  1. Dockerfile +5 -1
  2. app/main.py +125 -35
Dockerfile CHANGED
@@ -7,7 +7,8 @@ RUN apt-get update && apt-get install -y \
7
  libsm6 \
8
  libxext6 \
9
  libxrender-dev \
10
- libgl1-mesa-glx
 
11
 
12
  # Set the working directory in the container
13
  WORKDIR /code
@@ -18,6 +19,9 @@ COPY ./requirements.txt /code/requirements.txt
18
  # Install Python dependencies
19
  RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
20
 
 
 
 
21
  # Copy the FastAPI app into the container
22
  COPY ./app /code/app
23
 
 
7
  libsm6 \
8
  libxext6 \
9
  libxrender-dev \
10
+ libgl1-mesa-glx \
11
+ wget
12
 
13
  # Set the working directory in the container
14
  WORKDIR /code
 
19
  # Install Python dependencies
20
  RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
21
 
22
+ # Download the Kraken model
23
+ RUN kraken get 10.5281/zenodo.12743230
24
+
25
  # Copy the FastAPI app into the container
26
  COPY ./app /code/app
27
 
app/main.py CHANGED
@@ -1,50 +1,140 @@
1
- from fastapi import FastAPI, UploadFile, File
2
- from fastapi.responses import JSONResponse
3
- import subprocess
 
 
4
  import json
5
- import os
6
  import tempfile
7
- import shutil
8
- from pydantic import BaseModel
9
- import torch
 
 
 
 
 
10
 
11
  app = FastAPI()
12
 
13
  class LineDetectionResponse(BaseModel):
14
- lines: list
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
  @app.post("/detect_lines", response_model=LineDetectionResponse)
17
  async def detect_lines(file: UploadFile = File(...)):
18
- # Check if CUDA is available
19
- device = "cuda" if torch.cuda.is_available() else "cpu"
20
-
21
- # Create a temporary directory
22
- with tempfile.TemporaryDirectory() as temp_dir:
23
- # Save the uploaded file
24
- temp_file_path = os.path.join(temp_dir, file.filename)
25
- with open(temp_file_path, "wb") as buffer:
26
- shutil.copyfileobj(file.file, buffer)
27
-
28
- # Set up the output JSON path
29
- lines_json_path = os.path.join(temp_dir, "lines.json")
30
-
31
- # Run Kraken for line detection
32
- kraken_command = f"kraken -i {temp_file_path} {lines_json_path} segment -bl"
33
- subprocess.run(kraken_command, shell=True, check=True)
34
-
35
- # Load the lines from the JSON file
36
- with open(lines_json_path, 'r') as f:
37
- lines_data = json.load(f)
38
-
39
- # Return the lines data
40
- return LineDetectionResponse(lines=lines_data['lines'])
41
-
42
- # Optionally, you can add a root endpoint for basic information
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  @app.get("/")
44
  async def root():
45
- return {"message": "Welcome to the Kraken Line Detection API"}
 
 
 
46
 
47
- # To run the app with GPU support on Hugging Face Spaces, you need to use uvicorn with the following settings:
48
  if __name__ == "__main__":
49
  import uvicorn
50
  uvicorn.run(app, host="0.0.0.0", port=7860, workers=1)
 
1
+ from fastapi import FastAPI, UploadFile, File, Form, HTTPException
2
+ from fastapi.responses import JSONResponse, FileResponse
3
+ from pydantic import BaseModel
4
+ import io
5
+ from PIL import Image
6
  import json
 
7
  import tempfile
8
+ import base64
9
+ from typing import List, Optional
10
+
11
+ from kraken import binarization
12
+ from kraken import pageseg
13
+ from kraken import rpred
14
+ from kraken.lib import models
15
+ from kraken import serialization
16
 
17
  app = FastAPI()
18
 
19
  class LineDetectionResponse(BaseModel):
20
+ lines: List[dict]
21
+
22
+ class OCRResponse(BaseModel):
23
+ text: str
24
+
25
+ class SegmentationResponse(BaseModel):
26
+ regions: List[dict]
27
+ lines: List[dict]
28
+
29
+ class ComprehensiveResponse(BaseModel):
30
+ binarized_image: str
31
+ segmentation: SegmentationResponse
32
+ ocr_result: str
33
 
34
  @app.post("/detect_lines", response_model=LineDetectionResponse)
35
  async def detect_lines(file: UploadFile = File(...)):
36
+ content = await file.read()
37
+ image = Image.open(io.BytesIO(content))
38
+
39
+ bw_img = binarization.nlbin(image)
40
+
41
+ baseline_seg = pageseg.segment(bw_img, text_direction='horizontal-lr')
42
+
43
+ lines_data = [line.to_dict() for line in baseline_seg.lines]
44
+
45
+ return LineDetectionResponse(lines=lines_data)
46
+
47
+ @app.post("/ocr", response_model=OCRResponse)
48
+ async def perform_ocr(
49
+ file: UploadFile = File(...),
50
+ model_path: str = Form("catmus-medieval.mlmodel"),
51
+ binarize: bool = Form(False)
52
+ ):
53
+ content = await file.read()
54
+ image = Image.open(io.BytesIO(content))
55
+
56
+ if binarize:
57
+ image = binarization.nlbin(image)
58
+
59
+ model = models.load_any(model_path)
60
+
61
+ baseline_seg = pageseg.segment(image)
62
+
63
+ result = rpred.rpred(model, image, baseline_seg)
64
+ text = '\n'.join(record.prediction for record in result)
65
+
66
+ return OCRResponse(text=text)
67
+
68
+ @app.post("/segment", response_model=SegmentationResponse)
69
+ async def segment_image(
70
+ file: UploadFile = File(...),
71
+ baseline: bool = Form(True)
72
+ ):
73
+ content = await file.read()
74
+ image = Image.open(io.BytesIO(content))
75
+
76
+ bw_img = binarization.nlbin(image)
77
+
78
+ if baseline:
79
+ segmentation = pageseg.segment(bw_img)
80
+ else:
81
+ segmentation = pageseg.segment(bw_img, text_direction='horizontal-lr')
82
+
83
+ regions_data = [region.to_dict() for region in segmentation.regions]
84
+ lines_data = [line.to_dict() for line in segmentation.lines]
85
+
86
+ return SegmentationResponse(regions=regions_data, lines=lines_data)
87
+
88
+ @app.post("/binarize")
89
+ async def binarize_image(file: UploadFile = File(...)):
90
+ content = await file.read()
91
+ image = Image.open(io.BytesIO(content))
92
+
93
+ bw_img = binarization.nlbin(image)
94
+
95
+ with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as temp_file:
96
+ bw_img.save(temp_file.name)
97
+ return FileResponse(temp_file.name, media_type="image/png", filename="binarized.png")
98
+
99
+ @app.post("/process_all", response_model=ComprehensiveResponse)
100
+ async def process_all(
101
+ file: UploadFile = File(...),
102
+ model_path: str = Form("catmus-medieval.mlmodel")
103
+ ):
104
+ content = await file.read()
105
+ image = Image.open(io.BytesIO(content))
106
+
107
+ # Step 1: Binarization
108
+ bw_img = binarization.nlbin(image)
109
+
110
+ # Convert binarized image to base64 for JSON response
111
+ buffered = io.BytesIO()
112
+ bw_img.save(buffered, format="PNG")
113
+ binarized_base64 = base64.b64encode(buffered.getvalue()).decode()
114
+
115
+ # Step 2: Segmentation
116
+ segmentation = pageseg.segment(bw_img)
117
+ regions_data = [region.to_dict() for region in segmentation.regions]
118
+ lines_data = [line.to_dict() for line in segmentation.lines]
119
+
120
+ # Step 3: OCR
121
+ model = models.load_any(model_path)
122
+ result = rpred.rpred(model, bw_img, segmentation)
123
+ ocr_text = '\n'.join(record.prediction for record in result)
124
+
125
+ return ComprehensiveResponse(
126
+ binarized_image=binarized_base64,
127
+ segmentation=SegmentationResponse(regions=regions_data, lines=lines_data),
128
+ ocr_result=ocr_text
129
+ )
130
+
131
  @app.get("/")
132
  async def root():
133
+ return {
134
+ "message": "Welcome to the Comprehensive Kraken Python API",
135
+ "available_endpoints": ["/detect_lines", "/ocr", "/segment", "/binarize", "/process_all"]
136
+ }
137
 
 
138
  if __name__ == "__main__":
139
  import uvicorn
140
  uvicorn.run(app, host="0.0.0.0", port=7860, workers=1)