Spaces:

wjbmattingly
/

kraken-api

Running

wjbmattingly commited on Aug 11

Commit

107dc4b

•

1 Parent(s): f381fe6

working on ocr

Files changed (2) hide show

app/main.py CHANGED Viewed

@@ -76,23 +76,26 @@ async def detect_lines(file: UploadFile = File(...)):
 @app.post("/ocr", response_model=RawResponse)
 async def perform_ocr(
     file: UploadFile = File(...),
-    model_name: str = Form("catmus-medieval"),
     binarize: bool = Form(False)
 ):
     content = await file.read()
     image = Image.open(io.BytesIO(content))
-    if binarize:
-        image = binarization.nlbin(image)
     try:
         model = models.load_any(model_name)
     except KrakenInvalidModelException:
         raise HTTPException(status_code=400, detail=f"Model '{model_name}' not found or invalid")
-    baseline_seg = pageseg.segment(image)
-    result = list(rpred.rpred(model, image, baseline_seg))
     serialized_result = [
         {

 @app.post("/ocr", response_model=RawResponse)
 async def perform_ocr(
     file: UploadFile = File(...),
+    model_name: str = Form("catmus-medieval.mlmodel"),
     binarize: bool = Form(False)
 ):
     content = await file.read()
     image = Image.open(io.BytesIO(content))
+    # Always binarize the image before segmentation
+    bw_img = binarization.nlbin(image)
     try:
         model = models.load_any(model_name)
     except KrakenInvalidModelException:
         raise HTTPException(status_code=400, detail=f"Model '{model_name}' not found or invalid")
+    baseline_seg = pageseg.segment(bw_img)
+    # Use the original image for OCR if binarize is False, otherwise use the binarized image
+    ocr_image = bw_img if binarize else image
+    result = list(rpred.rpred(model, ocr_image, baseline_seg))
     serialized_result = [
         {

send_image.py CHANGED Viewed

@@ -2,7 +2,7 @@ import requests
 import os
 # API endpoint
-url = "https://wjbmattingly-kraken-api.hf.space/process_all"
 # Path to the image file
 image_path = os.path.join("data", "ms.jpg")
@@ -10,17 +10,26 @@ image_path = os.path.join("data", "ms.jpg")
 # Prepare the file for upload
 files = {'file': ('ms.jpg', open(image_path, 'rb'), 'image/jpeg')}
 # Send the POST request
-response = requests.post(url, files=files)
 # Check if the request was successful
 if response.status_code == 200:
     # Parse the JSON response
     result = response.json()
-    for line in result["result"]["lines"]:
-        print(line)
         print()
 else:
     print(f"Error: {response.status_code}")
-    print(response.text)

 import os
 # API endpoint
+url = "https://wjbmattingly-kraken-api.hf.space/ocr"
 # Path to the image file
 image_path = os.path.join("data", "ms.jpg")
 # Prepare the file for upload
 files = {'file': ('ms.jpg', open(image_path, 'rb'), 'image/jpeg')}
+# Specify the model to use
+data = {'model_name': 'catmus-medieval.mlmodel'}
 # Send the POST request
+response = requests.post(url, files=files, data=data)
 # Check if the request was successful
 if response.status_code == 200:
     # Parse the JSON response
     result = response.json()
+    # The OCR results are directly in result['result']
+    ocr_results = result['result']
+    for record in ocr_results:
+        print(f"Text: {record['text']}")
+        print(f"Confidence: {record['confidence']}")
+        print(f"Bounding Box: {record['bbox']}")
         print()
 else:
     print(f"Error: {response.status_code}")
+    print(response.text)