wjbmattingly commited on
Commit
107dc4b
1 Parent(s): f381fe6

working on ocr

Browse files
Files changed (2) hide show
  1. app/main.py +8 -5
  2. send_image.py +14 -5
app/main.py CHANGED
@@ -76,23 +76,26 @@ async def detect_lines(file: UploadFile = File(...)):
76
  @app.post("/ocr", response_model=RawResponse)
77
  async def perform_ocr(
78
  file: UploadFile = File(...),
79
- model_name: str = Form("catmus-medieval"),
80
  binarize: bool = Form(False)
81
  ):
82
  content = await file.read()
83
  image = Image.open(io.BytesIO(content))
84
 
85
- if binarize:
86
- image = binarization.nlbin(image)
87
 
88
  try:
89
  model = models.load_any(model_name)
90
  except KrakenInvalidModelException:
91
  raise HTTPException(status_code=400, detail=f"Model '{model_name}' not found or invalid")
92
 
93
- baseline_seg = pageseg.segment(image)
 
 
 
94
 
95
- result = list(rpred.rpred(model, image, baseline_seg))
96
 
97
  serialized_result = [
98
  {
 
76
  @app.post("/ocr", response_model=RawResponse)
77
  async def perform_ocr(
78
  file: UploadFile = File(...),
79
+ model_name: str = Form("catmus-medieval.mlmodel"),
80
  binarize: bool = Form(False)
81
  ):
82
  content = await file.read()
83
  image = Image.open(io.BytesIO(content))
84
 
85
+ # Always binarize the image before segmentation
86
+ bw_img = binarization.nlbin(image)
87
 
88
  try:
89
  model = models.load_any(model_name)
90
  except KrakenInvalidModelException:
91
  raise HTTPException(status_code=400, detail=f"Model '{model_name}' not found or invalid")
92
 
93
+ baseline_seg = pageseg.segment(bw_img)
94
+
95
+ # Use the original image for OCR if binarize is False, otherwise use the binarized image
96
+ ocr_image = bw_img if binarize else image
97
 
98
+ result = list(rpred.rpred(model, ocr_image, baseline_seg))
99
 
100
  serialized_result = [
101
  {
send_image.py CHANGED
@@ -2,7 +2,7 @@ import requests
2
  import os
3
 
4
  # API endpoint
5
- url = "https://wjbmattingly-kraken-api.hf.space/process_all"
6
 
7
  # Path to the image file
8
  image_path = os.path.join("data", "ms.jpg")
@@ -10,17 +10,26 @@ image_path = os.path.join("data", "ms.jpg")
10
  # Prepare the file for upload
11
  files = {'file': ('ms.jpg', open(image_path, 'rb'), 'image/jpeg')}
12
 
 
 
 
13
  # Send the POST request
14
- response = requests.post(url, files=files)
15
 
16
  # Check if the request was successful
17
  if response.status_code == 200:
18
  # Parse the JSON response
19
  result = response.json()
20
- for line in result["result"]["lines"]:
21
- print(line)
 
 
 
 
 
 
22
  print()
23
 
24
  else:
25
  print(f"Error: {response.status_code}")
26
- print(response.text)
 
2
  import os
3
 
4
  # API endpoint
5
+ url = "https://wjbmattingly-kraken-api.hf.space/ocr"
6
 
7
  # Path to the image file
8
  image_path = os.path.join("data", "ms.jpg")
 
10
  # Prepare the file for upload
11
  files = {'file': ('ms.jpg', open(image_path, 'rb'), 'image/jpeg')}
12
 
13
+ # Specify the model to use
14
+ data = {'model_name': 'catmus-medieval.mlmodel'}
15
+
16
  # Send the POST request
17
+ response = requests.post(url, files=files, data=data)
18
 
19
  # Check if the request was successful
20
  if response.status_code == 200:
21
  # Parse the JSON response
22
  result = response.json()
23
+
24
+ # The OCR results are directly in result['result']
25
+ ocr_results = result['result']
26
+
27
+ for record in ocr_results:
28
+ print(f"Text: {record['text']}")
29
+ print(f"Confidence: {record['confidence']}")
30
+ print(f"Bounding Box: {record['bbox']}")
31
  print()
32
 
33
  else:
34
  print(f"Error: {response.status_code}")
35
+ print(response.text)