Geetansh commited on
Commit
52916f3
1 Parent(s): dd52982

changed pytesseract code as tesseract-ocr would be available in PATH in hf spaces

Browse files
Files changed (2) hide show
  1. image_to_text.py +10 -3
  2. pdf_to_image.py +0 -1
image_to_text.py CHANGED
@@ -1,10 +1,17 @@
1
  from PIL import Image
2
  import pytesseract
3
 
4
- # If you don't have tesseract executable in your PATH, include the following:
5
- pytesseract.pytesseract.tesseract_cmd = r'C:\TesseractOCR\tesseract'
 
 
 
 
 
 
 
6
 
7
  # Simple image to string
8
  def img2string(imgPath):
9
  textOfImage = pytesseract.image_to_string(imgPath)
10
- return textOfImage
 
1
  from PIL import Image
2
  import pytesseract
3
 
4
+ # # If you don't have tesseract executable in your PATH, include the following:
5
+ # pytesseract.pytesseract.tesseract_cmd = r'C:\TesseractOCR\tesseract'
6
+
7
+ # # Simple image to string
8
+ # def img2string(imgPath):
9
+ # textOfImage = pytesseract.image_to_string(imgPath)
10
+ # return textOfImage
11
+
12
+ # Changed version of above code for deployment on huggingface spaces
13
 
14
  # Simple image to string
15
  def img2string(imgPath):
16
  textOfImage = pytesseract.image_to_string(imgPath)
17
+ return textOfImage
pdf_to_image.py CHANGED
@@ -23,7 +23,6 @@ from pdf2image.exceptions import (
23
  # return images
24
 
25
  # Changed version of above code for deployment on huggingface spaces
26
-
27
  def pdfToImg(pdfPath, outputPath):
28
  '''
29
  1)Images stored in output folder
 
23
  # return images
24
 
25
  # Changed version of above code for deployment on huggingface spaces
 
26
  def pdfToImg(pdfPath, outputPath):
27
  '''
28
  1)Images stored in output folder