Omkar008 commited on
Commit
49820d4
1 Parent(s): 5531c5f

Update test.py

Browse files
Files changed (1) hide show
  1. test.py +2 -3
test.py CHANGED
@@ -12,7 +12,6 @@ from docx import Document
12
  from PIL import Image
13
  import pytesseract
14
  import io
15
- import base64
16
 
17
  app = FastAPI()
18
  oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token")
@@ -124,7 +123,7 @@ async def send_chunked_data(websocket: WebSocket, filename: str, data: str):
124
 
125
 
126
  async def extract_text_from_pdf(pdf_data):
127
- with io.BytesIO(base64.b64decode(pdf_data)) as pdf_file:
128
  pdf_reader = PyPDF2.PdfReader(pdf_file)
129
  text = ""
130
  for page_num in range(pdf_reader.numPages):
@@ -133,7 +132,7 @@ async def extract_text_from_pdf(pdf_data):
133
  return text
134
 
135
  async def extract_text_from_docx(docx_data):
136
- doc = Document(io.BytesIO(base64.b64decode(docx_data)))
137
  text = ""
138
  for para in doc.paragraphs:
139
  text += para.text + "\n"
 
12
  from PIL import Image
13
  import pytesseract
14
  import io
 
15
 
16
  app = FastAPI()
17
  oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token")
 
123
 
124
 
125
  async def extract_text_from_pdf(pdf_data):
126
+ with io.BytesIO(pdf_data) as pdf_file:
127
  pdf_reader = PyPDF2.PdfReader(pdf_file)
128
  text = ""
129
  for page_num in range(pdf_reader.numPages):
 
132
  return text
133
 
134
  async def extract_text_from_docx(docx_data):
135
+ doc = Document(io.BytesIO(docx_data))
136
  text = ""
137
  for para in doc.paragraphs:
138
  text += para.text + "\n"