Spaces:
Running
Running
Update test.py
Browse files
test.py
CHANGED
@@ -12,7 +12,6 @@ from docx import Document
|
|
12 |
from PIL import Image
|
13 |
import pytesseract
|
14 |
import io
|
15 |
-
import base64
|
16 |
|
17 |
app = FastAPI()
|
18 |
oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token")
|
@@ -124,7 +123,7 @@ async def send_chunked_data(websocket: WebSocket, filename: str, data: str):
|
|
124 |
|
125 |
|
126 |
async def extract_text_from_pdf(pdf_data):
|
127 |
-
with io.BytesIO(
|
128 |
pdf_reader = PyPDF2.PdfReader(pdf_file)
|
129 |
text = ""
|
130 |
for page_num in range(pdf_reader.numPages):
|
@@ -133,7 +132,7 @@ async def extract_text_from_pdf(pdf_data):
|
|
133 |
return text
|
134 |
|
135 |
async def extract_text_from_docx(docx_data):
|
136 |
-
doc = Document(io.BytesIO(
|
137 |
text = ""
|
138 |
for para in doc.paragraphs:
|
139 |
text += para.text + "\n"
|
|
|
12 |
from PIL import Image
|
13 |
import pytesseract
|
14 |
import io
|
|
|
15 |
|
16 |
app = FastAPI()
|
17 |
oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token")
|
|
|
123 |
|
124 |
|
125 |
async def extract_text_from_pdf(pdf_data):
|
126 |
+
with io.BytesIO(pdf_data) as pdf_file:
|
127 |
pdf_reader = PyPDF2.PdfReader(pdf_file)
|
128 |
text = ""
|
129 |
for page_num in range(pdf_reader.numPages):
|
|
|
132 |
return text
|
133 |
|
134 |
async def extract_text_from_docx(docx_data):
|
135 |
+
doc = Document(io.BytesIO(docx_data))
|
136 |
text = ""
|
137 |
for para in doc.paragraphs:
|
138 |
text += para.text + "\n"
|