Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -7,7 +7,7 @@ import io
|
|
7 |
import edge_tts
|
8 |
import asyncio
|
9 |
import aiofiles
|
10 |
-
import
|
11 |
import os
|
12 |
from typing import List, Dict, Tuple
|
13 |
|
@@ -277,8 +277,8 @@ class TextExtractor:
|
|
277 |
async def extract_from_pdf(file_path: str) -> str:
|
278 |
async with aiofiles.open(file_path, 'rb') as file:
|
279 |
content = await file.read()
|
280 |
-
pdf_reader =
|
281 |
-
return "
|
282 |
|
283 |
@staticmethod
|
284 |
async def extract_from_txt(file_path: str) -> str:
|
|
|
7 |
import edge_tts
|
8 |
import asyncio
|
9 |
import aiofiles
|
10 |
+
import pypdf
|
11 |
import os
|
12 |
from typing import List, Dict, Tuple
|
13 |
|
|
|
277 |
async def extract_from_pdf(file_path: str) -> str:
|
278 |
async with aiofiles.open(file_path, 'rb') as file:
|
279 |
content = await file.read()
|
280 |
+
pdf_reader = pypdf.PdfReader(io.BytesIO(content))
|
281 |
+
return "\n\n".join(page.extract_text() for page in pdf_reader.pages if page.extract_text())
|
282 |
|
283 |
@staticmethod
|
284 |
async def extract_from_txt(file_path: str) -> str:
|