Spaces:

acpotts
/

AIE4w2a1

Sleeping

acpotts commited on Aug 23

Commit

3e04a9d

•

1 Parent(s): d567c0e

Update app.py and requirements.txt with PdfReader

Files changed (2) hide show

app.py CHANGED Viewed

@@ -11,6 +11,7 @@ from aimakerspace.openai_utils.embedding import EmbeddingModel
 from aimakerspace.vectordatabase import VectorDatabase
 from aimakerspace.openai_utils.chatmodel import ChatOpenAI
 import chainlit as cl
 system_template = """\
 Use the following context to answer a users question. If you cannot find the answer in the context, say you don't know the answer."""
@@ -64,12 +65,8 @@ def process_text_file(file: AskFileResponse):
         documents = text_loader.load_documents()
         texts = text_splitter.split_texts(documents)
     elif file.type == 'application/pdf':
-        import pymupdf
-        doc = pymupdf.open(temp_file_path)
-        texts = ""
-        for page_num in range(len(doc)):
-            page = doc.load_page(page_num)
-            texts += page.get_text()
     else:
         raise ValueError("Provide a .txt or .pdf file")

 from aimakerspace.vectordatabase import VectorDatabase
 from aimakerspace.openai_utils.chatmodel import ChatOpenAI
 import chainlit as cl
+from PyPDF2 import PdfReader
 system_template = """\
 Use the following context to answer a users question. If you cannot find the answer in the context, say you don't know the answer."""
         documents = text_loader.load_documents()
         texts = text_splitter.split_texts(documents)
     elif file.type == 'application/pdf':
+        pdf_reader = PdfReader(temp_file_path)
+        documents = [page.extract_text() for page in pdf_reader.pages]
     else:
         raise ValueError("Provide a .txt or .pdf file")

requirements.txt CHANGED Viewed

@@ -1,4 +1,4 @@
 numpy
 chainlit==0.7.700
 openai
-pymupdf

 numpy
 chainlit==0.7.700
 openai
+PdfReader