acpotts commited on
Commit
3e04a9d
1 Parent(s): d567c0e

Update app.py and requirements.txt with PdfReader

Browse files
Files changed (2) hide show
  1. app.py +3 -6
  2. requirements.txt +1 -1
app.py CHANGED
@@ -11,6 +11,7 @@ from aimakerspace.openai_utils.embedding import EmbeddingModel
11
  from aimakerspace.vectordatabase import VectorDatabase
12
  from aimakerspace.openai_utils.chatmodel import ChatOpenAI
13
  import chainlit as cl
 
14
 
15
  system_template = """\
16
  Use the following context to answer a users question. If you cannot find the answer in the context, say you don't know the answer."""
@@ -64,12 +65,8 @@ def process_text_file(file: AskFileResponse):
64
  documents = text_loader.load_documents()
65
  texts = text_splitter.split_texts(documents)
66
  elif file.type == 'application/pdf':
67
- import pymupdf
68
- doc = pymupdf.open(temp_file_path)
69
- texts = ""
70
- for page_num in range(len(doc)):
71
- page = doc.load_page(page_num)
72
- texts += page.get_text()
73
  else:
74
  raise ValueError("Provide a .txt or .pdf file")
75
 
 
11
  from aimakerspace.vectordatabase import VectorDatabase
12
  from aimakerspace.openai_utils.chatmodel import ChatOpenAI
13
  import chainlit as cl
14
+ from PyPDF2 import PdfReader
15
 
16
  system_template = """\
17
  Use the following context to answer a users question. If you cannot find the answer in the context, say you don't know the answer."""
 
65
  documents = text_loader.load_documents()
66
  texts = text_splitter.split_texts(documents)
67
  elif file.type == 'application/pdf':
68
+ pdf_reader = PdfReader(temp_file_path)
69
+ documents = [page.extract_text() for page in pdf_reader.pages]
 
 
 
 
70
  else:
71
  raise ValueError("Provide a .txt or .pdf file")
72
 
requirements.txt CHANGED
@@ -1,4 +1,4 @@
1
  numpy
2
  chainlit==0.7.700
3
  openai
4
- pymupdf
 
1
  numpy
2
  chainlit==0.7.700
3
  openai
4
+ PdfReader