acpotts commited on
Commit
e6849fe
1 Parent(s): dcb283f

Update pdf loader

Browse files
Files changed (1) hide show
  1. app.py +4 -2
app.py CHANGED
@@ -53,6 +53,7 @@ text_splitter = CharacterTextSplitter()
53
 
54
  def process_text_file(file: AskFileResponse):
55
  import tempfile
 
56
 
57
  with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=file.name) as temp_file:
58
  temp_file_path = temp_file.name
@@ -64,8 +65,8 @@ def process_text_file(file: AskFileResponse):
64
  text_loader = TextFileLoader(temp_file_path)
65
  documents = text_loader.load_documents()
66
  elif file.type == 'application/pdf':
67
- pdf_reader = PdfReader(temp_file_path)
68
- documents = [page.extract_text() for page in pdf_reader.pages]
69
  else:
70
  raise ValueError("Provide a .txt or .pdf file")
71
  texts = text_splitter.split_texts(documents)
@@ -73,6 +74,7 @@ def process_text_file(file: AskFileResponse):
73
  return texts
74
 
75
 
 
76
  @cl.on_chat_start
77
  async def on_chat_start():
78
  files = None
 
53
 
54
  def process_text_file(file: AskFileResponse):
55
  import tempfile
56
+ from langchain_community.document_loaders.pdf import PyPDFLoader
57
 
58
  with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=file.name) as temp_file:
59
  temp_file_path = temp_file.name
 
65
  text_loader = TextFileLoader(temp_file_path)
66
  documents = text_loader.load_documents()
67
  elif file.type == 'application/pdf':
68
+ pdf_loader = PyPDFLoader(temp_file_path)
69
+ documents = pdf_loader.load()
70
  else:
71
  raise ValueError("Provide a .txt or .pdf file")
72
  texts = text_splitter.split_texts(documents)
 
74
  return texts
75
 
76
 
77
+
78
  @cl.on_chat_start
79
  async def on_chat_start():
80
  files = None