Tuana commited on
Commit
6c152f9
1 Parent(s): 836e16d

working with list of PDFs

Browse files
Files changed (1) hide show
  1. app.py +5 -2
app.py CHANGED
@@ -19,7 +19,10 @@ preprocessor = PreProcessor(
19
  def pdf_to_document_store(pdf_files):
20
  document_store.delete_documents()
21
  converter = PDFToTextConverter(remove_numeric_tables=True, valid_languages=["en"])
22
- documents = [Document.from_dict(converter.convert(file_path=pdf.name, meta=None) for pdf in pdf_files)]
 
 
 
23
  preprocessed_docs = preprocessor.process(documents)
24
  document_store.write_documents(preprocessed_docs)
25
  return None
@@ -28,5 +31,5 @@ uploaded_files = st.file_uploader("Choose PDF files", accept_multiple_files=True
28
 
29
  if uploaded_files is not None:
30
  pdf_to_document_store(uploaded_files)
31
- st.write(document_store.get_document_count)
32
 
 
19
  def pdf_to_document_store(pdf_files):
20
  document_store.delete_documents()
21
  converter = PDFToTextConverter(remove_numeric_tables=True, valid_languages=["en"])
22
+ documents = []
23
+ for pdf in pdf_files:
24
+ docs = converter.convert(file_path=pdf.name, meta=None)
25
+ documents.append(docs)
26
  preprocessed_docs = preprocessor.process(documents)
27
  document_store.write_documents(preprocessed_docs)
28
  return None
 
31
 
32
  if uploaded_files is not None:
33
  pdf_to_document_store(uploaded_files)
34
+ st.write(document_store.get_document_count())
35