Spaces:
Build error
Build error
working with list of PDFs
Browse files
app.py
CHANGED
@@ -19,7 +19,10 @@ preprocessor = PreProcessor(
|
|
19 |
def pdf_to_document_store(pdf_files):
|
20 |
document_store.delete_documents()
|
21 |
converter = PDFToTextConverter(remove_numeric_tables=True, valid_languages=["en"])
|
22 |
-
documents = [
|
|
|
|
|
|
|
23 |
preprocessed_docs = preprocessor.process(documents)
|
24 |
document_store.write_documents(preprocessed_docs)
|
25 |
return None
|
@@ -28,5 +31,5 @@ uploaded_files = st.file_uploader("Choose PDF files", accept_multiple_files=True
|
|
28 |
|
29 |
if uploaded_files is not None:
|
30 |
pdf_to_document_store(uploaded_files)
|
31 |
-
st.write(document_store.get_document_count)
|
32 |
|
|
|
19 |
def pdf_to_document_store(pdf_files):
|
20 |
document_store.delete_documents()
|
21 |
converter = PDFToTextConverter(remove_numeric_tables=True, valid_languages=["en"])
|
22 |
+
documents = []
|
23 |
+
for pdf in pdf_files:
|
24 |
+
docs = converter.convert(file_path=pdf.name, meta=None)
|
25 |
+
documents.append(docs)
|
26 |
preprocessed_docs = preprocessor.process(documents)
|
27 |
document_store.write_documents(preprocessed_docs)
|
28 |
return None
|
|
|
31 |
|
32 |
if uploaded_files is not None:
|
33 |
pdf_to_document_store(uploaded_files)
|
34 |
+
st.write(document_store.get_document_count())
|
35 |
|