Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
@@ -14,7 +14,6 @@ def start_haystack():
|
|
14 |
split_by="word",
|
15 |
split_length=100,
|
16 |
split_respect_sentence_boundary=True,
|
17 |
-
split_overlap=0
|
18 |
)
|
19 |
summarizer = TransformersSummarizer(model_name_or_path="google/pegasus-newsroom")
|
20 |
return document_store, summarizer, preprocessor
|
@@ -24,9 +23,10 @@ def pdf_to_document_store(pdf_files):
|
|
24 |
converter = PDFToTextConverter(remove_numeric_tables=True, valid_languages=["en"])
|
25 |
documents = []
|
26 |
for pdf in pdf_files:
|
27 |
-
doc = converter.convert(file_path=pdf.name, meta=None)
|
|
|
28 |
documents.append(doc)
|
29 |
-
document_store.write_documents(
|
30 |
st.write('Document count: ', document_store.get_document_count())
|
31 |
|
32 |
|
|
|
14 |
split_by="word",
|
15 |
split_length=100,
|
16 |
split_respect_sentence_boundary=True,
|
|
|
17 |
)
|
18 |
summarizer = TransformersSummarizer(model_name_or_path="google/pegasus-newsroom")
|
19 |
return document_store, summarizer, preprocessor
|
|
|
23 |
converter = PDFToTextConverter(remove_numeric_tables=True, valid_languages=["en"])
|
24 |
documents = []
|
25 |
for pdf in pdf_files:
|
26 |
+
doc = converter.convert(file_path=pdf.name, meta=None)[0]
|
27 |
+
preprocessed_doc=preprocessor.process([doc])
|
28 |
documents.append(doc)
|
29 |
+
document_store.write_documents(preprocessed_doc)
|
30 |
st.write('Document count: ', document_store.get_document_count())
|
31 |
|
32 |
|