Tuana commited on
Commit
a7fa548
1 Parent(s): 4d16c37

attempt summary

Browse files
Files changed (1) hide show
  1. app.py +8 -2
app.py CHANGED
@@ -1,6 +1,6 @@
1
  import streamlit as st
2
  from haystack.document_stores import InMemoryDocumentStore
3
- from haystack.nodes import FARMReader, PreProcessor, PDFToTextConverter, TfidfRetriever
4
  from haystack.schema import Document
5
  import logging
6
 
@@ -14,6 +14,7 @@ preprocessor = PreProcessor(
14
  split_respect_sentence_boundary=True,
15
  split_overlap=3
16
  )
 
17
 
18
 
19
  def pdf_to_document_store(pdf_files):
@@ -27,6 +28,11 @@ def pdf_to_document_store(pdf_files):
27
  preprocessed_docs = preprocessor.process(documents)
28
  document_store.write_documents(preprocessed_docs)
29
  return None
 
 
 
 
 
30
 
31
  uploaded_files = st.file_uploader("Choose PDF files", accept_multiple_files=True)
32
 
@@ -34,7 +40,7 @@ if uploaded_files is not None:
34
  st.write(len(uploaded_files))
35
  if st.button('Summarize Documents'):
36
  document_store.delete_documents()
37
- pdf_to_document_store(uploaded_files)
38
 
39
  if st.button('Calculate num of docs'):
40
  st.write(document_store.get_document_count())
 
1
  import streamlit as st
2
  from haystack.document_stores import InMemoryDocumentStore
3
+ from haystack.nodes import TransformersSummarizer, PreProcessor, PDFToTextConverter, TfidfRetriever
4
  from haystack.schema import Document
5
  import logging
6
 
 
14
  split_respect_sentence_boundary=True,
15
  split_overlap=3
16
  )
17
+ summarizer = TransformersSummarizer(model_name_or_path="google/pegasus-xsum")
18
 
19
 
20
  def pdf_to_document_store(pdf_files):
 
28
  preprocessed_docs = preprocessor.process(documents)
29
  document_store.write_documents(preprocessed_docs)
30
  return None
31
+
32
+ def summarize(files):
33
+ pdf_to_document_store(files)
34
+ summary = summarizer.predict(documents=document_store.get_all_documents(), generate_single_summary=True)
35
+ st.write(summary)
36
 
37
  uploaded_files = st.file_uploader("Choose PDF files", accept_multiple_files=True)
38
 
 
40
  st.write(len(uploaded_files))
41
  if st.button('Summarize Documents'):
42
  document_store.delete_documents()
43
+ summarize(uploaded_files)
44
 
45
  if st.button('Calculate num of docs'):
46
  st.write(document_store.get_document_count())