Spaces:

wing-nus
/

SciAssist

Sleeping

wing-nus commited on Oct 26, 2022

Commit

696f5b1

•

1 Parent(s): 49e8e4b

pdf demo on summarization

Files changed (6) hide show

README.md CHANGED Viewed

@@ -1,10 +1,10 @@
 ---
-title: SciAssist
-emoji: 📊
-colorFrom: blue
-colorTo: gray
 sdk: gradio
-sdk_version: 3.1.6
 app_file: app.py
 pinned: false
 license: afl-3.0

 ---
+title: Test Sciassist
+emoji: 🚀
+colorFrom: red
+colorTo: red
 sdk: gradio
+sdk_version: 3.4
 app_file: app.py
 pinned: false
 license: afl-3.0

app.py CHANGED Viewed

@@ -45,7 +45,7 @@ with gr.Blocks(css="#htext span {white-space: pre-line}") as demo:
                         combine_adjacent=True,
                         adjacent_separator=" ",
                     )
-                rsp_file_examples = gr.Examples(examples=[["examples/N18-3011_ref.txt", False],], inputs=[rsp_file, rsp_file_dehyphen])
         rsp_file_btn.click(
@@ -91,7 +91,7 @@ with gr.Blocks(css="#htext span {white-space: pre-line}") as demo:
                         elem_id="htext",
                         label="Summary",
                     )
-                ssum_file_examples = gr.Examples(examples=[["examples/BERT_body.txt", 10, 2],],
                                                 inputs=[ssum_file, ssum_file_beams, ssum_file_sequences])
     ssum_file_btn.click(
@@ -108,4 +108,4 @@ with gr.Blocks(css="#htext span {white-space: pre-line}") as demo:
-demo.launch()

                         combine_adjacent=True,
                         adjacent_separator=" ",
                     )
+                rsp_file_examples = gr.Examples(examples=[["examples/N18-3011_ref.txt", False],["examples/BERT_paper.pdf", True]], inputs=[rsp_file, rsp_file_dehyphen])
         rsp_file_btn.click(
                         elem_id="htext",
                         label="Summary",
                     )
+                ssum_file_examples = gr.Examples(examples=[["examples/BERT_body.txt", 10, 2],["examples/BERT_paper.pdf", 1, 1]],
                                                 inputs=[ssum_file, ssum_file_beams, ssum_file_sequences])
     ssum_file_btn.click(
+demo.launch(share=True)

description.py CHANGED Viewed

@@ -7,7 +7,8 @@ rsp_file_md = '''
 To **test on a file**, the input can be:
 - A txt file which contains a reference string in each line.
 '''
 # - A pdf file which contains a whole scientific document without any processing (including title, author...).
@@ -24,7 +25,9 @@ To **test on a file**, the input can be:
 - A txt file which contains the content to be summarized.
 **Note**: The **number of beams** should be **divisible** by the **number of generated summaries** for group beam search.
 '''
-# - A pdf file which contains a whole scientific document without any processing (including title, author...).

 To **test on a file**, the input can be:
 - A txt file which contains a reference string in each line.
+- A pdf file which contains a whole scientific documention without any preprocessing(including title, author, body text...).
 '''
 # - A pdf file which contains a whole scientific document without any processing (including title, author...).
 - A txt file which contains the content to be summarized.
+- A pdf file which contains a whole scientific documention without any preprocessing(including title, author, body text...).
 **Note**: The **number of beams** should be **divisible** by the **number of generated summaries** for group beam search.
 '''

reference_string_parsing.py CHANGED Viewed

@@ -3,7 +3,7 @@ import torch
 from SciAssist import ReferenceStringParsing
 device = "gpu" if torch.cuda.is_available() else "cpu"
-rsp_pipeline = ReferenceStringParsing()
 def rsp_for_str(input, dehyphen=False) -> List[Tuple[str, str]]:
@@ -22,9 +22,9 @@ def rsp_for_file(input, dehyphen=False) -> List[Tuple[str, str]]:
     filename = input.name
     # Identify the format of input and parse reference strings
     if filename[-4:] == ".txt":
-        results = rsp_pipeline.predict(filename, type="txt", dehyphen=dehyphen)
-    # elif filename[-4:] == ".pdf":
-    #     results = rsp_pipeline.predict(filename, dehyphen=dehyphen)
     else:
         return [("File Format Error !", None)]
     # Prepare for the input gradio.HighlightedText accepts.

 from SciAssist import ReferenceStringParsing
 device = "gpu" if torch.cuda.is_available() else "cpu"
+rsp_pipeline = ReferenceStringParsing(os_name="nt")
 def rsp_for_str(input, dehyphen=False) -> List[Tuple[str, str]]:
     filename = input.name
     # Identify the format of input and parse reference strings
     if filename[-4:] == ".txt":
+        results = rsp_pipeline.predict(filename, type="txt", dehyphen=dehyphen, save_results=False)
+    elif filename[-4:] == ".pdf":
+        results = rsp_pipeline.predict(filename, dehyphen=dehyphen, save_results=False)
     else:
         return [("File Format Error !", None)]
     # Prepare for the input gradio.HighlightedText accepts.

requirements.txt CHANGED Viewed

summarization.py CHANGED Viewed

@@ -3,7 +3,7 @@ import torch
 from SciAssist import Summarization
 device = "gpu" if torch.cuda.is_available() else "cpu"
-ssum_pipeline = Summarization()
 def ssum_for_str(input, num_beams=1, num_return_sequences=1) -> List[Tuple[str, str]]:
@@ -23,8 +23,8 @@ def ssum_for_file(input, num_beams=1, num_return_sequences=1) -> List[Tuple[str,
     if filename[-4:] == ".txt":
         results = ssum_pipeline.predict(filename, type="txt", num_beams=num_beams,
                                        num_return_sequences=num_return_sequences, save_results=False)
-    # elif filename[-4:] == ".pdf":
-    #     results = rsp_pipeline.predict(filename, num_beams=num_beams, num_return_sequences=num_return_sequences)
     else:
         return [("File Format Error !", None)]

 from SciAssist import Summarization
 device = "gpu" if torch.cuda.is_available() else "cpu"
+ssum_pipeline = Summarization(os_name="nt")
 def ssum_for_str(input, num_beams=1, num_return_sequences=1) -> List[Tuple[str, str]]:
     if filename[-4:] == ".txt":
         results = ssum_pipeline.predict(filename, type="txt", num_beams=num_beams,
                                        num_return_sequences=num_return_sequences, save_results=False)
+    elif filename[-4:] == ".pdf":
+        results = ssum_pipeline.predict(filename, num_beams=num_beams, num_return_sequences=num_return_sequences, save_results=False)
     else:
         return [("File Format Error !", None)]