wing-nus commited on
Commit
696f5b1
β€’
1 Parent(s): 49e8e4b

pdf demo on summarization

Browse files
README.md CHANGED
@@ -1,10 +1,10 @@
1
  ---
2
- title: SciAssist
3
- emoji: πŸ“Š
4
- colorFrom: blue
5
- colorTo: gray
6
  sdk: gradio
7
- sdk_version: 3.1.6
8
  app_file: app.py
9
  pinned: false
10
  license: afl-3.0
 
1
  ---
2
+ title: Test Sciassist
3
+ emoji: πŸš€
4
+ colorFrom: red
5
+ colorTo: red
6
  sdk: gradio
7
+ sdk_version: 3.4
8
  app_file: app.py
9
  pinned: false
10
  license: afl-3.0
app.py CHANGED
@@ -45,7 +45,7 @@ with gr.Blocks(css="#htext span {white-space: pre-line}") as demo:
45
  combine_adjacent=True,
46
  adjacent_separator=" ",
47
  )
48
- rsp_file_examples = gr.Examples(examples=[["examples/N18-3011_ref.txt", False],], inputs=[rsp_file, rsp_file_dehyphen])
49
 
50
 
51
  rsp_file_btn.click(
@@ -91,7 +91,7 @@ with gr.Blocks(css="#htext span {white-space: pre-line}") as demo:
91
  elem_id="htext",
92
  label="Summary",
93
  )
94
- ssum_file_examples = gr.Examples(examples=[["examples/BERT_body.txt", 10, 2],],
95
  inputs=[ssum_file, ssum_file_beams, ssum_file_sequences])
96
 
97
  ssum_file_btn.click(
@@ -108,4 +108,4 @@ with gr.Blocks(css="#htext span {white-space: pre-line}") as demo:
108
 
109
 
110
 
111
- demo.launch()
 
45
  combine_adjacent=True,
46
  adjacent_separator=" ",
47
  )
48
+ rsp_file_examples = gr.Examples(examples=[["examples/N18-3011_ref.txt", False],["examples/BERT_paper.pdf", True]], inputs=[rsp_file, rsp_file_dehyphen])
49
 
50
 
51
  rsp_file_btn.click(
 
91
  elem_id="htext",
92
  label="Summary",
93
  )
94
+ ssum_file_examples = gr.Examples(examples=[["examples/BERT_body.txt", 10, 2],["examples/BERT_paper.pdf", 1, 1]],
95
  inputs=[ssum_file, ssum_file_beams, ssum_file_sequences])
96
 
97
  ssum_file_btn.click(
 
108
 
109
 
110
 
111
+ demo.launch(share=True)
description.py CHANGED
@@ -7,7 +7,8 @@ rsp_file_md = '''
7
  To **test on a file**, the input can be:
8
 
9
  - A txt file which contains a reference string in each line.
10
-
 
11
 
12
  '''
13
  # - A pdf file which contains a whole scientific document without any processing (including title, author...).
@@ -24,7 +25,9 @@ To **test on a file**, the input can be:
24
 
25
  - A txt file which contains the content to be summarized.
26
 
 
 
 
27
  **Note**: The **number of beams** should be **divisible** by the **number of generated summaries** for group beam search.
28
 
29
  '''
30
- # - A pdf file which contains a whole scientific document without any processing (including title, author...).
 
7
  To **test on a file**, the input can be:
8
 
9
  - A txt file which contains a reference string in each line.
10
+
11
+ - A pdf file which contains a whole scientific documention without any preprocessing(including title, author, body text...).
12
 
13
  '''
14
  # - A pdf file which contains a whole scientific document without any processing (including title, author...).
 
25
 
26
  - A txt file which contains the content to be summarized.
27
 
28
+ - A pdf file which contains a whole scientific documention without any preprocessing(including title, author, body text...).
29
+
30
+
31
  **Note**: The **number of beams** should be **divisible** by the **number of generated summaries** for group beam search.
32
 
33
  '''
 
reference_string_parsing.py CHANGED
@@ -3,7 +3,7 @@ import torch
3
  from SciAssist import ReferenceStringParsing
4
 
5
  device = "gpu" if torch.cuda.is_available() else "cpu"
6
- rsp_pipeline = ReferenceStringParsing()
7
 
8
 
9
  def rsp_for_str(input, dehyphen=False) -> List[Tuple[str, str]]:
@@ -22,9 +22,9 @@ def rsp_for_file(input, dehyphen=False) -> List[Tuple[str, str]]:
22
  filename = input.name
23
  # Identify the format of input and parse reference strings
24
  if filename[-4:] == ".txt":
25
- results = rsp_pipeline.predict(filename, type="txt", dehyphen=dehyphen)
26
- # elif filename[-4:] == ".pdf":
27
- # results = rsp_pipeline.predict(filename, dehyphen=dehyphen)
28
  else:
29
  return [("File Format Error !", None)]
30
  # Prepare for the input gradio.HighlightedText accepts.
 
3
  from SciAssist import ReferenceStringParsing
4
 
5
  device = "gpu" if torch.cuda.is_available() else "cpu"
6
+ rsp_pipeline = ReferenceStringParsing(os_name="nt")
7
 
8
 
9
  def rsp_for_str(input, dehyphen=False) -> List[Tuple[str, str]]:
 
22
  filename = input.name
23
  # Identify the format of input and parse reference strings
24
  if filename[-4:] == ".txt":
25
+ results = rsp_pipeline.predict(filename, type="txt", dehyphen=dehyphen, save_results=False)
26
+ elif filename[-4:] == ".pdf":
27
+ results = rsp_pipeline.predict(filename, dehyphen=dehyphen, save_results=False)
28
  else:
29
  return [("File Format Error !", None)]
30
  # Prepare for the input gradio.HighlightedText accepts.
requirements.txt CHANGED
@@ -1,2 +1,2 @@
1
  torch==1.12.0
2
- SciAssist==0.0.18
 
1
  torch==1.12.0
2
+ SciAssist==0.0.20
summarization.py CHANGED
@@ -3,7 +3,7 @@ import torch
3
  from SciAssist import Summarization
4
 
5
  device = "gpu" if torch.cuda.is_available() else "cpu"
6
- ssum_pipeline = Summarization()
7
 
8
 
9
  def ssum_for_str(input, num_beams=1, num_return_sequences=1) -> List[Tuple[str, str]]:
@@ -23,8 +23,8 @@ def ssum_for_file(input, num_beams=1, num_return_sequences=1) -> List[Tuple[str,
23
  if filename[-4:] == ".txt":
24
  results = ssum_pipeline.predict(filename, type="txt", num_beams=num_beams,
25
  num_return_sequences=num_return_sequences, save_results=False)
26
- # elif filename[-4:] == ".pdf":
27
- # results = rsp_pipeline.predict(filename, num_beams=num_beams, num_return_sequences=num_return_sequences)
28
  else:
29
  return [("File Format Error !", None)]
30
 
 
3
  from SciAssist import Summarization
4
 
5
  device = "gpu" if torch.cuda.is_available() else "cpu"
6
+ ssum_pipeline = Summarization(os_name="nt")
7
 
8
 
9
  def ssum_for_str(input, num_beams=1, num_return_sequences=1) -> List[Tuple[str, str]]:
 
23
  if filename[-4:] == ".txt":
24
  results = ssum_pipeline.predict(filename, type="txt", num_beams=num_beams,
25
  num_return_sequences=num_return_sequences, save_results=False)
26
+ elif filename[-4:] == ".pdf":
27
+ results = ssum_pipeline.predict(filename, num_beams=num_beams, num_return_sequences=num_return_sequences, save_results=False)
28
  else:
29
  return [("File Format Error !", None)]
30