Spaces:
Sleeping
Sleeping
pdf demo on summarization
Browse files- README.md +5 -5
- app.py +3 -3
- description.py +5 -2
- reference_string_parsing.py +4 -4
- requirements.txt +1 -1
- summarization.py +3 -3
README.md
CHANGED
@@ -1,10 +1,10 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
sdk: gradio
|
7 |
-
sdk_version: 3.
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
license: afl-3.0
|
|
|
1 |
---
|
2 |
+
title: Test Sciassist
|
3 |
+
emoji: π
|
4 |
+
colorFrom: red
|
5 |
+
colorTo: red
|
6 |
sdk: gradio
|
7 |
+
sdk_version: 3.4
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
license: afl-3.0
|
app.py
CHANGED
@@ -45,7 +45,7 @@ with gr.Blocks(css="#htext span {white-space: pre-line}") as demo:
|
|
45 |
combine_adjacent=True,
|
46 |
adjacent_separator=" ",
|
47 |
)
|
48 |
-
rsp_file_examples = gr.Examples(examples=[["examples/N18-3011_ref.txt", False],], inputs=[rsp_file, rsp_file_dehyphen])
|
49 |
|
50 |
|
51 |
rsp_file_btn.click(
|
@@ -91,7 +91,7 @@ with gr.Blocks(css="#htext span {white-space: pre-line}") as demo:
|
|
91 |
elem_id="htext",
|
92 |
label="Summary",
|
93 |
)
|
94 |
-
ssum_file_examples = gr.Examples(examples=[["examples/BERT_body.txt", 10, 2],],
|
95 |
inputs=[ssum_file, ssum_file_beams, ssum_file_sequences])
|
96 |
|
97 |
ssum_file_btn.click(
|
@@ -108,4 +108,4 @@ with gr.Blocks(css="#htext span {white-space: pre-line}") as demo:
|
|
108 |
|
109 |
|
110 |
|
111 |
-
demo.launch()
|
|
|
45 |
combine_adjacent=True,
|
46 |
adjacent_separator=" ",
|
47 |
)
|
48 |
+
rsp_file_examples = gr.Examples(examples=[["examples/N18-3011_ref.txt", False],["examples/BERT_paper.pdf", True]], inputs=[rsp_file, rsp_file_dehyphen])
|
49 |
|
50 |
|
51 |
rsp_file_btn.click(
|
|
|
91 |
elem_id="htext",
|
92 |
label="Summary",
|
93 |
)
|
94 |
+
ssum_file_examples = gr.Examples(examples=[["examples/BERT_body.txt", 10, 2],["examples/BERT_paper.pdf", 1, 1]],
|
95 |
inputs=[ssum_file, ssum_file_beams, ssum_file_sequences])
|
96 |
|
97 |
ssum_file_btn.click(
|
|
|
108 |
|
109 |
|
110 |
|
111 |
+
demo.launch(share=True)
|
description.py
CHANGED
@@ -7,7 +7,8 @@ rsp_file_md = '''
|
|
7 |
To **test on a file**, the input can be:
|
8 |
|
9 |
- A txt file which contains a reference string in each line.
|
10 |
-
|
|
|
11 |
|
12 |
'''
|
13 |
# - A pdf file which contains a whole scientific document without any processing (including title, author...).
|
@@ -24,7 +25,9 @@ To **test on a file**, the input can be:
|
|
24 |
|
25 |
- A txt file which contains the content to be summarized.
|
26 |
|
|
|
|
|
|
|
27 |
**Note**: The **number of beams** should be **divisible** by the **number of generated summaries** for group beam search.
|
28 |
|
29 |
'''
|
30 |
-
# - A pdf file which contains a whole scientific document without any processing (including title, author...).
|
|
|
7 |
To **test on a file**, the input can be:
|
8 |
|
9 |
- A txt file which contains a reference string in each line.
|
10 |
+
|
11 |
+
- A pdf file which contains a whole scientific documention without any preprocessing(including title, author, body text...).
|
12 |
|
13 |
'''
|
14 |
# - A pdf file which contains a whole scientific document without any processing (including title, author...).
|
|
|
25 |
|
26 |
- A txt file which contains the content to be summarized.
|
27 |
|
28 |
+
- A pdf file which contains a whole scientific documention without any preprocessing(including title, author, body text...).
|
29 |
+
|
30 |
+
|
31 |
**Note**: The **number of beams** should be **divisible** by the **number of generated summaries** for group beam search.
|
32 |
|
33 |
'''
|
|
reference_string_parsing.py
CHANGED
@@ -3,7 +3,7 @@ import torch
|
|
3 |
from SciAssist import ReferenceStringParsing
|
4 |
|
5 |
device = "gpu" if torch.cuda.is_available() else "cpu"
|
6 |
-
rsp_pipeline = ReferenceStringParsing()
|
7 |
|
8 |
|
9 |
def rsp_for_str(input, dehyphen=False) -> List[Tuple[str, str]]:
|
@@ -22,9 +22,9 @@ def rsp_for_file(input, dehyphen=False) -> List[Tuple[str, str]]:
|
|
22 |
filename = input.name
|
23 |
# Identify the format of input and parse reference strings
|
24 |
if filename[-4:] == ".txt":
|
25 |
-
results = rsp_pipeline.predict(filename, type="txt", dehyphen=dehyphen)
|
26 |
-
|
27 |
-
|
28 |
else:
|
29 |
return [("File Format Error !", None)]
|
30 |
# Prepare for the input gradio.HighlightedText accepts.
|
|
|
3 |
from SciAssist import ReferenceStringParsing
|
4 |
|
5 |
device = "gpu" if torch.cuda.is_available() else "cpu"
|
6 |
+
rsp_pipeline = ReferenceStringParsing(os_name="nt")
|
7 |
|
8 |
|
9 |
def rsp_for_str(input, dehyphen=False) -> List[Tuple[str, str]]:
|
|
|
22 |
filename = input.name
|
23 |
# Identify the format of input and parse reference strings
|
24 |
if filename[-4:] == ".txt":
|
25 |
+
results = rsp_pipeline.predict(filename, type="txt", dehyphen=dehyphen, save_results=False)
|
26 |
+
elif filename[-4:] == ".pdf":
|
27 |
+
results = rsp_pipeline.predict(filename, dehyphen=dehyphen, save_results=False)
|
28 |
else:
|
29 |
return [("File Format Error !", None)]
|
30 |
# Prepare for the input gradio.HighlightedText accepts.
|
requirements.txt
CHANGED
@@ -1,2 +1,2 @@
|
|
1 |
torch==1.12.0
|
2 |
-
SciAssist==0.0.
|
|
|
1 |
torch==1.12.0
|
2 |
+
SciAssist==0.0.20
|
summarization.py
CHANGED
@@ -3,7 +3,7 @@ import torch
|
|
3 |
from SciAssist import Summarization
|
4 |
|
5 |
device = "gpu" if torch.cuda.is_available() else "cpu"
|
6 |
-
ssum_pipeline = Summarization()
|
7 |
|
8 |
|
9 |
def ssum_for_str(input, num_beams=1, num_return_sequences=1) -> List[Tuple[str, str]]:
|
@@ -23,8 +23,8 @@ def ssum_for_file(input, num_beams=1, num_return_sequences=1) -> List[Tuple[str,
|
|
23 |
if filename[-4:] == ".txt":
|
24 |
results = ssum_pipeline.predict(filename, type="txt", num_beams=num_beams,
|
25 |
num_return_sequences=num_return_sequences, save_results=False)
|
26 |
-
|
27 |
-
|
28 |
else:
|
29 |
return [("File Format Error !", None)]
|
30 |
|
|
|
3 |
from SciAssist import Summarization
|
4 |
|
5 |
device = "gpu" if torch.cuda.is_available() else "cpu"
|
6 |
+
ssum_pipeline = Summarization(os_name="nt")
|
7 |
|
8 |
|
9 |
def ssum_for_str(input, num_beams=1, num_return_sequences=1) -> List[Tuple[str, str]]:
|
|
|
23 |
if filename[-4:] == ".txt":
|
24 |
results = ssum_pipeline.predict(filename, type="txt", num_beams=num_beams,
|
25 |
num_return_sequences=num_return_sequences, save_results=False)
|
26 |
+
elif filename[-4:] == ".pdf":
|
27 |
+
results = ssum_pipeline.predict(filename, num_beams=num_beams, num_return_sequences=num_return_sequences, save_results=False)
|
28 |
else:
|
29 |
return [("File Format Error !", None)]
|
30 |
|