Spaces:
Sleeping
Sleeping
add PDF URL crawling (#12)
Browse files- add pdf crawl (6ae3c63c5102967f9d32532f0a8bb83ad449ee09)
app.py
CHANGED
@@ -5,6 +5,35 @@ from reference_string_parsing import *
|
|
5 |
from controlled_summarization import *
|
6 |
from dataset_extraction import *
|
7 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
with gr.Blocks(css="#htext span {white-space: pre-line}") as demo:
|
9 |
gr.Markdown("# Gradio Demo for SciAssist")
|
10 |
with gr.Tabs():
|
@@ -16,7 +45,8 @@ with gr.Blocks(css="#htext span {white-space: pre-line}") as demo:
|
|
16 |
gr.Markdown(ctrlsum_file_md)
|
17 |
with gr.Row():
|
18 |
with gr.Column():
|
19 |
-
|
|
|
20 |
ctrlsum_str = gr.TextArea(label="Input String", max_lines=5)
|
21 |
with gr.Column():
|
22 |
gr.Markdown("* Length 0 will exert no control over length.")
|
@@ -33,6 +63,9 @@ with gr.Blocks(css="#htext span {white-space: pre-line}") as demo:
|
|
33 |
ctrlsum_file_examples = gr.Examples(examples=[["examples/H01-1042_body.txt", 50, "automatic evaluation technique"],["examples/H01-1042.pdf", 0, "automatic evaluation technique"]],
|
34 |
inputs=[ctrlsum_file, ctrlsum_file_length, ctrlsum_file_keywords])
|
35 |
|
|
|
|
|
|
|
36 |
ctrlsum_file_btn.click(
|
37 |
fn=ctrlsum_for_file,
|
38 |
inputs=[ctrlsum_file, ctrlsum_file_length, ctrlsum_file_keywords, ctrlsum_str],
|
@@ -143,4 +176,4 @@ with gr.Blocks(css="#htext span {white-space: pre-line}") as demo:
|
|
143 |
)
|
144 |
|
145 |
|
146 |
-
demo.launch(share=False)
|
|
|
5 |
from controlled_summarization import *
|
6 |
from dataset_extraction import *
|
7 |
|
8 |
+
import requests
|
9 |
+
def download_pdf(url, dest_folder):
|
10 |
+
|
11 |
+
"""
|
12 |
+
Download a PDF from a given URL and save it to a specified destination folder.
|
13 |
+
Parameters:
|
14 |
+
url (str): URL of the PDF
|
15 |
+
dest_folder (str): Destination folder to save the downloaded PDF
|
16 |
+
"""
|
17 |
+
|
18 |
+
if not os.path.exists(dest_folder):
|
19 |
+
os.makedirs(dest_folder)
|
20 |
+
|
21 |
+
response = requests.get(url, stream=True)
|
22 |
+
filename = os.path.join(dest_folder, url.split("/")[-1])
|
23 |
+
|
24 |
+
with open(filename, 'wb') as file:
|
25 |
+
for chunk in response.iter_content(chunk_size=1024):
|
26 |
+
if chunk:
|
27 |
+
file.write(chunk)
|
28 |
+
#print(f"Downloaded {url} to {filename}")
|
29 |
+
return filename
|
30 |
+
|
31 |
+
# Example Usage
|
32 |
+
#url = "https://arxiv.org/pdf/2305.14996.pdf"
|
33 |
+
#dest_folder = "./examples/"
|
34 |
+
#download_pdf(url, dest_folder)
|
35 |
+
|
36 |
+
|
37 |
with gr.Blocks(css="#htext span {white-space: pre-line}") as demo:
|
38 |
gr.Markdown("# Gradio Demo for SciAssist")
|
39 |
with gr.Tabs():
|
|
|
45 |
gr.Markdown(ctrlsum_file_md)
|
46 |
with gr.Row():
|
47 |
with gr.Column():
|
48 |
+
ctrlsum_url = gr.TextArea(label="PDF URL", max_lines=1)
|
49 |
+
ctrlsum_file = gr.File(label="Input File", max_lines=2)
|
50 |
ctrlsum_str = gr.TextArea(label="Input String", max_lines=5)
|
51 |
with gr.Column():
|
52 |
gr.Markdown("* Length 0 will exert no control over length.")
|
|
|
63 |
ctrlsum_file_examples = gr.Examples(examples=[["examples/H01-1042_body.txt", 50, "automatic evaluation technique"],["examples/H01-1042.pdf", 0, "automatic evaluation technique"]],
|
64 |
inputs=[ctrlsum_file, ctrlsum_file_length, ctrlsum_file_keywords])
|
65 |
|
66 |
+
if ctrlsum_url is not None and len(ctrlsum_url) > 4:
|
67 |
+
ctrlsum_file = download_pdf(ctrlsum_url, './examples/')
|
68 |
+
|
69 |
ctrlsum_file_btn.click(
|
70 |
fn=ctrlsum_for_file,
|
71 |
inputs=[ctrlsum_file, ctrlsum_file_length, ctrlsum_file_keywords, ctrlsum_str],
|
|
|
176 |
)
|
177 |
|
178 |
|
179 |
+
demo.launch(share=False)
|