Spaces:
Running
on
L4
Running
on
L4
Update app.py
Browse files
app.py
CHANGED
@@ -8,8 +8,8 @@ from pathlib import Path
|
|
8 |
import re
|
9 |
|
10 |
os.system('pip install -r requirements.txt')
|
11 |
-
os.system('pip install -U magic_pdf-0.8.
|
12 |
-
|
13 |
|
14 |
from huggingface_hub import snapshot_download
|
15 |
model_dir = snapshot_download('opendatalab/PDF-Extract-Kit')
|
@@ -17,7 +17,7 @@ model_dir = snapshot_download('opendatalab/PDF-Extract-Kit')
|
|
17 |
os.system('wget https://github.com/opendatalab/MinerU/raw/master/magic-pdf.template.json')
|
18 |
os.system('cp magic-pdf.template.json ~/magic-pdf.json')
|
19 |
os.system(f"sed -i 's|/tmp/models|{model_dir}/models|g' /home/user/magic-pdf.json")
|
20 |
-
|
21 |
|
22 |
os.system("pip install gradio-pdf")
|
23 |
from gradio_pdf import PDF
|
@@ -35,7 +35,7 @@ def read_fn(path):
|
|
35 |
disk_rw = DiskReaderWriter(os.path.dirname(path))
|
36 |
return disk_rw.read(os.path.basename(path), AbsReaderWriter.MODE_BIN)
|
37 |
|
38 |
-
|
39 |
def parse_pdf(doc_path, output_dir, end_page_id):
|
40 |
os.makedirs(output_dir, exist_ok=True)
|
41 |
|
@@ -119,7 +119,8 @@ def to_markdown(file_path, end_pages):
|
|
119 |
# 返回转换后的PDF路径
|
120 |
new_pdf_path = os.path.join(local_md_dir, file_name + "_layout.pdf")
|
121 |
|
122 |
-
return md_content, txt_content, archive_zip_path, show_pdf(new_pdf_path)
|
|
|
123 |
|
124 |
|
125 |
# def show_pdf(file_path):
|
@@ -141,25 +142,25 @@ if __name__ == "__main__":
|
|
141 |
with gr.Blocks() as demo:
|
142 |
with gr.Row():
|
143 |
with gr.Column(variant='panel', scale=5):
|
144 |
-
file = gr.File(label="Please upload pdf", file_types=[".pdf"])
|
|
|
145 |
max_pages = gr.Slider(1, 10, 5, step=1, label="Max convert pages")
|
146 |
with gr.Row() as bu_flow:
|
147 |
change_bu = gr.Button("Convert")
|
148 |
-
clear_bu = gr.ClearButton([
|
149 |
-
gr.Markdown(value="### PDF preview")
|
150 |
# pdf_show = gr.HTML(label="PDF preview")
|
151 |
-
pdf_show = PDF(label="
|
152 |
|
153 |
with gr.Column(variant='panel', scale=5):
|
154 |
output_file = gr.File(label="convert result", interactive=False)
|
155 |
with gr.Tabs():
|
156 |
with gr.Tab("Markdown rendering"):
|
157 |
-
md = gr.Markdown(label="Markdown rendering", height=
|
158 |
latex_delimiters=latex_delimiters, line_breaks=True)
|
159 |
with gr.Tab("Markdown text"):
|
160 |
-
md_text = gr.TextArea(lines=
|
161 |
-
file.upload(fn=show_pdf, inputs=file, outputs=pdf_show)
|
162 |
-
change_bu.click(fn=to_markdown, inputs=[
|
163 |
clear_bu.add([md, pdf_show, md_text, output_file])
|
164 |
|
165 |
demo.launch()
|
|
|
8 |
import re
|
9 |
|
10 |
os.system('pip install -r requirements.txt')
|
11 |
+
os.system('pip install -U magic_pdf-0.8.0a2-py3-none-any.whl')
|
12 |
+
os.system('python -m pip install paddlepaddle-gpu==3.0.0b1 -i https://www.paddlepaddle.org.cn/packages/stable/cu118/')
|
13 |
|
14 |
from huggingface_hub import snapshot_download
|
15 |
model_dir = snapshot_download('opendatalab/PDF-Extract-Kit')
|
|
|
17 |
os.system('wget https://github.com/opendatalab/MinerU/raw/master/magic-pdf.template.json')
|
18 |
os.system('cp magic-pdf.template.json ~/magic-pdf.json')
|
19 |
os.system(f"sed -i 's|/tmp/models|{model_dir}/models|g' /home/user/magic-pdf.json")
|
20 |
+
os.system("sed -i 's|cpu|cuda|g' /home/user/magic-pdf.json")
|
21 |
|
22 |
os.system("pip install gradio-pdf")
|
23 |
from gradio_pdf import PDF
|
|
|
35 |
disk_rw = DiskReaderWriter(os.path.dirname(path))
|
36 |
return disk_rw.read(os.path.basename(path), AbsReaderWriter.MODE_BIN)
|
37 |
|
38 |
+
@spaces.GPU(duration=120)
|
39 |
def parse_pdf(doc_path, output_dir, end_page_id):
|
40 |
os.makedirs(output_dir, exist_ok=True)
|
41 |
|
|
|
119 |
# 返回转换后的PDF路径
|
120 |
new_pdf_path = os.path.join(local_md_dir, file_name + "_layout.pdf")
|
121 |
|
122 |
+
# return md_content, txt_content, archive_zip_path, show_pdf(new_pdf_path)
|
123 |
+
return md_content, txt_content, archive_zip_path, new_pdf_path
|
124 |
|
125 |
|
126 |
# def show_pdf(file_path):
|
|
|
142 |
with gr.Blocks() as demo:
|
143 |
with gr.Row():
|
144 |
with gr.Column(variant='panel', scale=5):
|
145 |
+
# file = gr.File(label="Please upload pdf", file_types=[".pdf"])
|
146 |
+
pdf_show = gr.Markdown()
|
147 |
max_pages = gr.Slider(1, 10, 5, step=1, label="Max convert pages")
|
148 |
with gr.Row() as bu_flow:
|
149 |
change_bu = gr.Button("Convert")
|
150 |
+
clear_bu = gr.ClearButton([pdf_show], value="Clear")
|
|
|
151 |
# pdf_show = gr.HTML(label="PDF preview")
|
152 |
+
pdf_show = PDF(label="Please upload pdf", interactive=True, height=800)
|
153 |
|
154 |
with gr.Column(variant='panel', scale=5):
|
155 |
output_file = gr.File(label="convert result", interactive=False)
|
156 |
with gr.Tabs():
|
157 |
with gr.Tab("Markdown rendering"):
|
158 |
+
md = gr.Markdown(label="Markdown rendering", height=900, show_copy_button=True,
|
159 |
latex_delimiters=latex_delimiters, line_breaks=True)
|
160 |
with gr.Tab("Markdown text"):
|
161 |
+
md_text = gr.TextArea(lines=45, show_copy_button=True)
|
162 |
+
# file.upload(fn=show_pdf, inputs=file, outputs=pdf_show)
|
163 |
+
change_bu.click(fn=to_markdown, inputs=[pdf_show, max_pages], outputs=[md, md_text, output_file, pdf_show])
|
164 |
clear_bu.add([md, pdf_show, md_text, output_file])
|
165 |
|
166 |
demo.launch()
|