myhloli commited on
Commit
21435e3
1 Parent(s): 63e3958

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -13
app.py CHANGED
@@ -8,8 +8,8 @@ from pathlib import Path
8
  import re
9
 
10
  os.system('pip install -r requirements.txt')
11
- os.system('pip install -U magic_pdf-0.8.0a1-py3-none-any.whl')
12
- # os.system('python -m pip install paddlepaddle-gpu==3.0.0b1 -i https://www.paddlepaddle.org.cn/packages/stable/cu118/')
13
 
14
  from huggingface_hub import snapshot_download
15
  model_dir = snapshot_download('opendatalab/PDF-Extract-Kit')
@@ -17,7 +17,7 @@ model_dir = snapshot_download('opendatalab/PDF-Extract-Kit')
17
  os.system('wget https://github.com/opendatalab/MinerU/raw/master/magic-pdf.template.json')
18
  os.system('cp magic-pdf.template.json ~/magic-pdf.json')
19
  os.system(f"sed -i 's|/tmp/models|{model_dir}/models|g' /home/user/magic-pdf.json")
20
- # os.system("sed -i 's|cpu|cuda|g' /home/user/magic-pdf.json")
21
 
22
  os.system("pip install gradio-pdf")
23
  from gradio_pdf import PDF
@@ -35,7 +35,7 @@ def read_fn(path):
35
  disk_rw = DiskReaderWriter(os.path.dirname(path))
36
  return disk_rw.read(os.path.basename(path), AbsReaderWriter.MODE_BIN)
37
 
38
-
39
  def parse_pdf(doc_path, output_dir, end_page_id):
40
  os.makedirs(output_dir, exist_ok=True)
41
 
@@ -119,7 +119,8 @@ def to_markdown(file_path, end_pages):
119
  # 返回转换后的PDF路径
120
  new_pdf_path = os.path.join(local_md_dir, file_name + "_layout.pdf")
121
 
122
- return md_content, txt_content, archive_zip_path, show_pdf(new_pdf_path)
 
123
 
124
 
125
  # def show_pdf(file_path):
@@ -141,25 +142,25 @@ if __name__ == "__main__":
141
  with gr.Blocks() as demo:
142
  with gr.Row():
143
  with gr.Column(variant='panel', scale=5):
144
- file = gr.File(label="Please upload pdf", file_types=[".pdf"])
 
145
  max_pages = gr.Slider(1, 10, 5, step=1, label="Max convert pages")
146
  with gr.Row() as bu_flow:
147
  change_bu = gr.Button("Convert")
148
- clear_bu = gr.ClearButton([file, max_pages], value="Clear")
149
- gr.Markdown(value="### PDF preview")
150
  # pdf_show = gr.HTML(label="PDF preview")
151
- pdf_show = PDF(label="PDF preview", interactive=True, height=1000)
152
 
153
  with gr.Column(variant='panel', scale=5):
154
  output_file = gr.File(label="convert result", interactive=False)
155
  with gr.Tabs():
156
  with gr.Tab("Markdown rendering"):
157
- md = gr.Markdown(label="Markdown rendering", height=1100, show_copy_button=True,
158
  latex_delimiters=latex_delimiters, line_breaks=True)
159
  with gr.Tab("Markdown text"):
160
- md_text = gr.TextArea(lines=55, show_copy_button=True)
161
- file.upload(fn=show_pdf, inputs=file, outputs=pdf_show)
162
- change_bu.click(fn=to_markdown, inputs=[file, max_pages], outputs=[md, md_text, output_file, pdf_show])
163
  clear_bu.add([md, pdf_show, md_text, output_file])
164
 
165
  demo.launch()
 
8
  import re
9
 
10
  os.system('pip install -r requirements.txt')
11
+ os.system('pip install -U magic_pdf-0.8.0a2-py3-none-any.whl')
12
+ os.system('python -m pip install paddlepaddle-gpu==3.0.0b1 -i https://www.paddlepaddle.org.cn/packages/stable/cu118/')
13
 
14
  from huggingface_hub import snapshot_download
15
  model_dir = snapshot_download('opendatalab/PDF-Extract-Kit')
 
17
  os.system('wget https://github.com/opendatalab/MinerU/raw/master/magic-pdf.template.json')
18
  os.system('cp magic-pdf.template.json ~/magic-pdf.json')
19
  os.system(f"sed -i 's|/tmp/models|{model_dir}/models|g' /home/user/magic-pdf.json")
20
+ os.system("sed -i 's|cpu|cuda|g' /home/user/magic-pdf.json")
21
 
22
  os.system("pip install gradio-pdf")
23
  from gradio_pdf import PDF
 
35
  disk_rw = DiskReaderWriter(os.path.dirname(path))
36
  return disk_rw.read(os.path.basename(path), AbsReaderWriter.MODE_BIN)
37
 
38
+ @spaces.GPU(duration=120)
39
  def parse_pdf(doc_path, output_dir, end_page_id):
40
  os.makedirs(output_dir, exist_ok=True)
41
 
 
119
  # 返回转换后的PDF路径
120
  new_pdf_path = os.path.join(local_md_dir, file_name + "_layout.pdf")
121
 
122
+ # return md_content, txt_content, archive_zip_path, show_pdf(new_pdf_path)
123
+ return md_content, txt_content, archive_zip_path, new_pdf_path
124
 
125
 
126
  # def show_pdf(file_path):
 
142
  with gr.Blocks() as demo:
143
  with gr.Row():
144
  with gr.Column(variant='panel', scale=5):
145
+ # file = gr.File(label="Please upload pdf", file_types=[".pdf"])
146
+ pdf_show = gr.Markdown()
147
  max_pages = gr.Slider(1, 10, 5, step=1, label="Max convert pages")
148
  with gr.Row() as bu_flow:
149
  change_bu = gr.Button("Convert")
150
+ clear_bu = gr.ClearButton([pdf_show], value="Clear")
 
151
  # pdf_show = gr.HTML(label="PDF preview")
152
+ pdf_show = PDF(label="Please upload pdf", interactive=True, height=800)
153
 
154
  with gr.Column(variant='panel', scale=5):
155
  output_file = gr.File(label="convert result", interactive=False)
156
  with gr.Tabs():
157
  with gr.Tab("Markdown rendering"):
158
+ md = gr.Markdown(label="Markdown rendering", height=900, show_copy_button=True,
159
  latex_delimiters=latex_delimiters, line_breaks=True)
160
  with gr.Tab("Markdown text"):
161
+ md_text = gr.TextArea(lines=45, show_copy_button=True)
162
+ # file.upload(fn=show_pdf, inputs=file, outputs=pdf_show)
163
+ change_bu.click(fn=to_markdown, inputs=[pdf_show, max_pages], outputs=[md, md_text, output_file, pdf_show])
164
  clear_bu.add([md, pdf_show, md_text, output_file])
165
 
166
  demo.launch()