Mageia commited on
Commit
711e2cf
1 Parent(s): 0b4c6fc

fix: process pdf once

Browse files
Files changed (1) hide show
  1. app.py +20 -6
app.py CHANGED
@@ -1,5 +1,7 @@
 
1
  import base64
2
  import os
 
3
  import tempfile
4
  import time
5
 
@@ -17,6 +19,19 @@ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
17
  model = AutoModel.from_pretrained(model_name, trust_remote_code=True, device_map=device)
18
  model = model.eval().to(device)
19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
  def pdf_to_images(pdf_path):
22
  images = []
@@ -49,11 +64,10 @@ def convert_pdf_to_images(file):
49
  images = pdf_to_images(file.name)
50
  image_paths = []
51
 
52
- with tempfile.TemporaryDirectory() as temp_dir:
53
- for i, image in enumerate(images):
54
- img_path = os.path.join(temp_dir, f"page_{i+1}.png")
55
- image.save(img_path, "PNG")
56
- image_paths.append(img_path)
57
 
58
  return "PDF转换为图片成功", image_paths
59
  except Exception as e:
@@ -129,7 +143,7 @@ with gr.Blocks() as demo:
129
 
130
  with gr.Row():
131
  with gr.Column(scale=1):
132
- selected_image = gr.Image(label="选中的图片")
133
  got_mode = gr.Dropdown(
134
  choices=[
135
  "plain texts OCR",
 
1
+ import atexit
2
  import base64
3
  import os
4
+ import shutil
5
  import tempfile
6
  import time
7
 
 
19
  model = AutoModel.from_pretrained(model_name, trust_remote_code=True, device_map=device)
20
  model = model.eval().to(device)
21
 
22
+ # 创建一个持久的临时目录
23
+ TEMP_DIR = tempfile.mkdtemp()
24
+
25
+
26
+ def cleanup():
27
+ """清理临时目录"""
28
+ shutil.rmtree(TEMP_DIR, ignore_errors=True)
29
+
30
+
31
+ # 确保在程序退出时清理临时目录
32
+
33
+ atexit.register(cleanup)
34
+
35
 
36
  def pdf_to_images(pdf_path):
37
  images = []
 
64
  images = pdf_to_images(file.name)
65
  image_paths = []
66
 
67
+ for i, image in enumerate(images):
68
+ img_path = os.path.join(TEMP_DIR, f"page_{i+1}.png")
69
+ image.save(img_path, "PNG")
70
+ image_paths.append(img_path)
 
71
 
72
  return "PDF转换为图片成功", image_paths
73
  except Exception as e:
 
143
 
144
  with gr.Row():
145
  with gr.Column(scale=1):
146
+ selected_image = gr.Image(label="选中的图片", type="filepath")
147
  got_mode = gr.Dropdown(
148
  choices=[
149
  "plain texts OCR",