Spaces:

Mageia
/

GOT-OCR-Optimize

Running

Mageia commited on Oct 16

Commit

711e2cf

•

1 Parent(s): 0b4c6fc

fix: process pdf once

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,5 +1,7 @@
 import base64
 import os
 import tempfile
 import time
@@ -17,6 +19,19 @@ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
 model = AutoModel.from_pretrained(model_name, trust_remote_code=True, device_map=device)
 model = model.eval().to(device)
 def pdf_to_images(pdf_path):
     images = []
@@ -49,11 +64,10 @@ def convert_pdf_to_images(file):
         images = pdf_to_images(file.name)
         image_paths = []
-        with tempfile.TemporaryDirectory() as temp_dir:
-            for i, image in enumerate(images):
-                img_path = os.path.join(temp_dir, f"page_{i+1}.png")
-                image.save(img_path, "PNG")
-                image_paths.append(img_path)
         return "PDF转换为图片成功", image_paths
     except Exception as e:
@@ -129,7 +143,7 @@ with gr.Blocks() as demo:
     with gr.Row():
         with gr.Column(scale=1):
-            selected_image = gr.Image(label="选中的图片")
             got_mode = gr.Dropdown(
                 choices=[
                     "plain texts OCR",

+import atexit
 import base64
 import os
+import shutil
 import tempfile
 import time
 model = AutoModel.from_pretrained(model_name, trust_remote_code=True, device_map=device)
 model = model.eval().to(device)
+# 创建一个持久的临时目录
+TEMP_DIR = tempfile.mkdtemp()
+def cleanup():
+    """清理临时目录"""
+    shutil.rmtree(TEMP_DIR, ignore_errors=True)
+# 确保在程序退出时清理临时目录
+atexit.register(cleanup)
 def pdf_to_images(pdf_path):
     images = []
         images = pdf_to_images(file.name)
         image_paths = []
+        for i, image in enumerate(images):
+            img_path = os.path.join(TEMP_DIR, f"page_{i+1}.png")
+            image.save(img_path, "PNG")
+            image_paths.append(img_path)
         return "PDF转换为图片成功", image_paths
     except Exception as e:
     with gr.Row():
         with gr.Column(scale=1):
+            selected_image = gr.Image(label="选中的图片", type="filepath")
             got_mode = gr.Dropdown(
                 choices=[
                     "plain texts OCR",