Mageia commited on
Commit
5f2ab23
1 Parent(s): b1f5681

fix: process pdf once

Browse files
Files changed (1) hide show
  1. app.py +9 -9
app.py CHANGED
@@ -28,7 +28,7 @@ def ocr_process(image, got_mode, ocr_color="", ocr_box=""):
28
  res = model.chat_crop(tokenizer, image_path, ocr_type="ocr")
29
  else:
30
  res = model.chat(tokenizer, image_path, ocr_type="ocr", ocr_box=ocr_box, ocr_color=ocr_color)
31
- return gr.Markdown(res)
32
  elif "format" in got_mode:
33
  if "multi-crop" in got_mode:
34
  res = model.chat_crop(tokenizer, image_path, ocr_type="format", render=True, save_render_file=result_path)
@@ -36,17 +36,17 @@ def ocr_process(image, got_mode, ocr_color="", ocr_box=""):
36
  res = model.chat(tokenizer, image_path, ocr_type="format", ocr_box=ocr_box, ocr_color=ocr_color, render=True, save_render_file=result_path)
37
 
38
  if os.path.exists(result_path):
39
- with open(result_path, "r") as f:
40
  html_content = f.read()
41
  encoded_html = base64.b64encode(html_content.encode("utf-8")).decode("utf-8")
42
- iframe_src = f"data:text/html;base64,{encoded_html}"
43
- iframe = f'<iframe src="{iframe_src}" width="100%" height="600px"></iframe>'
44
- download_link = f'<a href="data:text/html;base64,{encoded_html}" download="result.html">下载完整结果</a>'
45
- return gr.Markdown(f"{download_link}\n\n{iframe}")
46
 
47
- return gr.Markdown("错误: 未知的OCR模式")
48
  except Exception as e:
49
- return gr.Markdown(f"错误: {str(e)}")
50
 
51
 
52
  with gr.Blocks() as demo:
@@ -67,7 +67,7 @@ with gr.Blocks() as demo:
67
 
68
  submit_button = gr.Button("开始OCR识别")
69
 
70
- output = gr.Markdown(label="识别结果")
71
 
72
  submit_button.click(ocr_process, inputs=[image_input, got_mode, ocr_color, ocr_box], outputs=output)
73
 
 
28
  res = model.chat_crop(tokenizer, image_path, ocr_type="ocr")
29
  else:
30
  res = model.chat(tokenizer, image_path, ocr_type="ocr", ocr_box=ocr_box, ocr_color=ocr_color)
31
+ return res
32
  elif "format" in got_mode:
33
  if "multi-crop" in got_mode:
34
  res = model.chat_crop(tokenizer, image_path, ocr_type="format", render=True, save_render_file=result_path)
 
36
  res = model.chat(tokenizer, image_path, ocr_type="format", ocr_box=ocr_box, ocr_color=ocr_color, render=True, save_render_file=result_path)
37
 
38
  if os.path.exists(result_path):
39
+ with open(result_path, "r", encoding="utf-8") as f:
40
  html_content = f.read()
41
  encoded_html = base64.b64encode(html_content.encode("utf-8")).decode("utf-8")
42
+ data_uri = f"data:text/html;charset=utf-8;base64,{encoded_html}"
43
+ preview = f'<iframe src="{data_uri}" width="100%" height="600px"></iframe>'
44
+ download_link = f'<a href="{data_uri}" download="result.html">下载完整结果</a>'
45
+ return f"{download_link}\n\n{preview}\n\n识别结果:\n{res}"
46
 
47
+ return "错误: 未知的OCR模式"
48
  except Exception as e:
49
+ return f"错误: {str(e)}"
50
 
51
 
52
  with gr.Blocks() as demo:
 
67
 
68
  submit_button = gr.Button("开始OCR识别")
69
 
70
+ output = gr.HTML(label="识别结果")
71
 
72
  submit_button.click(ocr_process, inputs=[image_input, got_mode, ocr_color, ocr_box], outputs=output)
73