Spaces:

Mageia
/

GOT-OCR-Optimize

Running

App Files Files Community

GOT-OCR-Optimize / app-ocr.py

Mageia

add: pdf2images

8ec1357 unverified about 2 months ago

raw

history blame

5.64 kB

	import base64
	import io
	import os
	import shutil
	import time
	import uuid
	from pathlib import Path

	# import numpy as np
	# import tempfile
	# from PIL import Image
	import gradio as gr
	from modelscope import AutoModel, AutoTokenizer

	UPLOAD_FOLDER = "./uploads"
	RESULTS_FOLDER = "./results"


	tokenizer = AutoTokenizer.from_pretrained("stepfun-ai/GOT-OCR2_0", trust_remote_code=True)
	model = AutoModel.from_pretrained("stepfun-ai/GOT-OCR2_0", trust_remote_code=True, low_cpu_mem_usage=True, device_map="cuda", use_safetensors=True)
	model = model.eval().cuda()

	for folder in [UPLOAD_FOLDER, RESULTS_FOLDER]:
	if not os.path.exists(folder):
	os.makedirs(folder)


	def image_to_base64(image):
	buffered = io.BytesIO()
	image.save(buffered, format="PNG")
	return base64.b64encode(buffered.getvalue()).decode()


	def run_GOT(image, got_mode, fine_grained_mode="", ocr_color="", ocr_box=""):
	unique_id = str(uuid.uuid4())
	image_path = os.path.join(UPLOAD_FOLDER, f"{unique_id}.png")
	result_path = os.path.join(RESULTS_FOLDER, f"{unique_id}.html")

	shutil.copy(image, image_path)

	try:
	if got_mode == "plain texts OCR":
	res = model.chat(tokenizer, image_path, ocr_type="ocr")
	return res, None
	elif got_mode == "format texts OCR":
	res = model.chat(tokenizer, image_path, ocr_type="format", render=True, save_render_file=result_path)
	elif got_mode == "plain multi-crop OCR":
	res = model.chat_crop(tokenizer, image_path, ocr_type="ocr")
	return res, None
	elif got_mode == "format multi-crop OCR":
	res = model.chat_crop(tokenizer, image_path, ocr_type="format", render=True, save_render_file=result_path)
	elif got_mode == "plain fine-grained OCR":
	res = model.chat(tokenizer, image_path, ocr_type="ocr", ocr_box=ocr_box, ocr_color=ocr_color)
	return res, None
	elif got_mode == "format fine-grained OCR":
	res = model.chat(tokenizer, image_path, ocr_type="format", ocr_box=ocr_box, ocr_color=ocr_color, render=True, save_render_file=result_path)

	# res_markdown = f"$$ {res} $$"
	res_markdown = res

	if "format" in got_mode and os.path.exists(result_path):
	with open(result_path, "r") as f:
	html_content = f.read()
	encoded_html = base64.b64encode(html_content.encode("utf-8")).decode("utf-8")
	iframe_src = f"data:text/html;base64,{encoded_html}"
	iframe = f'<iframe src="{iframe_src}" width="100%" height="600px"></iframe>'
	download_link = f'<a href="data:text/html;base64,{encoded_html}" download="result_{unique_id}.html">Download Full Result</a>'
	return res_markdown, f"{download_link}<br>{iframe}"
	else:
	return res_markdown, None
	except Exception as e:
	return f"Error: {str(e)}", None
	finally:
	if os.path.exists(image_path):
	os.remove(image_path)


	def task_update(task):
	if "fine-grained" in task:
	return [
	gr.update(visible=True),
	gr.update(visible=False),
	gr.update(visible=False),
	]
	else:
	return [
	gr.update(visible=False),
	gr.update(visible=False),
	gr.update(visible=False),
	]


	def fine_grained_update(task):
	if task == "box":
	return [
	gr.update(visible=False, value=""),
	gr.update(visible=True),
	]
	elif task == "color":
	return [
	gr.update(visible=True),
	gr.update(visible=False, value=""),
	]


	def cleanup_old_files():
	current_time = time.time()
	for folder in [UPLOAD_FOLDER, RESULTS_FOLDER]:
	for file_path in Path(folder).glob("*"):
	if current_time - file_path.stat().st_mtime > 3600: # 1 hour
	file_path.unlink()


	with gr.Blocks() as demo:
	with gr.Row():
	with gr.Column():
	image_input = gr.Image(type="filepath", label="上传图片")
	task_dropdown = gr.Dropdown(
	choices=[
	"plain texts OCR",
	"format texts OCR",
	"plain multi-crop OCR",
	"format multi-crop OCR",
	"plain fine-grained OCR",
	"format fine-grained OCR",
	],
	label="选择GOT模式",
	value="plain texts OCR",
	)
	fine_grained_dropdown = gr.Dropdown(choices=["box", "color"], label="fine-grained type", visible=False)
	color_dropdown = gr.Dropdown(choices=["red", "green", "blue"], label="color list", visible=False)
	box_input = gr.Textbox(label="input box: [x1,y1,x2,y2]", placeholder="e.g., [0,0,100,100]", visible=False)
	submit_button = gr.Button("Submit")

	with gr.Column():
	ocr_result = gr.Textbox(label="GOT output")

	with gr.Column():
	gr.Markdown("如果选择带格式的模式，mathpix结果将自动呈现如下:")
	html_result = gr.HTML(label="rendered html", show_label=True)

	task_dropdown.change(task_update, inputs=[task_dropdown], outputs=[fine_grained_dropdown, color_dropdown, box_input])
	fine_grained_dropdown.change(fine_grained_update, inputs=[fine_grained_dropdown], outputs=[color_dropdown, box_input])

	submit_button.click(run_GOT, inputs=[image_input, task_dropdown, fine_grained_dropdown, color_dropdown, box_input], outputs=[ocr_result, html_result])

	if __name__ == "__main__":
	cleanup_old_files()
	demo.launch()