Spaces:

Mageia
/

GOT-OCR-Optimize

Running

App Files Files Community

Mageia commited on Oct 8

Commit

7ee1423

•

1 Parent(s): ba22048

fix: got_ocr

Browse files

Files changed (1) hide show

app.py +69 -6

app.py CHANGED Viewed

@@ -70,21 +70,84 @@ def on_image_select(evt: gr.SelectData):
     return None
-def perform_ocr(selected_index, image_paths):
-    if selected_index is not None and image_paths and 0 <= selected_index < len(image_paths):
-        selected_image = image_paths[selected_index][0]
-        # 这里添加OCR处理逻辑
-        return got_ocr(model, tokenizer, selected_image)
-    return "请先选择一个图片"
 with gr.Blocks() as demo:
     pdf_input = gr.File(label="上传PDF文件")
     image_gallery = gr.Gallery(label="PDF页面预览", columns=3, height="auto")
     selected_index = gr.State(None)
     ocr_button = gr.Button("开始OCR识别")
     ocr_result = gr.Textbox(label="OCR结果")
     pdf_input.upload(fn=process_pdf, inputs=pdf_input, outputs=image_gallery)
     image_gallery.select(fn=on_image_select, inputs=[], outputs=selected_index)
     ocr_button.click(fn=perform_ocr, inputs=[selected_index, image_gallery], outputs=ocr_result)

     return None
+# 更新perform_ocr函数的输入参数
+def perform_ocr(selected_index, image_gallery, task, fine_grained_type, color, box):
+    if selected_index is None or len(image_gallery) == 0:
+        return "请先选择一张图片"
+    selected_image = image_gallery[selected_index]
+    # 根据选择的任务和参数调用GOT OCR
+    got_mode = task
+    ocr_color = color if fine_grained_type == "color" else ""
+    ocr_box = box if fine_grained_type == "box" else ""
+    result, _ = got_ocr(model, tokenizer, selected_image, got_mode=got_mode, fine_grained_mode=fine_grained_type, ocr_color=ocr_color, ocr_box=ocr_box)
+    return result
 with gr.Blocks() as demo:
     pdf_input = gr.File(label="上传PDF文件")
     image_gallery = gr.Gallery(label="PDF页面预览", columns=3, height="auto")
     selected_index = gr.State(None)
+    task_dropdown = gr.Dropdown(
+        choices=[
+            "plain texts OCR",
+            "format texts OCR",
+            "plain multi-crop OCR",
+            "format multi-crop OCR",
+            "plain fine-grained OCR",
+            "format fine-grained OCR",
+        ],
+        label="选择GOT模式",
+        value="plain texts OCR",
+    )
+    fine_grained_dropdown = gr.Dropdown(choices=["box", "color"], label="fine-grained类型", visible=False)
+    color_dropdown = gr.Dropdown(choices=["red", "green", "blue"], label="颜色列表", visible=False)
+    box_input = gr.Textbox(label="输入框: [x1,y1,x2,y2]", placeholder="例如: [0,0,100,100]", visible=False)
+    def task_update(task):
+        if "fine-grained" in task:
+            return [
+                gr.update(visible=True),
+                gr.update(visible=False),
+                gr.update(visible=False),
+            ]
+        else:
+            return [
+                gr.update(visible=False),
+                gr.update(visible=False),
+                gr.update(visible=False),
+            ]
+    def fine_grained_update(fine_grained_type):
+        if fine_grained_type == "color":
+            return [
+                gr.update(visible=True),
+                gr.update(visible=False),
+            ]
+        elif fine_grained_type == "box":
+            return [
+                gr.update(visible=False),
+                gr.update(visible=True),
+            ]
+        else:
+            return [
+                gr.update(visible=False),
+                gr.update(visible=False),
+            ]
+    task_dropdown.change(task_update, inputs=[task_dropdown], outputs=[fine_grained_dropdown, color_dropdown, box_input])
+    fine_grained_dropdown.change(fine_grained_update, inputs=[fine_grained_dropdown], outputs=[color_dropdown, box_input])
     ocr_button = gr.Button("开始OCR识别")
     ocr_result = gr.Textbox(label="OCR结果")
+    # 更新ocr_button的click事件，传递所有必要的参数
+    ocr_button.click(
+        fn=perform_ocr, inputs=[selected_index, image_gallery, task_dropdown, fine_grained_dropdown, color_dropdown, box_input], outputs=ocr_result
+    )
     pdf_input.upload(fn=process_pdf, inputs=pdf_input, outputs=image_gallery)
     image_gallery.select(fn=on_image_select, inputs=[], outputs=selected_index)
     ocr_button.click(fn=perform_ocr, inputs=[selected_index, image_gallery], outputs=ocr_result)