Mageia commited on
Commit
7ee1423
1 Parent(s): ba22048

fix: got_ocr

Browse files
Files changed (1) hide show
  1. app.py +69 -6
app.py CHANGED
@@ -70,21 +70,84 @@ def on_image_select(evt: gr.SelectData):
70
  return None
71
 
72
 
73
- def perform_ocr(selected_index, image_paths):
74
- if selected_index is not None and image_paths and 0 <= selected_index < len(image_paths):
75
- selected_image = image_paths[selected_index][0]
76
- # 这里添加OCR处理逻辑
77
- return got_ocr(model, tokenizer, selected_image)
78
- return "请先选择一个图片"
 
 
 
 
 
 
 
 
79
 
80
 
81
  with gr.Blocks() as demo:
82
  pdf_input = gr.File(label="上传PDF文件")
83
  image_gallery = gr.Gallery(label="PDF页面预览", columns=3, height="auto")
84
  selected_index = gr.State(None)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
  ocr_button = gr.Button("开始OCR识别")
86
  ocr_result = gr.Textbox(label="OCR结果")
87
 
 
 
 
 
 
88
  pdf_input.upload(fn=process_pdf, inputs=pdf_input, outputs=image_gallery)
89
  image_gallery.select(fn=on_image_select, inputs=[], outputs=selected_index)
90
  ocr_button.click(fn=perform_ocr, inputs=[selected_index, image_gallery], outputs=ocr_result)
 
70
  return None
71
 
72
 
73
+ # 更新perform_ocr函数的输入参数
74
+ def perform_ocr(selected_index, image_gallery, task, fine_grained_type, color, box):
75
+ if selected_index is None or len(image_gallery) == 0:
76
+ return "请先选择一张图片"
77
+
78
+ selected_image = image_gallery[selected_index]
79
+
80
+ # 根据选择的任务和参数调用GOT OCR
81
+ got_mode = task
82
+ ocr_color = color if fine_grained_type == "color" else ""
83
+ ocr_box = box if fine_grained_type == "box" else ""
84
+
85
+ result, _ = got_ocr(model, tokenizer, selected_image, got_mode=got_mode, fine_grained_mode=fine_grained_type, ocr_color=ocr_color, ocr_box=ocr_box)
86
+ return result
87
 
88
 
89
  with gr.Blocks() as demo:
90
  pdf_input = gr.File(label="上传PDF文件")
91
  image_gallery = gr.Gallery(label="PDF页面预览", columns=3, height="auto")
92
  selected_index = gr.State(None)
93
+ task_dropdown = gr.Dropdown(
94
+ choices=[
95
+ "plain texts OCR",
96
+ "format texts OCR",
97
+ "plain multi-crop OCR",
98
+ "format multi-crop OCR",
99
+ "plain fine-grained OCR",
100
+ "format fine-grained OCR",
101
+ ],
102
+ label="选择GOT模式",
103
+ value="plain texts OCR",
104
+ )
105
+ fine_grained_dropdown = gr.Dropdown(choices=["box", "color"], label="fine-grained类型", visible=False)
106
+ color_dropdown = gr.Dropdown(choices=["red", "green", "blue"], label="颜色列表", visible=False)
107
+ box_input = gr.Textbox(label="输入框: [x1,y1,x2,y2]", placeholder="例如: [0,0,100,100]", visible=False)
108
+
109
+ def task_update(task):
110
+ if "fine-grained" in task:
111
+ return [
112
+ gr.update(visible=True),
113
+ gr.update(visible=False),
114
+ gr.update(visible=False),
115
+ ]
116
+ else:
117
+ return [
118
+ gr.update(visible=False),
119
+ gr.update(visible=False),
120
+ gr.update(visible=False),
121
+ ]
122
+
123
+ def fine_grained_update(fine_grained_type):
124
+ if fine_grained_type == "color":
125
+ return [
126
+ gr.update(visible=True),
127
+ gr.update(visible=False),
128
+ ]
129
+ elif fine_grained_type == "box":
130
+ return [
131
+ gr.update(visible=False),
132
+ gr.update(visible=True),
133
+ ]
134
+ else:
135
+ return [
136
+ gr.update(visible=False),
137
+ gr.update(visible=False),
138
+ ]
139
+
140
+ task_dropdown.change(task_update, inputs=[task_dropdown], outputs=[fine_grained_dropdown, color_dropdown, box_input])
141
+ fine_grained_dropdown.change(fine_grained_update, inputs=[fine_grained_dropdown], outputs=[color_dropdown, box_input])
142
+
143
  ocr_button = gr.Button("开始OCR识别")
144
  ocr_result = gr.Textbox(label="OCR结果")
145
 
146
+ # 更新ocr_button的click事件,传递所有必要的参数
147
+ ocr_button.click(
148
+ fn=perform_ocr, inputs=[selected_index, image_gallery, task_dropdown, fine_grained_dropdown, color_dropdown, box_input], outputs=ocr_result
149
+ )
150
+
151
  pdf_input.upload(fn=process_pdf, inputs=pdf_input, outputs=image_gallery)
152
  image_gallery.select(fn=on_image_select, inputs=[], outputs=selected_index)
153
  ocr_button.click(fn=perform_ocr, inputs=[selected_index, image_gallery], outputs=ocr_result)