Spaces:

merve
/

UDOP

Running

App Files Files Community

merve HF staff commited on Mar 10

Commit

ab5180d

•

1 Parent(s): b9176b4

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -6

app.py CHANGED Viewed

@@ -15,8 +15,9 @@ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 @spaces.GPU
 def udop_box_inference(image, text_prompt, box_coordinates):
-    box_coordinates = [box_coordinates[0], box_coordinates[1], box_coordinates[3], box_coordinates[4]]
     extracted_image = extract_box(image, box_coordinates)
     extracted_image.save("cropped_image.png")
@@ -50,6 +51,11 @@ def normalize_bbox(bbox, width, height):
 def extract_box(image, coordinates):
     x, y, x2, y2 = coordinates
     cropped_image = image.crop((x, y, x2, y2))
     return cropped_image
@@ -61,9 +67,10 @@ def infer_box(prompts, text_prompts):
     image = prompts["image"]
     if image is None:
       gr.Error("Please upload an image and draw a box before submitting")
-    points = prompts["points"][0]
-    if points is None:
-      gr.Error("Please draw a box before submitting.")
     return udop_box_inference(image, text_prompts, points)
@@ -80,7 +87,16 @@ with gr.Blocks(title="UDOP") as demo:
       with gr.Column():
         output = gr.Textbox(label="UDOP Output")
   btn.click(infer_box, inputs=[im,text_prompt], outputs=[output])
 demo.launch(debug=True)

 @spaces.GPU
 def udop_box_inference(image, text_prompt, box_coordinates):
+    if box_coordinates != []:
+        box_coordinates = [box_coordinates[0], box_coordinates[1], box_coordinates[3], box_coordinates[4]]
     extracted_image = extract_box(image, box_coordinates)
     extracted_image.save("cropped_image.png")
 def extract_box(image, coordinates):
+  if type(image) == str:
+    image = Image.open(image)
+  if coordinates==[]:
+    return image
+  else:
     x, y, x2, y2 = coordinates
     cropped_image = image.crop((x, y, x2, y2))
     return cropped_image
     image = prompts["image"]
     if image is None:
       gr.Error("Please upload an image and draw a box before submitting")
+    try:
+      points = prompts["points"][0]
+    except:
+      points = []
     return udop_box_inference(image, text_prompts, points)
       with gr.Column():
         output = gr.Textbox(label="UDOP Output")
+  with gr.Row():
+    gr.Examples(
+          examples = [[PromptValue(image = "/content/dummy_pdf.png",
+                            points = [[87.0, 908.0, 2.0, 456.0, 972.0, 3.0]]), "Question answering. What is the objective?"],
+                      [PromptValue(image = "/content/docvqa_example (3).png",
+                            points = [[]]), "Question answering. How much is the total?"]],
+          inputs=[im, text_prompt],
+          outputs=output,
+          fn=infer_box,
+          )
   btn.click(infer_box, inputs=[im,text_prompt], outputs=[output])
 demo.launch(debug=True)