Spaces:

arad1367
/

florence-2-vision-model-v1

Running on Zero

App Files Files Community

arad1367 commited on Jul 1

Commit

96e1778

•

1 Parent(s): cbd54ec

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -9

app.py CHANGED Viewed

@@ -9,6 +9,7 @@ import numpy as np
 import spaces
 import subprocess
 subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
 # Initialize Florence-2-large model and processor
@@ -75,7 +76,15 @@ def plot_image_with_bboxes(image_np, bboxes, labels=None):
         if labels and i < len(labels):
             ax.text(x, y, labels[i], color=color, fontsize=8, bbox=dict(facecolor='white', alpha=0.7))
     plt.axis('off')
-    return fig
 # Gradio function to process uploaded images
 @spaces.GPU
@@ -117,16 +126,12 @@ def process_image(image_path):
     Image Captioning:
     - Simple Caption: {caption_result['<CAPTION>']}
     - Detailed Caption: {detailed_caption_result['<DETAILED_CAPTION>']}
     Object Detection:
     - Detected {len(od_bboxes)} objects
     OCR:
     {ocr_result['<OCR>']}
     Phrase Grounding:
     - Grounded {len(pg_bboxes)} phrases from the simple caption
     Cascaded Tasks:
     - Grounded {len(cascaded_bboxes)} phrases from the detailed caption
     """
@@ -142,9 +147,9 @@ with gr.Blocks(theme='NoCrypt/miku') as demo:
     image_input = gr.Image(type="filepath")
     text_output = gr.Textbox()
-    plot_output_1 = gr.Plot()
-    plot_output_2 = gr.Plot()
-    plot_output_3 = gr.Plot()
     image_input.upload(process_image, inputs=[image_input], outputs=[text_output, plot_output_1, plot_output_2, plot_output_3])
@@ -159,4 +164,4 @@ with gr.Blocks(theme='NoCrypt/miku') as demo:
     """
     gr.HTML(footer)
-demo.launch()

 import spaces
 import subprocess
+# Ensure flash-attn is installed correctly
 subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
 # Initialize Florence-2-large model and processor
         if labels and i < len(labels):
             ax.text(x, y, labels[i], color=color, fontsize=8, bbox=dict(facecolor='white', alpha=0.7))
     plt.axis('off')
+    # Save the plot to a BytesIO object
+    from io import BytesIO
+    buf = BytesIO()
+    plt.savefig(buf, format='png')
+    plt.close()
+    buf.seek(0)
+    return buf
 # Gradio function to process uploaded images
 @spaces.GPU
     Image Captioning:
     - Simple Caption: {caption_result['<CAPTION>']}
     - Detailed Caption: {detailed_caption_result['<DETAILED_CAPTION>']}
     Object Detection:
     - Detected {len(od_bboxes)} objects
     OCR:
     {ocr_result['<OCR>']}
     Phrase Grounding:
     - Grounded {len(pg_bboxes)} phrases from the simple caption
     Cascaded Tasks:
     - Grounded {len(cascaded_bboxes)} phrases from the detailed caption
     """
     image_input = gr.Image(type="filepath")
     text_output = gr.Textbox()
+    plot_output_1 = gr.Image()
+    plot_output_2 = gr.Image()
+    plot_output_3 = gr.Image()
     image_input.upload(process_image, inputs=[image_input], outputs=[text_output, plot_output_1, plot_output_2, plot_output_3])
     """
     gr.HTML(footer)
+demo.launch()