Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -9,6 +9,7 @@ import numpy as np
|
|
9 |
import spaces
|
10 |
import subprocess
|
11 |
|
|
|
12 |
subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
|
13 |
|
14 |
# Initialize Florence-2-large model and processor
|
@@ -75,7 +76,15 @@ def plot_image_with_bboxes(image_np, bboxes, labels=None):
|
|
75 |
if labels and i < len(labels):
|
76 |
ax.text(x, y, labels[i], color=color, fontsize=8, bbox=dict(facecolor='white', alpha=0.7))
|
77 |
plt.axis('off')
|
78 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
79 |
|
80 |
# Gradio function to process uploaded images
|
81 |
@spaces.GPU
|
@@ -117,16 +126,12 @@ def process_image(image_path):
|
|
117 |
Image Captioning:
|
118 |
- Simple Caption: {caption_result['<CAPTION>']}
|
119 |
- Detailed Caption: {detailed_caption_result['<DETAILED_CAPTION>']}
|
120 |
-
|
121 |
Object Detection:
|
122 |
- Detected {len(od_bboxes)} objects
|
123 |
-
|
124 |
OCR:
|
125 |
{ocr_result['<OCR>']}
|
126 |
-
|
127 |
Phrase Grounding:
|
128 |
- Grounded {len(pg_bboxes)} phrases from the simple caption
|
129 |
-
|
130 |
Cascaded Tasks:
|
131 |
- Grounded {len(cascaded_bboxes)} phrases from the detailed caption
|
132 |
"""
|
@@ -142,9 +147,9 @@ with gr.Blocks(theme='NoCrypt/miku') as demo:
|
|
142 |
|
143 |
image_input = gr.Image(type="filepath")
|
144 |
text_output = gr.Textbox()
|
145 |
-
plot_output_1 = gr.
|
146 |
-
plot_output_2 = gr.
|
147 |
-
plot_output_3 = gr.
|
148 |
|
149 |
image_input.upload(process_image, inputs=[image_input], outputs=[text_output, plot_output_1, plot_output_2, plot_output_3])
|
150 |
|
@@ -159,4 +164,4 @@ with gr.Blocks(theme='NoCrypt/miku') as demo:
|
|
159 |
"""
|
160 |
gr.HTML(footer)
|
161 |
|
162 |
-
demo.launch()
|
|
|
9 |
import spaces
|
10 |
import subprocess
|
11 |
|
12 |
+
# Ensure flash-attn is installed correctly
|
13 |
subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
|
14 |
|
15 |
# Initialize Florence-2-large model and processor
|
|
|
76 |
if labels and i < len(labels):
|
77 |
ax.text(x, y, labels[i], color=color, fontsize=8, bbox=dict(facecolor='white', alpha=0.7))
|
78 |
plt.axis('off')
|
79 |
+
|
80 |
+
# Save the plot to a BytesIO object
|
81 |
+
from io import BytesIO
|
82 |
+
buf = BytesIO()
|
83 |
+
plt.savefig(buf, format='png')
|
84 |
+
plt.close()
|
85 |
+
buf.seek(0)
|
86 |
+
|
87 |
+
return buf
|
88 |
|
89 |
# Gradio function to process uploaded images
|
90 |
@spaces.GPU
|
|
|
126 |
Image Captioning:
|
127 |
- Simple Caption: {caption_result['<CAPTION>']}
|
128 |
- Detailed Caption: {detailed_caption_result['<DETAILED_CAPTION>']}
|
|
|
129 |
Object Detection:
|
130 |
- Detected {len(od_bboxes)} objects
|
|
|
131 |
OCR:
|
132 |
{ocr_result['<OCR>']}
|
|
|
133 |
Phrase Grounding:
|
134 |
- Grounded {len(pg_bboxes)} phrases from the simple caption
|
|
|
135 |
Cascaded Tasks:
|
136 |
- Grounded {len(cascaded_bboxes)} phrases from the detailed caption
|
137 |
"""
|
|
|
147 |
|
148 |
image_input = gr.Image(type="filepath")
|
149 |
text_output = gr.Textbox()
|
150 |
+
plot_output_1 = gr.Image()
|
151 |
+
plot_output_2 = gr.Image()
|
152 |
+
plot_output_3 = gr.Image()
|
153 |
|
154 |
image_input.upload(process_image, inputs=[image_input], outputs=[text_output, plot_output_1, plot_output_2, plot_output_3])
|
155 |
|
|
|
164 |
"""
|
165 |
gr.HTML(footer)
|
166 |
|
167 |
+
demo.launch()
|