arad1367 commited on
Commit
96e1778
1 Parent(s): cbd54ec

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -9
app.py CHANGED
@@ -9,6 +9,7 @@ import numpy as np
9
  import spaces
10
  import subprocess
11
 
 
12
  subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
13
 
14
  # Initialize Florence-2-large model and processor
@@ -75,7 +76,15 @@ def plot_image_with_bboxes(image_np, bboxes, labels=None):
75
  if labels and i < len(labels):
76
  ax.text(x, y, labels[i], color=color, fontsize=8, bbox=dict(facecolor='white', alpha=0.7))
77
  plt.axis('off')
78
- return fig
 
 
 
 
 
 
 
 
79
 
80
  # Gradio function to process uploaded images
81
  @spaces.GPU
@@ -117,16 +126,12 @@ def process_image(image_path):
117
  Image Captioning:
118
  - Simple Caption: {caption_result['<CAPTION>']}
119
  - Detailed Caption: {detailed_caption_result['<DETAILED_CAPTION>']}
120
-
121
  Object Detection:
122
  - Detected {len(od_bboxes)} objects
123
-
124
  OCR:
125
  {ocr_result['<OCR>']}
126
-
127
  Phrase Grounding:
128
  - Grounded {len(pg_bboxes)} phrases from the simple caption
129
-
130
  Cascaded Tasks:
131
  - Grounded {len(cascaded_bboxes)} phrases from the detailed caption
132
  """
@@ -142,9 +147,9 @@ with gr.Blocks(theme='NoCrypt/miku') as demo:
142
 
143
  image_input = gr.Image(type="filepath")
144
  text_output = gr.Textbox()
145
- plot_output_1 = gr.Plot()
146
- plot_output_2 = gr.Plot()
147
- plot_output_3 = gr.Plot()
148
 
149
  image_input.upload(process_image, inputs=[image_input], outputs=[text_output, plot_output_1, plot_output_2, plot_output_3])
150
 
@@ -159,4 +164,4 @@ with gr.Blocks(theme='NoCrypt/miku') as demo:
159
  """
160
  gr.HTML(footer)
161
 
162
- demo.launch()
 
9
  import spaces
10
  import subprocess
11
 
12
+ # Ensure flash-attn is installed correctly
13
  subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
14
 
15
  # Initialize Florence-2-large model and processor
 
76
  if labels and i < len(labels):
77
  ax.text(x, y, labels[i], color=color, fontsize=8, bbox=dict(facecolor='white', alpha=0.7))
78
  plt.axis('off')
79
+
80
+ # Save the plot to a BytesIO object
81
+ from io import BytesIO
82
+ buf = BytesIO()
83
+ plt.savefig(buf, format='png')
84
+ plt.close()
85
+ buf.seek(0)
86
+
87
+ return buf
88
 
89
  # Gradio function to process uploaded images
90
  @spaces.GPU
 
126
  Image Captioning:
127
  - Simple Caption: {caption_result['<CAPTION>']}
128
  - Detailed Caption: {detailed_caption_result['<DETAILED_CAPTION>']}
 
129
  Object Detection:
130
  - Detected {len(od_bboxes)} objects
 
131
  OCR:
132
  {ocr_result['<OCR>']}
 
133
  Phrase Grounding:
134
  - Grounded {len(pg_bboxes)} phrases from the simple caption
 
135
  Cascaded Tasks:
136
  - Grounded {len(cascaded_bboxes)} phrases from the detailed caption
137
  """
 
147
 
148
  image_input = gr.Image(type="filepath")
149
  text_output = gr.Textbox()
150
+ plot_output_1 = gr.Image()
151
+ plot_output_2 = gr.Image()
152
+ plot_output_3 = gr.Image()
153
 
154
  image_input.upload(process_image, inputs=[image_input], outputs=[text_output, plot_output_1, plot_output_2, plot_output_3])
155
 
 
164
  """
165
  gr.HTML(footer)
166
 
167
+ demo.launch()