|
import gradio as gr |
|
from autodistill_gpt_4v import GPT4V |
|
from autodistill.detection import CaptionOntology |
|
from autodistill_grounded_sam import GroundedSAM |
|
from autodistill.utils import plot |
|
import cv2 |
|
|
|
from autodistill.core.custom_detection_model import CustomDetectionModel |
|
|
|
MARKDOWN = """ |
|
# Grounded SAM-GPT4V |
|
|
|
Use Grounding DINO, Meta AI's Segment Anything (SAM) and GPT-4V to label specific objects. |
|
|
|
Visit [awesome-openai-vision-api-experiments](https://github.com/roboflow/awesome-openai-vision-api-experiments) |
|
repository to find more OpenAI Vision API experiments or contribute your own.""" |
|
|
|
def respond(api_key, input_image, dino_prompt, gpt_prompt): |
|
|
|
cv2.imwrite("input.jpg", input_image) |
|
|
|
DINOGPT = CustomDetectionModel( |
|
detection_model=GroundedSAM(CaptionOntology( |
|
{dino_prompt: dino_prompt}, |
|
)), |
|
classification_model=GPT4V( |
|
CaptionOntology({k: k for k in gpt_prompt.split(", ")}), |
|
api_key=api_key |
|
) |
|
) |
|
|
|
results = DINOGPT.predict("input.jpg") |
|
|
|
result = plot( |
|
image=cv2.imread("input.jpg"), |
|
detections=results, |
|
classes=gpt_prompt.split(", "), |
|
raw=True |
|
) |
|
|
|
return result |
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown(MARKDOWN) |
|
with gr.Row(): |
|
with gr.Column(): |
|
api_key_textbox = gr.Textbox( |
|
label="OpenAI API KEY", type="password") |
|
dino_prompt = gr.Textbox(label="Grounded SAM Prompt") |
|
gpt_prompt = gr.Textbox(label="GPT-4V Prompt") |
|
input_image = gr.Image(type="numpy", label="Input Image") |
|
with gr.Column(): |
|
output_image = gr.Image(type="numpy", label="Output Image") |
|
submit_button = gr.Button() |
|
|
|
submit_button.click( |
|
fn=respond, |
|
inputs=[api_key_textbox, input_image, dino_prompt, gpt_prompt], |
|
outputs=[output_image] |
|
) |
|
|
|
demo.launch() |