SAM-GPT4V / app.py
capjamesg's picture
add space code
025f810
import gradio as gr
from autodistill_gpt_4v import GPT4V
from autodistill.detection import CaptionOntology
from autodistill_grounded_sam import GroundedSAM
from autodistill.utils import plot
import cv2
from autodistill.core.custom_detection_model import CustomDetectionModel
MARKDOWN = """
# Grounded SAM-GPT4V
Use Grounding DINO, Meta AI's Segment Anything (SAM) and GPT-4V to label specific objects.
Visit [awesome-openai-vision-api-experiments](https://github.com/roboflow/awesome-openai-vision-api-experiments)
repository to find more OpenAI Vision API experiments or contribute your own."""
def respond(api_key, input_image, dino_prompt, gpt_prompt):
# input_image = cv2.cvtColor(input_image, cv2.COLOR_BGR2RGB)
cv2.imwrite("input.jpg", input_image)
DINOGPT = CustomDetectionModel(
detection_model=GroundedSAM(CaptionOntology(
{dino_prompt: dino_prompt},
)),
classification_model=GPT4V(
CaptionOntology({k: k for k in gpt_prompt.split(", ")}),
api_key=api_key
)
)
results = DINOGPT.predict("input.jpg")
result = plot(
image=cv2.imread("input.jpg"),
detections=results,
classes=gpt_prompt.split(", "),
raw=True
)
return result
with gr.Blocks() as demo:
gr.Markdown(MARKDOWN)
with gr.Row():
with gr.Column():
api_key_textbox = gr.Textbox(
label="OpenAI API KEY", type="password")
dino_prompt = gr.Textbox(label="Grounded SAM Prompt")
gpt_prompt = gr.Textbox(label="GPT-4V Prompt")
input_image = gr.Image(type="numpy", label="Input Image")
with gr.Column():
output_image = gr.Image(type="numpy", label="Output Image")
submit_button = gr.Button()
submit_button.click(
fn=respond,
inputs=[api_key_textbox, input_image, dino_prompt, gpt_prompt],
outputs=[output_image]
)
demo.launch()