Spaces:

mrm8488
/

idefics-9b-ft-describe-diffusion-mj

Runtime error

App Files Files Community

add upload image component

by radames - opened Sep 23, 2023

base: refs/heads/main

←

from: refs/pr/1

Discussion Files changed

+11

-7

Files changed (1) hide show

app.py +11 -7

app.py CHANGED Viewed

@@ -14,8 +14,11 @@ processor = AutoProcessor.from_pretrained(config.base_model_name_or_path)
 model = model.to(device)
 model.eval()
-def predict(prompt, image_url, max_length):
-    image = processor.image_processor.fetch_images(image_url)
     prompts = [[image, prompt]]
     inputs = processor(prompts[0], return_tensors="pt").to(device)
     generated_ids = model.generate(**inputs, max_length=max_length)
@@ -28,17 +31,18 @@ title = "Midjourney-like Image Captioning with IDEFICS"
 description = "Gradio Demo for generating *Midjourney* like captions (describe functionality) with **IDEFICS**"
 examples = [
-    ["Describe the following image:", "https://miro.medium.com/v2/resize:fit:0/1*sTXgMwDUW0pk-1yK4iHYFw.png", 64],
-    ["Describe the following image:", "https://miro.medium.com/v2/resize:fit:1400/0*6as5rHi0sgG4W2Tq.png", 64],
-    ["Describe the following image:", "https://cdn.arstechnica.net/wp-content/uploads/2023/06/zoomout_2-1440x807.jpg", 64],
-    ["Describe the following image:", "https://framerusercontent.com/images/inZdRVn7eafZNvaVre2iW1a538.png", 64],
-    ["Describe the following image:", "https://hips.hearstapps.com/hmg-prod/images/cute-photos-of-cats-in-grass-1593184777.jpg", 64]
 ]
 io = gr.Interface(fn=predict,
                   inputs=[
                       gr.Textbox(label="Prompt", value="Describe the following image:"),
                       gr.Textbox(label="image URL", placeholder="Insert the URL of the image to be described"),
                       gr.Slider(label="Max tokens", value=64, max=128, min=16, step=8)
                   ],
                   outputs=[

 model = model.to(device)
 model.eval()
+def predict(prompt, image_url, image_pil=None, max_length=64):
+    if image_pil is not None:
+        image = image_pil
+    else:
+        image = processor.image_processor.fetch_images(image_url)
     prompts = [[image, prompt]]
     inputs = processor(prompts[0], return_tensors="pt").to(device)
     generated_ids = model.generate(**inputs, max_length=max_length)
 description = "Gradio Demo for generating *Midjourney* like captions (describe functionality) with **IDEFICS**"
 examples = [
+    ["Describe the following image:", "https://miro.medium.com/v2/resize:fit:0/1*sTXgMwDUW0pk-1yK4iHYFw.png", None, 64],
+    ["Describe the following image:", "https://miro.medium.com/v2/resize:fit:1400/0*6as5rHi0sgG4W2Tq.png", None, 64],
+    ["Describe the following image:", "https://cdn.arstechnica.net/wp-content/uploads/2023/06/zoomout_2-1440x807.jpg", None, 64],
+    ["Describe the following image:", "https://framerusercontent.com/images/inZdRVn7eafZNvaVre2iW1a538.png", None, 64],
+    ["Describe the following image:", "https://hips.hearstapps.com/hmg-prod/images/cute-photos-of-cats-in-grass-1593184777.jpg", None, 64]
 ]
 io = gr.Interface(fn=predict,
                   inputs=[
                       gr.Textbox(label="Prompt", value="Describe the following image:"),
                       gr.Textbox(label="image URL", placeholder="Insert the URL of the image to be described"),
+                      gr.Image(label="or upload an image", type="pil"),
                       gr.Slider(label="Max tokens", value=64, max=128, min=16, step=8)
                   ],
                   outputs=[