Spaces:

editing-images
/

ledits

Running on A10G

App Files Files Community

Linoy Tsaban commited on Jun 12, 2023

Commit

c633c03

•

1 Parent(s): 9cd1904

Update app.py

Browse files

caption image with BLIP

Files changed (1) hide show

app.py +91 -78

app.py CHANGED Viewed

@@ -4,16 +4,37 @@ import numpy as np
 import requests
 import random
 from io import BytesIO
-from diffusers import StableDiffusionPipeline
-from diffusers import DDIMScheduler
 from utils import *
 from inversion_utils import *
 from modified_pipeline_semantic_stable_diffusion import SemanticStableDiffusionPipeline
 from torch import autocast, inference_mode
-import re
 def invert(x0, prompt_src="", num_diffusion_steps=100, cfg_scale_src = 3.5, eta = 1):
   #  inverts a real image according to Algorihm 1 in https://arxiv.org/pdf/2304.06140.pdf,
@@ -35,7 +56,6 @@ def invert(x0, prompt_src="", num_diffusion_steps=100, cfg_scale_src = 3.5, eta
   return zs, wts
 def sample(zs, wts, prompt_tar="", cfg_scale_tar=15, skip=36, eta = 1):
     # reverse process (via Zs and wT)
@@ -49,85 +69,13 @@ def sample(zs, wts, prompt_tar="", cfg_scale_tar=15, skip=36, eta = 1):
     img = image_grid(x0_dec)
     return img
-# load pipelines
-sd_model_id = "stabilityai/stable-diffusion-2-base"
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-sd_pipe = StableDiffusionPipeline.from_pretrained(sd_model_id).to(device)
-sd_pipe.scheduler = DDIMScheduler.from_config(sd_model_id, subfolder = "scheduler")
-sem_pipe = SemanticStableDiffusionPipeline.from_pretrained(sd_model_id).to(device)
-def get_example():
-    case = [
-        [
-            'examples/source_a_cat_sitting_next_to_a_mirror.jpeg',
-            'a cat sitting next to a mirror',
-            'watercolor painting of a cat sitting next to a mirror',
-            100,
-            36,
-            15,
-            'Schnauzer dog', 'cat',
-            5.5,
-            1,
-            'examples/ddpm_sega_watercolor_painting_a_cat_sitting_next_to_a_mirror_plus_dog_minus_cat.png'
-             ],
-        [
-            'examples/source_a_man_wearing_a_brown_hoodie_in_a_crowded_street.jpeg',
-            'a man wearing a brown hoodie in a crowded street',
-            'a robot wearing a brown hoodie in a crowded street',
-            100,
-            36,
-            15,
-            'painting','',
-            10,
-            1,
-            'examples/ddpm_sega_painting_of_a_robot_wearing_a_brown_hoodie_in_a_crowded_street.png'
-             ],
-    [
-            'examples/source_wall_with_framed_photos.jpeg',
-            '',
-            '',
-            100,
-            36,
-            15,
-            'pink drawings of muffins','',
-            10,
-            1,
-            'examples/ddpm_sega_plus_pink_drawings_of_muffins.png'
-             ],
-    [
-            'examples/source_an_empty_room_with_concrete_walls.jpg',
-            'an empty room with concrete walls',
-            'glass walls',
-            100,
-            36,
-            17,
-            'giant elephant','',
-            10,
-            1,
-            'examples/ddpm_sega_glass_walls_gian_elephant.png'
-             ]]
-    return case
-def randomize_seed_fn(seed, randomize_seed):
-    if randomize_seed:
-        seed = random.randint(0, np.iinfo(np.int32).max)
-    torch.manual_seed(seed)
-    return seed
 def reconstruct(tar_prompt,
                 tar_cfg_scale,
                 skip,
                 wts, zs,
-                # do_reconstruction,
-                # reconstruction
                ):
-    # if do_reconstruction:
     reconstruction = sample(zs.value, wts.value, prompt_tar=tar_prompt, skip=skip, cfg_scale_tar=tar_cfg_scale)
     return reconstruction
@@ -158,6 +106,7 @@ def load_and_invert(
     return wts, zs, do_inversion
 def edit(input_image,
             wts, zs,
@@ -197,6 +146,66 @@ def edit(input_image,
 ########
 # demo #
@@ -346,6 +355,7 @@ with gr.Blocks(css='style.css') as demo:
     with gr.Row():
         run_button = gr.Button("Run")
         reconstruct_button = gr.Button("Show Reconstruction", visible=False)
@@ -366,11 +376,14 @@ with gr.Blocks(css='style.css') as demo:
     with gr.Accordion("Help", open=False):
         gr.Markdown(help_text)
     add_concept_button.click(fn = add_concept, inputs=sega_concepts_counter,
                outputs= [row2, row3, add_concept_button, sega_concepts_counter], queue = False)
     run_button.click(
         fn = randomize_seed_fn,

 import requests
 import random
 from io import BytesIO
 from utils import *
 from inversion_utils import *
 from modified_pipeline_semantic_stable_diffusion import SemanticStableDiffusionPipeline
 from torch import autocast, inference_mode
+from diffusers import StableDiffusionPipeline
+from diffusers import DDIMScheduler
+from transformers import AutoProcessor, BlipForConditionalGeneration
+# load pipelines
+sd_model_id = "stabilityai/stable-diffusion-2-base"
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+sd_pipe = StableDiffusionPipeline.from_pretrained(sd_model_id).to(device)
+sd_pipe.scheduler = DDIMScheduler.from_config(sd_model_id, subfolder = "scheduler")
+sem_pipe = SemanticStableDiffusionPipeline.from_pretrained(sd_model_id).to(device)
+blip_processor = AutoProcessor.from_pretrained("Salesforce/blip-image-captioning-base").to(device)
+blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to(device)
+## IMAGE CPATIONING ##
+def caption_image(input_image):
+  inputs = blip_processor(images=image, return_tensors="pt")
+  pixel_values = inputs.pixel_values
+  generated_ids = blip_model.generate(pixel_values=pixel_values, max_length=50)
+  generated_caption = blip_processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+  return generated_caption
+## DDPM INVERSION AND SAMPLING ##
 def invert(x0, prompt_src="", num_diffusion_steps=100, cfg_scale_src = 3.5, eta = 1):
   #  inverts a real image according to Algorihm 1 in https://arxiv.org/pdf/2304.06140.pdf,
   return zs, wts
 def sample(zs, wts, prompt_tar="", cfg_scale_tar=15, skip=36, eta = 1):
     # reverse process (via Zs and wT)
     img = image_grid(x0_dec)
     return img
 def reconstruct(tar_prompt,
                 tar_cfg_scale,
                 skip,
                 wts, zs,
                ):
     reconstruction = sample(zs.value, wts.value, prompt_tar=tar_prompt, skip=skip, cfg_scale_tar=tar_cfg_scale)
     return reconstruction
     return wts, zs, do_inversion
+## SEGA ##
 def edit(input_image,
             wts, zs,
+def randomize_seed_fn(seed, randomize_seed):
+    if randomize_seed:
+        seed = random.randint(0, np.iinfo(np.int32).max)
+    torch.manual_seed(seed)
+    return seed
+def get_example():
+    case = [
+        [
+            'examples/source_a_cat_sitting_next_to_a_mirror.jpeg',
+            'a cat sitting next to a mirror',
+            'watercolor painting of a cat sitting next to a mirror',
+            100,
+            36,
+            15,
+            'Schnauzer dog', 'cat',
+            5.5,
+            1,
+            'examples/ddpm_sega_watercolor_painting_a_cat_sitting_next_to_a_mirror_plus_dog_minus_cat.png'
+             ],
+        [
+            'examples/source_a_man_wearing_a_brown_hoodie_in_a_crowded_street.jpeg',
+            'a man wearing a brown hoodie in a crowded street',
+            'a robot wearing a brown hoodie in a crowded street',
+            100,
+            36,
+            15,
+            'painting','',
+            10,
+            1,
+            'examples/ddpm_sega_painting_of_a_robot_wearing_a_brown_hoodie_in_a_crowded_street.png'
+             ],
+    [
+            'examples/source_wall_with_framed_photos.jpeg',
+            '',
+            '',
+            100,
+            36,
+            15,
+            'pink drawings of muffins','',
+            10,
+            1,
+            'examples/ddpm_sega_plus_pink_drawings_of_muffins.png'
+             ],
+    [
+            'examples/source_an_empty_room_with_concrete_walls.jpg',
+            'an empty room with concrete walls',
+            'glass walls',
+            100,
+            36,
+            17,
+            'giant elephant','',
+            10,
+            1,
+            'examples/ddpm_sega_glass_walls_gian_elephant.png'
+             ]]
+    return case
 ########
 # demo #
     with gr.Row():
+        caption_button = gr.Button("Caption Image")
         run_button = gr.Button("Run")
         reconstruct_button = gr.Button("Show Reconstruction", visible=False)
     with gr.Accordion("Help", open=False):
         gr.Markdown(help_text)
+    caption_button.click(
+        fn = caption_image,
+        inputs = [input_image],
+        outputs = [tar_prompt]
+    )
     add_concept_button.click(fn = add_concept, inputs=sega_concepts_counter,
                outputs= [row2, row3, add_concept_button, sega_concepts_counter], queue = False)
     run_button.click(
         fn = randomize_seed_fn,