callum-canavan commited on
Commit
609badf
1 Parent(s): 1ad8665

Fix pipeline

Browse files
.gitignore CHANGED
@@ -1,4 +1,5 @@
1
  env/
2
  __pycache__/
3
  *.png
4
- *.mp4
 
 
1
  env/
2
  __pycache__/
3
  *.png
4
+ *.mp4
5
+ *.gif
bapp.py CHANGED
@@ -75,12 +75,13 @@ def generate_content(
75
  choices = list(VIEW_MAP_NAMES.keys())
76
  gradio_app = gr.Interface(
77
  fn=generate_content,
 
78
  inputs=[
79
  gr.Textbox(label="Style", placeholder="an oil painting of"),
80
  gr.Textbox(label="Prompt for original view", placeholder="a dress"),
81
  gr.Textbox(label="Prompt for transformed view", placeholder="an old man"),
82
  gr.Dropdown(label="View transformation", choices=choices, value=choices[0]),
83
- gr.Number(label="Number of diffusion steps", value=100, step=1, minimum=1, maximum=300),
84
  gr.Number(label="Random seed", value=0, step=1, minimum=0, maximum=100000)
85
  ],
86
  outputs=[gr.Video(label="Illusion"), gr.Image(label="Original"), gr.Image(label="Transformed")],
 
75
  choices = list(VIEW_MAP_NAMES.keys())
76
  gradio_app = gr.Interface(
77
  fn=generate_content,
78
+ title="Multi-View Illusion Diffusion",
79
  inputs=[
80
  gr.Textbox(label="Style", placeholder="an oil painting of"),
81
  gr.Textbox(label="Prompt for original view", placeholder="a dress"),
82
  gr.Textbox(label="Prompt for transformed view", placeholder="an old man"),
83
  gr.Dropdown(label="View transformation", choices=choices, value=choices[0]),
84
+ gr.Number(label="Number of diffusion steps", value=50, step=1, minimum=1, maximum=300),
85
  gr.Number(label="Random seed", value=0, step=1, minimum=0, maximum=100000)
86
  ],
87
  outputs=[gr.Video(label="Illusion"), gr.Image(label="Original"), gr.Image(label="Transformed")],
requirements.txt CHANGED
@@ -7,6 +7,7 @@ imageio
7
  imageio[ffmpeg]
8
  imageio[pyav]
9
  opencv-python
 
10
  safetensors
11
  sentencepiece
12
  transformers
 
7
  imageio[ffmpeg]
8
  imageio[pyav]
9
  opencv-python
10
+ pygifsicle
11
  safetensors
12
  sentencepiece
13
  transformers
test_video.py CHANGED
@@ -7,5 +7,5 @@ if __name__ == "__main__":
7
  get_views(["identity", "flip"])[1],
8
  "a painting of vases",
9
  "a painting of a sloth",
10
- save_video_path="tmp3.mp4",
11
  )
 
7
  get_views(["identity", "flip"])[1],
8
  "a painting of vases",
9
  "a painting of a sloth",
10
+ save_video_path="tmp.mp4",
11
  )
visual_anagrams/animate.py CHANGED
@@ -1,8 +1,9 @@
1
  import cv2
2
  from tqdm import tqdm
3
  import numpy as np
4
- from PIL import Image, ImageDraw, ImageFont
5
  import imageio
 
6
 
7
  import torchvision.transforms.functional as TF
8
 
@@ -14,11 +15,12 @@ def draw_text(image, text, fill=(0,0,0), frame_size=384, im_size=256):
14
  image = image.copy()
15
 
16
  # Font info
 
17
  font_size = 16
18
 
19
  # Make PIL objects
20
  draw = ImageDraw.Draw(image)
21
- font = ImageFont.load_default()
22
 
23
  # Center text horizontally, and vertically between
24
  # illusion bottom and frame bottom
@@ -50,9 +52,9 @@ def animate_two_view(
50
  prompt_1,
51
  prompt_2,
52
  save_video_path='tmp.mp4',
53
- hold_duration=120,
54
  text_fade_duration=10,
55
- transition_duration=60,
56
  im_size=256,
57
  frame_size=384,
58
  ):
@@ -114,22 +116,23 @@ def animate_two_view(
114
 
115
  # Move last bit of clip to front
116
  frames = frames[-hold_duration//2:] + frames[:-hold_duration//2]
117
-
118
- # Convert PIL images to numpy arrays
119
- image_array = [imageio.core.asarray(frame) for frame in frames]
120
- f = image_array[0]
121
- print(f.dtype)
122
- print(f.shape)
123
- print(frame_size)
124
- print(np.min(f), np.max(f))
125
- print(len(image_array))
126
-
127
- # Save as video using opencv
128
- fourcc = cv2.VideoWriter_fourcc(*'mp4v')
129
- video = cv2.VideoWriter(save_video_path, fourcc, 30, (frame_size, frame_size))
130
- for frame in image_array:
131
- video.write(frame)
132
- video.release()
 
133
 
134
 
135
 
 
1
  import cv2
2
  from tqdm import tqdm
3
  import numpy as np
4
+ from PIL import Image, ImageDraw, ImageFont, ImageChops
5
  import imageio
6
+ from pygifsicle import optimize
7
 
8
  import torchvision.transforms.functional as TF
9
 
 
15
  image = image.copy()
16
 
17
  # Font info
18
+ font_path = get_courier_font_path()
19
  font_size = 16
20
 
21
  # Make PIL objects
22
  draw = ImageDraw.Draw(image)
23
+ font = ImageFont.truetype(font_path, font_size)
24
 
25
  # Center text horizontally, and vertically between
26
  # illusion bottom and frame bottom
 
52
  prompt_1,
53
  prompt_2,
54
  save_video_path='tmp.mp4',
55
+ hold_duration=60,
56
  text_fade_duration=10,
57
+ transition_duration=80,
58
  im_size=256,
59
  frame_size=384,
60
  ):
 
116
 
117
  # Move last bit of clip to front
118
  frames = frames[-hold_duration//2:] + frames[:-hold_duration//2]
119
+ images = frames
120
+
121
+ processed_frames = [images[0]]
122
+
123
+ for i in range(1, len(images)):
124
+ # Calculate the difference between current and previous frame
125
+ diff = ImageChops.difference(images[i], images[i - 1])
126
+ # Create a mask to isolate changes
127
+ mask = diff.convert("L").point(lambda x: 0 if x < 5 else 255, "1")
128
+ # Apply the mask to the current frame
129
+ new_frame = ImageChops.composite(images[i], processed_frames[-1], mask)
130
+ processed_frames.append(new_frame)
131
+
132
+ # Save the frames as a GIF
133
+ imageio.mimsave(save_video_path,
134
+ [np.array(frame) for frame in processed_frames],
135
+ fps=30)
136
 
137
 
138
 
visual_anagrams/samplers.py CHANGED
@@ -30,7 +30,7 @@ def sample_stage_1(model,
30
  prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds])
31
 
32
  # Setup timesteps
33
- model.scheduler.set_timesteps(num_inference_steps, device=device)
34
  timesteps = model.scheduler.timesteps
35
 
36
  # Make intermediate_images
@@ -45,7 +45,7 @@ def sample_stage_1(model,
45
  )
46
  # ic(noisy_images.shape)
47
 
48
- for i, t in tqdm(enumerate(timesteps)):
49
  # Apply views to noisy_image
50
  viewed_noisy_images = []
51
  for view_fn in views:
@@ -109,6 +109,7 @@ def sample_stage_1(model,
109
  # ic(noise_pred.shape)
110
 
111
  # ic(t.shape)
 
112
  # compute the previous noisy sample x_t -> x_t-1
113
  noisy_images = model.scheduler.step(
114
  noise_pred.to('cuda'), t, noisy_images.to('cuda'), generator=generator, return_dict=False
@@ -148,7 +149,7 @@ def sample_stage_2(model,
148
  prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds])
149
 
150
  # Get timesteps
151
- model.scheduler.set_timesteps(num_inference_steps, device=device)
152
  timesteps = model.scheduler.timesteps
153
 
154
  num_channels = model.unet.config.in_channels // 2
@@ -236,7 +237,7 @@ def sample_stage_2(model,
236
 
237
  # compute the previous noisy sample x_t -> x_t-1
238
  noisy_images = model.scheduler.step(
239
- noise_pred, t, noisy_images, generator=generator, return_dict=False
240
  )[0]
241
 
242
  # Return denoised images
 
30
  prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds])
31
 
32
  # Setup timesteps
33
+ model.scheduler.set_timesteps(int(num_inference_steps), device=device)
34
  timesteps = model.scheduler.timesteps
35
 
36
  # Make intermediate_images
 
45
  )
46
  # ic(noisy_images.shape)
47
 
48
+ for i, t in enumerate(tqdm(timesteps)):
49
  # Apply views to noisy_image
50
  viewed_noisy_images = []
51
  for view_fn in views:
 
109
  # ic(noise_pred.shape)
110
 
111
  # ic(t.shape)
112
+ # ic(t.dtype)
113
  # compute the previous noisy sample x_t -> x_t-1
114
  noisy_images = model.scheduler.step(
115
  noise_pred.to('cuda'), t, noisy_images.to('cuda'), generator=generator, return_dict=False
 
149
  prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds])
150
 
151
  # Get timesteps
152
+ model.scheduler.set_timesteps(int(num_inference_steps), device=device)
153
  timesteps = model.scheduler.timesteps
154
 
155
  num_channels = model.unet.config.in_channels // 2
 
237
 
238
  # compute the previous noisy sample x_t -> x_t-1
239
  noisy_images = model.scheduler.step(
240
+ noise_pred.to('cuda'), t, noisy_images.to('cuda'), generator=generator, return_dict=False
241
  )[0]
242
 
243
  # Return denoised images