First motion control: fixed camera

#40
Files changed (1) hide show
  1. app.py +26 -18
app.py CHANGED
@@ -42,7 +42,8 @@ def animate(
42
  frame_format: str = "webp",
43
  version: str = "auto",
44
  width: int = 1024,
45
- height: int = 576
 
46
  ):
47
  start = time.time()
48
 
@@ -50,9 +51,12 @@ def animate(
50
  raise gr.Error("Please provide an image to animate.")
51
 
52
  output_folder = "outputs"
53
- image = resize_image(image, output_size=(width, height))
54
- if image.mode == "RGBA":
55
- image = image.convert("RGB")
 
 
 
56
 
57
  if randomize_seed:
58
  seed = random.randint(0, max_64_bit_int)
@@ -64,7 +68,7 @@ def animate(
64
  version = "svd"
65
 
66
  frames = animate_on_gpu(
67
- image,
68
  seed,
69
  motion_bucket_id,
70
  fps_id,
@@ -121,7 +125,7 @@ def animate(
121
  @torch.no_grad()
122
  @spaces.GPU(duration=180)
123
  def animate_on_gpu(
124
- image: Image,
125
  seed: Optional[int] = 42,
126
  motion_bucket_id: int = 127,
127
  fps_id: int = 6,
@@ -134,11 +138,11 @@ def animate_on_gpu(
134
  generator = torch.manual_seed(seed)
135
 
136
  if version == "dragnuwa":
137
- return dragnuwaPipe(image, width=width, height=height, decode_chunk_size=decoding_t, generator=generator, motion_bucket_id=motion_bucket_id, noise_aug_strength=noise_aug_strength, num_frames=25).frames[0]
138
  elif version == "svdxt":
139
- return fps25Pipe(image, width=width, height=height, decode_chunk_size=decoding_t, generator=generator, motion_bucket_id=motion_bucket_id, noise_aug_strength=noise_aug_strength, num_frames=25).frames[0]
140
  else:
141
- return fps14Pipe(image, width=width, height=height, decode_chunk_size=decoding_t, generator=generator, motion_bucket_id=motion_bucket_id, noise_aug_strength=noise_aug_strength, num_frames=25).frames[0]
142
 
143
 
144
  def resize_image(image, output_size=(1024, 576)):
@@ -188,7 +192,8 @@ def reset():
188
  "webp",
189
  "auto",
190
  1024,
191
- 576
 
192
  ]
193
 
194
  with gr.Blocks() as demo:
@@ -210,12 +215,13 @@ with gr.Blocks() as demo:
210
  with gr.Accordion("Advanced options", open=False):
211
  width = gr.Slider(label="Width", info="Width of the video", value=1024, minimum=256, maximum=1024, step=8)
212
  height = gr.Slider(label="Height", info="Height of the video", value=576, minimum=256, maximum=576, step=8)
 
 
 
213
  fps_id = gr.Slider(label="Frames per second", info="The length of your video in seconds will be 25/fps", value=25, minimum=5, maximum=30)
214
  motion_bucket_id = gr.Slider(label="Motion bucket id", info="Controls how much motion to add/remove from the image", value=127, minimum=1, maximum=255)
215
  noise_aug_strength = gr.Slider(label="Noise strength", info="The noise to add", value=0.1, minimum=0, maximum=1, step=0.1)
216
  decoding_t = gr.Slider(label="Decoding", info="Number of frames decoded at a time; this eats more VRAM; reduce if necessary", value=3, minimum=1, maximum=5, step=1)
217
- video_format = gr.Radio([["*.mp4", "mp4"], ["*.avi", "avi"], ["*.wmv", "wmv"], ["*.mkv", "mkv"], ["*.mov", "mov"], ["*.gif", "gif"]], label="Video format for result", info="File extention", value="mp4", interactive=True)
218
- frame_format = gr.Radio([["*.webp", "webp"], ["*.png", "png"], ["*.jpeg", "jpeg"], ["*.gif (unanimated)", "gif"], ["*.bmp", "bmp"]], label="Image format for frames", info="File extention", value="webp", interactive=True)
219
  version = gr.Radio([["Auto", "auto"], ["πŸƒπŸ»β€β™€οΈ SVD (trained on 14 f/s)", "svd"], ["πŸƒπŸ»β€β™€οΈπŸ’¨ SVD-XT (trained on 25 f/s)", "svdxt"], ["DragNUWA (unstable)", "dragnuwa"]], label="Model", info="Trained model", value="auto", interactive=True)
220
  seed = gr.Slider(label="Seed", value=42, randomize=True, minimum=0, maximum=max_64_bit_int, step=1)
221
  randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
@@ -242,7 +248,8 @@ with gr.Blocks() as demo:
242
  frame_format,
243
  version,
244
  width,
245
- height
 
246
  ], outputs=[
247
  video_output,
248
  gif_output,
@@ -265,16 +272,17 @@ with gr.Blocks() as demo:
265
  frame_format,
266
  version,
267
  width,
268
- height
 
269
  ], queue = False, show_progress = False)
270
 
271
  gr.Examples(
272
  examples=[
273
- ["Examples/Fire.webp", 42, True, 127, 25, 0.1, 3, "mp4", "png", "auto", 1024, 576],
274
- ["Examples/Water.png", 42, True, 127, 25, 0.1, 3, "mp4", "png", "auto", 1024, 576],
275
- ["Examples/Town.jpeg", 42, True, 127, 25, 0.1, 3, "mp4", "png", "auto", 1024, 576]
276
  ],
277
- inputs=[image, seed, randomize_seed, motion_bucket_id, fps_id, noise_aug_strength, decoding_t, video_format, frame_format, version, width, height],
278
  outputs=[video_output, gif_output, download_button, gallery, seed, information_msg, reset_btn],
279
  fn=animate,
280
  run_on_click=True,
 
42
  frame_format: str = "webp",
43
  version: str = "auto",
44
  width: int = 1024,
45
+ height: int = 576,
46
+ motion_control: bool = False
47
  ):
48
  start = time.time()
49
 
 
51
  raise gr.Error("Please provide an image to animate.")
52
 
53
  output_folder = "outputs"
54
+ image_data = resize_image(image, output_size=(width, height))
55
+ if image_data.mode == "RGBA":
56
+ image_data = image_data.convert("RGB")
57
+
58
+ if motion_control:
59
+ image_data = [image_data] * 25
60
 
61
  if randomize_seed:
62
  seed = random.randint(0, max_64_bit_int)
 
68
  version = "svd"
69
 
70
  frames = animate_on_gpu(
71
+ image_data,
72
  seed,
73
  motion_bucket_id,
74
  fps_id,
 
125
  @torch.no_grad()
126
  @spaces.GPU(duration=180)
127
  def animate_on_gpu(
128
+ image_data: Union[Image, List[Image]],
129
  seed: Optional[int] = 42,
130
  motion_bucket_id: int = 127,
131
  fps_id: int = 6,
 
138
  generator = torch.manual_seed(seed)
139
 
140
  if version == "dragnuwa":
141
+ return dragnuwaPipe(image_data, width=width, height=height, decode_chunk_size=decoding_t, generator=generator, motion_bucket_id=motion_bucket_id, noise_aug_strength=noise_aug_strength, num_frames=25).frames[0]
142
  elif version == "svdxt":
143
+ return fps25Pipe(image_data, width=width, height=height, decode_chunk_size=decoding_t, generator=generator, motion_bucket_id=motion_bucket_id, noise_aug_strength=noise_aug_strength, num_frames=25).frames[0]
144
  else:
145
+ return fps14Pipe(image_data, width=width, height=height, decode_chunk_size=decoding_t, generator=generator, motion_bucket_id=motion_bucket_id, noise_aug_strength=noise_aug_strength, num_frames=25).frames[0]
146
 
147
 
148
  def resize_image(image, output_size=(1024, 576)):
 
192
  "webp",
193
  "auto",
194
  1024,
195
+ 576,
196
+ False
197
  ]
198
 
199
  with gr.Blocks() as demo:
 
215
  with gr.Accordion("Advanced options", open=False):
216
  width = gr.Slider(label="Width", info="Width of the video", value=1024, minimum=256, maximum=1024, step=8)
217
  height = gr.Slider(label="Height", info="Height of the video", value=576, minimum=256, maximum=576, step=8)
218
+ motion_control = gr.Checkbox(label="Motion control (fixed camera)", info="Fix the camera", value=False)
219
+ video_format = gr.Radio([["*.mp4", "mp4"], ["*.avi", "avi"], ["*.wmv", "wmv"], ["*.mkv", "mkv"], ["*.mov", "mov"], ["*.gif", "gif"]], label="Video format for result", info="File extention", value="mp4", interactive=True)
220
+ frame_format = gr.Radio([["*.webp", "webp"], ["*.png", "png"], ["*.jpeg", "jpeg"], ["*.gif (unanimated)", "gif"], ["*.bmp", "bmp"]], label="Image format for frames", info="File extention", value="webp", interactive=True)
221
  fps_id = gr.Slider(label="Frames per second", info="The length of your video in seconds will be 25/fps", value=25, minimum=5, maximum=30)
222
  motion_bucket_id = gr.Slider(label="Motion bucket id", info="Controls how much motion to add/remove from the image", value=127, minimum=1, maximum=255)
223
  noise_aug_strength = gr.Slider(label="Noise strength", info="The noise to add", value=0.1, minimum=0, maximum=1, step=0.1)
224
  decoding_t = gr.Slider(label="Decoding", info="Number of frames decoded at a time; this eats more VRAM; reduce if necessary", value=3, minimum=1, maximum=5, step=1)
 
 
225
  version = gr.Radio([["Auto", "auto"], ["πŸƒπŸ»β€β™€οΈ SVD (trained on 14 f/s)", "svd"], ["πŸƒπŸ»β€β™€οΈπŸ’¨ SVD-XT (trained on 25 f/s)", "svdxt"], ["DragNUWA (unstable)", "dragnuwa"]], label="Model", info="Trained model", value="auto", interactive=True)
226
  seed = gr.Slider(label="Seed", value=42, randomize=True, minimum=0, maximum=max_64_bit_int, step=1)
227
  randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
 
248
  frame_format,
249
  version,
250
  width,
251
+ height,
252
+ motion_control
253
  ], outputs=[
254
  video_output,
255
  gif_output,
 
272
  frame_format,
273
  version,
274
  width,
275
+ height,
276
+ motion_control
277
  ], queue = False, show_progress = False)
278
 
279
  gr.Examples(
280
  examples=[
281
+ ["Examples/Fire.webp", 42, True, 127, 25, 0.1, 3, "mp4", "png", "auto", 1024, 576, False],
282
+ ["Examples/Water.png", 42, True, 127, 25, 0.1, 3, "mp4", "png", "auto", 1024, 576, False],
283
+ ["Examples/Town.jpeg", 42, True, 127, 25, 0.1, 3, "mp4", "png", "auto", 1024, 576, False]
284
  ],
285
+ inputs=[image, seed, randomize_seed, motion_bucket_id, fps_id, noise_aug_strength, decoding_t, video_format, frame_format, version, width, height, motion_control],
286
  outputs=[video_output, gif_output, download_button, gallery, seed, information_msg, reset_btn],
287
  fn=animate,
288
  run_on_click=True,