Stable-Video-Diffusion-Img2Vid

Running on Zero

App Files Files Community

xi0v

Fabrice-TIERCELIN commited on Jun 21

Commit

d3e5f59

•

1 Parent(s): 7187257

Allow the user to force the model selection (and fix autorun) (#18)

Browse files

- Allow the user to force the model selection (and fix autorun) (83088413eb5c5d1b7f6cdf1076c41cf6895e8f45)

Co-authored-by: Fabrice TIERCELIN <[email protected]>

Files changed (1) hide show

app.py +9 -8

app.py CHANGED Viewed

@@ -34,7 +34,7 @@ def sample(
     noise_aug_strength: float = 0.1,
     decoding_t: int = 3,
     frame_format: str = "webp",
-    version: str = "svd_xt",
     device: str = "cuda",
     output_folder: str = "outputs",
 ):
@@ -49,7 +49,7 @@ def sample(
     base_count = len(glob(os.path.join(output_folder, "*.mp4")))
     video_path = os.path.join(output_folder, f"{base_count:06d}.mp4")
-    if 14 < fps_id:
         frames = fps25Pipe(image, decode_chunk_size=decoding_t, generator=generator, motion_bucket_id=motion_bucket_id, noise_aug_strength=noise_aug_strength, num_frames=25).frames[0]
     else:
         frames = fps14Pipe(image, decode_chunk_size=decoding_t, generator=generator, motion_bucket_id=motion_bucket_id, noise_aug_strength=noise_aug_strength, num_frames=25).frames[0]
@@ -105,6 +105,7 @@ with gr.Blocks() as demo:
               noise_aug_strength = gr.Slider(label="Noise strength", info="The noise to add", value=0.1, minimum=0, maximum=1, step=0.1)
               decoding_t = gr.Slider(label="Decoding", info="Number of frames decoded at a time; this eats more VRAM; reduce if necessary", value=3, minimum=1, maximum=5, step=1)
               frame_format = gr.Radio([["*.png", "png"], ["*.webp", "webp"], ["*.jpeg", "jpeg"], ["*.gif", "gif"], ["*.bmp", "bmp"]], label="Image format for result", info="File extention", value="webp", interactive=True)
               seed = gr.Slider(label="Seed", value=42, randomize=True, minimum=0, maximum=max_64_bit_int, step=1)
               randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
@@ -115,18 +116,18 @@ with gr.Blocks() as demo:
           gallery = gr.Gallery(label="Generated frames")
   image.upload(fn=resize_image, inputs=image, outputs=image, queue=False)
-  generate_btn.click(fn=sample, inputs=[image, seed, randomize_seed, motion_bucket_id, fps_id, noise_aug_strength, decoding_t, frame_format], outputs=[video, gallery, seed], api_name="video")
   gr.Examples(
     examples=[
-        ["Examples/Fire.webp", 25, 127, 0.1, 3, "png", 42, True],
-        ["Examples/Town.jpeg", 25, 127, 0.1, 3, "png", 42, True],
-        ["Examples/Water.png", 25, 127, 0.1, 3, "png", 42, True]
     ],
-    inputs=[image, fps_id, motion_bucket_id, noise_aug_strength, decoding_t, frame_format, seed, randomize_seed],
     outputs=[video, gallery, seed],
     fn=sample,
-    run_on_click=False,
     cache_examples=False,
   )

     noise_aug_strength: float = 0.1,
     decoding_t: int = 3,
     frame_format: str = "webp",
+    version: str = "auto",
     device: str = "cuda",
     output_folder: str = "outputs",
 ):
     base_count = len(glob(os.path.join(output_folder, "*.mp4")))
     video_path = os.path.join(output_folder, f"{base_count:06d}.mp4")
+    if version == "svdxt" or (14 < fps_id and version != "svd"):
         frames = fps25Pipe(image, decode_chunk_size=decoding_t, generator=generator, motion_bucket_id=motion_bucket_id, noise_aug_strength=noise_aug_strength, num_frames=25).frames[0]
     else:
         frames = fps14Pipe(image, decode_chunk_size=decoding_t, generator=generator, motion_bucket_id=motion_bucket_id, noise_aug_strength=noise_aug_strength, num_frames=25).frames[0]
               noise_aug_strength = gr.Slider(label="Noise strength", info="The noise to add", value=0.1, minimum=0, maximum=1, step=0.1)
               decoding_t = gr.Slider(label="Decoding", info="Number of frames decoded at a time; this eats more VRAM; reduce if necessary", value=3, minimum=1, maximum=5, step=1)
               frame_format = gr.Radio([["*.png", "png"], ["*.webp", "webp"], ["*.jpeg", "jpeg"], ["*.gif", "gif"], ["*.bmp", "bmp"]], label="Image format for result", info="File extention", value="webp", interactive=True)
+              version = gr.Radio([["Auto", "auto"], ["SVD (trained on 14 f/s)", "svd"], ["SVD-XT (trained on 25 f/s)", "svdxt"]], label="Model", info="Trained model", value="auto", interactive=True)
               seed = gr.Slider(label="Seed", value=42, randomize=True, minimum=0, maximum=max_64_bit_int, step=1)
               randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
           gallery = gr.Gallery(label="Generated frames")
   image.upload(fn=resize_image, inputs=image, outputs=image, queue=False)
+  generate_btn.click(fn=sample, inputs=[image, seed, randomize_seed, motion_bucket_id, fps_id, noise_aug_strength, decoding_t, frame_format, version], outputs=[video, gallery, seed], api_name="video")
   gr.Examples(
     examples=[
+        ["Examples/Fire.webp", 42, True, 127, 25, 0.1, 3, "png", "auto"],
+        ["Examples/Water.png", 42, True, 127, 25, 0.1, 3, "png", "auto"],
+        ["Examples/Town.jpeg", 42, True, 127, 25, 0.1, 3, "png", "auto"]
     ],
+    inputs=[image, seed, randomize_seed, motion_bucket_id, fps_id, noise_aug_strength, decoding_t, frame_format, version],
     outputs=[video, gallery, seed],
     fn=sample,
+    run_on_click=True,
     cache_examples=False,
   )