Spaces:
Runtime error
Runtime error
from streaming_svd_inference import StreamingSVD | |
from lib.farancia import IImage | |
import datetime | |
from pathlib import Path | |
import os | |
import ast | |
from typing import Tuple | |
import numpy as np | |
from PIL import Image | |
def get_uuid(asset: str,cache: str) -> Path: | |
""" | |
Generate a unique filename based on the current timestamp and save it in the specified root folder. | |
Root Folder will be under environment variable GRADIO_TEMP_DIR, if specified, otherwise the current working directory. | |
Args: | |
root_fol (str): The root folder where the file will be saved. | |
Returns: | |
Path: The path to the saved file. | |
""" | |
file_name = "_".join( | |
"_".join(str(datetime.datetime.now()).split('.')).split(" "))+".mp4" | |
file = Path(cache) / asset / file_name | |
if not file.parent.exists(): | |
file.parent.mkdir(parents=True) | |
print(f"Saving file to {file}") | |
return file | |
def retrieve_intermediate_data(video_file: str) -> Tuple[list[int],list[int],Image.Image]: | |
""" | |
Retrieve intermediate data related to a video file, including expansion size, original size, and outpainted image. | |
Args: | |
video_file (str): The path to the video file with "__cropped__" in its name. | |
Returns: | |
Tuple[list[int], list[int], Image.Image]: A tuple containing the expansion size, original size, and outpainted image. | |
Raises: | |
AssertionError: If the video file path is not a string or does not contain "__cropped__" in its name. | |
""" | |
assert isinstance(video_file,str) and "__cropped__" in video_file,f"File {video_file} is missing __cropped__ keyword" | |
video_file_expanded = video_file.replace( | |
"__cropped__", "__expanded__") | |
# get the expansion size to obtain 16:9 aspect ratio | |
expanded_size = ast.literal_eval(Path(video_file_expanded.replace( | |
"__expanded__", "__meta_expanded_size__").replace("mp4", "txt")).read_text()) | |
# get the original size | |
orig_size = ast.literal_eval(Path(video_file_expanded.replace( | |
"__expanded__", "__meta_orig_size__").replace("mp4", "txt")).read_text()) | |
# get the outpainted image | |
scaled_outpainted_image = IImage.open(video_file_expanded.replace( | |
"__expanded__", "__anchor__").replace("mp4", "png")).numpy() | |
return expanded_size, orig_size, scaled_outpainted_image | |
def save_intermediate_data(video: np.ndarray, user_image: np.ndarray, video_path: Path, expanded_size: list[int], fps: int, scaled_outpainted_image: Image.Image): | |
""" | |
Save intermediate data related to the generated video, including resolution information and scaled outpainted image. | |
Args: | |
video (np.ndarray): The generated video. | |
user_image (np.ndarray): The user image used for generating the video. | |
video_path (Path): The path to the generated video file. | |
expanded_size (list[int]): The expansion size information. | |
fps (int): The frames per second of the video. | |
scaled_outpainted_image (Image.Image): The scaled outpainted image. | |
""" | |
# save resolution of outpainting (before scaling) | |
meta = video_path.parent / \ | |
("__meta_expanded_size__"+video_path.name.replace("mp4", "txt")) | |
meta.write_text(str(expanded_size)) | |
# save original resolution of user image | |
meta = video_path.parent / \ | |
("__meta_orig_size__"+video_path.name.replace("mp4", "txt")) | |
meta.write_text(str([user_image.shape[1], user_image.shape[0]])) | |
# save scaled outpainted first frame | |
anchor = video_path.parent / \ | |
("__anchor__"+video_path.name.replace("mp4", "png")) | |
IImage(scaled_outpainted_image).save(anchor) | |
# save video generated from outpainted image | |
video_path_expanded = video_path.parent / \ | |
("__expanded__" + video_path.name) | |
IImage(video, vmin=0, vmax=255).setFps(fps).save(video_path_expanded) | |
def image_to_video_gradio(img: np.ndarray, streaming_svd: StreamingSVD, gradio_cache: str, fps: int =24, asset: str="first_stage", **kwargs: dict) -> str: | |
""" | |
Convert an image to a video using the provided streaming_svd object and perform additional processing steps. | |
Args: | |
img: The input image to convert to video. | |
streaming_svd: The object used for converting the image to video. | |
fps (int, optional): The frames per second of the output video (default is 24). | |
root_fol (str, optional): The root folder where the video will be saved (default is "first_stage"). | |
**kwargs: Additional keyword arguments to pass to the streaming_svd object. | |
Returns: | |
str: The path to the saved cropped video file. | |
Note: We save several additional files to hard-drive using a path derived from the cropped video file. | |
* image-to-video result using outpainted image (key = __cropped__ ) | |
* the size of the outpainted image (key = __meta_expanded_size__ ) | |
* the size of the input image (key = __meta_orig_size__ ) | |
* the input image (key = __anchor__ ) | |
""" | |
video, scaled_outpainted_image, expanded_size = streaming_svd.image_to_video(img, **kwargs) | |
video_path = get_uuid(asset,cache=gradio_cache) | |
video_path_cropped = video_path.parent / ("__cropped__" + video_path.name) | |
IImage(video, vmin=0, vmax=255).resize(expanded_size[::-1]).crop( | |
(0, 0, img.shape[1], img.shape[0])).setFps(fps).save(video_path_cropped) | |
save_intermediate_data(video=video, video_path=video_path, expanded_size=expanded_size, | |
fps=fps, user_image=img, scaled_outpainted_image=scaled_outpainted_image) | |
return video_path_cropped.as_posix() | |
def image_to_video_vfi_gradio(img: np.ndarray, streaming_svd: StreamingSVD, gradio_cache: str, fps: int =24, asset: str="first_stage", num_frames: int=None, **kwargs: dict) -> str: | |
""" | |
Convert an image to a video using the provided streaming_svd object and perform additional processing steps. Then applies VFI | |
Args: | |
img: The input image to convert to video. | |
streaming_svd: The object used for converting the image to video. | |
fps (int, optional): The frames per second of the output video (default is 24). | |
root_fol (str, optional): The root folder where the video will be saved (default is "first_stage"). | |
**kwargs: Additional keyword arguments to pass to the streaming_svd object. | |
Returns: | |
str: The path to the saved cropped video file. | |
Note: We save several additional files to hard-drive using a path derived from the cropped video file. | |
* image-to-video result using outpainted image (key = __cropped__ ) | |
* the size of the outpainted image (key = __meta_expanded_size__ ) | |
* the size of the input image (key = __meta_orig_size__ ) | |
* the input image (key = __anchor__ ) | |
""" | |
video, scaled_outpainted_image, expanded_size = streaming_svd.image_to_video(img, num_frames = (num_frames+1) // 2, **kwargs) | |
video = streaming_svd.interpolate_video(video, dest_num_frames=num_frames) | |
video_path = get_uuid(asset,cache=gradio_cache) | |
video_path_cropped = video_path.parent / ("__cropped__" + video_path.name) | |
IImage(video, vmin=0, vmax=255).resize(expanded_size[::-1]).crop( | |
(0, 0, img.shape[1], img.shape[0])).setFps(fps).save(video_path_cropped) | |
save_intermediate_data(video=video, video_path=video_path, expanded_size=expanded_size, | |
fps=fps, user_image=img, scaled_outpainted_image=scaled_outpainted_image) | |
return video_path_cropped.as_posix() | |
def text_to_image_gradio(prompt: str, streaming_svd: StreamingSVD, **kwargs: dict) -> np.ndarray: | |
""" | |
Generate an image from the provided text prompt using the specified streaming_svd object. | |
Args: | |
prompt (str): The text prompt used to generate the image. | |
streaming_svd (StreamingSVD): The object used for converting the text to an image. | |
**kwargs (dict): Additional keyword arguments to pass to the streaming_svd object. | |
Returns: | |
np.ndarray: The generated image based on the text prompt. | |
""" | |
return streaming_svd.text_to_image(prompt, **kwargs) | |
def enhance_video_vfi_gradio(img: np.ndarray, video : str, expanded_size: list[int], num_frames: int,gradio_cache:str, streaming_svd: StreamingSVD, fps: int = 24, asset="second_stage", orig_size: list[int] = None, **kwargs: dict) -> str: | |
""" | |
Enhance a video by applying our proposed enhancement (including randomized blending) to the video. | |
Args: | |
img (np.ndarray): The input image used for enhancing the video. | |
video (str): The path to the input video to be enhanced. | |
expanded_size (list[int]): The size to which the video will be expanded. | |
streaming_svd (StreamingSVD): The object used for enhancing the video. | |
fps (int, optional): The frames per second of the output video (default is 24). | |
root_fol (str, optional): The root folder where the enhanced video will be saved (default is "second_stage_preview"). | |
orig_size (list[int], optional): The original size of the image (default is None). | |
**kwargs (dict): Additional keyword arguments to pass to the streaming_svd object for enhancement. | |
Returns: | |
str: The path to the saved enhanced video file. | |
""" | |
video_enh = streaming_svd.enhance_video(image=img, video=video, num_frames=(num_frames+1) // 2, **kwargs) | |
video_int = streaming_svd.interpolate_video(video_enh, dest_num_frames=num_frames) | |
video_path = get_uuid(asset, cache=gradio_cache) | |
IImage(video_int, vmin=0, vmax=255).resize( | |
expanded_size[::-1]).crop((0, 0, orig_size[0], orig_size[1])).setFps(fps).save(video_path) | |
return video_path.as_posix() |