AnyV2V / prepare_video.py
vinesmsuic's picture
init
26853cd
raw
history blame
7.38 kB
import argparse
from moviepy.editor import VideoFileClip
import os
import glob
import random
import numpy as np
from PIL import Image
def extract_frames(video_path, frame_count=16):
clip = VideoFileClip(video_path)
duration = clip.duration
frames = []
# Calculate the time interval at which to extract frames
times = np.linspace(0, duration, frame_count, endpoint=False)
for t in times:
# Extract the frame at the specific timestamp
frame = clip.get_frame(t)
# Convert the frame (numpy array) to a PIL Image
pil_img = Image.fromarray(frame)
frames.append(pil_img)
return frames
def crop_and_resize_video(input_video_path, output_folder, clip_duration, width=None, height=None, start_time=None, end_time=None, n_frames=16, center_crop=False, x_offset=0, y_offset=0, longest_to_width=False): # Load the video file
video = VideoFileClip(input_video_path)
# Calculate start and end times for cropping
if start_time is not None:
start_time = float(start_time)
end_time = start_time + clip_duration
elif end_time is not None:
end_time = float(end_time)
start_time = end_time - clip_duration
else:
# Default to random cropping if neither start nor end time is specified
video_duration = video.duration
if video_duration <= clip_duration:
print(f"Skipping {input_video_path}: duration is less than or equal to the clip duration.")
return
max_start_time = video_duration - clip_duration
start_time = random.uniform(0, max_start_time)
end_time = start_time + clip_duration
# Crop the video
cropped_video = video.subclip(start_time, end_time)
if center_crop:
# Calculate scale to ensure the desired crop size fits within the video
video_width, video_height = cropped_video.size
scale_width = video_width / width
scale_height = video_height / height
if longest_to_width:
scale = max(scale_width, scale_height)
else:
scale = min(scale_width, scale_height)
# Resize video to ensure the crop area fits within the frame
# This step ensures that the smallest dimension matches or exceeds 512 pixels
new_width = int(video_width / scale)
new_height = int(video_height / scale)
resized_video = cropped_video.resize(newsize=(new_width, new_height))
print(f"Resized video to ({new_width}, {new_height})")
# Calculate crop position with offset, ensuring the crop does not go out of bounds
# The offset calculation needs to ensure that the cropping area remains within the video frame
offset_x = int(((x_offset + 1) / 2) * (new_width - width)) # Adjusted for [-1, 1] scale
offset_y = int(((y_offset + 1) / 2) * (new_height - height)) # Adjusted for [-1, 1] scale
# Ensure offsets do not push the crop area out of the video frame
offset_x = max(0, min(new_width - width, offset_x))
offset_y = max(0, min(new_height - height, offset_y))
# Apply center crop with offsets
cropped_video = resized_video.crop(x1=offset_x, y1=offset_y, width=width, height=height)
elif width and height:
# Directly resize the video to specified width and height if no center crop is specified
cropped_video = cropped_video.resize(newsize=(width, height))
# After resizing and cropping, set the frame rate to fps
fps = n_frames // clip_duration
final_video = cropped_video.set_fps(fps)
# Prepare the output video path
if not os.path.exists(output_folder):
os.makedirs(output_folder)
filename = os.path.basename(input_video_path)
output_video_path = os.path.join(output_folder, filename)
# Write the result to the output file
final_video.write_videofile(output_video_path, codec='libx264', audio_codec='aac', fps=fps)
print(f"Processed {input_video_path}, saved to {output_video_path}")
return output_video_path
def process_videos(input_folder, output_base_folder, clip_duration, width=None, height=None, start_time=None, end_time=None, n_frames=16, center_crop=False, x_offset=0, y_offset=0, longest_to_width=False):
video_files = glob.glob(os.path.join(input_folder, '*.mp4')) # Adjust the pattern if needed
if video_files == []:
print(f"No video files found in {input_folder}")
return
for video_file in video_files:
crop_and_resize_video(video_file, output_base_folder, clip_duration, width, height, start_time, end_time, n_frames, center_crop, x_offset, y_offset, longest_to_width)
return
def main():
parser = argparse.ArgumentParser(description='Crop and resize video segments.')
parser.add_argument('--input_folder', type=str, help='Path to the input folder containing video files')
parser.add_argument('--video_path', type=str, default=None, required=False, help='Path to the input video file')
parser.add_argument('--output_folder', type=str, default="processed_video_data", help='Path to the folder for the output videos')
parser.add_argument('--clip_duration', type=int, default=2, required=False, help='Duration of the video clips in seconds')
parser.add_argument('--width', type=int, default=512, help='Width of the output video (optional)')
parser.add_argument('--height', type=int, default=512, help='Height of the output video (optional)')
parser.add_argument('--start_time', type=float, help='Start time for cropping (optional)')
parser.add_argument('--end_time', type=float, help='End time for cropping (optional)')
parser.add_argument('--n_frames', type=int, default=16, help='Number of frames to extract from each video')
parser.add_argument('--center_crop', action='store_true', help='Center crop the video')
parser.add_argument('--x_offset', type=float, default=0, required=False, help='Horizontal offset for center cropping, range -1 to 1 (optional)')
parser.add_argument('--y_offset', type=float, default=0, required=False, help='Vertical offset for center cropping, range -1 to 1 (optional)')
parser.add_argument('--longest_to_width', action='store_true', help='Resize the longest dimension to the specified width')
args = parser.parse_args()
if args.start_time and args.end_time:
print("Please specify only one of start_time or end_time, not both.")
return
if args.video_path:
crop_and_resize_video(args.video_path,
args.output_folder,
args.clip_duration,
args.width, args.height,
args.start_time, args.end_time,
args.n_frames,
args.center_crop, args.x_offset, args.y_offset, args.longest_to_width)
else:
process_videos(args.input_folder,
args.output_folder,
args.clip_duration,
args.width, args.height,
args.start_time, args.end_time,
args.n_frames,
args.center_crop, args.x_offset, args.y_offset, args.longest_to_width)
if __name__ == "__main__":
main()