StreamingSVD

Runtime error

App Files Files Community

StreamingSVD / config.yaml

lev1

Initial commit

8fd2f2f 3 months ago

raw

history blame contribute delete

11.9 kB

	# pytorch_lightning==2.2.2
	seed_everything: 33
	trainer:
	accelerator: auto
	strategy: auto
	devices: '1'
	num_nodes: 1
	precision: 16-mixed
	logger: False
	model:
	class_path: diffusion_trainer.streaming_svd.StreamingSVD
	init_args:
	vfi:
	class_path: modules.params.vfi.VFIParams
	init_args:
	ckpt_path_local: checkpoint/VFI/ours.pkl
	ckpt_path_global: https://drive.google.com/file/d/1XCNoyhA1RX3m8W-XJK8H8inH47l36kxP/view?usp=sharing
	i2v_enhance:
	class_path: modules.params.i2v_enhance.I2VEnhanceParams
	init_args:
	ckpt_path_local: checkpoint/i2v_enhance/
	ckpt_path_global: ali-vilab/i2vgen-xl
	module_loader:
	class_path: modules.loader.module_loader.GenericModuleLoader
	init_args:
	pipeline_repo: stabilityai/stable-video-diffusion-img2vid-xt
	pipeline_obj: streamingt2v_pipeline
	set_prediction_type: ''
	module_names:
	- network_config
	- model
	- controlnet
	- denoiser
	- conditioner
	- first_stage_model
	- sampler
	- svd_pipeline
	module_config:
	controlnet:
	class_path: modules.loader.module_loader_config.ModuleLoaderConfig
	init_args:
	loader_cls_path: models.control.controlnet.ControlNet
	cls_func: from_unet
	cls_func_fast_dev_run: ''
	kwargs_diffusers: null
	model_params:
	merging_mode: addition
	zero_conv_mode: Identity
	frame_expansion: none
	downsample_controlnet_cond: true
	use_image_encoder_normalization: true
	use_controlnet_mask: false
	condition_encoder: ''
	conditioning_embedding_out_channels:
	- 32
	- 96
	- 256
	- 512
	kwargs_diff_trainer_params: null
	args: []
	dependent_modules:
	model: model
	dependent_modules_cloned: null
	state_dict_path: ''
	strict_loading: true
	state_dict_filters: []
	network_config:
	class_path: models.diffusion.video_model.VideoUNet
	init_args:
	in_channels: 8
	model_channels: 320
	out_channels: 4
	num_res_blocks: 2
	num_conditional_frames: null
	attention_resolutions:
	- 4
	- 2
	- 1
	dropout: 0.0
	channel_mult:
	- 1
	- 2
	- 4
	- 4
	conv_resample: true
	dims: 2
	num_classes: sequential
	use_checkpoint: False
	num_heads: -1
	num_head_channels: 64
	num_heads_upsample: -1
	use_scale_shift_norm: false
	resblock_updown: false
	transformer_depth: 1
	transformer_depth_middle: null
	context_dim: 1024
	time_downup: false
	time_context_dim: null
	extra_ff_mix_layer: true
	use_spatial_context: true
	merge_strategy: learned_with_images
	merge_factor: 0.5
	spatial_transformer_attn_type: softmax-xformers
	video_kernel_size:
	- 3
	- 1
	- 1
	use_linear_in_transformer: true
	adm_in_channels: 768
	disable_temporal_crossattention: false
	max_ddpm_temb_period: 10000
	merging_mode: attention_cross_attention
	controlnet_mode: true
	use_apm: false
	model:
	class_path: modules.loader.module_loader_config.ModuleLoaderConfig
	init_args:
	loader_cls_path: models.svd.sgm.modules.diffusionmodules.wrappers.OpenAIWrapper
	cls_func: ''
	cls_func_fast_dev_run: ''
	kwargs_diffusers:
	compile_model: false
	model_params: null
	model_params_fast_dev_run: null
	kwargs_diff_trainer_params: null
	args: []
	dependent_modules:
	diffusion_model: network_config
	dependent_modules_cloned: null
	state_dict_path: ''
	strict_loading: true
	state_dict_filters: []
	denoiser:
	class_path: models.svd.sgm.modules.diffusionmodules.denoiser.Denoiser
	init_args:
	scaling_config:
	target: models.svd.sgm.modules.diffusionmodules.denoiser_scaling.VScalingWithEDMcNoise
	sampler:
	class_path: models.svd.sgm.modules.diffusionmodules.sampling.EulerEDMSampler
	init_args:
	s_churn: 0.0
	s_tmin: 0.0
	s_tmax: .inf
	s_noise: 1.0
	discretization_config:
	target: models.diffusion.discretizer.AlignYourSteps
	params:
	sigma_max: 700.0
	num_steps: 30
	guider_config:
	target: models.svd.sgm.modules.diffusionmodules.guiders.LinearPredictionGuider
	params:
	max_scale: 3.0
	min_scale: 1.5
	num_frames: 25
	verbose: false
	device: cuda
	conditioner:
	class_path: models.svd.sgm.modules.GeneralConditioner
	init_args:
	emb_models:
	- is_trainable: false
	input_key: cond_frames_without_noise
	target: models.svd.sgm.modules.encoders.modules.FrozenOpenCLIPImagePredictionEmbedder
	params:
	n_cond_frames: 1
	n_copies: 1
	open_clip_embedding_config:
	target: models.svd.sgm.modules.encoders.modules.FrozenOpenCLIPImageEmbedder
	params:
	freeze: true
	- input_key: fps_id
	is_trainable: false
	target: models.svd.sgm.modules.encoders.modules.ConcatTimestepEmbedderND
	params:
	outdim: 256
	- input_key: motion_bucket_id
	is_trainable: false
	target: models.svd.sgm.modules.encoders.modules.ConcatTimestepEmbedderND
	params:
	outdim: 256
	- input_key: cond_frames
	is_trainable: false
	target: models.svd.sgm.modules.encoders.modules.VideoPredictionEmbedderWithEncoder
	params:
	disable_encoder_autocast: true
	n_cond_frames: 1
	n_copies: 1
	is_ae: true
	encoder_config:
	target: models.svd.sgm.models.autoencoder.AutoencoderKLModeOnly
	params:
	embed_dim: 4
	monitor: val/rec_loss
	ddconfig:
	attn_type: vanilla-xformers
	double_z: true
	z_channels: 4
	resolution: 256
	in_channels: 3
	out_ch: 3
	ch: 128
	ch_mult:
	- 1
	- 2
	- 4
	- 4
	num_res_blocks: 2
	attn_resolutions: []
	dropout: 0.0
	lossconfig:
	target: torch.nn.Identity
	- input_key: cond_aug
	is_trainable: false
	target: models.svd.sgm.modules.encoders.modules.ConcatTimestepEmbedderND
	params:
	outdim: 256
	first_stage_model:
	class_path: models.svd.sgm.AutoencodingEngine
	init_args:
	encoder_config:
	target: models.svd.sgm.modules.diffusionmodules.model.Encoder
	params:
	attn_type: vanilla
	double_z: true
	z_channels: 4
	resolution: 256
	in_channels: 3
	out_ch: 3
	ch: 128
	ch_mult:
	- 1
	- 2
	- 4
	- 4
	num_res_blocks: 2
	attn_resolutions: []
	dropout: 0.0
	decoder_config:
	target: models.svd.sgm.modules.autoencoding.temporal_ae.VideoDecoder
	params:
	attn_type: vanilla
	double_z: true
	z_channels: 4
	resolution: 256
	in_channels: 3
	out_ch: 3
	ch: 128
	ch_mult:
	- 1
	- 2
	- 4
	- 4
	num_res_blocks: 2
	attn_resolutions: []
	dropout: 0.0
	video_kernel_size:
	- 3
	- 1
	- 1
	loss_config:
	target: torch.nn.Identity
	regularizer_config:
	target: models.svd.sgm.modules.autoencoding.regularizers.DiagonalGaussianRegularizer
	optimizer_config: null
	lr_g_factor: 1.0
	trainable_ae_params: null
	ae_optimizer_args: null
	trainable_disc_params: null
	disc_optimizer_args: null
	disc_start_iter: 0
	diff_boost_factor: 3.0
	ckpt_engine: null
	ckpt_path: null
	additional_decode_keys: null
	ema_decay: null
	monitor: null
	input_key: jpg
	svd_pipeline:
	class_path: modules.loader.module_loader_config.ModuleLoaderConfig
	init_args:
	loader_cls_path: diffusers.StableVideoDiffusionPipeline
	cls_func: from_pretrained
	cls_func_fast_dev_run: ''
	kwargs_diffusers:
	torch_dtype: torch.float16
	variant: fp16
	use_safetensors: true
	model_params: null
	model_params_fast_dev_run: null
	kwargs_diff_trainer_params: null
	args:
	- stabilityai/stable-video-diffusion-img2vid-xt
	dependent_modules: null
	dependent_modules_cloned: null
	state_dict_path: ''
	strict_loading: true
	state_dict_filters: []
	root_cls: null
	diff_trainer_params:
	class_path: modules.params.diffusion_trainer.params_streaming_diff_trainer.DiffusionTrainerParams
	init_args:
	scale_factor: 0.18215
	streamingsvd_ckpt:
	class_path: modules.params.diffusion_trainer.params_streaming_diff_trainer.CheckpointDescriptor
	init_args:
	ckpt_path_local: checkpoint/StreamingSVD/model.safetensors
	ckpt_path_global: PAIR/StreamingSVD/resolve/main/model.safetensors
	disable_first_stage_autocast: true
	inference_params:
	class_path: modules.params.diffusion.inference_params.T2VInferenceParams
	init_args:
	n_autoregressive_generations: 2 # Number of autoregression for StreamingSVD
	num_conditional_frames: 7 # is this used?
	anchor_frames: '6' # Take the (Number+1)th frame as CLIP encoding for StreamingSVD
	reset_seed_per_generation: true # If true, the seed is reset on every generation