EzAudio-ControlNet

Paused

EzAudio-ControlNet / ckpts /ezaudio-l.yml

Upload 7 files

cb4641a verified 3 months ago

1.14 kB

	model_name: EzAudio-L

	model:
	mae: True
	mae_prob: 0.25
	mask_ratio: [0.25, 1.0]
	mask_span: 10
	img_size: 500
	patch_size: 1
	in_chans: 257
	out_chans: 128
	input_type: '1d'
	embed_dim: 1024
	depth: 24
	num_heads: 16
	mlp_ratio: 4.0
	qkv_bias: false
	qk_scale: null
	qk_norm: layernorm
	norm_layer: layernorm
	act_layer: geglu
	context_norm: true
	use_checkpoint: true
	time_fusion: 'ada_lora_bias'
	ada_lora_rank: 32
	ada_lora_alpha: 32
	cls_dim: null
	context_dim: 1024
	context_fusion: 'cross'
	context_max_length: null
	context_pe_method: 'none'
	pe_method: 'none'
	rope_mode: 'shared'
	use_conv: true
	skip: true
	skip_norm: true

	autoencoder:
	name: stable_vae
	dim: 128
	sr: 24000
	latent_sr: 50
	q_first: true
	scale: 1.0
	shift: 0.0

	text_encoder:
	model: google/flan-t5-large
	max_length: 100
	cfg: 0.1

	diff:
	num_train_timesteps: 1000
	beta_schedule: 'scaled_linear'
	beta_start: 0.00085
	beta_end: 0.012
	prediction_type: 'v_prediction'
	rescale_betas_zero_snr: true
	timestep_spacing: 'trailing'
	clip_sample: false