{ | |
"architectures": [ | |
"EncodecModel" | |
], | |
"audio_channels": 2, | |
"chunk_length_s": 1.0, | |
"codebook_dim": 128, | |
"codebook_size": 1024, | |
"compress": 2, | |
"dilation_growth_rate": 2, | |
"hidden_size": 128, | |
"kernel_size": 7, | |
"last_kernel_size": 7, | |
"model_type": "encodec", | |
"norm_type": "time_group_norm", | |
"normalize": true, | |
"num_filters": 32, | |
"num_lstm_layers": 2, | |
"num_residual_layers": 1, | |
"overlap": 0.01, | |
"pad_mode": "reflect", | |
"residual_kernel_size": 3, | |
"sampling_rate": 48000, | |
"target_bandwidths": [ | |
3.0, | |
6.0, | |
12.0, | |
24.0 | |
], | |
"torch_dtype": "float32", | |
"transformers_version": "4.31.0.dev0", | |
"trim_right_ratio": 1.0, | |
"upsampling_ratios": [ | |
8, | |
5, | |
4, | |
2 | |
], | |
"use_causal_conv": false | |
} |