pother commited on
Commit
ca6956f
1 Parent(s): 944a84e

Upload model

Browse files
Files changed (2) hide show
  1. config.json +4 -3
  2. model.safetensors +2 -2
config.json CHANGED
@@ -1,7 +1,8 @@
1
  {
 
2
  "activation_dropout": 0.1,
3
  "architectures": [
4
- "VitsModelForPreTraining"
5
  ],
6
  "attention_dropout": 0.1,
7
  "depth_separable_channels": 2,
@@ -51,7 +52,7 @@
51
  "noise_scale_duration": 0.8,
52
  "num_attention_heads": 2,
53
  "num_hidden_layers": 6,
54
- "num_speakers": 1,
55
  "posterior_encoder_num_wavenet_layers": 16,
56
  "prior_encoder_num_flows": 4,
57
  "prior_encoder_num_wavenet_layers": 4,
@@ -79,7 +80,7 @@
79
  ],
80
  "sampling_rate": 16000,
81
  "segment_size": 8192,
82
- "speaker_embedding_size": 0,
83
  "speaking_rate": 1.0,
84
  "spectrogram_bins": 513,
85
  "torch_dtype": "float32",
 
1
  {
2
+ "_name_or_path": "./tmp/vits_finetuned_tha",
3
  "activation_dropout": 0.1,
4
  "architectures": [
5
+ "VitsModel"
6
  ],
7
  "attention_dropout": 0.1,
8
  "depth_separable_channels": 2,
 
52
  "noise_scale_duration": 0.8,
53
  "num_attention_heads": 2,
54
  "num_hidden_layers": 6,
55
+ "num_speakers": 1408,
56
  "posterior_encoder_num_wavenet_layers": 16,
57
  "prior_encoder_num_flows": 4,
58
  "prior_encoder_num_wavenet_layers": 4,
 
80
  ],
81
  "sampling_rate": 16000,
82
  "segment_size": 8192,
83
+ "speaker_embedding_size": 256,
84
  "speaking_rate": 1.0,
85
  "spectrogram_bins": 513,
86
  "torch_dtype": "float32",
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cabfa3c771f470cc2207be3ce2432b587fdf219515b4fd0ecab6c1648fab6e3c
3
- size 332186056
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:472f1254f0e227386fd4717e052f5de2484e717269334fa8bcedc6fdbc5b2787
3
+ size 160105216