pother
/

mms_finetuning

Inference Endpoints

Model card Files Files and versions Community

pother commited on Jul 27

Commit

ca6956f

•

1 Parent(s): 944a84e

Upload model

Files changed (2) hide show

config.json +4 -3
model.safetensors +2 -2

config.json CHANGED Viewed

@@ -1,7 +1,8 @@
 {
   "activation_dropout": 0.1,
   "architectures": [
-    "VitsModelForPreTraining"
   ],
   "attention_dropout": 0.1,
   "depth_separable_channels": 2,
@@ -51,7 +52,7 @@
   "noise_scale_duration": 0.8,
   "num_attention_heads": 2,
   "num_hidden_layers": 6,
-  "num_speakers": 1,
   "posterior_encoder_num_wavenet_layers": 16,
   "prior_encoder_num_flows": 4,
   "prior_encoder_num_wavenet_layers": 4,
@@ -79,7 +80,7 @@
   ],
   "sampling_rate": 16000,
   "segment_size": 8192,
-  "speaker_embedding_size": 0,
   "speaking_rate": 1.0,
   "spectrogram_bins": 513,
   "torch_dtype": "float32",

 {
+  "_name_or_path": "./tmp/vits_finetuned_tha",
   "activation_dropout": 0.1,
   "architectures": [
+    "VitsModel"
   ],
   "attention_dropout": 0.1,
   "depth_separable_channels": 2,
   "noise_scale_duration": 0.8,
   "num_attention_heads": 2,
   "num_hidden_layers": 6,
+  "num_speakers": 1408,
   "posterior_encoder_num_wavenet_layers": 16,
   "prior_encoder_num_flows": 4,
   "prior_encoder_num_wavenet_layers": 4,
   ],
   "sampling_rate": 16000,
   "segment_size": 8192,
+  "speaker_embedding_size": 256,
   "speaking_rate": 1.0,
   "spectrogram_bins": 513,
   "torch_dtype": "float32",

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cabfa3c771f470cc2207be3ce2432b587fdf219515b4fd0ecab6c1648fab6e3c
-size 332186056

 version https://git-lfs.github.com/spec/v1
+oid sha256:472f1254f0e227386fd4717e052f5de2484e717269334fa8bcedc6fdbc5b2787
+size 160105216