pyp1 commited on
Commit
60b4c8e
1 Parent(s): ed79ec9

Push model using huggingface_hub.

Browse files
Files changed (3) hide show
  1. README.md +12 -0
  2. config.json +153 -0
  3. model.safetensors +3 -0
README.md ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: voicecraft
3
+ tags:
4
+ - text-to-speech
5
+ - pytorch_model_hub_mixin
6
+ - model_hub_mixin
7
+ repo_url: https://github.com/jasonppy/VoiceCraft
8
+ ---
9
+
10
+ This model has been pushed to the Hub using **voicecraft**:
11
+ - Repo: https://github.com/jasonppy/VoiceCraft
12
+ - Docs: [More Information Needed]
config.json ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "args": null,
3
+ "audio_embedding_dim": 1024,
4
+ "audio_embedding_dropout": 0,
5
+ "audio_max_length": 20.0,
6
+ "audio_min_length": 2.0,
7
+ "audio_pad_token": 1026,
8
+ "audio_positional_embedding_dropout": 0.1,
9
+ "audio_vocab_size": "1024",
10
+ "batch_size": 100,
11
+ "clipping_update_period": 1000,
12
+ "codebook_weight": "[5,2,1,0.5,0.2,0.1,0.1,0.1]",
13
+ "d_model": 1024,
14
+ "dataset": "gigaspeech",
15
+ "dataset_dir": "/data/scratch/pyp/datasets/gigaspeech_phn_enc_manifest/xl",
16
+ "drop_long": 1,
17
+ "dynamic_batching": 1,
18
+ "early_stop_step": 3200,
19
+ "early_stop_threshold": -1.0,
20
+ "empty_token": 1024,
21
+ "encodec_folder_name": "encodec_d3cd8c2f_8cb1024_noise_epoch94",
22
+ "encodec_sr": 50,
23
+ "eog": 1025,
24
+ "eos": 1027,
25
+ "exp_dir": "/data/scratch/pyp/exp_pyp/VoiceCraft/gigaspeech/615M_8cb1024_se",
26
+ "gradient_accumulation_steps": 24,
27
+ "gradient_clip_val": 1.0,
28
+ "load_model_from": null,
29
+ "lr": 0.05,
30
+ "manifest_name": "manifest_large16khz_lessambi",
31
+ "mask_len_max": 600,
32
+ "mask_len_min": 1,
33
+ "mask_sample_dist": "poisson1",
34
+ "max_mask_portion": 0.9,
35
+ "max_n_spans": 3,
36
+ "max_num_tokens": 100000,
37
+ "min_gap": 5,
38
+ "n_codebooks": 8,
39
+ "n_special": 4,
40
+ "nhead": 16,
41
+ "num_buckets": 6,
42
+ "num_decoder_layers": 48,
43
+ "num_epochs": 10,
44
+ "num_steps": 50000,
45
+ "num_workers": 8,
46
+ "optimizer_name": "ScaledAdam",
47
+ "pad_x": 0,
48
+ "phn2num": {
49
+ "!": 17,
50
+ ",": 64,
51
+ ".": 77,
52
+ "<MUSIC>": 39,
53
+ "<NOISE>": 52,
54
+ "<OTHER>": 60,
55
+ "<SIL>": 53,
56
+ "?": 78,
57
+ "_": 15,
58
+ "a\u026a": 48,
59
+ "a\u026a\u0259": 56,
60
+ "a\u026a\u025a": 2,
61
+ "a\u028a": 36,
62
+ "b": 20,
63
+ "d": 72,
64
+ "d\u0292": 57,
65
+ "e\u026a": 6,
66
+ "f": 69,
67
+ "h": 14,
68
+ "i": 27,
69
+ "i\u0259": 42,
70
+ "i\u02d0": 68,
71
+ "i\u02d0\u02d0": 51,
72
+ "j": 67,
73
+ "k": 41,
74
+ "l": 63,
75
+ "m": 9,
76
+ "n": 23,
77
+ "n\u02b2": 8,
78
+ "o\u028a": 25,
79
+ "o\u02d0": 74,
80
+ "o\u02d0\u0279": 40,
81
+ "p": 34,
82
+ "r": 79,
83
+ "s": 66,
84
+ "t": 73,
85
+ "t\u0283": 75,
86
+ "u": 1,
87
+ "u\u02d0": 47,
88
+ "v": 31,
89
+ "w": 19,
90
+ "x": 4,
91
+ "z": 22,
92
+ "\u00e6": 32,
93
+ "\u00e6\u00e6": 50,
94
+ "\u00e7": 10,
95
+ "\u00f0": 7,
96
+ "\u014b": 58,
97
+ "\u0250": 70,
98
+ "\u0250\u0250": 71,
99
+ "\u0251": 61,
100
+ "\u0251\u02d0": 0,
101
+ "\u0251\u02d0\u0279": 44,
102
+ "\u0254": 3,
103
+ "\u0254\u026a": 13,
104
+ "\u0254\u02d0": 29,
105
+ "\u0254\u02d0\u0279": 33,
106
+ "\u0259": 54,
107
+ "\u0259l": 16,
108
+ "\u025a": 35,
109
+ "\u025b": 18,
110
+ "\u025b\u0279": 11,
111
+ "\u025c\u02d0": 21,
112
+ "\u0261": 49,
113
+ "\u0261\u02b2": 37,
114
+ "\u026a": 65,
115
+ "\u026a\u0279": 76,
116
+ "\u026c": 46,
117
+ "\u0279": 5,
118
+ "\u027e": 24,
119
+ "\u0283": 26,
120
+ "\u028a": 43,
121
+ "\u028a\u0279": 28,
122
+ "\u028c": 38,
123
+ "\u0292": 55,
124
+ "\u0294": 59,
125
+ "\u0303": 45,
126
+ "\u0329": 12,
127
+ "\u03b8": 30,
128
+ "\u1d7b": 62
129
+ },
130
+ "phn_folder_name": "phonemes",
131
+ "precision": "float16",
132
+ "print_every_n_steps": 400,
133
+ "pseudo_epoch_size": 3000,
134
+ "reduce_lr_start_epoch": 4,
135
+ "reduce_lr_start_step": 3000,
136
+ "reduced_eog": 1,
137
+ "resume": false,
138
+ "seed": 1,
139
+ "shuffle_mask_embedding": 0,
140
+ "special_first": 0,
141
+ "tb_write_every_n_steps": 100,
142
+ "text_embedding_dropout": 0.1,
143
+ "text_max_length": 400,
144
+ "text_min_length": 10.0,
145
+ "text_pad_token": 100,
146
+ "text_positional_embedding_dropout": 0.1,
147
+ "text_vocab_size": 100,
148
+ "trm_dropout": 0.1,
149
+ "val_every_n_steps": 1600,
150
+ "val_max_num_tokens": 6000,
151
+ "warmup_fraction": 0.01,
152
+ "weight_decay": 0.01
153
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4730a85b239b5150bca01fc2e93aa64835583dfc23a1f94ff4a6b4cd8681635
3
+ size 2486330704