patrickvonplaten commited on
Commit
4a24635
1 Parent(s): d1d31db
Files changed (2) hide show
  1. config.json +2 -16
  2. preprocessor_config.json +9 -0
config.json CHANGED
@@ -1,6 +1,5 @@
1
  {
2
  "activation_dropout": 0.0,
3
- "apply_spec_augment": true,
4
  "architectures": [
5
  "UniSpeechSatForPreTraining"
6
  ],
@@ -8,7 +7,6 @@
8
  "bos_token_id": 1,
9
  "classifier_proj_size": 256,
10
  "codevector_dim": 768,
11
- "contrastive_logits_temperature": 0.1,
12
  "conv_bias": true,
13
  "conv_dim": [
14
  512,
@@ -37,9 +35,6 @@
37
  2,
38
  2
39
  ],
40
- "ctc_loss_reduction": "sum",
41
- "ctc_zero_infinity": false,
42
- "diversity_loss_weight": 0.1,
43
  "do_stable_layer_norm": true,
44
  "eos_token_id": 2,
45
  "feat_extract_activation": "gelu",
@@ -48,7 +43,6 @@
48
  "feat_proj_dropout": 0.1,
49
  "feat_quantizer_dropout": 0.0,
50
  "final_dropout": 0.0,
51
- "gradient_checkpointing": false,
52
  "hidden_act": "gelu",
53
  "hidden_dropout": 0.1,
54
  "hidden_size": 1024,
@@ -56,18 +50,10 @@
56
  "intermediate_size": 4096,
57
  "layer_norm_eps": 1e-05,
58
  "layerdrop": 0.1,
59
- "mask_channel_length": 10,
60
- "mask_channel_min_space": 1,
61
- "mask_channel_other": 0.0,
62
- "mask_channel_prob": 0.0,
63
- "mask_channel_selection": "static",
64
  "mask_feature_length": 10,
65
  "mask_feature_prob": 0.0,
66
  "mask_time_length": 10,
67
- "mask_time_min_space": 1,
68
- "mask_time_other": 0.0,
69
  "mask_time_prob": 0.075,
70
- "mask_time_selection": "static",
71
  "model_type": "unispeech-sat",
72
  "num_attention_heads": 16,
73
  "num_clusters": 504,
@@ -80,9 +66,9 @@
80
  "num_negatives": 100,
81
  "pad_token_id": 0,
82
  "proj_codevector_dim": 768,
83
- "replace_prob": 0.5,
84
  "torch_dtype": "float32",
85
  "transformers_version": "4.12.0.dev0",
86
  "use_weighted_layer_sum": false,
87
- "vocab_size": 256
 
88
  }
 
1
  {
2
  "activation_dropout": 0.0,
 
3
  "architectures": [
4
  "UniSpeechSatForPreTraining"
5
  ],
 
7
  "bos_token_id": 1,
8
  "classifier_proj_size": 256,
9
  "codevector_dim": 768,
 
10
  "conv_bias": true,
11
  "conv_dim": [
12
  512,
 
35
  2,
36
  2
37
  ],
 
 
 
38
  "do_stable_layer_norm": true,
39
  "eos_token_id": 2,
40
  "feat_extract_activation": "gelu",
 
43
  "feat_proj_dropout": 0.1,
44
  "feat_quantizer_dropout": 0.0,
45
  "final_dropout": 0.0,
 
46
  "hidden_act": "gelu",
47
  "hidden_dropout": 0.1,
48
  "hidden_size": 1024,
 
50
  "intermediate_size": 4096,
51
  "layer_norm_eps": 1e-05,
52
  "layerdrop": 0.1,
 
 
 
 
 
53
  "mask_feature_length": 10,
54
  "mask_feature_prob": 0.0,
55
  "mask_time_length": 10,
 
 
56
  "mask_time_prob": 0.075,
 
57
  "model_type": "unispeech-sat",
58
  "num_attention_heads": 16,
59
  "num_clusters": 504,
 
66
  "num_negatives": 100,
67
  "pad_token_id": 0,
68
  "proj_codevector_dim": 768,
 
69
  "torch_dtype": "float32",
70
  "transformers_version": "4.12.0.dev0",
71
  "use_weighted_layer_sum": false,
72
+ "vocab_size": 256,
73
+ "tokenizer_class": "Wav2Vec2CTCTokenizer"
74
  }
preprocessor_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
+ "feature_size": 1,
5
+ "padding_side": "right",
6
+ "padding_value": 0,
7
+ "return_attention_mask": true,
8
+ "sampling_rate": 16000
9
+ }