Cristhian2430 commited on
Commit
601bbd9
1 Parent(s): 4eb0881

Upload WhisperForConditionalGeneration

Browse files
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "openai/whisper-small",
3
  "activation_dropout": 0.0,
4
  "activation_function": "gelu",
5
  "apply_spec_augment": false,
@@ -13,17 +13,17 @@
13
  ],
14
  "bos_token_id": 50257,
15
  "classifier_proj_size": 256,
16
- "d_model": 768,
17
- "decoder_attention_heads": 12,
18
- "decoder_ffn_dim": 3072,
19
  "decoder_layerdrop": 0.0,
20
- "decoder_layers": 12,
21
  "decoder_start_token_id": 50258,
22
  "dropout": 0.0,
23
- "encoder_attention_heads": 12,
24
- "encoder_ffn_dim": 3072,
25
  "encoder_layerdrop": 0.0,
26
- "encoder_layers": 12,
27
  "eos_token_id": 50257,
28
  "forced_decoder_ids": null,
29
  "init_std": 0.02,
@@ -39,14 +39,14 @@
39
  "max_target_positions": 448,
40
  "median_filter_width": 7,
41
  "model_type": "whisper",
42
- "num_hidden_layers": 12,
43
- "num_mel_bins": 80,
44
- "pad_token_id": 50257,
45
  "scale_embedding": false,
46
  "suppress_tokens": [],
47
  "torch_dtype": "float32",
48
- "transformers_version": "4.34.0.dev0",
49
  "use_cache": true,
50
  "use_weighted_layer_sum": false,
51
- "vocab_size": 51865
52
  }
 
1
  {
2
+ "_name_or_path": "openai/whisper-large-v3",
3
  "activation_dropout": 0.0,
4
  "activation_function": "gelu",
5
  "apply_spec_augment": false,
 
13
  ],
14
  "bos_token_id": 50257,
15
  "classifier_proj_size": 256,
16
+ "d_model": 1280,
17
+ "decoder_attention_heads": 20,
18
+ "decoder_ffn_dim": 5120,
19
  "decoder_layerdrop": 0.0,
20
+ "decoder_layers": 32,
21
  "decoder_start_token_id": 50258,
22
  "dropout": 0.0,
23
+ "encoder_attention_heads": 20,
24
+ "encoder_ffn_dim": 5120,
25
  "encoder_layerdrop": 0.0,
26
+ "encoder_layers": 32,
27
  "eos_token_id": 50257,
28
  "forced_decoder_ids": null,
29
  "init_std": 0.02,
 
39
  "max_target_positions": 448,
40
  "median_filter_width": 7,
41
  "model_type": "whisper",
42
+ "num_hidden_layers": 32,
43
+ "num_mel_bins": 128,
44
+ "pad_token_id": 50256,
45
  "scale_embedding": false,
46
  "suppress_tokens": [],
47
  "torch_dtype": "float32",
48
+ "transformers_version": "4.36.0.dev0",
49
  "use_cache": true,
50
  "use_weighted_layer_sum": false,
51
+ "vocab_size": 51866
52
  }
generation_config.json CHANGED
@@ -1,44 +1,44 @@
1
  {
2
  "alignment_heads": [
3
  [
4
- 5,
5
- 3
6
  ],
7
  [
8
- 5,
9
- 9
10
  ],
11
  [
12
- 8,
13
- 0
14
  ],
15
  [
16
- 8,
17
- 4
18
  ],
19
  [
20
- 8,
21
- 7
22
  ],
23
  [
24
- 8,
25
- 8
26
  ],
27
  [
28
- 9,
29
- 0
30
  ],
31
  [
32
- 9,
33
- 7
34
  ],
35
  [
36
- 9,
37
- 9
38
  ],
39
  [
40
- 10,
41
- 5
42
  ]
43
  ],
44
  "begin_suppress_tokens": [
@@ -55,7 +55,7 @@
55
  ],
56
  [
57
  2,
58
- 50359
59
  ]
60
  ],
61
  "is_multilingual": true,
@@ -158,11 +158,12 @@
158
  "<|vi|>": 50278,
159
  "<|yi|>": 50335,
160
  "<|yo|>": 50325,
 
161
  "<|zh|>": 50260
162
  },
163
  "max_initial_timestamp_index": 1,
164
  "max_length": 448,
165
- "no_timestamps_token_id": 50363,
166
  "pad_token_id": 50257,
167
  "return_timestamps": false,
168
  "suppress_tokens": [
@@ -249,15 +250,15 @@
249
  49870,
250
  50254,
251
  50258,
252
- 50358,
253
  50359,
254
  50360,
255
  50361,
256
- 50362
 
257
  ],
258
  "task_to_id": {
259
- "transcribe": 50359,
260
- "translate": 50358
261
  },
262
- "transformers_version": "4.34.0.dev0"
263
  }
 
1
  {
2
  "alignment_heads": [
3
  [
4
+ 7,
5
+ 0
6
  ],
7
  [
8
+ 10,
9
+ 17
10
  ],
11
  [
12
+ 12,
13
+ 18
14
  ],
15
  [
16
+ 13,
17
+ 12
18
  ],
19
  [
20
+ 16,
21
+ 1
22
  ],
23
  [
24
+ 17,
25
+ 14
26
  ],
27
  [
28
+ 19,
29
+ 11
30
  ],
31
  [
32
+ 21,
33
+ 4
34
  ],
35
  [
36
+ 24,
37
+ 1
38
  ],
39
  [
40
+ 25,
41
+ 6
42
  ]
43
  ],
44
  "begin_suppress_tokens": [
 
55
  ],
56
  [
57
  2,
58
+ 50360
59
  ]
60
  ],
61
  "is_multilingual": true,
 
158
  "<|vi|>": 50278,
159
  "<|yi|>": 50335,
160
  "<|yo|>": 50325,
161
+ "<|yue|>": 50358,
162
  "<|zh|>": 50260
163
  },
164
  "max_initial_timestamp_index": 1,
165
  "max_length": 448,
166
+ "no_timestamps_token_id": 50364,
167
  "pad_token_id": 50257,
168
  "return_timestamps": false,
169
  "suppress_tokens": [
 
250
  49870,
251
  50254,
252
  50258,
 
253
  50359,
254
  50360,
255
  50361,
256
+ 50362,
257
+ 50363
258
  ],
259
  "task_to_id": {
260
+ "transcribe": 50360,
261
+ "translate": 50359
262
  },
263
+ "transformers_version": "4.36.0.dev0"
264
  }
model-00001-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba2a4c5046d1813176f2457181fda8cc6202f1cad32e5b48a0718b0aae572fc2
3
+ size 4993448880
model-00002-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28937bc5e70b444638b38295854dacb2c85dc00be8520c7bf111db8cc1c1f6da
3
+ size 1180663192
model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff