rosyvs commited on
Commit
8e560f0
1 Parent(s): 5b7f46b

first commit - version 202_base-en_v1 model run. trained without augmentation

Browse files
CKPT.yaml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ # yamllint disable
2
+ WER: 77.8046811945117
3
+ deletions: 1108
4
+ end-of-epoch: true
5
+ insertions: 1593
6
+ substitutions: 2119
7
+ unixtime: 1682145122.214084
brain.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:222d610754b85e9dcd4fa864baf9e838d3066ea1e79b95355624f47e714729eb
3
+ size 50
config.json ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "openai/whisper-base.en",
3
+ "activation_dropout": 0.0,
4
+ "activation_function": "gelu",
5
+ "architectures": [
6
+ "WhisperForConditionalGeneration"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "begin_suppress_tokens": [
10
+ 220,
11
+ 50256
12
+ ],
13
+ "bos_token_id": 50257,
14
+ "d_model": 512,
15
+ "decoder_attention_heads": 8,
16
+ "decoder_ffn_dim": 2048,
17
+ "decoder_layerdrop": 0.0,
18
+ "decoder_layers": 6,
19
+ "decoder_start_token_id": 50257,
20
+ "dropout": 0.0,
21
+ "encoder_attention_heads": 8,
22
+ "encoder_ffn_dim": 2048,
23
+ "encoder_layerdrop": 0.0,
24
+ "encoder_layers": 6,
25
+ "eos_token_id": 50256,
26
+ "forced_decoder_ids": [
27
+ [
28
+ 1,
29
+ 50362
30
+ ]
31
+ ],
32
+ "init_std": 0.02,
33
+ "is_encoder_decoder": true,
34
+ "max_length": 448,
35
+ "max_source_positions": 1500,
36
+ "max_target_positions": 448,
37
+ "model_type": "whisper",
38
+ "num_hidden_layers": 6,
39
+ "num_mel_bins": 80,
40
+ "pad_token_id": 50256,
41
+ "scale_embedding": false,
42
+ "suppress_tokens": [
43
+ 1,
44
+ 2,
45
+ 7,
46
+ 8,
47
+ 9,
48
+ 10,
49
+ 14,
50
+ 25,
51
+ 26,
52
+ 27,
53
+ 28,
54
+ 29,
55
+ 31,
56
+ 58,
57
+ 59,
58
+ 60,
59
+ 61,
60
+ 62,
61
+ 63,
62
+ 90,
63
+ 91,
64
+ 92,
65
+ 93,
66
+ 357,
67
+ 366,
68
+ 438,
69
+ 532,
70
+ 685,
71
+ 705,
72
+ 796,
73
+ 930,
74
+ 1058,
75
+ 1220,
76
+ 1267,
77
+ 1279,
78
+ 1303,
79
+ 1343,
80
+ 1377,
81
+ 1391,
82
+ 1635,
83
+ 1782,
84
+ 1875,
85
+ 2162,
86
+ 2361,
87
+ 2488,
88
+ 3467,
89
+ 4008,
90
+ 4211,
91
+ 4600,
92
+ 4808,
93
+ 5299,
94
+ 5855,
95
+ 6329,
96
+ 7203,
97
+ 9609,
98
+ 9959,
99
+ 10563,
100
+ 10786,
101
+ 11420,
102
+ 11709,
103
+ 11907,
104
+ 13163,
105
+ 13697,
106
+ 13700,
107
+ 14808,
108
+ 15306,
109
+ 16410,
110
+ 16791,
111
+ 17992,
112
+ 19203,
113
+ 19510,
114
+ 20724,
115
+ 22305,
116
+ 22935,
117
+ 27007,
118
+ 30109,
119
+ 30420,
120
+ 33409,
121
+ 34949,
122
+ 40283,
123
+ 40493,
124
+ 40549,
125
+ 47282,
126
+ 49146,
127
+ 50257,
128
+ 50357,
129
+ 50358,
130
+ 50359,
131
+ 50360,
132
+ 50361
133
+ ],
134
+ "torch_dtype": "float32",
135
+ "transformers_version": "4.27.0.dev0",
136
+ "use_cache": true,
137
+ "vocab_size": 51864
138
+ }
counter.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e07408562bedb8b60ce05c1decfe3ad16b72230967de01f640b7e4729b49fce
3
+ size 1
dataloader-TRAIN.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:721d978b344436223d253b925b1b5fb9965247f76cfddb6172e9f3d7a7c69a62
3
+ size 5
decoder.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09cd10d25028e9b2ae84581a7579a04a4987b41cfb51a731aedeead7feb06e69
3
+ size 290532565
infer_hf_whisper.yaml ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ################################
2
+ # Model: Whisper (Encoder-Decoder) + NLL
3
+ # Augmentation: TimeDomainSpecAugment
4
+ # Authors: Adel Moumen 2022, Titouan Parcollet 2022, Rosy Southwell 2023
5
+ # ################################
6
+
7
+ model_src: rosyvs/whisat-base
8
+ model_type: tiny.en
9
+ language: english
10
+ auto_mix_prec: False # TODO: set to True for CUDA
11
+ only_encoder: False
12
+
13
+ # These values are only used for the searchers.
14
+ # They needs to be hardcoded and should not be changed with Whisper.
15
+ # They are used as part of the searching process.
16
+ # The bos token of the searcher will be timestamp_index
17
+ # and will be concatenated with the bos, language and task tokens.
18
+ timestamp_index: 50363
19
+ eos_index: 50257
20
+ bos_index: 50258
21
+
22
+ # Decoding parameters
23
+ min_decode_ratio: 0.0
24
+ max_decode_ratio: 1.0 # the commonvoice inference yaml uses 0.1
25
+ test_beam_size: 5 # TODO: this was 8, changing to 5 as this is the default used by openAI
26
+
27
+ whisper: !new:speechbrain.lobes.models.huggingface_whisper.HuggingFaceWhisper
28
+ encoder_only: !ref <only_encoder>
29
+ freeze: True
30
+ freeze_encoder: True
31
+ source: !ref <model_src>
32
+ save_path: !ref <cache_dir>
33
+ # language: language
34
+
35
+ # tokenizer: !new:speechbrain.lobes.models.huggingface_whisper.HuggingFaceWhisper
36
+ # encoder_only: False
37
+ # freeze: True
38
+ # freeze_encoder: True
39
+ # source: !ref openai/whisper-<model_type>
40
+ # save_path: !ref <cache_dir>
41
+
42
+ # decoder: !new:speechbrain.decoders.seq2seq.S2SWhisperGreedySearch
43
+ # model: !ref <whisper>
44
+ # bos_index: !ref <timestamp_index>
45
+ # eos_index: !ref <eos_index>
46
+ # min_decode_ratio: !ref <min_decode_ratio>
47
+ # max_decode_ratio: !ref <max_decode_ratio>
48
+
49
+ decoder: !new:speechbrain.decoders.seq2seq.S2SWhisperBeamSearch
50
+ module: [!ref <whisper>]
51
+ bos_index: !ref <timestamp_index>
52
+ eos_index: !ref <eos_index>
53
+ min_decode_ratio: !ref <min_decode_ratio>
54
+ max_decode_ratio: !ref <max_decode_ratio>
55
+ beam_size: !ref <test_beam_size>
56
+
57
+ modules:
58
+ whisper: !ref <whisper>
59
+ # tokenizer: !ref <tokenizer>
60
+ decoder: !ref <decoder> # can change to greedy
61
+
62
+ pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
63
+ loadables:
64
+ whisper: !ref <whisper>
65
+ # tokenizer: !ref <tokenizer>
66
+ # decoder: !ref <decoder>
67
+ # paths:
68
+ # whisper: !ref <model_src>/whisper.ckpt
69
+ # tokenizer: !ref openai/whisper-<model_type>
70
+ # decoder: !ref <model_src>/whisper.ckpt
71
+
72
+ # checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
73
+ # checkpoints_dir: !ref <model_src>
74
+ # recoverables:
75
+ # whisper: !ref <whisper>
optimizer.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c63a201ce77e2b680fe1cf6957c1dbc5143dc6c23602934a4328a5bdbdcd71ed
3
+ size 580951685
preprocessor_config.json ADDED
The diff for this file is too large to render. See raw diff
 
scaler.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:882f6f4d929d3900d5e93202d3d027a1caad91a342f2c084a5e26dad638e087e
3
+ size 557
scheduler_whisper.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:efb8b0ea85f579ac77f43c3d0e01d6bdde8b963e5d0e8d41d9dfbfa452ddb0a0
3
+ size 515
tokenizer.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee0076ed1a26ff3b91988c6bc709947094aaba3f7571c1c2fca25aa5bc855056
3
+ size 290531013
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "bos_token": {
5
+ "__type": "AddedToken",
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "eos_token": {
13
+ "__type": "AddedToken",
14
+ "content": "<|endoftext|>",
15
+ "lstrip": false,
16
+ "normalized": true,
17
+ "rstrip": false,
18
+ "single_word": false
19
+ },
20
+ "errors": "replace",
21
+ "model_max_length": 1024,
22
+ "name_or_path": "openai/whisper-base.en",
23
+ "pad_token": null,
24
+ "processor_class": "WhisperProcessor",
25
+ "return_attention_mask": false,
26
+ "special_tokens_map_file": null,
27
+ "tokenizer_class": "WhisperTokenizer",
28
+ "unk_token": {
29
+ "__type": "AddedToken",
30
+ "content": "<|endoftext|>",
31
+ "lstrip": false,
32
+ "normalized": true,
33
+ "rstrip": false,
34
+ "single_word": false
35
+ }
36
+ }
vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
whisper.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9ff69b0535385083a7ff21cb8b813b5cb2adeb86a295fd751bdbbe9e474b00e
3
+ size 290529941