Edresson commited on
Commit
e226caa
1 Parent(s): aedef12

Add checkpints

Browse files
all_results.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 102.0,
3
+ "eval_loss": 0.5073719620704651,
4
+ "eval_runtime": 236.6641,
5
+ "eval_samples": 7066,
6
+ "eval_samples_per_second": 29.857,
7
+ "eval_wer": 0.3200079912096694,
8
+ "train_runtime": 110050.311,
9
+ "train_samples": 15408,
10
+ "train_samples_per_second": 0.102
11
+ }
config.json ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "facebook/wav2vec2-large-100k-voxpopuli",
3
+ "activation_dropout": 0.0,
4
+ "apply_spec_augment": true,
5
+ "architectures": [
6
+ "Wav2Vec2ForCTC"
7
+ ],
8
+ "attention_dropout": 0.1,
9
+ "bos_token_id": 1,
10
+ "codevector_dim": 768,
11
+ "contrastive_logits_temperature": 0.1,
12
+ "conv_bias": true,
13
+ "conv_dim": [
14
+ 512,
15
+ 512,
16
+ 512,
17
+ 512,
18
+ 512,
19
+ 512,
20
+ 512
21
+ ],
22
+ "conv_kernel": [
23
+ 10,
24
+ 3,
25
+ 3,
26
+ 3,
27
+ 3,
28
+ 2,
29
+ 2
30
+ ],
31
+ "conv_stride": [
32
+ 5,
33
+ 2,
34
+ 2,
35
+ 2,
36
+ 2,
37
+ 2,
38
+ 2
39
+ ],
40
+ "ctc_loss_reduction": "mean",
41
+ "ctc_zero_infinity": true,
42
+ "diversity_loss_weight": 0.1,
43
+ "do_stable_layer_norm": true,
44
+ "eos_token_id": 2,
45
+ "feat_extract_activation": "gelu",
46
+ "feat_extract_dropout": 0.0,
47
+ "feat_extract_norm": "layer",
48
+ "feat_proj_dropout": 0.1,
49
+ "feat_quantizer_dropout": 0.0,
50
+ "final_dropout": 0.0,
51
+ "gradient_checkpointing": true,
52
+ "hidden_act": "gelu",
53
+ "hidden_dropout": 0.1,
54
+ "hidden_size": 1024,
55
+ "initializer_range": 0.02,
56
+ "intermediate_size": 4096,
57
+ "layer_norm_eps": 1e-05,
58
+ "layerdrop": 0.0,
59
+ "mask_channel_length": 10,
60
+ "mask_channel_min_space": 1,
61
+ "mask_channel_other": 0.0,
62
+ "mask_channel_prob": 0.0,
63
+ "mask_channel_selection": "static",
64
+ "mask_feature_length": 10,
65
+ "mask_feature_prob": 0.0,
66
+ "mask_time_length": 10,
67
+ "mask_time_min_space": 1,
68
+ "mask_time_other": 0.0,
69
+ "mask_time_prob": 0.05,
70
+ "mask_time_selection": "static",
71
+ "model_type": "wav2vec2",
72
+ "num_attention_heads": 16,
73
+ "num_codevector_groups": 2,
74
+ "num_codevectors_per_group": 320,
75
+ "num_conv_pos_embedding_groups": 16,
76
+ "num_conv_pos_embeddings": 128,
77
+ "num_feat_extract_layers": 7,
78
+ "num_hidden_layers": 24,
79
+ "num_negatives": 100,
80
+ "pad_token_id": 0,
81
+ "proj_codevector_dim": 768,
82
+ "transformers_version": "4.6.1",
83
+ "vocab_size": 45
84
+ }
config_train.json ADDED
@@ -0,0 +1,170 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "run_name": "Wav2Vec-fine-tuning-TEDx",
3
+ "run_description": "Fine tuning TEDx",
4
+ "seed": 42,
5
+ // AUDIO PARAMS
6
+ "sampling_rate": 16000,
7
+
8
+ // VOCABULARY PARAMETERS
9
+ "vocab":{
10
+ "vocab_path": "example/vocab_example.json", // generic vocab for Portuguese
11
+ "blank": "<pad>", // blank token for padding
12
+ "silence": "|", // token between words
13
+ "unk": "<unk>" // unk token
14
+ },
15
+
16
+ // TRAINING
17
+ "batch_size": 8, // Batch size for training.
18
+ "mixed_precision": true, // level of optimization with NVIDIA's apex feature for automatic mixed FP16/FP32 precision (AMP), NOTE: currently only O1 is supported, and use "O1" to activate.
19
+ "early_stop_epochs": 10, // If 0 disabled else Number of epochs for stop training with validation loss dont decrease
20
+ "preprocess_dataset": false, // if true, the dataset will be pre-processed and saved in disk, otherwise the audio files will be loaded in each step. Preprocessing makes training faster, but requires much more disk space.
21
+
22
+ // OPTIMIZER
23
+ "epochs": 140, // total number of epochs to train.
24
+ "lr": 0.00003, // Initial learning rate.
25
+ "gradient_accumulation_steps": 24,
26
+
27
+ // LOGGING
28
+ "logging_steps": 100, // Number of steps to plot.
29
+ "load_best_model_at_end": true,
30
+ "save_total_limit": 3,
31
+ "warmup_ratio": 0.06666666667, // 0 disable Ratio of total training steps used for a linear warmup from 0 to learning_rate
32
+ "warmup_steps": 0, // 0 disable Number of steps used for a linear warmup from 0 to learning_rate
33
+
34
+ // DATA LOADING
35
+ "num_loader_workers": 8, // number of training data loader processes. Don't set it too big. 4-8 are goo
36
+
37
+ // MODEL
38
+ "freeze_feature_extractor": true, // Whether to freeze the feature extractor layers of the model.
39
+ "attention_dropout": 0.1, // The dropout ratio for the attention probabilities.
40
+ "activation_dropout": 0.1, // The dropout ratio for activations inside the fully connected layer.
41
+ "hidden_dropout": 0.1, // The dropout probabilitiy for all fully connected layers in the embeddings, encoder, and pooler.
42
+ "feat_proj_dropout": 0.1, // The dropout probabilitiy for all 1D convolutional layers in feature extractor.
43
+ "mask_time_prob": 0.05, // Propability of each feature vector along the time axis to be chosen as the start of the vector span to be masked.
44
+ "layerdrop": 0.0, // The LayerDrop probability.
45
+ "gradient_checkpointing": true, // If True, use gradient checkpointing to save memory at the expense of slower backward pass.
46
+
47
+ // ToDo: Implement Time mask and Frequency Mask
48
+ "audio_augmentation":[
49
+ // additive noise and room impulse response (RIR) simulation similar to: https://arxiv.org/pdf/2009.14153.pdf
50
+ {
51
+ "name": "additive",
52
+ "sounds_path":"../../datasets/musan/speech/", // download: https://www.openslr.org/17/
53
+ "lru_cache_size": 32, // Maximum size of the LRU cache for storing noise files in memory
54
+ "min_snr_in_db": 13.0,
55
+ "max_snr_in_db": 20.0,
56
+ // "sample_rate": 16000,
57
+ "p": 0.25
58
+ },
59
+ {
60
+ "name": "additive",
61
+ "sounds_path":"../../datasets/musan/music/", // download: https://www.openslr.org/17/
62
+ "lru_cache_size": 32, // Maximum size of the LRU cache for storing noise files in memory
63
+ "min_snr_in_db": 5.0,
64
+ "max_snr_in_db": 15.0,
65
+ // "sample_rate": 16000,
66
+ "p": 0.25
67
+ },
68
+ {
69
+ "name": "additive",
70
+ "sounds_path":"../../datasets/musan/noise/", // download: https://www.openslr.org/17/
71
+ "lru_cache_size": 32, // Maximum size of the LRU cache for storing noise files in memory
72
+ "min_snr_in_db": 0.0,
73
+ "max_snr_in_db": 15.0,
74
+ // "sample_rate": 16000,
75
+ "p": 0.25
76
+ },
77
+ // rir filter proposed by: https://ieeexplore.ieee.org/document/7953152
78
+ {
79
+ "name": "rir",
80
+ "ir_path": "../../datasets/RIRS_NOISES/simulated_rirs/", // download: https://www.openslr.org/28/
81
+ "lru_cache_size": 128, // Maximum size of the LRU cache for storing noise files in memory
82
+ // "sample_rate": 16000,
83
+ "p": 0.25
84
+ }
85
+ ,
86
+ // {
87
+ // "name": "gain",
88
+ // "min_gain_in_db": -18.0,
89
+ // "max_gain_in_db": 6,
90
+ // "p": 0.25 // propability of apply this method, 0 is disable
91
+ // },
92
+ {
93
+ "name": "pitch_shift",
94
+ "min_semitones": -4,
95
+ "max_semitones": 4,
96
+ "p": 0.25 // propability of apply this method, 0 is disable
97
+ },
98
+ {
99
+ "name": "gaussian",
100
+ "min_amplitude": 0.0001,
101
+ "max_amplitude": 0.001,
102
+ "p": 0.25 // propability of apply this method, 0 is disable
103
+ }
104
+ ],
105
+ // PATHS
106
+ "output_path": "../checkpoints/Wav2Vec-voxpopuli/one-speaker/Final-paper/GT/PT/140-epoch/",
107
+ // CACHE
108
+ "dataset_cache": "../datasets/",
109
+
110
+ // DATASETS
111
+ "datasets":{
112
+
113
+ "files_path": "/workspace/edresson/datasets/Common_Voice/cv-corpus-7.0-2021-07-21/pt/", // relative path for audios It's will be join with the CS
114
+ "train":
115
+ [
116
+ // this dicts is pass directly for the load dataset see the documentation: https://huggingface.co/docs/datasets/package_reference/loading_methods.html#datasets.load_dataset
117
+ {
118
+ "name": "csv",
119
+ "path": "csv",
120
+ "data_files": ["/workspace/edresson/datasets/Common_Voice/cv-corpus-7.0-2021-07-21/pt/train_converted.csv"], // csv files
121
+ "text_column": "text",
122
+ "path_column": "file_path"
123
+ }
124
+ ,
125
+ {
126
+ "name": "csv",
127
+ "path": "csv",
128
+ "data_files": ["/workspace/edresson/datasets/TTS-Portuguese-Corpus_16khz/train_TTS-Portuguese_Corpus_metadata_converted_to_ASR.csv"], // csv files
129
+ "text_column": "text",
130
+ "path_column": "file_path"
131
+ }
132
+ ]
133
+ ,
134
+ "devel":
135
+ [
136
+ {
137
+ "name": "csv",
138
+ "path": "csv",
139
+ "data_files": ["/workspace/edresson/datasets/Common_Voice/cv-corpus-7.0-2021-07-21/pt/dev_converted.csv"], // csv files
140
+ "text_column": "text",
141
+ "path_column": "file_path"
142
+ }
143
+ ]
144
+ ,
145
+ "test":
146
+ {
147
+ "name": "csv",
148
+ "path": "csv",
149
+ "data_files": ["/workspace/edresson/datasets/Common_Voice/cv-corpus-7.0-2021-07-21/pt/test_converted.csv"], // csv files
150
+ "text_column": "text",
151
+ "path_column": "file_path"
152
+ }
153
+
154
+ }//,
155
+ // used only for test
156
+ // "KenLM":{
157
+ // "kenlm_model_path": "../../kenLM/binaries/subtitle/4-gram/lm.binary", // Path for KenLM model
158
+ // "lexicon_path": "example/lexicon.lst", // file with all words for limit the decoder search
159
+ // "beam": 2048,
160
+ // "nbest": 1,
161
+ // "beam_threshold": 25,
162
+ // "lm_weight": 1,
163
+ // "word_score": -1,
164
+ // "sil_weight": 0
165
+ // }
166
+
167
+
168
+
169
+ }
170
+
eval_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 102.0,
3
+ "eval_loss": 0.5073719620704651,
4
+ "eval_runtime": 236.6641,
5
+ "eval_samples": 7066,
6
+ "eval_samples_per_second": 29.857,
7
+ "eval_wer": 0.3200079912096694
8
+ }
preprocessor_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
+ "feature_size": 1,
5
+ "padding_side": "right",
6
+ "padding_value": 0.0,
7
+ "return_attention_mask": true,
8
+ "sampling_rate": 16000
9
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f35ffb3f0c9e50d85d8001b0c638ec58939199a443de8500862dac6f4c7535b5
3
+ size 1262114051
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>"}
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "<unk>", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "<pad>", "do_lower_case": false, "word_delimiter_token": "|"}
train_results.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 102.0,
3
+ "train_runtime": 110050.311,
4
+ "train_samples": 15408,
5
+ "train_samples_per_second": 0.102
6
+ }
trainer_state.json ADDED
@@ -0,0 +1,1331 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.5058531761169434,
3
+ "best_model_checkpoint": "../checkpoints/Wav2Vec-voxpopuli/one-speaker/Final-paper/GT/PT/140-epoch/checkpoint-7360",
4
+ "epoch": 101.99688473520249,
5
+ "global_step": 8160,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.01,
12
+ "learning_rate": 4.016064257028112e-08,
13
+ "loss": 13.7434,
14
+ "step": 1
15
+ },
16
+ {
17
+ "epoch": 1.0,
18
+ "eval_loss": 21.166763305664062,
19
+ "eval_runtime": 236.3766,
20
+ "eval_samples_per_second": 29.893,
21
+ "eval_wer": 1.0027969233842773,
22
+ "step": 80
23
+ },
24
+ {
25
+ "epoch": 1.25,
26
+ "learning_rate": 4.016064257028113e-06,
27
+ "loss": 18.0487,
28
+ "step": 100
29
+ },
30
+ {
31
+ "epoch": 2.0,
32
+ "eval_loss": 13.212292671203613,
33
+ "eval_runtime": 235.4679,
34
+ "eval_samples_per_second": 30.008,
35
+ "eval_wer": 1.0,
36
+ "step": 160
37
+ },
38
+ {
39
+ "epoch": 2.5,
40
+ "learning_rate": 7.991967871485944e-06,
41
+ "loss": 13.4798,
42
+ "step": 200
43
+ },
44
+ {
45
+ "epoch": 3.0,
46
+ "eval_loss": 8.697088241577148,
47
+ "eval_runtime": 235.1241,
48
+ "eval_samples_per_second": 30.052,
49
+ "eval_wer": 1.0,
50
+ "step": 240
51
+ },
52
+ {
53
+ "epoch": 3.75,
54
+ "learning_rate": 1.2008032128514056e-05,
55
+ "loss": 8.6518,
56
+ "step": 300
57
+ },
58
+ {
59
+ "epoch": 4.0,
60
+ "eval_loss": 6.402597904205322,
61
+ "eval_runtime": 233.852,
62
+ "eval_samples_per_second": 30.216,
63
+ "eval_wer": 1.0,
64
+ "step": 320
65
+ },
66
+ {
67
+ "epoch": 5.0,
68
+ "learning_rate": 1.6024096385542168e-05,
69
+ "loss": 6.2486,
70
+ "step": 400
71
+ },
72
+ {
73
+ "epoch": 5.0,
74
+ "eval_loss": 5.096883296966553,
75
+ "eval_runtime": 235.426,
76
+ "eval_samples_per_second": 30.014,
77
+ "eval_wer": 1.0,
78
+ "step": 400
79
+ },
80
+ {
81
+ "epoch": 6.0,
82
+ "eval_loss": 4.204596519470215,
83
+ "eval_runtime": 235.3347,
84
+ "eval_samples_per_second": 30.025,
85
+ "eval_wer": 1.0,
86
+ "step": 480
87
+ },
88
+ {
89
+ "epoch": 6.25,
90
+ "learning_rate": 2.0040160642570282e-05,
91
+ "loss": 4.7563,
92
+ "step": 500
93
+ },
94
+ {
95
+ "epoch": 7.0,
96
+ "eval_loss": 3.5930964946746826,
97
+ "eval_runtime": 233.5038,
98
+ "eval_samples_per_second": 30.261,
99
+ "eval_wer": 1.0,
100
+ "step": 560
101
+ },
102
+ {
103
+ "epoch": 7.5,
104
+ "learning_rate": 2.4056224899598393e-05,
105
+ "loss": 3.756,
106
+ "step": 600
107
+ },
108
+ {
109
+ "epoch": 8.0,
110
+ "eval_loss": 3.2295875549316406,
111
+ "eval_runtime": 235.3066,
112
+ "eval_samples_per_second": 30.029,
113
+ "eval_wer": 1.0,
114
+ "step": 640
115
+ },
116
+ {
117
+ "epoch": 8.75,
118
+ "learning_rate": 2.8072289156626508e-05,
119
+ "loss": 3.23,
120
+ "step": 700
121
+ },
122
+ {
123
+ "epoch": 9.0,
124
+ "eval_loss": 3.0816433429718018,
125
+ "eval_runtime": 235.38,
126
+ "eval_samples_per_second": 30.02,
127
+ "eval_wer": 1.0,
128
+ "step": 720
129
+ },
130
+ {
131
+ "epoch": 10.0,
132
+ "learning_rate": 2.9850760547211327e-05,
133
+ "loss": 3.0413,
134
+ "step": 800
135
+ },
136
+ {
137
+ "epoch": 10.0,
138
+ "eval_loss": 3.010103940963745,
139
+ "eval_runtime": 235.1911,
140
+ "eval_samples_per_second": 30.044,
141
+ "eval_wer": 1.0,
142
+ "step": 800
143
+ },
144
+ {
145
+ "epoch": 11.0,
146
+ "eval_loss": 2.9560742378234863,
147
+ "eval_runtime": 234.3165,
148
+ "eval_samples_per_second": 30.156,
149
+ "eval_wer": 1.0,
150
+ "step": 880
151
+ },
152
+ {
153
+ "epoch": 11.25,
154
+ "learning_rate": 2.9563761599540802e-05,
155
+ "loss": 2.9748,
156
+ "step": 900
157
+ },
158
+ {
159
+ "epoch": 12.0,
160
+ "eval_loss": 2.9139442443847656,
161
+ "eval_runtime": 235.2653,
162
+ "eval_samples_per_second": 30.034,
163
+ "eval_wer": 1.0,
164
+ "step": 960
165
+ },
166
+ {
167
+ "epoch": 12.5,
168
+ "learning_rate": 2.9276762651870278e-05,
169
+ "loss": 2.9122,
170
+ "step": 1000
171
+ },
172
+ {
173
+ "epoch": 13.0,
174
+ "eval_loss": 2.897088050842285,
175
+ "eval_runtime": 235.1263,
176
+ "eval_samples_per_second": 30.052,
177
+ "eval_wer": 1.0,
178
+ "step": 1040
179
+ },
180
+ {
181
+ "epoch": 13.75,
182
+ "learning_rate": 2.8989763704199754e-05,
183
+ "loss": 2.8791,
184
+ "step": 1100
185
+ },
186
+ {
187
+ "epoch": 14.0,
188
+ "eval_loss": 2.8461437225341797,
189
+ "eval_runtime": 237.2882,
190
+ "eval_samples_per_second": 29.778,
191
+ "eval_wer": 1.0,
192
+ "step": 1120
193
+ },
194
+ {
195
+ "epoch": 15.0,
196
+ "learning_rate": 2.8702764756529226e-05,
197
+ "loss": 2.8098,
198
+ "step": 1200
199
+ },
200
+ {
201
+ "epoch": 15.0,
202
+ "eval_loss": 2.71140718460083,
203
+ "eval_runtime": 236.1183,
204
+ "eval_samples_per_second": 29.926,
205
+ "eval_wer": 0.9989611427429828,
206
+ "step": 1200
207
+ },
208
+ {
209
+ "epoch": 16.0,
210
+ "eval_loss": 2.430983066558838,
211
+ "eval_runtime": 234.0852,
212
+ "eval_samples_per_second": 30.186,
213
+ "eval_wer": 0.9871341524323245,
214
+ "step": 1280
215
+ },
216
+ {
217
+ "epoch": 16.25,
218
+ "learning_rate": 2.84157658088587e-05,
219
+ "loss": 2.6134,
220
+ "step": 1300
221
+ },
222
+ {
223
+ "epoch": 17.0,
224
+ "eval_loss": 2.0400171279907227,
225
+ "eval_runtime": 236.1033,
226
+ "eval_samples_per_second": 29.928,
227
+ "eval_wer": 0.9695534911597243,
228
+ "step": 1360
229
+ },
230
+ {
231
+ "epoch": 17.5,
232
+ "learning_rate": 2.8128766861188177e-05,
233
+ "loss": 2.2164,
234
+ "step": 1400
235
+ },
236
+ {
237
+ "epoch": 18.0,
238
+ "eval_loss": 1.610411286354065,
239
+ "eval_runtime": 234.9978,
240
+ "eval_samples_per_second": 30.068,
241
+ "eval_wer": 0.8650484467086205,
242
+ "step": 1440
243
+ },
244
+ {
245
+ "epoch": 18.75,
246
+ "learning_rate": 2.784176791351765e-05,
247
+ "loss": 1.7608,
248
+ "step": 1500
249
+ },
250
+ {
251
+ "epoch": 19.0,
252
+ "eval_loss": 1.3267238140106201,
253
+ "eval_runtime": 236.1618,
254
+ "eval_samples_per_second": 29.92,
255
+ "eval_wer": 0.7361502347417841,
256
+ "step": 1520
257
+ },
258
+ {
259
+ "epoch": 20.0,
260
+ "learning_rate": 2.7554768965847124e-05,
261
+ "loss": 1.4269,
262
+ "step": 1600
263
+ },
264
+ {
265
+ "epoch": 20.0,
266
+ "eval_loss": 1.1435879468917847,
267
+ "eval_runtime": 235.9998,
268
+ "eval_samples_per_second": 29.941,
269
+ "eval_wer": 0.6568774348216961,
270
+ "step": 1600
271
+ },
272
+ {
273
+ "epoch": 21.0,
274
+ "eval_loss": 1.0661028623580933,
275
+ "eval_runtime": 235.7709,
276
+ "eval_samples_per_second": 29.97,
277
+ "eval_wer": 0.6220956947357906,
278
+ "step": 1680
279
+ },
280
+ {
281
+ "epoch": 21.25,
282
+ "learning_rate": 2.72677700181766e-05,
283
+ "loss": 1.2421,
284
+ "step": 1700
285
+ },
286
+ {
287
+ "epoch": 22.0,
288
+ "eval_loss": 0.9825426936149597,
289
+ "eval_runtime": 235.4357,
290
+ "eval_samples_per_second": 30.012,
291
+ "eval_wer": 0.5804015582858856,
292
+ "step": 1760
293
+ },
294
+ {
295
+ "epoch": 22.5,
296
+ "learning_rate": 2.6980771070506075e-05,
297
+ "loss": 1.1212,
298
+ "step": 1800
299
+ },
300
+ {
301
+ "epoch": 23.0,
302
+ "eval_loss": 0.9016405344009399,
303
+ "eval_runtime": 236.5199,
304
+ "eval_samples_per_second": 29.875,
305
+ "eval_wer": 0.5381680151832984,
306
+ "step": 1840
307
+ },
308
+ {
309
+ "epoch": 23.75,
310
+ "learning_rate": 2.669377212283555e-05,
311
+ "loss": 1.0251,
312
+ "step": 1900
313
+ },
314
+ {
315
+ "epoch": 24.0,
316
+ "eval_loss": 0.8733579516410828,
317
+ "eval_runtime": 236.0952,
318
+ "eval_samples_per_second": 29.929,
319
+ "eval_wer": 0.5140145839576465,
320
+ "step": 1920
321
+ },
322
+ {
323
+ "epoch": 25.0,
324
+ "learning_rate": 2.6406773175165027e-05,
325
+ "loss": 0.9619,
326
+ "step": 2000
327
+ },
328
+ {
329
+ "epoch": 25.0,
330
+ "eval_loss": 0.815034031867981,
331
+ "eval_runtime": 236.7344,
332
+ "eval_samples_per_second": 29.848,
333
+ "eval_wer": 0.48626510838078113,
334
+ "step": 2000
335
+ },
336
+ {
337
+ "epoch": 26.0,
338
+ "eval_loss": 0.8121696710586548,
339
+ "eval_runtime": 236.6854,
340
+ "eval_samples_per_second": 29.854,
341
+ "eval_wer": 0.47379882129657375,
342
+ "step": 2080
343
+ },
344
+ {
345
+ "epoch": 26.25,
346
+ "learning_rate": 2.6119774227494502e-05,
347
+ "loss": 0.9102,
348
+ "step": 2100
349
+ },
350
+ {
351
+ "epoch": 27.0,
352
+ "eval_loss": 0.7858432531356812,
353
+ "eval_runtime": 235.5249,
354
+ "eval_samples_per_second": 30.001,
355
+ "eval_wer": 0.4592348416741584,
356
+ "step": 2160
357
+ },
358
+ {
359
+ "epoch": 27.5,
360
+ "learning_rate": 2.5832775279823974e-05,
361
+ "loss": 0.8691,
362
+ "step": 2200
363
+ },
364
+ {
365
+ "epoch": 28.0,
366
+ "eval_loss": 0.7678278088569641,
367
+ "eval_runtime": 236.2494,
368
+ "eval_samples_per_second": 29.909,
369
+ "eval_wer": 0.4526820497452802,
370
+ "step": 2240
371
+ },
372
+ {
373
+ "epoch": 28.75,
374
+ "learning_rate": 2.554577633215345e-05,
375
+ "loss": 0.8358,
376
+ "step": 2300
377
+ },
378
+ {
379
+ "epoch": 29.0,
380
+ "eval_loss": 0.744153618812561,
381
+ "eval_runtime": 236.3798,
382
+ "eval_samples_per_second": 29.893,
383
+ "eval_wer": 0.4421736090300669,
384
+ "step": 2320
385
+ },
386
+ {
387
+ "epoch": 30.0,
388
+ "learning_rate": 2.5258777384482922e-05,
389
+ "loss": 0.8078,
390
+ "step": 2400
391
+ },
392
+ {
393
+ "epoch": 30.0,
394
+ "eval_loss": 0.7339251637458801,
395
+ "eval_runtime": 236.5399,
396
+ "eval_samples_per_second": 29.872,
397
+ "eval_wer": 0.4369793227449805,
398
+ "step": 2400
399
+ },
400
+ {
401
+ "epoch": 31.0,
402
+ "eval_loss": 0.6944392919540405,
403
+ "eval_runtime": 236.1263,
404
+ "eval_samples_per_second": 29.925,
405
+ "eval_wer": 0.4234542003795825,
406
+ "step": 2480
407
+ },
408
+ {
409
+ "epoch": 31.25,
410
+ "learning_rate": 2.4971778436812397e-05,
411
+ "loss": 0.7813,
412
+ "step": 2500
413
+ },
414
+ {
415
+ "epoch": 32.0,
416
+ "eval_loss": 0.7018499374389648,
417
+ "eval_runtime": 236.4001,
418
+ "eval_samples_per_second": 29.89,
419
+ "eval_wer": 0.42349415642792926,
420
+ "step": 2560
421
+ },
422
+ {
423
+ "epoch": 32.5,
424
+ "learning_rate": 2.4684779489141873e-05,
425
+ "loss": 0.7586,
426
+ "step": 2600
427
+ },
428
+ {
429
+ "epoch": 33.0,
430
+ "eval_loss": 0.691132664680481,
431
+ "eval_runtime": 236.4392,
432
+ "eval_samples_per_second": 29.885,
433
+ "eval_wer": 0.4159824193387274,
434
+ "step": 2640
435
+ },
436
+ {
437
+ "epoch": 33.75,
438
+ "learning_rate": 2.439778054147135e-05,
439
+ "loss": 0.7311,
440
+ "step": 2700
441
+ },
442
+ {
443
+ "epoch": 34.0,
444
+ "eval_loss": 0.6664403080940247,
445
+ "eval_runtime": 235.2837,
446
+ "eval_samples_per_second": 30.032,
447
+ "eval_wer": 0.4046748576565778,
448
+ "step": 2720
449
+ },
450
+ {
451
+ "epoch": 35.0,
452
+ "learning_rate": 2.4110781593800824e-05,
453
+ "loss": 0.7201,
454
+ "step": 2800
455
+ },
456
+ {
457
+ "epoch": 35.0,
458
+ "eval_loss": 0.6681694388389587,
459
+ "eval_runtime": 237.7483,
460
+ "eval_samples_per_second": 29.721,
461
+ "eval_wer": 0.4040755169313755,
462
+ "step": 2800
463
+ },
464
+ {
465
+ "epoch": 36.0,
466
+ "eval_loss": 0.6622908115386963,
467
+ "eval_runtime": 235.7553,
468
+ "eval_samples_per_second": 29.972,
469
+ "eval_wer": 0.40017980221756067,
470
+ "step": 2880
471
+ },
472
+ {
473
+ "epoch": 36.25,
474
+ "learning_rate": 2.38237826461303e-05,
475
+ "loss": 0.6993,
476
+ "step": 2900
477
+ },
478
+ {
479
+ "epoch": 37.0,
480
+ "eval_loss": 0.6478127241134644,
481
+ "eval_runtime": 236.8845,
482
+ "eval_samples_per_second": 29.829,
483
+ "eval_wer": 0.394825691739087,
484
+ "step": 2960
485
+ },
486
+ {
487
+ "epoch": 37.5,
488
+ "learning_rate": 2.3536783698459775e-05,
489
+ "loss": 0.6855,
490
+ "step": 3000
491
+ },
492
+ {
493
+ "epoch": 38.0,
494
+ "eval_loss": 0.6474733352661133,
495
+ "eval_runtime": 234.8758,
496
+ "eval_samples_per_second": 30.084,
497
+ "eval_wer": 0.39692338427729495,
498
+ "step": 3040
499
+ },
500
+ {
501
+ "epoch": 38.75,
502
+ "learning_rate": 2.324978475078925e-05,
503
+ "loss": 0.676,
504
+ "step": 3100
505
+ },
506
+ {
507
+ "epoch": 39.0,
508
+ "eval_loss": 0.6449029445648193,
509
+ "eval_runtime": 236.3414,
510
+ "eval_samples_per_second": 29.897,
511
+ "eval_wer": 0.3954250324642893,
512
+ "step": 3120
513
+ },
514
+ {
515
+ "epoch": 40.0,
516
+ "learning_rate": 2.296278580311872e-05,
517
+ "loss": 0.6623,
518
+ "step": 3200
519
+ },
520
+ {
521
+ "epoch": 40.0,
522
+ "eval_loss": 0.631065309047699,
523
+ "eval_runtime": 236.9217,
524
+ "eval_samples_per_second": 29.824,
525
+ "eval_wer": 0.38497652582159625,
526
+ "step": 3200
527
+ },
528
+ {
529
+ "epoch": 41.0,
530
+ "eval_loss": 0.6289655566215515,
531
+ "eval_runtime": 236.5702,
532
+ "eval_samples_per_second": 29.869,
533
+ "eval_wer": 0.3841574268304865,
534
+ "step": 3280
535
+ },
536
+ {
537
+ "epoch": 41.25,
538
+ "learning_rate": 2.2675786855448195e-05,
539
+ "loss": 0.6458,
540
+ "step": 3300
541
+ },
542
+ {
543
+ "epoch": 42.0,
544
+ "eval_loss": 0.6317856907844543,
545
+ "eval_runtime": 235.0808,
546
+ "eval_samples_per_second": 30.058,
547
+ "eval_wer": 0.3838178004195385,
548
+ "step": 3360
549
+ },
550
+ {
551
+ "epoch": 42.5,
552
+ "learning_rate": 2.238878790777767e-05,
553
+ "loss": 0.6327,
554
+ "step": 3400
555
+ },
556
+ {
557
+ "epoch": 43.0,
558
+ "eval_loss": 0.6043937802314758,
559
+ "eval_runtime": 236.0018,
560
+ "eval_samples_per_second": 29.94,
561
+ "eval_wer": 0.3680950953950654,
562
+ "step": 3440
563
+ },
564
+ {
565
+ "epoch": 43.75,
566
+ "learning_rate": 2.2101788960107146e-05,
567
+ "loss": 0.6317,
568
+ "step": 3500
569
+ },
570
+ {
571
+ "epoch": 44.0,
572
+ "eval_loss": 0.6136683821678162,
573
+ "eval_runtime": 236.7044,
574
+ "eval_samples_per_second": 29.852,
575
+ "eval_wer": 0.37600639296773547,
576
+ "step": 3520
577
+ },
578
+ {
579
+ "epoch": 45.0,
580
+ "learning_rate": 2.181479001243662e-05,
581
+ "loss": 0.6206,
582
+ "step": 3600
583
+ },
584
+ {
585
+ "epoch": 45.0,
586
+ "eval_loss": 0.6018360257148743,
587
+ "eval_runtime": 235.576,
588
+ "eval_samples_per_second": 29.995,
589
+ "eval_wer": 0.37245030466486867,
590
+ "step": 3600
591
+ },
592
+ {
593
+ "epoch": 46.0,
594
+ "eval_loss": 0.6160127520561218,
595
+ "eval_runtime": 236.4202,
596
+ "eval_samples_per_second": 29.887,
597
+ "eval_wer": 0.378423733892718,
598
+ "step": 3680
599
+ },
600
+ {
601
+ "epoch": 46.25,
602
+ "learning_rate": 2.1527791064766097e-05,
603
+ "loss": 0.6045,
604
+ "step": 3700
605
+ },
606
+ {
607
+ "epoch": 47.0,
608
+ "eval_loss": 0.6069476008415222,
609
+ "eval_runtime": 237.3738,
610
+ "eval_samples_per_second": 29.767,
611
+ "eval_wer": 0.36865448007192086,
612
+ "step": 3760
613
+ },
614
+ {
615
+ "epoch": 47.5,
616
+ "learning_rate": 2.1240792117095573e-05,
617
+ "loss": 0.6049,
618
+ "step": 3800
619
+ },
620
+ {
621
+ "epoch": 48.0,
622
+ "eval_loss": 0.5968321561813354,
623
+ "eval_runtime": 235.4281,
624
+ "eval_samples_per_second": 30.013,
625
+ "eval_wer": 0.36411946858455696,
626
+ "step": 3840
627
+ },
628
+ {
629
+ "epoch": 48.75,
630
+ "learning_rate": 2.0953793169425048e-05,
631
+ "loss": 0.5877,
632
+ "step": 3900
633
+ },
634
+ {
635
+ "epoch": 49.0,
636
+ "eval_loss": 0.6001846790313721,
637
+ "eval_runtime": 235.467,
638
+ "eval_samples_per_second": 30.008,
639
+ "eval_wer": 0.36515832584157426,
640
+ "step": 3920
641
+ },
642
+ {
643
+ "epoch": 50.0,
644
+ "learning_rate": 2.066679422175452e-05,
645
+ "loss": 0.5865,
646
+ "step": 4000
647
+ },
648
+ {
649
+ "epoch": 50.0,
650
+ "eval_loss": 0.5853711366653442,
651
+ "eval_runtime": 235.5406,
652
+ "eval_samples_per_second": 29.999,
653
+ "eval_wer": 0.3635001498351813,
654
+ "step": 4000
655
+ },
656
+ {
657
+ "epoch": 51.0,
658
+ "eval_loss": 0.5745455622673035,
659
+ "eval_runtime": 236.1025,
660
+ "eval_samples_per_second": 29.928,
661
+ "eval_wer": 0.35540905004495055,
662
+ "step": 4080
663
+ },
664
+ {
665
+ "epoch": 51.25,
666
+ "learning_rate": 2.0379795274083996e-05,
667
+ "loss": 0.5764,
668
+ "step": 4100
669
+ },
670
+ {
671
+ "epoch": 52.0,
672
+ "eval_loss": 0.5656818151473999,
673
+ "eval_runtime": 237.1421,
674
+ "eval_samples_per_second": 29.796,
675
+ "eval_wer": 0.35113375287184095,
676
+ "step": 4160
677
+ },
678
+ {
679
+ "epoch": 52.5,
680
+ "learning_rate": 2.0092796326413468e-05,
681
+ "loss": 0.5706,
682
+ "step": 4200
683
+ },
684
+ {
685
+ "epoch": 53.0,
686
+ "eval_loss": 0.5767672657966614,
687
+ "eval_runtime": 235.7491,
688
+ "eval_samples_per_second": 29.973,
689
+ "eval_wer": 0.355089401658176,
690
+ "step": 4240
691
+ },
692
+ {
693
+ "epoch": 53.75,
694
+ "learning_rate": 1.9805797378742943e-05,
695
+ "loss": 0.565,
696
+ "step": 4300
697
+ },
698
+ {
699
+ "epoch": 54.0,
700
+ "eval_loss": 0.5638183355331421,
701
+ "eval_runtime": 236.1669,
702
+ "eval_samples_per_second": 29.92,
703
+ "eval_wer": 0.3511936869443612,
704
+ "step": 4320
705
+ },
706
+ {
707
+ "epoch": 55.0,
708
+ "learning_rate": 1.951879843107242e-05,
709
+ "loss": 0.5499,
710
+ "step": 4400
711
+ },
712
+ {
713
+ "epoch": 55.0,
714
+ "eval_loss": 0.5843964219093323,
715
+ "eval_runtime": 237.455,
716
+ "eval_samples_per_second": 29.757,
717
+ "eval_wer": 0.3593047647587654,
718
+ "step": 4400
719
+ },
720
+ {
721
+ "epoch": 56.0,
722
+ "eval_loss": 0.5718104839324951,
723
+ "eval_runtime": 235.3494,
724
+ "eval_samples_per_second": 30.023,
725
+ "eval_wer": 0.35534911597243035,
726
+ "step": 4480
727
+ },
728
+ {
729
+ "epoch": 56.25,
730
+ "learning_rate": 1.9231799483401894e-05,
731
+ "loss": 0.5554,
732
+ "step": 4500
733
+ },
734
+ {
735
+ "epoch": 57.0,
736
+ "eval_loss": 0.5693724155426025,
737
+ "eval_runtime": 237.1158,
738
+ "eval_samples_per_second": 29.8,
739
+ "eval_wer": 0.3528718409749276,
740
+ "step": 4560
741
+ },
742
+ {
743
+ "epoch": 57.5,
744
+ "learning_rate": 1.894480053573137e-05,
745
+ "loss": 0.5464,
746
+ "step": 4600
747
+ },
748
+ {
749
+ "epoch": 58.0,
750
+ "eval_loss": 0.5594637989997864,
751
+ "eval_runtime": 237.4676,
752
+ "eval_samples_per_second": 29.756,
753
+ "eval_wer": 0.3494156427929278,
754
+ "step": 4640
755
+ },
756
+ {
757
+ "epoch": 58.75,
758
+ "learning_rate": 1.8657801588060845e-05,
759
+ "loss": 0.539,
760
+ "step": 4700
761
+ },
762
+ {
763
+ "epoch": 59.0,
764
+ "eval_loss": 0.5721908807754517,
765
+ "eval_runtime": 236.7283,
766
+ "eval_samples_per_second": 29.849,
767
+ "eval_wer": 0.35081410448506645,
768
+ "step": 4720
769
+ },
770
+ {
771
+ "epoch": 60.0,
772
+ "learning_rate": 1.837080264039032e-05,
773
+ "loss": 0.5359,
774
+ "step": 4800
775
+ },
776
+ {
777
+ "epoch": 60.0,
778
+ "eval_loss": 0.5462157130241394,
779
+ "eval_runtime": 235.957,
780
+ "eval_samples_per_second": 29.946,
781
+ "eval_wer": 0.3440015982419339,
782
+ "step": 4800
783
+ },
784
+ {
785
+ "epoch": 61.0,
786
+ "eval_loss": 0.5488569736480713,
787
+ "eval_runtime": 236.1407,
788
+ "eval_samples_per_second": 29.923,
789
+ "eval_wer": 0.34198381780041953,
790
+ "step": 4880
791
+ },
792
+ {
793
+ "epoch": 61.25,
794
+ "learning_rate": 1.8083803692719793e-05,
795
+ "loss": 0.5304,
796
+ "step": 4900
797
+ },
798
+ {
799
+ "epoch": 62.0,
800
+ "eval_loss": 0.5528755784034729,
801
+ "eval_runtime": 235.178,
802
+ "eval_samples_per_second": 30.045,
803
+ "eval_wer": 0.3452202577165118,
804
+ "step": 4960
805
+ },
806
+ {
807
+ "epoch": 62.5,
808
+ "learning_rate": 1.779680474504927e-05,
809
+ "loss": 0.52,
810
+ "step": 5000
811
+ },
812
+ {
813
+ "epoch": 63.0,
814
+ "eval_loss": 0.5493497252464294,
815
+ "eval_runtime": 234.848,
816
+ "eval_samples_per_second": 30.088,
817
+ "eval_wer": 0.34174408151033864,
818
+ "step": 5040
819
+ },
820
+ {
821
+ "epoch": 63.75,
822
+ "learning_rate": 1.7509805797378744e-05,
823
+ "loss": 0.5316,
824
+ "step": 5100
825
+ },
826
+ {
827
+ "epoch": 64.0,
828
+ "eval_loss": 0.5514699816703796,
829
+ "eval_runtime": 234.6944,
830
+ "eval_samples_per_second": 30.107,
831
+ "eval_wer": 0.34138447707521724,
832
+ "step": 5120
833
+ },
834
+ {
835
+ "epoch": 65.0,
836
+ "learning_rate": 1.7222806849708216e-05,
837
+ "loss": 0.5191,
838
+ "step": 5200
839
+ },
840
+ {
841
+ "epoch": 65.0,
842
+ "eval_loss": 0.5479201674461365,
843
+ "eval_runtime": 235.3908,
844
+ "eval_samples_per_second": 30.018,
845
+ "eval_wer": 0.3387273998601538,
846
+ "step": 5200
847
+ },
848
+ {
849
+ "epoch": 66.0,
850
+ "eval_loss": 0.5452239513397217,
851
+ "eval_runtime": 233.7285,
852
+ "eval_samples_per_second": 30.232,
853
+ "eval_wer": 0.34569973029667367,
854
+ "step": 5280
855
+ },
856
+ {
857
+ "epoch": 66.25,
858
+ "learning_rate": 1.6935807902037692e-05,
859
+ "loss": 0.5138,
860
+ "step": 5300
861
+ },
862
+ {
863
+ "epoch": 67.0,
864
+ "eval_loss": 0.5339746475219727,
865
+ "eval_runtime": 238.4195,
866
+ "eval_samples_per_second": 29.637,
867
+ "eval_wer": 0.3345120367595645,
868
+ "step": 5360
869
+ },
870
+ {
871
+ "epoch": 67.5,
872
+ "learning_rate": 1.6648808954367167e-05,
873
+ "loss": 0.5161,
874
+ "step": 5400
875
+ },
876
+ {
877
+ "epoch": 68.0,
878
+ "eval_loss": 0.529504120349884,
879
+ "eval_runtime": 235.3768,
880
+ "eval_samples_per_second": 30.02,
881
+ "eval_wer": 0.3316352012785935,
882
+ "step": 5440
883
+ },
884
+ {
885
+ "epoch": 68.75,
886
+ "learning_rate": 1.6361810006696643e-05,
887
+ "loss": 0.5038,
888
+ "step": 5500
889
+ },
890
+ {
891
+ "epoch": 69.0,
892
+ "eval_loss": 0.540134847164154,
893
+ "eval_runtime": 234.8619,
894
+ "eval_samples_per_second": 30.086,
895
+ "eval_wer": 0.3354310258715413,
896
+ "step": 5520
897
+ },
898
+ {
899
+ "epoch": 70.0,
900
+ "learning_rate": 1.607481105902612e-05,
901
+ "loss": 0.4976,
902
+ "step": 5600
903
+ },
904
+ {
905
+ "epoch": 70.0,
906
+ "eval_loss": 0.5468968749046326,
907
+ "eval_runtime": 234.2977,
908
+ "eval_samples_per_second": 30.158,
909
+ "eval_wer": 0.33575067425831584,
910
+ "step": 5600
911
+ },
912
+ {
913
+ "epoch": 71.0,
914
+ "eval_loss": 0.5490255951881409,
915
+ "eval_runtime": 235.056,
916
+ "eval_samples_per_second": 30.061,
917
+ "eval_wer": 0.33371291579262813,
918
+ "step": 5680
919
+ },
920
+ {
921
+ "epoch": 71.25,
922
+ "learning_rate": 1.578781211135559e-05,
923
+ "loss": 0.4983,
924
+ "step": 5700
925
+ },
926
+ {
927
+ "epoch": 72.0,
928
+ "eval_loss": 0.5308582782745361,
929
+ "eval_runtime": 240.9129,
930
+ "eval_samples_per_second": 29.33,
931
+ "eval_wer": 0.32967735490959943,
932
+ "step": 5760
933
+ },
934
+ {
935
+ "epoch": 72.5,
936
+ "learning_rate": 1.5500813163685066e-05,
937
+ "loss": 0.4893,
938
+ "step": 5800
939
+ },
940
+ {
941
+ "epoch": 73.0,
942
+ "eval_loss": 0.5336333513259888,
943
+ "eval_runtime": 234.8259,
944
+ "eval_samples_per_second": 30.09,
945
+ "eval_wer": 0.33373289381680155,
946
+ "step": 5840
947
+ },
948
+ {
949
+ "epoch": 73.75,
950
+ "learning_rate": 1.5213814216014542e-05,
951
+ "loss": 0.4926,
952
+ "step": 5900
953
+ },
954
+ {
955
+ "epoch": 74.0,
956
+ "eval_loss": 0.5315715074539185,
957
+ "eval_runtime": 233.7173,
958
+ "eval_samples_per_second": 30.233,
959
+ "eval_wer": 0.3309359704325242,
960
+ "step": 5920
961
+ },
962
+ {
963
+ "epoch": 75.0,
964
+ "learning_rate": 1.4926815268344016e-05,
965
+ "loss": 0.4844,
966
+ "step": 6000
967
+ },
968
+ {
969
+ "epoch": 75.0,
970
+ "eval_loss": 0.5397169589996338,
971
+ "eval_runtime": 233.3548,
972
+ "eval_samples_per_second": 30.28,
973
+ "eval_wer": 0.3329937069223854,
974
+ "step": 6000
975
+ },
976
+ {
977
+ "epoch": 76.0,
978
+ "eval_loss": 0.5310106873512268,
979
+ "eval_runtime": 234.4399,
980
+ "eval_samples_per_second": 30.14,
981
+ "eval_wer": 0.334292278493657,
982
+ "step": 6080
983
+ },
984
+ {
985
+ "epoch": 76.25,
986
+ "learning_rate": 1.4639816320673491e-05,
987
+ "loss": 0.4821,
988
+ "step": 6100
989
+ },
990
+ {
991
+ "epoch": 77.0,
992
+ "eval_loss": 0.5305867791175842,
993
+ "eval_runtime": 233.3613,
994
+ "eval_samples_per_second": 30.279,
995
+ "eval_wer": 0.3270402557187094,
996
+ "step": 6160
997
+ },
998
+ {
999
+ "epoch": 77.5,
1000
+ "learning_rate": 1.4352817373002967e-05,
1001
+ "loss": 0.4772,
1002
+ "step": 6200
1003
+ },
1004
+ {
1005
+ "epoch": 78.0,
1006
+ "eval_loss": 0.5232115387916565,
1007
+ "eval_runtime": 234.4045,
1008
+ "eval_samples_per_second": 30.144,
1009
+ "eval_wer": 0.32903805813605036,
1010
+ "step": 6240
1011
+ },
1012
+ {
1013
+ "epoch": 78.75,
1014
+ "learning_rate": 1.406581842533244e-05,
1015
+ "loss": 0.4876,
1016
+ "step": 6300
1017
+ },
1018
+ {
1019
+ "epoch": 79.0,
1020
+ "eval_loss": 0.5481472015380859,
1021
+ "eval_runtime": 235.1064,
1022
+ "eval_samples_per_second": 30.054,
1023
+ "eval_wer": 0.32951753071621215,
1024
+ "step": 6320
1025
+ },
1026
+ {
1027
+ "epoch": 80.0,
1028
+ "learning_rate": 1.3778819477661916e-05,
1029
+ "loss": 0.471,
1030
+ "step": 6400
1031
+ },
1032
+ {
1033
+ "epoch": 80.0,
1034
+ "eval_loss": 0.5388906598091125,
1035
+ "eval_runtime": 234.5017,
1036
+ "eval_samples_per_second": 30.132,
1037
+ "eval_wer": 0.3307761462391369,
1038
+ "step": 6400
1039
+ },
1040
+ {
1041
+ "epoch": 81.0,
1042
+ "eval_loss": 0.5276188254356384,
1043
+ "eval_runtime": 234.907,
1044
+ "eval_samples_per_second": 30.08,
1045
+ "eval_wer": 0.32865847567675555,
1046
+ "step": 6480
1047
+ },
1048
+ {
1049
+ "epoch": 81.25,
1050
+ "learning_rate": 1.349182052999139e-05,
1051
+ "loss": 0.4721,
1052
+ "step": 6500
1053
+ },
1054
+ {
1055
+ "epoch": 82.0,
1056
+ "eval_loss": 0.5138600468635559,
1057
+ "eval_runtime": 236.3966,
1058
+ "eval_samples_per_second": 29.89,
1059
+ "eval_wer": 0.32388372789931075,
1060
+ "step": 6560
1061
+ },
1062
+ {
1063
+ "epoch": 82.5,
1064
+ "learning_rate": 1.3204821582320865e-05,
1065
+ "loss": 0.4706,
1066
+ "step": 6600
1067
+ },
1068
+ {
1069
+ "epoch": 83.0,
1070
+ "eval_loss": 0.5274313688278198,
1071
+ "eval_runtime": 234.3438,
1072
+ "eval_samples_per_second": 30.152,
1073
+ "eval_wer": 0.32626111277594644,
1074
+ "step": 6640
1075
+ },
1076
+ {
1077
+ "epoch": 83.75,
1078
+ "learning_rate": 1.2917822634650339e-05,
1079
+ "loss": 0.4659,
1080
+ "step": 6700
1081
+ },
1082
+ {
1083
+ "epoch": 84.0,
1084
+ "eval_loss": 0.5202988386154175,
1085
+ "eval_runtime": 234.4209,
1086
+ "eval_samples_per_second": 30.142,
1087
+ "eval_wer": 0.3222255518929178,
1088
+ "step": 6720
1089
+ },
1090
+ {
1091
+ "epoch": 85.0,
1092
+ "learning_rate": 1.2630823686979815e-05,
1093
+ "loss": 0.4607,
1094
+ "step": 6800
1095
+ },
1096
+ {
1097
+ "epoch": 85.0,
1098
+ "eval_loss": 0.5231702923774719,
1099
+ "eval_runtime": 235.6072,
1100
+ "eval_samples_per_second": 29.991,
1101
+ "eval_wer": 0.320867046249126,
1102
+ "step": 6800
1103
+ },
1104
+ {
1105
+ "epoch": 86.0,
1106
+ "eval_loss": 0.5206882953643799,
1107
+ "eval_runtime": 235.3295,
1108
+ "eval_samples_per_second": 30.026,
1109
+ "eval_wer": 0.32252522225551894,
1110
+ "step": 6880
1111
+ },
1112
+ {
1113
+ "epoch": 86.25,
1114
+ "learning_rate": 1.234382473930929e-05,
1115
+ "loss": 0.4634,
1116
+ "step": 6900
1117
+ },
1118
+ {
1119
+ "epoch": 87.0,
1120
+ "eval_loss": 0.522659182548523,
1121
+ "eval_runtime": 235.3665,
1122
+ "eval_samples_per_second": 30.021,
1123
+ "eval_wer": 0.32252522225551894,
1124
+ "step": 6960
1125
+ },
1126
+ {
1127
+ "epoch": 87.5,
1128
+ "learning_rate": 1.2056825791638764e-05,
1129
+ "loss": 0.4546,
1130
+ "step": 7000
1131
+ },
1132
+ {
1133
+ "epoch": 88.0,
1134
+ "eval_loss": 0.5252255201339722,
1135
+ "eval_runtime": 234.3682,
1136
+ "eval_samples_per_second": 30.149,
1137
+ "eval_wer": 0.32510238737388875,
1138
+ "step": 7040
1139
+ },
1140
+ {
1141
+ "epoch": 88.75,
1142
+ "learning_rate": 1.1769826843968238e-05,
1143
+ "loss": 0.4606,
1144
+ "step": 7100
1145
+ },
1146
+ {
1147
+ "epoch": 89.0,
1148
+ "eval_loss": 0.5120769739151001,
1149
+ "eval_runtime": 236.0459,
1150
+ "eval_samples_per_second": 29.935,
1151
+ "eval_wer": 0.32012785935470983,
1152
+ "step": 7120
1153
+ },
1154
+ {
1155
+ "epoch": 90.0,
1156
+ "learning_rate": 1.1482827896297713e-05,
1157
+ "loss": 0.4565,
1158
+ "step": 7200
1159
+ },
1160
+ {
1161
+ "epoch": 90.0,
1162
+ "eval_loss": 0.5241296291351318,
1163
+ "eval_runtime": 233.918,
1164
+ "eval_samples_per_second": 30.207,
1165
+ "eval_wer": 0.32190590350614323,
1166
+ "step": 7200
1167
+ },
1168
+ {
1169
+ "epoch": 91.0,
1170
+ "eval_loss": 0.5129281282424927,
1171
+ "eval_runtime": 235.5104,
1172
+ "eval_samples_per_second": 30.003,
1173
+ "eval_wer": 0.3203675956447907,
1174
+ "step": 7280
1175
+ },
1176
+ {
1177
+ "epoch": 91.25,
1178
+ "learning_rate": 1.1195828948627189e-05,
1179
+ "loss": 0.4557,
1180
+ "step": 7300
1181
+ },
1182
+ {
1183
+ "epoch": 92.0,
1184
+ "eval_loss": 0.5058531761169434,
1185
+ "eval_runtime": 235.6964,
1186
+ "eval_samples_per_second": 29.979,
1187
+ "eval_wer": 0.3161522325442014,
1188
+ "step": 7360
1189
+ },
1190
+ {
1191
+ "epoch": 92.5,
1192
+ "learning_rate": 1.0908830000956664e-05,
1193
+ "loss": 0.4528,
1194
+ "step": 7400
1195
+ },
1196
+ {
1197
+ "epoch": 93.0,
1198
+ "eval_loss": 0.5195637345314026,
1199
+ "eval_runtime": 236.5815,
1200
+ "eval_samples_per_second": 29.867,
1201
+ "eval_wer": 0.32304465088402756,
1202
+ "step": 7440
1203
+ },
1204
+ {
1205
+ "epoch": 93.75,
1206
+ "learning_rate": 1.0621831053286138e-05,
1207
+ "loss": 0.4507,
1208
+ "step": 7500
1209
+ },
1210
+ {
1211
+ "epoch": 94.0,
1212
+ "eval_loss": 0.5212134718894958,
1213
+ "eval_runtime": 236.6887,
1214
+ "eval_samples_per_second": 29.854,
1215
+ "eval_wer": 0.3194286285086405,
1216
+ "step": 7520
1217
+ },
1218
+ {
1219
+ "epoch": 95.0,
1220
+ "learning_rate": 1.0334832105615612e-05,
1221
+ "loss": 0.4491,
1222
+ "step": 7600
1223
+ },
1224
+ {
1225
+ "epoch": 95.0,
1226
+ "eval_loss": 0.5105758905410767,
1227
+ "eval_runtime": 236.3704,
1228
+ "eval_samples_per_second": 29.894,
1229
+ "eval_wer": 0.31345519928079113,
1230
+ "step": 7600
1231
+ },
1232
+ {
1233
+ "epoch": 96.0,
1234
+ "eval_loss": 0.5123084187507629,
1235
+ "eval_runtime": 236.3046,
1236
+ "eval_samples_per_second": 29.902,
1237
+ "eval_wer": 0.31884926580761164,
1238
+ "step": 7680
1239
+ },
1240
+ {
1241
+ "epoch": 96.25,
1242
+ "learning_rate": 1.0047833157945088e-05,
1243
+ "loss": 0.446,
1244
+ "step": 7700
1245
+ },
1246
+ {
1247
+ "epoch": 97.0,
1248
+ "eval_loss": 0.5163899660110474,
1249
+ "eval_runtime": 236.0105,
1250
+ "eval_samples_per_second": 29.939,
1251
+ "eval_wer": 0.3189691339526521,
1252
+ "step": 7760
1253
+ },
1254
+ {
1255
+ "epoch": 97.5,
1256
+ "learning_rate": 9.760834210274563e-06,
1257
+ "loss": 0.4474,
1258
+ "step": 7800
1259
+ },
1260
+ {
1261
+ "epoch": 98.0,
1262
+ "eval_loss": 0.5071632266044617,
1263
+ "eval_runtime": 238.0944,
1264
+ "eval_samples_per_second": 29.677,
1265
+ "eval_wer": 0.3145340125861552,
1266
+ "step": 7840
1267
+ },
1268
+ {
1269
+ "epoch": 98.75,
1270
+ "learning_rate": 9.473835262604039e-06,
1271
+ "loss": 0.4389,
1272
+ "step": 7900
1273
+ },
1274
+ {
1275
+ "epoch": 99.0,
1276
+ "eval_loss": 0.5162996649742126,
1277
+ "eval_runtime": 234.5869,
1278
+ "eval_samples_per_second": 30.121,
1279
+ "eval_wer": 0.3179502547198082,
1280
+ "step": 7920
1281
+ },
1282
+ {
1283
+ "epoch": 100.0,
1284
+ "learning_rate": 9.186836314933511e-06,
1285
+ "loss": 0.4408,
1286
+ "step": 8000
1287
+ },
1288
+ {
1289
+ "epoch": 100.0,
1290
+ "eval_loss": 0.511136531829834,
1291
+ "eval_runtime": 235.2605,
1292
+ "eval_samples_per_second": 30.035,
1293
+ "eval_wer": 0.3154929577464789,
1294
+ "step": 8000
1295
+ },
1296
+ {
1297
+ "epoch": 101.0,
1298
+ "eval_loss": 0.5172923803329468,
1299
+ "eval_runtime": 237.12,
1300
+ "eval_samples_per_second": 29.799,
1301
+ "eval_wer": 0.3147937269004096,
1302
+ "step": 8080
1303
+ },
1304
+ {
1305
+ "epoch": 101.25,
1306
+ "learning_rate": 8.899837367262986e-06,
1307
+ "loss": 0.4399,
1308
+ "step": 8100
1309
+ },
1310
+ {
1311
+ "epoch": 102.0,
1312
+ "eval_loss": 0.5145618915557861,
1313
+ "eval_runtime": 236.8186,
1314
+ "eval_samples_per_second": 29.837,
1315
+ "eval_wer": 0.3172909799220857,
1316
+ "step": 8160
1317
+ },
1318
+ {
1319
+ "epoch": 102.0,
1320
+ "step": 8160,
1321
+ "total_flos": 0,
1322
+ "train_runtime": 110050.311,
1323
+ "train_samples_per_second": 0.102
1324
+ }
1325
+ ],
1326
+ "max_steps": 11200,
1327
+ "num_train_epochs": 140,
1328
+ "total_flos": 0,
1329
+ "trial_name": null,
1330
+ "trial_params": null
1331
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb13c0c8ba6bf260fc4843ebf4559e1b46e3bd12a4dbc58046c3910ed733a5ff
3
+ size 2543
vocab.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"<pad>": 0, "|": 1, "<unk>": 2, "a": 3, "b": 4, "c": 5, "d": 6, "e": 7, "f": 8, "g": 9, "h": 10, "i": 11, "j": 12, "k": 13, "l": 14, "m": 15, "n": 16, "o": 17, "p": 18, "q": 19, "r": 20, "s": 21, "t": 22, "u": 23, "v": 24, "w": 25, "x": 26, "y": 27, "z": 28, "ç": 29, "ã": 30, "à": 31, "á": 32, "â": 33, "ê": 34, "é": 35, "í": 36, "ó": 37, "ô": 38, "õ": 39, "ú": 40, "û": 41, "-": 42, "<s>": 43, "</s>": 44}