Automatic Speech Recognition
Transformers
Safetensors
Welsh
English
wav2vec2
Inference Endpoints
DewiBrynJones commited on
Commit
820d04f
1 Parent(s): 6b62dcb

Training in progress, step 500

Browse files
README.md ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - cy
4
+ license: apache-2.0
5
+ base_model: facebook/wav2vec2-large-xlsr-53
6
+ tags:
7
+ - automatic-speech-recognition
8
+ - python/custom_common_voice.py
9
+ - generated_from_trainer
10
+ datasets:
11
+ - custom_common_voice
12
+ metrics:
13
+ - wer
14
+ model-index:
15
+ - name: wav2vec2-xlsr-53-ft-ccv-en-cy
16
+ results:
17
+ - task:
18
+ name: Automatic Speech Recognition
19
+ type: automatic-speech-recognition
20
+ dataset:
21
+ name: PYTHON/CUSTOM_COMMON_VOICE.PY - CY
22
+ type: custom_common_voice
23
+ config: cy
24
+ split: validation
25
+ args: 'Config: cy, Training split: train, Eval split: validation'
26
+ metrics:
27
+ - name: Wer
28
+ type: wer
29
+ value: 0.21777283505046477
30
+ ---
31
+
32
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
33
+ should probably proofread and complete it, then remove this comment. -->
34
+
35
+ # wav2vec2-xlsr-53-ft-ccv-en-cy
36
+
37
+ This model is a fine-tuned version of [facebook/wav2vec2-large-xlsr-53](https://huggingface.co/facebook/wav2vec2-large-xlsr-53) on the PYTHON/CUSTOM_COMMON_VOICE.PY - CY dataset.
38
+ It achieves the following results on the evaluation set:
39
+ - Loss: 0.2909
40
+ - Wer: 0.2178
41
+
42
+ ## Model description
43
+
44
+ More information needed
45
+
46
+ ## Intended uses & limitations
47
+
48
+ More information needed
49
+
50
+ ## Training and evaluation data
51
+
52
+ More information needed
53
+
54
+ ## Training procedure
55
+
56
+ ### Training hyperparameters
57
+
58
+ The following hyperparameters were used during training:
59
+ - learning_rate: 0.0003
60
+ - train_batch_size: 32
61
+ - eval_batch_size: 32
62
+ - seed: 42
63
+ - gradient_accumulation_steps: 2
64
+ - total_train_batch_size: 64
65
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
66
+ - lr_scheduler_type: linear
67
+ - lr_scheduler_warmup_steps: 800
68
+ - training_steps: 9000
69
+
70
+ ### Training results
71
+
72
+ | Training Loss | Epoch | Step | Validation Loss | Wer |
73
+ |:-------------:|:-----:|:----:|:---------------:|:------:|
74
+ | 5.8377 | 0.25 | 500 | 1.2190 | 0.8569 |
75
+ | 0.9829 | 0.51 | 1000 | 0.5585 | 0.4701 |
76
+ | 0.45 | 0.76 | 1500 | 0.4735 | 0.3901 |
77
+ | 0.3151 | 1.01 | 2000 | 0.4125 | 0.3418 |
78
+ | 0.2524 | 1.26 | 2500 | 0.3831 | 0.3117 |
79
+ | 0.243 | 1.52 | 3000 | 0.3661 | 0.3078 |
80
+ | 0.2341 | 1.77 | 3500 | 0.3489 | 0.2883 |
81
+ | 0.211 | 2.02 | 4000 | 0.3500 | 0.2738 |
82
+ | 0.1702 | 2.27 | 4500 | 0.3459 | 0.2704 |
83
+ | 0.1634 | 2.53 | 5000 | 0.3305 | 0.2583 |
84
+ | 0.1608 | 2.78 | 5500 | 0.3137 | 0.2479 |
85
+ | 0.1481 | 3.03 | 6000 | 0.3288 | 0.2562 |
86
+ | 0.1216 | 3.28 | 6500 | 0.3174 | 0.2446 |
87
+ | 0.1181 | 3.54 | 7000 | 0.3000 | 0.2325 |
88
+ | 0.1143 | 3.79 | 7500 | 0.2929 | 0.2326 |
89
+ | 0.1049 | 4.04 | 8000 | 0.2921 | 0.2218 |
90
+ | 0.0913 | 4.29 | 8500 | 0.2968 | 0.2208 |
91
+ | 0.0883 | 4.55 | 9000 | 0.2909 | 0.2178 |
92
+
93
+
94
+ ### Framework versions
95
+
96
+ - Transformers 4.33.3
97
+ - Pytorch 2.0.1+cu117
98
+ - Datasets 2.14.5
99
+ - Tokenizers 0.13.3
added_tokens.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "</s>": 72,
3
+ "<s>": 71
4
+ }
all_results.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 4.55,
3
+ "eval_loss": 0.2908598780632019,
4
+ "eval_runtime": 1109.9533,
5
+ "eval_samples": 26295,
6
+ "eval_samples_per_second": 23.69,
7
+ "eval_steps_per_second": 0.741,
8
+ "eval_wer": 0.21777283505046477,
9
+ "train_loss": 0.47586327913072374,
10
+ "train_runtime": 55725.2112,
11
+ "train_samples": 126693,
12
+ "train_samples_per_second": 10.336,
13
+ "train_steps_per_second": 0.162
14
+ }
config.json ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "facebook/wav2vec2-large-xlsr-53",
3
+ "activation_dropout": 0.0,
4
+ "adapter_attn_dim": null,
5
+ "adapter_kernel_size": 3,
6
+ "adapter_stride": 2,
7
+ "add_adapter": false,
8
+ "apply_spec_augment": true,
9
+ "architectures": [
10
+ "Wav2Vec2ForCTC"
11
+ ],
12
+ "attention_dropout": 0.0,
13
+ "bos_token_id": 1,
14
+ "classifier_proj_size": 256,
15
+ "codevector_dim": 768,
16
+ "contrastive_logits_temperature": 0.1,
17
+ "conv_bias": true,
18
+ "conv_dim": [
19
+ 512,
20
+ 512,
21
+ 512,
22
+ 512,
23
+ 512,
24
+ 512,
25
+ 512
26
+ ],
27
+ "conv_kernel": [
28
+ 10,
29
+ 3,
30
+ 3,
31
+ 3,
32
+ 3,
33
+ 2,
34
+ 2
35
+ ],
36
+ "conv_stride": [
37
+ 5,
38
+ 2,
39
+ 2,
40
+ 2,
41
+ 2,
42
+ 2,
43
+ 2
44
+ ],
45
+ "ctc_loss_reduction": "mean",
46
+ "ctc_zero_infinity": false,
47
+ "diversity_loss_weight": 0.1,
48
+ "do_stable_layer_norm": true,
49
+ "eos_token_id": 2,
50
+ "feat_extract_activation": "gelu",
51
+ "feat_extract_dropout": 0.0,
52
+ "feat_extract_norm": "layer",
53
+ "feat_proj_dropout": 0.0,
54
+ "feat_quantizer_dropout": 0.0,
55
+ "final_dropout": 0.0,
56
+ "hidden_act": "gelu",
57
+ "hidden_dropout": 0.0,
58
+ "hidden_size": 1024,
59
+ "initializer_range": 0.02,
60
+ "intermediate_size": 4096,
61
+ "layer_norm_eps": 1e-05,
62
+ "layerdrop": 0.0,
63
+ "mask_channel_length": 10,
64
+ "mask_channel_min_space": 1,
65
+ "mask_channel_other": 0.0,
66
+ "mask_channel_prob": 0.0,
67
+ "mask_channel_selection": "static",
68
+ "mask_feature_length": 10,
69
+ "mask_feature_min_masks": 0,
70
+ "mask_feature_prob": 0.0,
71
+ "mask_time_length": 10,
72
+ "mask_time_min_masks": 2,
73
+ "mask_time_min_space": 1,
74
+ "mask_time_other": 0.0,
75
+ "mask_time_prob": 0.05,
76
+ "mask_time_selection": "static",
77
+ "model_type": "wav2vec2",
78
+ "num_adapter_layers": 3,
79
+ "num_attention_heads": 16,
80
+ "num_codevector_groups": 2,
81
+ "num_codevectors_per_group": 320,
82
+ "num_conv_pos_embedding_groups": 16,
83
+ "num_conv_pos_embeddings": 128,
84
+ "num_feat_extract_layers": 7,
85
+ "num_hidden_layers": 24,
86
+ "num_negatives": 100,
87
+ "output_hidden_size": 1024,
88
+ "pad_token_id": 70,
89
+ "proj_codevector_dim": 768,
90
+ "tdnn_dilation": [
91
+ 1,
92
+ 2,
93
+ 3,
94
+ 1,
95
+ 1
96
+ ],
97
+ "tdnn_dim": [
98
+ 512,
99
+ 512,
100
+ 512,
101
+ 512,
102
+ 1500
103
+ ],
104
+ "tdnn_kernel": [
105
+ 5,
106
+ 3,
107
+ 3,
108
+ 1,
109
+ 1
110
+ ],
111
+ "torch_dtype": "float32",
112
+ "transformers_version": "4.38.2",
113
+ "use_weighted_layer_sum": false,
114
+ "vocab_size": 73,
115
+ "xvector_output_dim": 512
116
+ }
eval_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 4.55,
3
+ "eval_loss": 0.2908598780632019,
4
+ "eval_runtime": 1109.9533,
5
+ "eval_samples": 26295,
6
+ "eval_samples_per_second": 23.69,
7
+ "eval_steps_per_second": 0.741,
8
+ "eval_wer": 0.21777283505046477
9
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a61b18c9ee9860f31d39490c222d84fe01c48b00e6f7e1c3e09ce18165714fd8
3
+ size 1262106780
preprocessor_config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
+ "feature_size": 1,
5
+ "padding_side": "right",
6
+ "padding_value": 0,
7
+ "processor_class": "Wav2Vec2Processor",
8
+ "return_attention_mask": true,
9
+ "sampling_rate": 16000
10
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:640af29a66bf0fca78e79c6a8f356ea119a2dcedfd185bf572062ab2d460c286
3
+ size 1262201133
runs/Mar05_08-26-36_6f484a5b3164/events.out.tfevents.1709633973.6f484a5b3164.1068.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17b7eae01d3551ee25179d0dcc46de1ed12b48af48c559f800e4497c69d45bda
3
+ size 6860
runs/Oct12_12-35-37_8bec42e5e08c/events.out.tfevents.1697116115.8bec42e5e08c.128398.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f23bc9ed20787593c7faf592201602fe1cf35def9ac1cb498ed11a367e6476c
3
+ size 9821
runs/Oct12_21-06-48_8bec42e5e08c/events.out.tfevents.1697150543.8bec42e5e08c.128877.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d2dff4eb34b2fb556943985a86283b971dd466ac9160e4f37efcb76876831ed
3
+ size 15403
runs/Oct12_21-06-48_8bec42e5e08c/events.out.tfevents.1697206843.8bec42e5e08c.128877.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12386e0080f66dcacc204d5bca1aeafb6dd239e27c3944ad501b3cb823d0012a
3
+ size 406
runs/Oct13_21-34-07_8bec42e5e08c/events.out.tfevents.1697231979.8bec42e5e08c.152123.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4be896e7c005133fb626a14277688a03c401441b0126d76e1ecb589f4292619
3
+ size 15403
runs/Oct13_21-34-07_8bec42e5e08c/events.out.tfevents.1697278531.8bec42e5e08c.152123.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab6851d72d7a8ce34b81e108d994fdd2a363c41a3b8bf4eacfcc205fe1504e24
3
+ size 406
runs/Oct16_10-01-23_8bec42e5e08c/events.out.tfevents.1697451328.8bec42e5e08c.175326.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6d067310699db36ae9c6ae4fbbeef5b609e07a0ec0bc9682839f63f9a7c18ad
3
+ size 15403
runs/Oct16_10-01-23_8bec42e5e08c/events.out.tfevents.1697509159.8bec42e5e08c.175326.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06a734c7f708943c6cb0747de35e10fa6afab27ca9a49e1acc2585d23270dd31
3
+ size 406
runs/Oct17_10-22-05_8bec42e5e08c/events.out.tfevents.1697538029.8bec42e5e08c.356730.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd31597a301ec4ce39ce5a26a5646208e808a74276f0ed1d73331acd15cf0cd3
3
+ size 15403
runs/Oct17_10-22-05_8bec42e5e08c/events.out.tfevents.1697595206.8bec42e5e08c.356730.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20aa0529edd6ffdd713ca594feedd71853c7a668e9b63474ddc587456296a876
3
+ size 406
special_tokens_map.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<s>",
4
+ "</s>"
5
+ ],
6
+ "bos_token": {
7
+ "content": "<s>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false
12
+ },
13
+ "eos_token": {
14
+ "content": "<s>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false
19
+ },
20
+ "pad_token": {
21
+ "content": "[PAD]",
22
+ "lstrip": true,
23
+ "normalized": false,
24
+ "rstrip": true,
25
+ "single_word": false
26
+ },
27
+ "unk_token": {
28
+ "content": "[UNK]",
29
+ "lstrip": true,
30
+ "normalized": false,
31
+ "rstrip": true,
32
+ "single_word": false
33
+ }
34
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "69": {
4
+ "content": "[UNK]",
5
+ "lstrip": true,
6
+ "normalized": false,
7
+ "rstrip": true,
8
+ "single_word": false,
9
+ "special": false
10
+ },
11
+ "70": {
12
+ "content": "[PAD]",
13
+ "lstrip": true,
14
+ "normalized": false,
15
+ "rstrip": true,
16
+ "single_word": false,
17
+ "special": false
18
+ },
19
+ "71": {
20
+ "content": "<s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "72": {
28
+ "content": "</s>",
29
+ "lstrip": false,
30
+ "normalized": true,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ }
35
+ },
36
+ "additional_special_tokens": [
37
+ "<s>",
38
+ "</s>"
39
+ ],
40
+ "bos_token": "<s>",
41
+ "clean_up_tokenization_spaces": true,
42
+ "do_lower_case": false,
43
+ "eos_token": "<s>",
44
+ "model_max_length": 1000000000000000019884624838656,
45
+ "pad_token": "[PAD]",
46
+ "processor_class": "Wav2Vec2Processor",
47
+ "replace_word_delimiter_char": " ",
48
+ "target_lang": null,
49
+ "tokenizer_class": "Wav2Vec2CTCTokenizer",
50
+ "unk_token": "[UNK]",
51
+ "word_delimiter_token": "|"
52
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 4.55,
3
+ "train_loss": 0.47586327913072374,
4
+ "train_runtime": 55725.2112,
5
+ "train_samples": 126693,
6
+ "train_samples_per_second": 10.336,
7
+ "train_steps_per_second": 0.162
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,322 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.2908598780632019,
3
+ "best_model_checkpoint": "/models/hfhub/DewiBrynJones/wav2vec2-xlsr-53-ft-ccv-en-cy/checkpoint-9000",
4
+ "epoch": 4.545454545454545,
5
+ "eval_steps": 500,
6
+ "global_step": 9000,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.2,
13
+ "learning_rate": 0.00014925,
14
+ "loss": 5.8377,
15
+ "step": 400
16
+ },
17
+ {
18
+ "epoch": 0.25,
19
+ "eval_loss": 1.2190359830856323,
20
+ "eval_runtime": 1220.4523,
21
+ "eval_samples_per_second": 21.545,
22
+ "eval_steps_per_second": 0.674,
23
+ "eval_wer": 0.8568814654720188,
24
+ "step": 500
25
+ },
26
+ {
27
+ "epoch": 0.4,
28
+ "learning_rate": 0.00029925,
29
+ "loss": 0.9829,
30
+ "step": 800
31
+ },
32
+ {
33
+ "epoch": 0.51,
34
+ "eval_loss": 0.5584714412689209,
35
+ "eval_runtime": 1188.1001,
36
+ "eval_samples_per_second": 22.132,
37
+ "eval_steps_per_second": 0.692,
38
+ "eval_wer": 0.4700631596617496,
39
+ "step": 1000
40
+ },
41
+ {
42
+ "epoch": 0.61,
43
+ "learning_rate": 0.00028543902439024386,
44
+ "loss": 0.45,
45
+ "step": 1200
46
+ },
47
+ {
48
+ "epoch": 0.76,
49
+ "eval_loss": 0.4735090434551239,
50
+ "eval_runtime": 1202.8188,
51
+ "eval_samples_per_second": 21.861,
52
+ "eval_steps_per_second": 0.683,
53
+ "eval_wer": 0.39008750026229094,
54
+ "step": 1500
55
+ },
56
+ {
57
+ "epoch": 0.81,
58
+ "learning_rate": 0.00027080487804878047,
59
+ "loss": 0.3636,
60
+ "step": 1600
61
+ },
62
+ {
63
+ "epoch": 1.01,
64
+ "learning_rate": 0.00025617073170731703,
65
+ "loss": 0.3151,
66
+ "step": 2000
67
+ },
68
+ {
69
+ "epoch": 1.01,
70
+ "eval_loss": 0.4124945402145386,
71
+ "eval_runtime": 1160.0644,
72
+ "eval_samples_per_second": 22.667,
73
+ "eval_steps_per_second": 0.709,
74
+ "eval_wer": 0.34183435801666073,
75
+ "step": 2000
76
+ },
77
+ {
78
+ "epoch": 1.21,
79
+ "learning_rate": 0.00024153658536585365,
80
+ "loss": 0.2524,
81
+ "step": 2400
82
+ },
83
+ {
84
+ "epoch": 1.26,
85
+ "eval_loss": 0.3830628991127014,
86
+ "eval_runtime": 1409.4338,
87
+ "eval_samples_per_second": 18.656,
88
+ "eval_steps_per_second": 0.583,
89
+ "eval_wer": 0.3117191598296158,
90
+ "step": 2500
91
+ },
92
+ {
93
+ "epoch": 1.41,
94
+ "learning_rate": 0.00022690243902439024,
95
+ "loss": 0.243,
96
+ "step": 2800
97
+ },
98
+ {
99
+ "epoch": 1.52,
100
+ "eval_loss": 0.3661448061466217,
101
+ "eval_runtime": 1425.6726,
102
+ "eval_samples_per_second": 18.444,
103
+ "eval_steps_per_second": 0.577,
104
+ "eval_wer": 0.30780368046666806,
105
+ "step": 3000
106
+ },
107
+ {
108
+ "epoch": 1.62,
109
+ "learning_rate": 0.0002122682926829268,
110
+ "loss": 0.2341,
111
+ "step": 3200
112
+ },
113
+ {
114
+ "epoch": 1.77,
115
+ "eval_loss": 0.3488619327545166,
116
+ "eval_runtime": 1416.3145,
117
+ "eval_samples_per_second": 18.566,
118
+ "eval_steps_per_second": 0.58,
119
+ "eval_wer": 0.2883060201019787,
120
+ "step": 3500
121
+ },
122
+ {
123
+ "epoch": 1.82,
124
+ "learning_rate": 0.00019763414634146341,
125
+ "loss": 0.2235,
126
+ "step": 3600
127
+ },
128
+ {
129
+ "epoch": 2.02,
130
+ "learning_rate": 0.00018299999999999998,
131
+ "loss": 0.211,
132
+ "step": 4000
133
+ },
134
+ {
135
+ "epoch": 2.02,
136
+ "eval_loss": 0.34998372197151184,
137
+ "eval_runtime": 1154.299,
138
+ "eval_samples_per_second": 22.78,
139
+ "eval_steps_per_second": 0.712,
140
+ "eval_wer": 0.27384434605619323,
141
+ "step": 4000
142
+ },
143
+ {
144
+ "epoch": 2.22,
145
+ "learning_rate": 0.0001683658536585366,
146
+ "loss": 0.1702,
147
+ "step": 4400
148
+ },
149
+ {
150
+ "epoch": 2.27,
151
+ "eval_loss": 0.34594303369522095,
152
+ "eval_runtime": 1230.9263,
153
+ "eval_samples_per_second": 21.362,
154
+ "eval_steps_per_second": 0.668,
155
+ "eval_wer": 0.27035272887508655,
156
+ "step": 4500
157
+ },
158
+ {
159
+ "epoch": 2.42,
160
+ "learning_rate": 0.00015373170731707315,
161
+ "loss": 0.1634,
162
+ "step": 4800
163
+ },
164
+ {
165
+ "epoch": 2.53,
166
+ "eval_loss": 0.33047276735305786,
167
+ "eval_runtime": 1138.5459,
168
+ "eval_samples_per_second": 23.095,
169
+ "eval_steps_per_second": 0.722,
170
+ "eval_wer": 0.2583125249176406,
171
+ "step": 5000
172
+ },
173
+ {
174
+ "epoch": 2.63,
175
+ "learning_rate": 0.00013909756097560974,
176
+ "loss": 0.1608,
177
+ "step": 5200
178
+ },
179
+ {
180
+ "epoch": 2.78,
181
+ "eval_loss": 0.3136747479438782,
182
+ "eval_runtime": 1199.9023,
183
+ "eval_samples_per_second": 21.914,
184
+ "eval_steps_per_second": 0.685,
185
+ "eval_wer": 0.24792580313490148,
186
+ "step": 5500
187
+ },
188
+ {
189
+ "epoch": 2.83,
190
+ "learning_rate": 0.00012446341463414633,
191
+ "loss": 0.1542,
192
+ "step": 5600
193
+ },
194
+ {
195
+ "epoch": 3.03,
196
+ "learning_rate": 0.00010982926829268292,
197
+ "loss": 0.1481,
198
+ "step": 6000
199
+ },
200
+ {
201
+ "epoch": 3.03,
202
+ "eval_loss": 0.32883504033088684,
203
+ "eval_runtime": 1167.9676,
204
+ "eval_samples_per_second": 22.513,
205
+ "eval_steps_per_second": 0.704,
206
+ "eval_wer": 0.2562183939400298,
207
+ "step": 6000
208
+ },
209
+ {
210
+ "epoch": 3.23,
211
+ "learning_rate": 9.519512195121951e-05,
212
+ "loss": 0.1216,
213
+ "step": 6400
214
+ },
215
+ {
216
+ "epoch": 3.28,
217
+ "eval_loss": 0.3174259662628174,
218
+ "eval_runtime": 1161.665,
219
+ "eval_samples_per_second": 22.636,
220
+ "eval_steps_per_second": 0.708,
221
+ "eval_wer": 0.24461464213022222,
222
+ "step": 6500
223
+ },
224
+ {
225
+ "epoch": 3.43,
226
+ "learning_rate": 8.05609756097561e-05,
227
+ "loss": 0.1181,
228
+ "step": 6800
229
+ },
230
+ {
231
+ "epoch": 3.54,
232
+ "eval_loss": 0.30002185702323914,
233
+ "eval_runtime": 1207.6988,
234
+ "eval_samples_per_second": 21.773,
235
+ "eval_steps_per_second": 0.681,
236
+ "eval_wer": 0.2324569318253352,
237
+ "step": 7000
238
+ },
239
+ {
240
+ "epoch": 3.64,
241
+ "learning_rate": 6.592682926829267e-05,
242
+ "loss": 0.1143,
243
+ "step": 7200
244
+ },
245
+ {
246
+ "epoch": 3.79,
247
+ "eval_loss": 0.29287537932395935,
248
+ "eval_runtime": 1095.1242,
249
+ "eval_samples_per_second": 24.011,
250
+ "eval_steps_per_second": 0.751,
251
+ "eval_wer": 0.23258702813857357,
252
+ "step": 7500
253
+ },
254
+ {
255
+ "epoch": 3.84,
256
+ "learning_rate": 5.129268292682927e-05,
257
+ "loss": 0.1139,
258
+ "step": 7600
259
+ },
260
+ {
261
+ "epoch": 4.04,
262
+ "learning_rate": 3.665853658536585e-05,
263
+ "loss": 0.1049,
264
+ "step": 8000
265
+ },
266
+ {
267
+ "epoch": 4.04,
268
+ "eval_loss": 0.2921387255191803,
269
+ "eval_runtime": 1091.9168,
270
+ "eval_samples_per_second": 24.082,
271
+ "eval_steps_per_second": 0.753,
272
+ "eval_wer": 0.22178483748452482,
273
+ "step": 8000
274
+ },
275
+ {
276
+ "epoch": 4.24,
277
+ "learning_rate": 2.2024390243902437e-05,
278
+ "loss": 0.0913,
279
+ "step": 8400
280
+ },
281
+ {
282
+ "epoch": 4.29,
283
+ "eval_loss": 0.2968423664569855,
284
+ "eval_runtime": 1104.263,
285
+ "eval_samples_per_second": 23.812,
286
+ "eval_steps_per_second": 0.744,
287
+ "eval_wer": 0.2208196067734016,
288
+ "step": 8500
289
+ },
290
+ {
291
+ "epoch": 4.44,
292
+ "learning_rate": 7.4268292682926826e-06,
293
+ "loss": 0.0883,
294
+ "step": 8800
295
+ },
296
+ {
297
+ "epoch": 4.55,
298
+ "eval_loss": 0.2908598780632019,
299
+ "eval_runtime": 1095.7607,
300
+ "eval_samples_per_second": 23.997,
301
+ "eval_steps_per_second": 0.75,
302
+ "eval_wer": 0.21777283505046477,
303
+ "step": 9000
304
+ },
305
+ {
306
+ "epoch": 4.55,
307
+ "step": 9000,
308
+ "total_flos": 8.920242900072627e+19,
309
+ "train_loss": 0.47586327913072374,
310
+ "train_runtime": 55725.2112,
311
+ "train_samples_per_second": 10.336,
312
+ "train_steps_per_second": 0.162
313
+ }
314
+ ],
315
+ "logging_steps": 400,
316
+ "max_steps": 9000,
317
+ "num_train_epochs": 5,
318
+ "save_steps": 500,
319
+ "total_flos": 8.920242900072627e+19,
320
+ "trial_name": null,
321
+ "trial_params": null
322
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41391707330b917a182f50c624eca53c007657cb0e0a4d70d8f10b192c1076b1
3
+ size 4984
vocab.json ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "#": 1,
3
+ "'": 2,
4
+ "+": 3,
5
+ "-": 4,
6
+ "[PAD]": 70,
7
+ "[UNK]": 69,
8
+ "a": 5,
9
+ "b": 6,
10
+ "c": 7,
11
+ "d": 8,
12
+ "e": 9,
13
+ "f": 10,
14
+ "g": 11,
15
+ "h": 12,
16
+ "i": 13,
17
+ "j": 14,
18
+ "k": 15,
19
+ "l": 16,
20
+ "m": 17,
21
+ "n": 18,
22
+ "o": 19,
23
+ "p": 20,
24
+ "q": 21,
25
+ "r": 22,
26
+ "s": 23,
27
+ "t": 24,
28
+ "u": 25,
29
+ "v": 26,
30
+ "w": 27,
31
+ "x": 28,
32
+ "y": 29,
33
+ "z": 30,
34
+ "|": 0,
35
+ "ß": 31,
36
+ "à": 32,
37
+ "á": 33,
38
+ "â": 34,
39
+ "ã": 35,
40
+ "ä": 36,
41
+ "ç": 37,
42
+ "è": 38,
43
+ "é": 39,
44
+ "ê": 40,
45
+ "ë": 41,
46
+ "î": 42,
47
+ "ï": 43,
48
+ "ò": 44,
49
+ "ó": 45,
50
+ "ô": 46,
51
+ "ö": 47,
52
+ "ø": 48,
53
+ "ú": 49,
54
+ "û": 50,
55
+ "ü": 51,
56
+ "ÿ": 52,
57
+ "ı": 53,
58
+ "ł": 54,
59
+ "ń": 55,
60
+ "ō": 56,
61
+ "œ": 57,
62
+ "ř": 58,
63
+ "ş": 59,
64
+ "š": 60,
65
+ "ū": 61,
66
+ "ŵ": 62,
67
+ "ŷ": 63,
68
+ "ə": 64,
69
+ "α": 65,
70
+ "„": 66,
71
+ "奔": 67,
72
+ "熊": 68
73
+ }