tbkazakova
/

wav2vec-bert-2.0-even-pakendorf

@@ -1,13 +1,28 @@
 ---
 license: mit
 tags:
 - generated_from_trainer
-base_model: facebook/w2v-bert-2.0
 datasets:
 - audiofolder
 model-index:
 - name: wav2vec-bert-2.0-even-pakendorf-0406-1347
-  results: []
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -17,13 +32,9 @@ should probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [facebook/w2v-bert-2.0](https://huggingface.co/facebook/w2v-bert-2.0) on the audiofolder dataset.
 It achieves the following results on the evaluation set:
-- eval_loss: inf
-- eval_wer: 0.9991
-- eval_runtime: 59.9347
-- eval_samples_per_second: 10.011
-- eval_steps_per_second: 1.251
-- epoch: 1.3333
-- step: 200
 ## Model description
@@ -54,9 +65,34 @@ The following hyperparameters were used during training:
 - num_epochs: 10
 - mixed_precision_training: Native AMP
 ### Framework versions
-- Transformers 4.41.1
 - Pytorch 2.3.0+cu121
 - Datasets 2.19.2
 - Tokenizers 0.19.1

 ---
 license: mit
+base_model: facebook/w2v-bert-2.0
 tags:
 - generated_from_trainer
 datasets:
 - audiofolder
+metrics:
+- wer
 model-index:
 - name: wav2vec-bert-2.0-even-pakendorf-0406-1347
+  results:
+  - task:
+      name: Automatic Speech Recognition
+      type: automatic-speech-recognition
+    dataset:
+      name: audiofolder
+      type: audiofolder
+      config: default
+      split: train
+      args: default
+    metrics:
+    - name: Wer
+      type: wer
+      value: 0.5968606805108706
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 This model is a fine-tuned version of [facebook/w2v-bert-2.0](https://huggingface.co/facebook/w2v-bert-2.0) on the audiofolder dataset.
 It achieves the following results on the evaluation set:
+- Cer: 0.2128
+- Loss: inf
+- Wer: 0.5969
 ## Model description
 - num_epochs: 10
 - mixed_precision_training: Native AMP
+### Training results
+| Training Loss | Epoch  | Step | Cer    | Validation Loss | Wer    |
+|:-------------:|:------:|:----:|:------:|:---------------:|:------:|
+| 4.5767        | 0.5051 | 200  | 0.4932 | inf             | 0.9973 |
+| 1.8775        | 1.0101 | 400  | 0.3211 | inf             | 0.8494 |
+| 1.6006        | 1.5152 | 600  | 0.3017 | inf             | 0.8040 |
+| 1.4476        | 2.0202 | 800  | 0.2896 | inf             | 0.7534 |
+| 1.2213        | 2.5253 | 1000 | 0.2610 | inf             | 0.7080 |
+| 1.1485        | 3.0303 | 1200 | 0.2684 | inf             | 0.6800 |
+| 0.9554        | 3.5354 | 1400 | 0.2459 | inf             | 0.6732 |
+| 0.9379        | 4.0404 | 1600 | 0.2275 | inf             | 0.6251 |
+| 0.7644        | 4.5455 | 1800 | 0.2235 | inf             | 0.6224 |
+| 0.7891        | 5.0505 | 2000 | 0.2180 | inf             | 0.6053 |
+| 0.633         | 5.5556 | 2200 | 0.2130 | inf             | 0.5996 |
+| 0.6197        | 6.0606 | 2400 | 0.2126 | inf             | 0.6032 |
+| 0.5212        | 6.5657 | 2600 | 0.2196 | inf             | 0.6019 |
+| 0.4881        | 7.0707 | 2800 | 0.2125 | inf             | 0.5894 |
+| 0.4           | 7.5758 | 3000 | 0.2066 | inf             | 0.5852 |
+| 0.4008        | 8.0808 | 3200 | 0.2076 | inf             | 0.5790 |
+| 0.3304        | 8.5859 | 3400 | 0.2096 | inf             | 0.5884 |
+| 0.3446        | 9.0909 | 3600 | 0.2124 | inf             | 0.5983 |
+| 0.3237        | 9.5960 | 3800 | 0.2128 | inf             | 0.5969 |
 ### Framework versions
+- Transformers 4.41.2
 - Pytorch 2.3.0+cu121
 - Datasets 2.19.2
 - Tokenizers 0.19.1

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1b6e171d0a182489b9a3f59ba4ccf25019858e91ba2bf8c13b22301fd6499951
 size 2422978560

 version https://git-lfs.github.com/spec/v1
+oid sha256:f0e34fb80de4c4abc6da89c07bc2acca5c4a6333e215d3cea86419c4c616314d
 size 2422978560

preprocessor_config.json CHANGED Viewed

@@ -4,7 +4,6 @@
   "num_mel_bins": 80,
   "padding_side": "right",
   "padding_value": 0.0,
-  "processor_class": "Wav2Vec2BertProcessor",
   "return_attention_mask": true,
   "sampling_rate": 16000,
   "stride": 2

   "num_mel_bins": 80,
   "padding_side": "right",
   "padding_value": 0.0,
   "return_attention_mask": true,
   "sampling_rate": 16000,
   "stride": 2

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e59af1a404864488356784a05d23870d7d8e06df6304dd544a5295723444e17a
 size 5112

 version https://git-lfs.github.com/spec/v1
+oid sha256:eeeec4596f00f5d69e3fa4eea62251f33294831b4cb0dd1808f8662ed75ed1e7
 size 5112