./outs

Browse files

Files changed (7) hide show

README.md +18 -13
config.json +17 -16
generation_config.json +26 -27
model.safetensors +1 -1
preprocessor_config.json +1 -1
runs/Oct08_17-42-57_897a695464dd/events.out.tfevents.1728409410.897a695464dd.2598.0 +3 -0
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 library_name: transformers
 license: apache-2.0
-base_model: openai/whisper-large-v3
 tags:
 - generated_from_trainer
 model-index:
@@ -14,10 +14,10 @@ should probably proofread and complete it, then remove this comment. -->
 # outs
-This model is a fine-tuned version of [openai/whisper-large-v3](https://huggingface.co/openai/whisper-large-v3) on an unknown dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.6104
-- Cer: 25.0366
 ## Model description
@@ -36,22 +36,27 @@ More information needed
 ### Training hyperparameters
 The following hyperparameters were used during training:
-- learning_rate: 8e-05
-- train_batch_size: 2
-- eval_batch_size: 2
 - seed: 42
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
-- num_epochs: 3.0
 - mixed_precision_training: Native AMP
 ### Training results
-| Training Loss | Epoch | Step | Validation Loss | Cer     |
-|:-------------:|:-----:|:----:|:---------------:|:-------:|
-| 0.3211        | 1.0   | 1073 | 0.7999          | 39.8078 |
-| 0.0795        | 2.0   | 2146 | 0.6431          | 23.1977 |
-| 0.0101        | 3.0   | 3219 | 0.6104          | 25.0366 |
 ### Framework versions

 ---
 library_name: transformers
 license: apache-2.0
+base_model: biodatlab/whisper-th-small-combined
 tags:
 - generated_from_trainer
 model-index:
 # outs
+This model is a fine-tuned version of [biodatlab/whisper-th-small-combined](https://huggingface.co/biodatlab/whisper-th-small-combined) on an unknown dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.1552
+- Cer: 13.5275
 ## Model description
 ### Training hyperparameters
 The following hyperparameters were used during training:
+- learning_rate: 2e-05
+- train_batch_size: 4
+- eval_batch_size: 4
 - seed: 42
+- gradient_accumulation_steps: 2
+- total_train_batch_size: 8
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
+- lr_scheduler_warmup_steps: 1000
+- num_epochs: 10.0
 - mixed_precision_training: Native AMP
 ### Training results
+| Training Loss | Epoch  | Step | Validation Loss | Cer     |
+|:-------------:|:------:|:----:|:---------------:|:-------:|
+| 0.1733        | 1.8622 | 500  | 0.1206          | 7.2293  |
+| 0.1159        | 3.7244 | 1000 | 0.1404          | 10.6943 |
+| 0.0596        | 5.5866 | 1500 | 0.1665          | 12.2340 |
+| 0.0399        | 7.4488 | 2000 | 0.1486          | 11.8316 |
+| 0.0224        | 9.3110 | 2500 | 0.1552          | 13.5275 |
 ### Framework versions

config.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
-  "_name_or_path": "openai/whisper-large-v3",
   "activation_dropout": 0.0,
   "activation_function": "gelu",
-  "apply_spec_augment": false,
   "architectures": [
     "WhisperForConditionalGeneration"
   ],
@@ -13,38 +13,39 @@
   ],
   "bos_token_id": 50257,
   "classifier_proj_size": 256,
-  "d_model": 1280,
-  "decoder_attention_heads": 20,
-  "decoder_ffn_dim": 5120,
   "decoder_layerdrop": 0.0,
-  "decoder_layers": 32,
   "decoder_start_token_id": 50258,
   "dropout": 0.0,
-  "encoder_attention_heads": 20,
-  "encoder_ffn_dim": 5120,
   "encoder_layerdrop": 0.0,
-  "encoder_layers": 32,
   "eos_token_id": 50257,
   "init_std": 0.02,
   "is_encoder_decoder": true,
-  "mask_feature_length": 10,
   "mask_feature_min_masks": 0,
-  "mask_feature_prob": 0.0,
   "mask_time_length": 10,
   "mask_time_min_masks": 2,
-  "mask_time_prob": 0.05,
   "max_length": 448,
   "max_source_positions": 1500,
   "max_target_positions": 448,
   "median_filter_width": 7,
   "model_type": "whisper",
-  "num_hidden_layers": 32,
-  "num_mel_bins": 128,
-  "pad_token_id": 50256,
   "scale_embedding": false,
   "torch_dtype": "float32",
   "transformers_version": "4.44.2",
   "use_cache": true,
   "use_weighted_layer_sum": false,
-  "vocab_size": 51866
 }

 {
+  "_name_or_path": "biodatlab/whisper-th-small-combined",
   "activation_dropout": 0.0,
   "activation_function": "gelu",
+  "apply_spec_augment": true,
   "architectures": [
     "WhisperForConditionalGeneration"
   ],
   ],
   "bos_token_id": 50257,
   "classifier_proj_size": 256,
+  "d_model": 768,
+  "decoder_attention_heads": 12,
+  "decoder_ffn_dim": 3072,
   "decoder_layerdrop": 0.0,
+  "decoder_layers": 12,
   "decoder_start_token_id": 50258,
   "dropout": 0.0,
+  "encoder_attention_heads": 12,
+  "encoder_ffn_dim": 3072,
   "encoder_layerdrop": 0.0,
+  "encoder_layers": 12,
   "eos_token_id": 50257,
+  "forced_decoder_ids": null,
   "init_std": 0.02,
   "is_encoder_decoder": true,
+  "mask_feature_length": 64,
   "mask_feature_min_masks": 0,
+  "mask_feature_prob": 0.1,
   "mask_time_length": 10,
   "mask_time_min_masks": 2,
+  "mask_time_prob": 0.1,
   "max_length": 448,
   "max_source_positions": 1500,
   "max_target_positions": 448,
   "median_filter_width": 7,
   "model_type": "whisper",
+  "num_hidden_layers": 12,
+  "num_mel_bins": 80,
+  "pad_token_id": 50257,
   "scale_embedding": false,
   "torch_dtype": "float32",
   "transformers_version": "4.44.2",
   "use_cache": true,
   "use_weighted_layer_sum": false,
+  "vocab_size": 51865
 }

generation_config.json CHANGED Viewed

@@ -1,44 +1,44 @@
 {
   "alignment_heads": [
     [
-      7,
-      0
     ],
     [
-      10,
-      17
     ],
     [
-      12,
-      18
     ],
     [
-      13,
-      12
     ],
     [
-      16,
-      1
     ],
     [
-      17,
-      14
     ],
     [
-      19,
-      11
     ],
     [
-      21,
-      4
     ],
     [
-      24,
-      1
     ],
     [
-      25,
-      6
     ]
   ],
   "begin_suppress_tokens": [
@@ -148,15 +148,14 @@
     "<|vi|>": 50278,
     "<|yi|>": 50335,
     "<|yo|>": 50325,
-    "<|yue|>": 50358,
     "<|zh|>": 50260
   },
   "language": "th",
   "max_initial_timestamp_index": 50,
   "max_length": 448,
-  "no_timestamps_token_id": 50364,
   "pad_token_id": 50257,
-  "prev_sot_token_id": 50362,
   "return_timestamps": false,
   "suppress_tokens": [
     1,
@@ -242,16 +241,16 @@
     49870,
     50254,
     50258,
     50359,
     50360,
     50361,
-    50362,
-    50363
   ],
   "task": "transcribe",
   "task_to_id": {
-    "transcribe": 50360,
-    "translate": 50359
   },
   "transformers_version": "4.44.2"
 }

 {
   "alignment_heads": [
     [
+      5,
+      3
     ],
     [
+      5,
+      9
     ],
     [
+      8,
+      0
     ],
     [
+      8,
+      4
     ],
     [
+      8,
+      7
     ],
     [
+      8,
+      8
     ],
     [
+      9,
+      0
     ],
     [
+      9,
+      7
     ],
     [
+      9,
+      9
     ],
     [
+      10,
+      5
     ]
   ],
   "begin_suppress_tokens": [
     "<|vi|>": 50278,
     "<|yi|>": 50335,
     "<|yo|>": 50325,
     "<|zh|>": 50260
   },
   "language": "th",
   "max_initial_timestamp_index": 50,
   "max_length": 448,
+  "no_timestamps_token_id": 50363,
   "pad_token_id": 50257,
+  "prev_sot_token_id": 50361,
   "return_timestamps": false,
   "suppress_tokens": [
     1,
     49870,
     50254,
     50258,
+    50358,
     50359,
     50360,
     50361,
+    50362
   ],
   "task": "transcribe",
   "task_to_id": {
+    "transcribe": 50359,
+    "translate": 50358
   },
   "transformers_version": "4.44.2"
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5da00a2bc05aa8cc05b70eef258cc7194ba0c011fd1b352e94fdedac589feacd
 size 966995080

 version https://git-lfs.github.com/spec/v1
+oid sha256:fddfc79aa151f2ebdf6c2f7431ca578a9b15b5f750760ef25cb83c7877fc8969
 size 966995080

preprocessor_config.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "chunk_length": 30,
   "feature_extractor_type": "WhisperFeatureExtractor",
-  "feature_size": 128,
   "hop_length": 160,
   "n_fft": 400,
   "n_samples": 480000,

 {
   "chunk_length": 30,
   "feature_extractor_type": "WhisperFeatureExtractor",
+  "feature_size": 80,
   "hop_length": 160,
   "n_fft": 400,
   "n_samples": 480000,

runs/Oct08_17-42-57_897a695464dd/events.out.tfevents.1728409410.897a695464dd.2598.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:851eadf2d89d002bfb2651e1e29d4841b42c0145734ee7edf29ee58d16f219de
+size 13114

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:06fad7890ce3cf6185ec9294223ebb0348aed4fde156a6b405a801d12bc04ca9
 size 5304

 version https://git-lfs.github.com/spec/v1
+oid sha256:43f7ff2c50d3e04d4b8ff85050eae40b1a5219a4c6ebddf23606a80e1a42df42
 size 5304