clayton07 commited on
Commit
c44188c
1 Parent(s): 63fecd1

Training in progress, step 1000

Browse files
README.md CHANGED
@@ -1,54 +1,64 @@
1
- ---
2
- license: mit
3
- base_model: microsoft/speecht5_tts
4
- tags:
5
- - generated_from_trainer
6
- model-index:
7
- - name: speecht5_finetuned_hindi_mono
8
- results: []
9
- language:
10
- - hi
11
- ---
12
-
13
- <!-- This model card has been generated automatically according to the information the Trainer had access to. You
14
- should probably proofread and complete it, then remove this comment. -->
15
-
16
- # speecht5_finetuned_hindi_mono
17
-
18
- This model is a fine-tuned version of [microsoft/speecht5_tts](https://huggingface.co/microsoft/speecht5_tts) on the Hindi dataset from Indic TTS (provided by IITM)
19
- It achieves the following results on the evaluation set:
20
- - Loss: 0.4357
21
-
22
- ## Training procedure
23
-
24
- ### Training hyperparameters
25
-
26
- The following hyperparameters were used during training:
27
- - learning_rate: 1e-05
28
- - train_batch_size: 4
29
- - eval_batch_size: 2
30
- - seed: 42
31
- - gradient_accumulation_steps: 8
32
- - total_train_batch_size: 32
33
- - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
34
- - lr_scheduler_type: linear
35
- - lr_scheduler_warmup_steps: 500
36
- - training_steps: 4000
37
- - mixed_precision_training: Native AMP
38
-
39
- ### Training results
40
-
41
- | Training Loss | Epoch | Step | Validation Loss |
42
- |:-------------:|:-------:|:----:|:---------------:|
43
- | 0.5391 | 4.3549 | 1000 | 0.4788 |
44
- | 0.4991 | 8.7099 | 2000 | 0.4492 |
45
- | 0.4851 | 13.0648 | 3000 | 0.4367 |
46
- | 0.4859 | 17.4197 | 4000 | 0.4357 |
47
-
48
-
49
- ### Framework versions
50
-
51
- - Transformers 4.43.3
52
- - Pytorch 2.4.0+cu118
53
- - Datasets 3.0.1
54
- - Tokenizers 0.19.1
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: mit
3
+ base_model: microsoft/speecht5_tts
4
+ tags:
5
+ - generated_from_trainer
6
+ model-index:
7
+ - name: speecht5_finetuned_hindi_mono
8
+ results: []
9
+ ---
10
+
11
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
12
+ should probably proofread and complete it, then remove this comment. -->
13
+
14
+ # speecht5_finetuned_hindi_mono
15
+
16
+ This model is a fine-tuned version of [microsoft/speecht5_tts](https://huggingface.co/microsoft/speecht5_tts) on the None dataset.
17
+ It achieves the following results on the evaluation set:
18
+ - Loss: 0.4357
19
+
20
+ ## Model description
21
+
22
+ More information needed
23
+
24
+ ## Intended uses & limitations
25
+
26
+ More information needed
27
+
28
+ ## Training and evaluation data
29
+
30
+ More information needed
31
+
32
+ ## Training procedure
33
+
34
+ ### Training hyperparameters
35
+
36
+ The following hyperparameters were used during training:
37
+ - learning_rate: 1e-05
38
+ - train_batch_size: 4
39
+ - eval_batch_size: 2
40
+ - seed: 42
41
+ - gradient_accumulation_steps: 8
42
+ - total_train_batch_size: 32
43
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
44
+ - lr_scheduler_type: linear
45
+ - lr_scheduler_warmup_steps: 500
46
+ - training_steps: 4000
47
+ - mixed_precision_training: Native AMP
48
+
49
+ ### Training results
50
+
51
+ | Training Loss | Epoch | Step | Validation Loss |
52
+ |:-------------:|:-------:|:----:|:---------------:|
53
+ | 0.5391 | 4.3549 | 1000 | 0.4788 |
54
+ | 0.4991 | 8.7099 | 2000 | 0.4492 |
55
+ | 0.4851 | 13.0648 | 3000 | 0.4367 |
56
+ | 0.4859 | 17.4197 | 4000 | 0.4357 |
57
+
58
+
59
+ ### Framework versions
60
+
61
+ - Transformers 4.43.3
62
+ - Pytorch 2.4.0+cu118
63
+ - Datasets 3.0.1
64
+ - Tokenizers 0.19.1
config.json CHANGED
@@ -64,7 +64,7 @@
64
  "mask_time_length": 10,
65
  "mask_time_min_masks": 2,
66
  "mask_time_prob": 0.05,
67
- "max_length": 1876,
68
  "max_speech_positions": 1876,
69
  "max_text_positions": 600,
70
  "model_type": "speecht5",
@@ -85,7 +85,7 @@
85
  "speech_decoder_prenet_layers": 2,
86
  "speech_decoder_prenet_units": 256,
87
  "torch_dtype": "float32",
88
- "transformers_version": "4.43.3",
89
  "use_cache": false,
90
  "use_guided_attention_loss": true,
91
  "vocab_size": 81
 
64
  "mask_time_length": 10,
65
  "mask_time_min_masks": 2,
66
  "mask_time_prob": 0.05,
67
+ "max_length": null,
68
  "max_speech_positions": 1876,
69
  "max_text_positions": 600,
70
  "model_type": "speecht5",
 
85
  "speech_decoder_prenet_layers": 2,
86
  "speech_decoder_prenet_units": 256,
87
  "torch_dtype": "float32",
88
+ "transformers_version": "4.45.2",
89
  "use_cache": false,
90
  "use_guided_attention_loss": true,
91
  "vocab_size": 81
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a09464ff8074de0340a8db4586fcf93a515aea74d10b9bc1ff87b71d853054cf
3
  size 577789320
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e355f3a9e9b8264d49034a843ba350787de937b0dbd4ea1cc4d41943532c062
3
  size 577789320
runs/Nov12_01-51-30_DESKTOP-BOFGR5O/events.out.tfevents.1731356571.DESKTOP-BOFGR5O.28348.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fbd5326634b7df0360ba59d1243e34984f850e9a0fc94d355f4fa2b0597b3955
3
+ size 15298
runs/Oct22_05-50-37_DESKTOP-BOFGR5O/events.out.tfevents.1729556452.DESKTOP-BOFGR5O.21184.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:918828db57b2155a843bc9e1a87476249d4e3f80068c8f879299856be13cd388
3
+ size 7675
tokenizer_config.json CHANGED
@@ -50,7 +50,7 @@
50
  }
51
  },
52
  "bos_token": "<s>",
53
- "clean_up_tokenization_spaces": true,
54
  "eos_token": "</s>",
55
  "mask_token": "<mask>",
56
  "model_max_length": 600,
 
50
  }
51
  },
52
  "bos_token": "<s>",
53
+ "clean_up_tokenization_spaces": false,
54
  "eos_token": "</s>",
55
  "mask_token": "<mask>",
56
  "model_max_length": 600,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ead13ab101d69ffbc80172bfede8e5065f045740a1521f9693227c71ee700af8
3
  size 5432
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0997b5324a72d1fd59c7ea1525f34edfce58e28d76bb24b4cc0e8e172652d953
3
  size 5432