masterkristall commited on
Commit
539dff3
1 Parent(s): 8ce83a5

End of training

Browse files
README.md ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - generated_from_trainer
4
+ model-index:
5
+ - name: ml_gen_seo_google_23_05_2024
6
+ results: []
7
+ ---
8
+
9
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
10
+ should probably proofread and complete it, then remove this comment. -->
11
+
12
+ # ml_gen_seo_google_23_05_2024
13
+
14
+ This model was trained from scratch on an unknown dataset.
15
+ It achieves the following results on the evaluation set:
16
+ - Loss: 0.7733
17
+
18
+ ## Model description
19
+
20
+ More information needed
21
+
22
+ ## Intended uses & limitations
23
+
24
+ More information needed
25
+
26
+ ## Training and evaluation data
27
+
28
+ More information needed
29
+
30
+ ## Training procedure
31
+
32
+ ### Training hyperparameters
33
+
34
+ The following hyperparameters were used during training:
35
+ - learning_rate: 5e-05
36
+ - train_batch_size: 4
37
+ - eval_batch_size: 4
38
+ - seed: 42
39
+ - gradient_accumulation_steps: 2
40
+ - total_train_batch_size: 8
41
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
42
+ - lr_scheduler_type: linear
43
+ - lr_scheduler_warmup_steps: 50
44
+ - num_epochs: 3
45
+
46
+ ### Training results
47
+
48
+ | Training Loss | Epoch | Step | Validation Loss |
49
+ |:-------------:|:------:|:----:|:---------------:|
50
+ | 0.9707 | 0.1534 | 25 | 0.7829 |
51
+ | 0.942 | 0.3067 | 50 | 0.7871 |
52
+ | 0.8262 | 0.4601 | 75 | 0.7827 |
53
+ | 0.9281 | 0.6135 | 100 | 0.7894 |
54
+ | 0.9142 | 0.7669 | 125 | 0.7706 |
55
+ | 0.8757 | 0.9202 | 150 | 0.7701 |
56
+ | 0.8237 | 1.0736 | 175 | 0.7883 |
57
+ | 0.8219 | 1.2270 | 200 | 0.7684 |
58
+ | 0.8051 | 1.3804 | 225 | 0.7779 |
59
+ | 0.7711 | 1.5337 | 250 | 0.7831 |
60
+ | 0.8685 | 1.6871 | 275 | 0.7721 |
61
+ | 0.7802 | 1.8405 | 300 | 0.7804 |
62
+ | 0.778 | 1.9939 | 325 | 0.7812 |
63
+ | 0.7685 | 2.1472 | 350 | 0.7782 |
64
+ | 0.8233 | 2.3006 | 375 | 0.7678 |
65
+ | 0.7752 | 2.4540 | 400 | 0.7717 |
66
+ | 0.7144 | 2.6074 | 425 | 0.7722 |
67
+ | 0.7322 | 2.7607 | 450 | 0.7719 |
68
+ | 0.6849 | 2.9141 | 475 | 0.7733 |
69
+
70
+
71
+ ### Framework versions
72
+
73
+ - Transformers 4.41.0
74
+ - Pytorch 2.3.0+cu121
75
+ - Tokenizers 0.19.1
config.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "seo-model",
3
+ "architectures": [
4
+ "T5ForConditionalGeneration"
5
+ ],
6
+ "bos_token_id": 2,
7
+ "classifier_dropout": 0.0,
8
+ "d_ff": 2048,
9
+ "d_kv": 64,
10
+ "d_model": 768,
11
+ "decoder_start_token_id": 2,
12
+ "dense_act_fn": "gelu_new",
13
+ "dropout_rate": 0.1,
14
+ "eos_token_id": 1,
15
+ "feed_forward_proj": "gated-gelu",
16
+ "initializer_factor": 1.0,
17
+ "is_encoder_decoder": true,
18
+ "is_gated_act": true,
19
+ "layer_norm_epsilon": 1e-06,
20
+ "max_length": 100,
21
+ "model_type": "t5",
22
+ "num_beams": 5,
23
+ "num_decoder_layers": 12,
24
+ "num_heads": 12,
25
+ "num_layers": 12,
26
+ "output_past": true,
27
+ "pad_token_id": 0,
28
+ "relative_attention_max_distance": 128,
29
+ "relative_attention_num_buckets": 32,
30
+ "tie_word_embeddings": false,
31
+ "tokenizer_class": "T5Tokenizer",
32
+ "torch_dtype": "float32",
33
+ "transformers_version": "4.41.0",
34
+ "use_cache": true,
35
+ "vocab_size": 30000
36
+ }
generation_config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 2,
4
+ "decoder_start_token_id": 2,
5
+ "eos_token_id": 1,
6
+ "max_length": 100,
7
+ "num_beams": 5,
8
+ "pad_token_id": 0,
9
+ "transformers_version": "4.41.0"
10
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f58af0514ff38c4768a3cbf03c6068fe0a277e7827f0e467fcb33fe0dd0df9bf
3
+ size 977270632
runs/May23_18-54-30_e278246c3359/events.out.tfevents.1716490471.e278246c3359.1431.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d3e7bfcc5b2f80175e1a09853c947b85a29d03205fb2eed7aab1ac5f0dff95b
3
+ size 8758
runs/May23_19-04-21_e278246c3359/events.out.tfevents.1716491062.e278246c3359.1431.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fdff044642f29f846a6d1f6abcba81c3a829197635070ff067655004c970246c
3
+ size 8759
runs/May23_19-11-16_e278246c3359/events.out.tfevents.1716491477.e278246c3359.1431.3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc173599aaf701510de789f9b6d5745026e142d66bae590a3131c417c5127b24
3
+ size 12132
runs/May23_19-21-50_e278246c3359/events.out.tfevents.1716492111.e278246c3359.1431.4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3332279f9dc760ccea7202795133c212012363c85ecfe7a695129599df402b6c
3
+ size 11650
runs/May23_19-28-12_e278246c3359/events.out.tfevents.1716492493.e278246c3359.1431.5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de54c4cc607b9196654b488ac1768c49735d637ab5e5389c3e769c30263541b1
3
+ size 11650
runs/May23_19-36-17_e278246c3359/events.out.tfevents.1716492977.e278246c3359.1431.6 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e6a16a4ffac74313bcc17409810afc12b1fcfa9bbcf6c9b566110c20e847f3a
3
+ size 14542
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41727f9ee21abc22f78d3a6cc199e3cfc33d367fd20852e051975aae374e8bf1
3
+ size 5112