End of training

Browse files

Files changed (11) hide show

README.md +75 -0
config.json +36 -0
generation_config.json +10 -0
model.safetensors +3 -0
runs/May23_18-54-30_e278246c3359/events.out.tfevents.1716490471.e278246c3359.1431.1 +3 -0
runs/May23_19-04-21_e278246c3359/events.out.tfevents.1716491062.e278246c3359.1431.2 +3 -0
runs/May23_19-11-16_e278246c3359/events.out.tfevents.1716491477.e278246c3359.1431.3 +3 -0
runs/May23_19-21-50_e278246c3359/events.out.tfevents.1716492111.e278246c3359.1431.4 +3 -0
runs/May23_19-28-12_e278246c3359/events.out.tfevents.1716492493.e278246c3359.1431.5 +3 -0
runs/May23_19-36-17_e278246c3359/events.out.tfevents.1716492977.e278246c3359.1431.6 +3 -0
training_args.bin +3 -0

README.md ADDED Viewed

	@@ -0,0 +1,75 @@

+---
+tags:
+- generated_from_trainer
+model-index:
+- name: ml_gen_seo_google_23_05_2024
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# ml_gen_seo_google_23_05_2024
+This model was trained from scratch on an unknown dataset.
+It achieves the following results on the evaluation set:
+- Loss: 0.7733
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 5e-05
+- train_batch_size: 4
+- eval_batch_size: 4
+- seed: 42
+- gradient_accumulation_steps: 2
+- total_train_batch_size: 8
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: linear
+- lr_scheduler_warmup_steps: 50
+- num_epochs: 3
+### Training results
+| Training Loss | Epoch  | Step | Validation Loss |
+|:-------------:|:------:|:----:|:---------------:|
+| 0.9707        | 0.1534 | 25   | 0.7829          |
+| 0.942         | 0.3067 | 50   | 0.7871          |
+| 0.8262        | 0.4601 | 75   | 0.7827          |
+| 0.9281        | 0.6135 | 100  | 0.7894          |
+| 0.9142        | 0.7669 | 125  | 0.7706          |
+| 0.8757        | 0.9202 | 150  | 0.7701          |
+| 0.8237        | 1.0736 | 175  | 0.7883          |
+| 0.8219        | 1.2270 | 200  | 0.7684          |
+| 0.8051        | 1.3804 | 225  | 0.7779          |
+| 0.7711        | 1.5337 | 250  | 0.7831          |
+| 0.8685        | 1.6871 | 275  | 0.7721          |
+| 0.7802        | 1.8405 | 300  | 0.7804          |
+| 0.778         | 1.9939 | 325  | 0.7812          |
+| 0.7685        | 2.1472 | 350  | 0.7782          |
+| 0.8233        | 2.3006 | 375  | 0.7678          |
+| 0.7752        | 2.4540 | 400  | 0.7717          |
+| 0.7144        | 2.6074 | 425  | 0.7722          |
+| 0.7322        | 2.7607 | 450  | 0.7719          |
+| 0.6849        | 2.9141 | 475  | 0.7733          |
+### Framework versions
+- Transformers 4.41.0
+- Pytorch 2.3.0+cu121
+- Tokenizers 0.19.1

config.json ADDED Viewed

	@@ -0,0 +1,36 @@

+{
+  "_name_or_path": "seo-model",
+  "architectures": [
+    "T5ForConditionalGeneration"
+  ],
+  "bos_token_id": 2,
+  "classifier_dropout": 0.0,
+  "d_ff": 2048,
+  "d_kv": 64,
+  "d_model": 768,
+  "decoder_start_token_id": 2,
+  "dense_act_fn": "gelu_new",
+  "dropout_rate": 0.1,
+  "eos_token_id": 1,
+  "feed_forward_proj": "gated-gelu",
+  "initializer_factor": 1.0,
+  "is_encoder_decoder": true,
+  "is_gated_act": true,
+  "layer_norm_epsilon": 1e-06,
+  "max_length": 100,
+  "model_type": "t5",
+  "num_beams": 5,
+  "num_decoder_layers": 12,
+  "num_heads": 12,
+  "num_layers": 12,
+  "output_past": true,
+  "pad_token_id": 0,
+  "relative_attention_max_distance": 128,
+  "relative_attention_num_buckets": 32,
+  "tie_word_embeddings": false,
+  "tokenizer_class": "T5Tokenizer",
+  "torch_dtype": "float32",
+  "transformers_version": "4.41.0",
+  "use_cache": true,
+  "vocab_size": 30000
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 2,
+  "decoder_start_token_id": 2,
+  "eos_token_id": 1,
+  "max_length": 100,
+  "num_beams": 5,
+  "pad_token_id": 0,
+  "transformers_version": "4.41.0"
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f58af0514ff38c4768a3cbf03c6068fe0a277e7827f0e467fcb33fe0dd0df9bf
+size 977270632

runs/May23_18-54-30_e278246c3359/events.out.tfevents.1716490471.e278246c3359.1431.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4d3e7bfcc5b2f80175e1a09853c947b85a29d03205fb2eed7aab1ac5f0dff95b
+size 8758

runs/May23_19-04-21_e278246c3359/events.out.tfevents.1716491062.e278246c3359.1431.2 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fdff044642f29f846a6d1f6abcba81c3a829197635070ff067655004c970246c
+size 8759

runs/May23_19-11-16_e278246c3359/events.out.tfevents.1716491477.e278246c3359.1431.3 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cc173599aaf701510de789f9b6d5745026e142d66bae590a3131c417c5127b24
+size 12132

runs/May23_19-21-50_e278246c3359/events.out.tfevents.1716492111.e278246c3359.1431.4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3332279f9dc760ccea7202795133c212012363c85ecfe7a695129599df402b6c
+size 11650

runs/May23_19-28-12_e278246c3359/events.out.tfevents.1716492493.e278246c3359.1431.5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:de54c4cc607b9196654b488ac1768c49735d637ab5e5389c3e769c30263541b1
+size 11650

runs/May23_19-36-17_e278246c3359/events.out.tfevents.1716492977.e278246c3359.1431.6 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6e6a16a4ffac74313bcc17409810afc12b1fcfa9bbcf6c9b566110c20e847f3a
+size 14542

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:41727f9ee21abc22f78d3a6cc199e3cfc33d367fd20852e051975aae374e8bf1
+size 5112