End of training

Browse files

Files changed (8) hide show

README.md +30 -30
config.json +6 -5
model.safetensors +2 -2
runs/Apr06_17-05-44_df7953592bde/events.out.tfevents.1712423152.df7953592bde.168.1 +3 -0
special_tokens_map.json +21 -3
tokenizer.json +0 -0
tokenizer_config.json +1 -0
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
 license: apache-2.0
-base_model: t5-base
 tags:
 - generated_from_trainer
 model-index:
@@ -13,9 +13,9 @@ should probably proofread and complete it, then remove this comment. -->
 # ingredient_prune
-This model is a fine-tuned version of [t5-base](https://huggingface.co/t5-base) on the None dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.2021
 ## Model description
@@ -47,33 +47,33 @@ The following hyperparameters were used during training:
 | Training Loss | Epoch | Step | Validation Loss |
 |:-------------:|:-----:|:----:|:---------------:|
-| 13.8901       | 0.18  | 10   | 13.5101         |
-| 11.2216       | 0.36  | 20   | 9.6935          |
-| 8.9001        | 0.55  | 30   | 6.9317          |
-| 6.9158        | 0.73  | 40   | 4.3383          |
-| 4.5333        | 0.91  | 50   | 2.5113          |
-| 3.3307        | 1.09  | 60   | 1.5023          |
-| 2.3644        | 1.27  | 70   | 1.1537          |
-| 1.8483        | 1.45  | 80   | 0.7876          |
-| 1.5672        | 1.64  | 90   | 0.6205          |
-| 1.1184        | 1.82  | 100  | 0.4843          |
-| 0.9273        | 2.0   | 110  | 0.4324          |
-| 0.7881        | 2.18  | 120  | 0.4005          |
-| 0.7159        | 2.36  | 130  | 0.3644          |
-| 0.6495        | 2.55  | 140  | 0.3373          |
-| 0.5903        | 2.73  | 150  | 0.3155          |
-| 0.5114        | 2.91  | 160  | 0.2980          |
-| 0.5107        | 3.09  | 170  | 0.2800          |
-| 0.4718        | 3.27  | 180  | 0.2669          |
-| 0.4285        | 3.45  | 190  | 0.2542          |
-| 0.429         | 3.64  | 200  | 0.2422          |
-| 0.4065        | 3.82  | 210  | 0.2320          |
-| 0.4087        | 4.0   | 220  | 0.2244          |
-| 0.3597        | 4.18  | 230  | 0.2166          |
-| 0.3515        | 4.36  | 240  | 0.2106          |
-| 0.3453        | 4.55  | 250  | 0.2065          |
-| 0.3563        | 4.73  | 260  | 0.2036          |
-| 0.3492        | 4.91  | 270  | 0.2021          |
 ### Framework versions

 ---
 license: apache-2.0
+base_model: google/flan-t5-base
 tags:
 - generated_from_trainer
 model-index:
 # ingredient_prune
+This model is a fine-tuned version of [google/flan-t5-base](https://huggingface.co/google/flan-t5-base) on the None dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.9194
 ## Model description
 | Training Loss | Epoch | Step | Validation Loss |
 |:-------------:|:-----:|:----:|:---------------:|
+| 38.4846       | 0.18  | 10   | 32.3200         |
+| 30.1612       | 0.36  | 20   | 25.8135         |
+| 25.0775       | 0.55  | 30   | 20.9787         |
+| 21.133        | 0.73  | 40   | 15.7361         |
+| 16.0181       | 0.91  | 50   | 8.5017          |
+| 10.3654       | 1.09  | 60   | 5.1251          |
+| 6.4435        | 1.27  | 70   | 4.6717          |
+| 5.2627        | 1.45  | 80   | 4.4668          |
+| 4.6898        | 1.64  | 90   | 4.2637          |
+| 4.4092        | 1.82  | 100  | 4.0184          |
+| 4.2133        | 2.0   | 110  | 3.7045          |
+| 3.9949        | 2.18  | 120  | 3.3633          |
+| 3.8241        | 2.36  | 130  | 3.0716          |
+| 3.6217        | 2.55  | 140  | 2.7464          |
+| 3.4166        | 2.73  | 150  | 2.4338          |
+| 3.1844        | 2.91  | 160  | 2.1604          |
+| 3.0109        | 3.09  | 170  | 1.9189          |
+| 2.859         | 3.27  | 180  | 1.7063          |
+| 2.6514        | 3.45  | 190  | 1.5348          |
+| 2.4992        | 3.64  | 200  | 1.3970          |
+| 2.3855        | 3.82  | 210  | 1.2713          |
+| 2.2505        | 4.0   | 220  | 1.1576          |
+| 2.2017        | 4.18  | 230  | 1.0731          |
+| 2.0887        | 4.36  | 240  | 1.0120          |
+| 2.0502        | 4.55  | 250  | 0.9661          |
+| 1.996         | 4.73  | 260  | 0.9348          |
+| 2.006         | 4.91  | 270  | 0.9194          |
 ### Framework versions

config.json CHANGED Viewed

@@ -1,20 +1,20 @@
 {
-  "_name_or_path": "t5-base",
   "architectures": [
     "T5ForConditionalGeneration"
   ],
   "classifier_dropout": 0.0,
-  "d_ff": 3072,
   "d_kv": 64,
   "d_model": 768,
   "decoder_start_token_id": 0,
-  "dense_act_fn": "relu",
   "dropout_rate": 0.1,
   "eos_token_id": 1,
-  "feed_forward_proj": "relu",
   "initializer_factor": 1.0,
   "is_encoder_decoder": true,
-  "is_gated_act": false,
   "layer_norm_epsilon": 1e-06,
   "model_type": "t5",
   "n_positions": 512,
@@ -54,6 +54,7 @@
       "prefix": "translate English to Romanian: "
     }
   },
   "torch_dtype": "float32",
   "transformers_version": "4.38.2",
   "use_cache": true,

 {
+  "_name_or_path": "google/flan-t5-base",
   "architectures": [
     "T5ForConditionalGeneration"
   ],
   "classifier_dropout": 0.0,
+  "d_ff": 2048,
   "d_kv": 64,
   "d_model": 768,
   "decoder_start_token_id": 0,
+  "dense_act_fn": "gelu_new",
   "dropout_rate": 0.1,
   "eos_token_id": 1,
+  "feed_forward_proj": "gated-gelu",
   "initializer_factor": 1.0,
   "is_encoder_decoder": true,
+  "is_gated_act": true,
   "layer_norm_epsilon": 1e-06,
   "model_type": "t5",
   "n_positions": 512,
       "prefix": "translate English to Romanian: "
     }
   },
+  "tie_word_embeddings": false,
   "torch_dtype": "float32",
   "transformers_version": "4.38.2",
   "use_cache": true,

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f4d2d2b99f6ff36ae036cde9a73fcb1bcd4b107db55e86bc9fa2bb5d54c3dc3c
-size 891644712

 version https://git-lfs.github.com/spec/v1
+oid sha256:824b7cf206ea4453c76d7ba74c3195de0f9852168371f921d542dfbb8ea5497d
+size 990345064

runs/Apr06_17-05-44_df7953592bde/events.out.tfevents.1712423152.df7953592bde.168.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ad06d0ca68e9e819e4ab95685e30a3bfc7c06593543d28784c8e56417eeb7dd6
+size 18873

special_tokens_map.json CHANGED Viewed

@@ -101,7 +101,25 @@
     "<extra_id_98>",
     "<extra_id_99>"
   ],
-  "eos_token": "</s>",
-  "pad_token": "<pad>",
-  "unk_token": "<unk>"
 }

     "<extra_id_98>",
     "<extra_id_99>"
   ],
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<pad>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
 }

tokenizer.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json CHANGED Viewed

@@ -932,6 +932,7 @@
   "extra_ids": 100,
   "model_max_length": 128,
   "pad_token": "<pad>",
   "tokenizer_class": "T5Tokenizer",
   "unk_token": "<unk>"
 }

   "extra_ids": 100,
   "model_max_length": 128,
   "pad_token": "<pad>",
+  "sp_model_kwargs": {},
   "tokenizer_class": "T5Tokenizer",
   "unk_token": "<unk>"
 }

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e24eb1200ae4c986e921750f7baeedb709068cf3e3fd0294ba5080450d869661
 size 5048

 version https://git-lfs.github.com/spec/v1
+oid sha256:fd2de9f0bf497927315b29ce97631fc576aafa2304d0d3bbc6cada0d03d45624
 size 5048