Acc commited on
Commit
7aa8846
1 Parent(s): 513aafc

End of training

Browse files
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
  license: apache-2.0
3
- base_model: t5-base
4
  tags:
5
  - generated_from_trainer
6
  model-index:
@@ -13,9 +13,9 @@ should probably proofread and complete it, then remove this comment. -->
13
 
14
  # ingredient_prune
15
 
16
- This model is a fine-tuned version of [t5-base](https://huggingface.co/t5-base) on the None dataset.
17
  It achieves the following results on the evaluation set:
18
- - Loss: 0.2021
19
 
20
  ## Model description
21
 
@@ -47,33 +47,33 @@ The following hyperparameters were used during training:
47
 
48
  | Training Loss | Epoch | Step | Validation Loss |
49
  |:-------------:|:-----:|:----:|:---------------:|
50
- | 13.8901 | 0.18 | 10 | 13.5101 |
51
- | 11.2216 | 0.36 | 20 | 9.6935 |
52
- | 8.9001 | 0.55 | 30 | 6.9317 |
53
- | 6.9158 | 0.73 | 40 | 4.3383 |
54
- | 4.5333 | 0.91 | 50 | 2.5113 |
55
- | 3.3307 | 1.09 | 60 | 1.5023 |
56
- | 2.3644 | 1.27 | 70 | 1.1537 |
57
- | 1.8483 | 1.45 | 80 | 0.7876 |
58
- | 1.5672 | 1.64 | 90 | 0.6205 |
59
- | 1.1184 | 1.82 | 100 | 0.4843 |
60
- | 0.9273 | 2.0 | 110 | 0.4324 |
61
- | 0.7881 | 2.18 | 120 | 0.4005 |
62
- | 0.7159 | 2.36 | 130 | 0.3644 |
63
- | 0.6495 | 2.55 | 140 | 0.3373 |
64
- | 0.5903 | 2.73 | 150 | 0.3155 |
65
- | 0.5114 | 2.91 | 160 | 0.2980 |
66
- | 0.5107 | 3.09 | 170 | 0.2800 |
67
- | 0.4718 | 3.27 | 180 | 0.2669 |
68
- | 0.4285 | 3.45 | 190 | 0.2542 |
69
- | 0.429 | 3.64 | 200 | 0.2422 |
70
- | 0.4065 | 3.82 | 210 | 0.2320 |
71
- | 0.4087 | 4.0 | 220 | 0.2244 |
72
- | 0.3597 | 4.18 | 230 | 0.2166 |
73
- | 0.3515 | 4.36 | 240 | 0.2106 |
74
- | 0.3453 | 4.55 | 250 | 0.2065 |
75
- | 0.3563 | 4.73 | 260 | 0.2036 |
76
- | 0.3492 | 4.91 | 270 | 0.2021 |
77
 
78
 
79
  ### Framework versions
 
1
  ---
2
  license: apache-2.0
3
+ base_model: google/flan-t5-base
4
  tags:
5
  - generated_from_trainer
6
  model-index:
 
13
 
14
  # ingredient_prune
15
 
16
+ This model is a fine-tuned version of [google/flan-t5-base](https://huggingface.co/google/flan-t5-base) on the None dataset.
17
  It achieves the following results on the evaluation set:
18
+ - Loss: 0.9194
19
 
20
  ## Model description
21
 
 
47
 
48
  | Training Loss | Epoch | Step | Validation Loss |
49
  |:-------------:|:-----:|:----:|:---------------:|
50
+ | 38.4846 | 0.18 | 10 | 32.3200 |
51
+ | 30.1612 | 0.36 | 20 | 25.8135 |
52
+ | 25.0775 | 0.55 | 30 | 20.9787 |
53
+ | 21.133 | 0.73 | 40 | 15.7361 |
54
+ | 16.0181 | 0.91 | 50 | 8.5017 |
55
+ | 10.3654 | 1.09 | 60 | 5.1251 |
56
+ | 6.4435 | 1.27 | 70 | 4.6717 |
57
+ | 5.2627 | 1.45 | 80 | 4.4668 |
58
+ | 4.6898 | 1.64 | 90 | 4.2637 |
59
+ | 4.4092 | 1.82 | 100 | 4.0184 |
60
+ | 4.2133 | 2.0 | 110 | 3.7045 |
61
+ | 3.9949 | 2.18 | 120 | 3.3633 |
62
+ | 3.8241 | 2.36 | 130 | 3.0716 |
63
+ | 3.6217 | 2.55 | 140 | 2.7464 |
64
+ | 3.4166 | 2.73 | 150 | 2.4338 |
65
+ | 3.1844 | 2.91 | 160 | 2.1604 |
66
+ | 3.0109 | 3.09 | 170 | 1.9189 |
67
+ | 2.859 | 3.27 | 180 | 1.7063 |
68
+ | 2.6514 | 3.45 | 190 | 1.5348 |
69
+ | 2.4992 | 3.64 | 200 | 1.3970 |
70
+ | 2.3855 | 3.82 | 210 | 1.2713 |
71
+ | 2.2505 | 4.0 | 220 | 1.1576 |
72
+ | 2.2017 | 4.18 | 230 | 1.0731 |
73
+ | 2.0887 | 4.36 | 240 | 1.0120 |
74
+ | 2.0502 | 4.55 | 250 | 0.9661 |
75
+ | 1.996 | 4.73 | 260 | 0.9348 |
76
+ | 2.006 | 4.91 | 270 | 0.9194 |
77
 
78
 
79
  ### Framework versions
config.json CHANGED
@@ -1,20 +1,20 @@
1
  {
2
- "_name_or_path": "t5-base",
3
  "architectures": [
4
  "T5ForConditionalGeneration"
5
  ],
6
  "classifier_dropout": 0.0,
7
- "d_ff": 3072,
8
  "d_kv": 64,
9
  "d_model": 768,
10
  "decoder_start_token_id": 0,
11
- "dense_act_fn": "relu",
12
  "dropout_rate": 0.1,
13
  "eos_token_id": 1,
14
- "feed_forward_proj": "relu",
15
  "initializer_factor": 1.0,
16
  "is_encoder_decoder": true,
17
- "is_gated_act": false,
18
  "layer_norm_epsilon": 1e-06,
19
  "model_type": "t5",
20
  "n_positions": 512,
@@ -54,6 +54,7 @@
54
  "prefix": "translate English to Romanian: "
55
  }
56
  },
 
57
  "torch_dtype": "float32",
58
  "transformers_version": "4.38.2",
59
  "use_cache": true,
 
1
  {
2
+ "_name_or_path": "google/flan-t5-base",
3
  "architectures": [
4
  "T5ForConditionalGeneration"
5
  ],
6
  "classifier_dropout": 0.0,
7
+ "d_ff": 2048,
8
  "d_kv": 64,
9
  "d_model": 768,
10
  "decoder_start_token_id": 0,
11
+ "dense_act_fn": "gelu_new",
12
  "dropout_rate": 0.1,
13
  "eos_token_id": 1,
14
+ "feed_forward_proj": "gated-gelu",
15
  "initializer_factor": 1.0,
16
  "is_encoder_decoder": true,
17
+ "is_gated_act": true,
18
  "layer_norm_epsilon": 1e-06,
19
  "model_type": "t5",
20
  "n_positions": 512,
 
54
  "prefix": "translate English to Romanian: "
55
  }
56
  },
57
+ "tie_word_embeddings": false,
58
  "torch_dtype": "float32",
59
  "transformers_version": "4.38.2",
60
  "use_cache": true,
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f4d2d2b99f6ff36ae036cde9a73fcb1bcd4b107db55e86bc9fa2bb5d54c3dc3c
3
- size 891644712
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:824b7cf206ea4453c76d7ba74c3195de0f9852168371f921d542dfbb8ea5497d
3
+ size 990345064
runs/Apr06_17-05-44_df7953592bde/events.out.tfevents.1712423152.df7953592bde.168.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad06d0ca68e9e819e4ab95685e30a3bfc7c06593543d28784c8e56417eeb7dd6
3
+ size 18873
special_tokens_map.json CHANGED
@@ -101,7 +101,25 @@
101
  "<extra_id_98>",
102
  "<extra_id_99>"
103
  ],
104
- "eos_token": "</s>",
105
- "pad_token": "<pad>",
106
- "unk_token": "<unk>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
  }
 
101
  "<extra_id_98>",
102
  "<extra_id_99>"
103
  ],
104
+ "eos_token": {
105
+ "content": "</s>",
106
+ "lstrip": false,
107
+ "normalized": false,
108
+ "rstrip": false,
109
+ "single_word": false
110
+ },
111
+ "pad_token": {
112
+ "content": "<pad>",
113
+ "lstrip": false,
114
+ "normalized": false,
115
+ "rstrip": false,
116
+ "single_word": false
117
+ },
118
+ "unk_token": {
119
+ "content": "<unk>",
120
+ "lstrip": false,
121
+ "normalized": false,
122
+ "rstrip": false,
123
+ "single_word": false
124
+ }
125
  }
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -932,6 +932,7 @@
932
  "extra_ids": 100,
933
  "model_max_length": 128,
934
  "pad_token": "<pad>",
 
935
  "tokenizer_class": "T5Tokenizer",
936
  "unk_token": "<unk>"
937
  }
 
932
  "extra_ids": 100,
933
  "model_max_length": 128,
934
  "pad_token": "<pad>",
935
+ "sp_model_kwargs": {},
936
  "tokenizer_class": "T5Tokenizer",
937
  "unk_token": "<unk>"
938
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e24eb1200ae4c986e921750f7baeedb709068cf3e3fd0294ba5080450d869661
3
  size 5048
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd2de9f0bf497927315b29ce97631fc576aafa2304d0d3bbc6cada0d03d45624
3
  size 5048