apatidar0 commited on
Commit
fae75db
1 Parent(s): 56d87e0

End of training

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ base_model: google/mt5-small
4
+ tags:
5
+ - generated_from_trainer
6
+ metrics:
7
+ - rouge
8
+ model-index:
9
+ - name: mt5-small-finetuned-mt5
10
+ results: []
11
+ ---
12
+
13
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
14
+ should probably proofread and complete it, then remove this comment. -->
15
+
16
+ # mt5-small-finetuned-mt5
17
+
18
+ This model is a fine-tuned version of [google/mt5-small](https://huggingface.co/google/mt5-small) on an unknown dataset.
19
+ It achieves the following results on the evaluation set:
20
+ - Loss: 0.6691
21
+ - Rouge1: 0.5388
22
+ - Rouge2: 0.3838
23
+ - Rougel: 0.5283
24
+ - Rougelsum: 0.5270
25
+
26
+ ## Model description
27
+
28
+ More information needed
29
+
30
+ ## Intended uses & limitations
31
+
32
+ More information needed
33
+
34
+ ## Training and evaluation data
35
+
36
+ More information needed
37
+
38
+ ## Training procedure
39
+
40
+ ### Training hyperparameters
41
+
42
+ The following hyperparameters were used during training:
43
+ - learning_rate: 5.6e-05
44
+ - train_batch_size: 20
45
+ - eval_batch_size: 20
46
+ - seed: 42
47
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
48
+ - lr_scheduler_type: linear
49
+ - num_epochs: 100
50
+
51
+ ### Training results
52
+
53
+ | Training Loss | Epoch | Step | Validation Loss | Rouge1 | Rouge2 | Rougel | Rougelsum |
54
+ |:-------------:|:-----:|:----:|:---------------:|:------:|:------:|:------:|:---------:|
55
+ | 12.893 | 1.0 | 8 | 7.2101 | 0.0967 | 0.0309 | 0.0928 | 0.0928 |
56
+ | 12.4326 | 2.0 | 16 | 6.0616 | 0.1183 | 0.0458 | 0.1140 | 0.1141 |
57
+ | 12.0044 | 3.0 | 24 | 5.5399 | 0.1239 | 0.0469 | 0.1212 | 0.1200 |
58
+ | 11.4794 | 4.0 | 32 | 5.2619 | 0.1504 | 0.0541 | 0.1450 | 0.1470 |
59
+ | 10.85 | 5.0 | 40 | 4.8356 | 0.1675 | 0.0574 | 0.1605 | 0.1626 |
60
+ | 10.2044 | 6.0 | 48 | 4.2656 | 0.1933 | 0.0746 | 0.1862 | 0.1905 |
61
+ | 9.2904 | 7.0 | 56 | 3.7518 | 0.1983 | 0.0787 | 0.1891 | 0.1921 |
62
+ | 8.7029 | 8.0 | 64 | 3.4376 | 0.1873 | 0.0698 | 0.1797 | 0.1818 |
63
+ | 8.3889 | 9.0 | 72 | 3.2085 | 0.1811 | 0.0672 | 0.1738 | 0.1771 |
64
+ | 7.5091 | 10.0 | 80 | 3.0059 | 0.1581 | 0.0581 | 0.1557 | 0.1564 |
65
+ | 7.2132 | 11.0 | 88 | 2.8329 | 0.1654 | 0.0466 | 0.1623 | 0.1616 |
66
+ | 6.796 | 12.0 | 96 | 2.6879 | 0.1735 | 0.0486 | 0.1620 | 0.1617 |
67
+ | 6.4213 | 13.0 | 104 | 2.5694 | 0.1799 | 0.0482 | 0.1722 | 0.1726 |
68
+ | 5.7867 | 14.0 | 112 | 2.4405 | 0.1776 | 0.0497 | 0.1720 | 0.1715 |
69
+ | 5.2668 | 15.0 | 120 | 2.3098 | 0.1860 | 0.0521 | 0.1759 | 0.1766 |
70
+ | 5.0803 | 16.0 | 128 | 2.1944 | 0.2010 | 0.0677 | 0.1931 | 0.1939 |
71
+ | 4.6867 | 17.0 | 136 | 2.1139 | 0.2179 | 0.0811 | 0.2114 | 0.2117 |
72
+ | 4.5557 | 18.0 | 144 | 2.0466 | 0.2186 | 0.0805 | 0.2099 | 0.2103 |
73
+ | 4.4414 | 19.0 | 152 | 1.9919 | 0.2260 | 0.0916 | 0.2177 | 0.2172 |
74
+ | 4.0867 | 20.0 | 160 | 1.9404 | 0.2317 | 0.0976 | 0.2228 | 0.2221 |
75
+ | 3.6814 | 21.0 | 168 | 1.9014 | 0.2287 | 0.0921 | 0.2170 | 0.2157 |
76
+ | 3.5426 | 22.0 | 176 | 1.8656 | 0.2208 | 0.0862 | 0.2139 | 0.2131 |
77
+ | 3.266 | 23.0 | 184 | 1.8224 | 0.2348 | 0.0935 | 0.2232 | 0.2224 |
78
+ | 3.32 | 24.0 | 192 | 1.7907 | 0.2443 | 0.1072 | 0.2355 | 0.2348 |
79
+ | 3.1872 | 25.0 | 200 | 1.7459 | 0.2563 | 0.1121 | 0.2421 | 0.2414 |
80
+ | 2.9643 | 26.0 | 208 | 1.7043 | 0.2703 | 0.1213 | 0.2598 | 0.2591 |
81
+ | 2.8918 | 27.0 | 216 | 1.6654 | 0.2755 | 0.1190 | 0.2633 | 0.2634 |
82
+ | 2.7626 | 28.0 | 224 | 1.6199 | 0.3008 | 0.1385 | 0.2870 | 0.2861 |
83
+ | 2.8192 | 29.0 | 232 | 1.5712 | 0.3061 | 0.1410 | 0.2948 | 0.2942 |
84
+ | 2.5082 | 30.0 | 240 | 1.5405 | 0.3161 | 0.1533 | 0.3073 | 0.3069 |
85
+ | 2.564 | 31.0 | 248 | 1.5111 | 0.3296 | 0.1662 | 0.3198 | 0.3196 |
86
+ | 2.5577 | 32.0 | 256 | 1.4738 | 0.3344 | 0.1745 | 0.3250 | 0.3247 |
87
+ | 2.5199 | 33.0 | 264 | 1.4378 | 0.3468 | 0.1829 | 0.3336 | 0.3328 |
88
+ | 2.4798 | 34.0 | 272 | 1.4033 | 0.3593 | 0.1969 | 0.3448 | 0.3450 |
89
+ | 2.3208 | 35.0 | 280 | 1.3733 | 0.3728 | 0.2146 | 0.3613 | 0.3609 |
90
+ | 2.3704 | 36.0 | 288 | 1.3403 | 0.3721 | 0.2175 | 0.3644 | 0.3649 |
91
+ | 2.3199 | 37.0 | 296 | 1.3092 | 0.3718 | 0.2147 | 0.3638 | 0.3631 |
92
+ | 2.3046 | 38.0 | 304 | 1.2838 | 0.3674 | 0.2141 | 0.3608 | 0.3610 |
93
+ | 2.3183 | 39.0 | 312 | 1.2599 | 0.3728 | 0.2202 | 0.3664 | 0.3669 |
94
+ | 2.178 | 40.0 | 320 | 1.2272 | 0.3826 | 0.2274 | 0.3758 | 0.3749 |
95
+ | 2.1264 | 41.0 | 328 | 1.1940 | 0.3923 | 0.2348 | 0.3841 | 0.3835 |
96
+ | 2.0563 | 42.0 | 336 | 1.1629 | 0.3972 | 0.2391 | 0.3864 | 0.3865 |
97
+ | 2.0213 | 43.0 | 344 | 1.1324 | 0.4082 | 0.2509 | 0.3981 | 0.3980 |
98
+ | 1.9956 | 44.0 | 352 | 1.1085 | 0.4158 | 0.2569 | 0.4051 | 0.4054 |
99
+ | 2.0723 | 45.0 | 360 | 1.0895 | 0.4186 | 0.2594 | 0.4060 | 0.4061 |
100
+ | 1.9021 | 46.0 | 368 | 1.0713 | 0.4316 | 0.2775 | 0.4193 | 0.4194 |
101
+ | 1.9776 | 47.0 | 376 | 1.0510 | 0.4362 | 0.2785 | 0.4232 | 0.4237 |
102
+ | 1.8752 | 48.0 | 384 | 1.0289 | 0.4371 | 0.2778 | 0.4225 | 0.4230 |
103
+ | 1.8729 | 49.0 | 392 | 1.0070 | 0.4386 | 0.2766 | 0.4243 | 0.4245 |
104
+ | 1.9136 | 50.0 | 400 | 0.9900 | 0.4368 | 0.2773 | 0.4240 | 0.4232 |
105
+ | 1.86 | 51.0 | 408 | 0.9765 | 0.4413 | 0.2818 | 0.4291 | 0.4283 |
106
+ | 1.8629 | 52.0 | 416 | 0.9670 | 0.4494 | 0.2909 | 0.4386 | 0.4376 |
107
+ | 1.8345 | 53.0 | 424 | 0.9554 | 0.4515 | 0.2942 | 0.4402 | 0.4393 |
108
+ | 1.7786 | 54.0 | 432 | 0.9430 | 0.4559 | 0.2980 | 0.4439 | 0.4430 |
109
+ | 1.7535 | 55.0 | 440 | 0.9284 | 0.4585 | 0.3016 | 0.4480 | 0.4461 |
110
+ | 1.788 | 56.0 | 448 | 0.9126 | 0.4680 | 0.3096 | 0.4578 | 0.4568 |
111
+ | 1.6512 | 57.0 | 456 | 0.9015 | 0.4803 | 0.3201 | 0.4699 | 0.4691 |
112
+ | 1.7463 | 58.0 | 464 | 0.8937 | 0.4813 | 0.3194 | 0.4697 | 0.4693 |
113
+ | 1.7705 | 59.0 | 472 | 0.8835 | 0.4805 | 0.3192 | 0.4680 | 0.4673 |
114
+ | 1.6796 | 60.0 | 480 | 0.8709 | 0.4797 | 0.3168 | 0.4673 | 0.4667 |
115
+ | 1.652 | 61.0 | 488 | 0.8588 | 0.4811 | 0.3182 | 0.4686 | 0.4684 |
116
+ | 1.6272 | 62.0 | 496 | 0.8470 | 0.4812 | 0.3196 | 0.4696 | 0.4690 |
117
+ | 1.6013 | 63.0 | 504 | 0.8357 | 0.4910 | 0.3298 | 0.4779 | 0.4781 |
118
+ | 1.5951 | 64.0 | 512 | 0.8268 | 0.4948 | 0.3344 | 0.4818 | 0.4822 |
119
+ | 1.5817 | 65.0 | 520 | 0.8164 | 0.4896 | 0.3313 | 0.4787 | 0.4777 |
120
+ | 1.6403 | 66.0 | 528 | 0.8064 | 0.4983 | 0.3419 | 0.4867 | 0.4862 |
121
+ | 1.6281 | 67.0 | 536 | 0.7955 | 0.4992 | 0.3426 | 0.4866 | 0.4866 |
122
+ | 1.6482 | 68.0 | 544 | 0.7881 | 0.4990 | 0.3404 | 0.4860 | 0.4860 |
123
+ | 1.6103 | 69.0 | 552 | 0.7822 | 0.4997 | 0.3401 | 0.4882 | 0.4872 |
124
+ | 1.5396 | 70.0 | 560 | 0.7769 | 0.5023 | 0.3411 | 0.4896 | 0.4890 |
125
+ | 1.5271 | 71.0 | 568 | 0.7696 | 0.5040 | 0.3396 | 0.4908 | 0.4899 |
126
+ | 1.4252 | 72.0 | 576 | 0.7614 | 0.5128 | 0.3521 | 0.4999 | 0.4994 |
127
+ | 1.553 | 73.0 | 584 | 0.7541 | 0.5145 | 0.3525 | 0.5017 | 0.5012 |
128
+ | 1.5503 | 74.0 | 592 | 0.7475 | 0.5193 | 0.3561 | 0.5052 | 0.5047 |
129
+ | 1.4653 | 75.0 | 600 | 0.7415 | 0.5151 | 0.3540 | 0.5020 | 0.5018 |
130
+ | 1.5387 | 76.0 | 608 | 0.7355 | 0.5267 | 0.3632 | 0.5126 | 0.5121 |
131
+ | 1.5706 | 77.0 | 616 | 0.7292 | 0.5232 | 0.3628 | 0.5101 | 0.5096 |
132
+ | 1.4442 | 78.0 | 624 | 0.7229 | 0.5208 | 0.3626 | 0.5086 | 0.5082 |
133
+ | 1.4816 | 79.0 | 632 | 0.7173 | 0.5193 | 0.3606 | 0.5070 | 0.5060 |
134
+ | 1.5228 | 80.0 | 640 | 0.7119 | 0.5180 | 0.3596 | 0.5057 | 0.5053 |
135
+ | 1.4623 | 81.0 | 648 | 0.7077 | 0.5228 | 0.3645 | 0.5104 | 0.5092 |
136
+ | 1.4077 | 82.0 | 656 | 0.7025 | 0.5266 | 0.3699 | 0.5164 | 0.5156 |
137
+ | 1.4069 | 83.0 | 664 | 0.6977 | 0.5318 | 0.3749 | 0.5212 | 0.5203 |
138
+ | 1.4191 | 84.0 | 672 | 0.6934 | 0.5307 | 0.3732 | 0.5200 | 0.5192 |
139
+ | 1.4564 | 85.0 | 680 | 0.6898 | 0.5317 | 0.3764 | 0.5213 | 0.5202 |
140
+ | 1.4195 | 86.0 | 688 | 0.6872 | 0.5311 | 0.3751 | 0.5203 | 0.5186 |
141
+ | 1.422 | 87.0 | 696 | 0.6843 | 0.5319 | 0.3762 | 0.5212 | 0.5196 |
142
+ | 1.4821 | 88.0 | 704 | 0.6822 | 0.5355 | 0.3812 | 0.5254 | 0.5242 |
143
+ | 1.539 | 89.0 | 712 | 0.6809 | 0.5349 | 0.3792 | 0.5246 | 0.5234 |
144
+ | 1.4914 | 90.0 | 720 | 0.6793 | 0.5341 | 0.3785 | 0.5233 | 0.5221 |
145
+ | 1.4247 | 91.0 | 728 | 0.6774 | 0.5349 | 0.3795 | 0.5242 | 0.5229 |
146
+ | 1.4937 | 92.0 | 736 | 0.6757 | 0.5350 | 0.3788 | 0.5238 | 0.5226 |
147
+ | 1.3732 | 93.0 | 744 | 0.6741 | 0.5362 | 0.3809 | 0.5256 | 0.5243 |
148
+ | 1.3991 | 94.0 | 752 | 0.6729 | 0.5362 | 0.3816 | 0.5261 | 0.5249 |
149
+ | 1.481 | 95.0 | 760 | 0.6716 | 0.5384 | 0.3836 | 0.5280 | 0.5266 |
150
+ | 1.3902 | 96.0 | 768 | 0.6707 | 0.5384 | 0.3836 | 0.5280 | 0.5266 |
151
+ | 1.5239 | 97.0 | 776 | 0.6700 | 0.5388 | 0.3838 | 0.5283 | 0.5270 |
152
+ | 1.4486 | 98.0 | 784 | 0.6695 | 0.5388 | 0.3844 | 0.5290 | 0.5277 |
153
+ | 1.3551 | 99.0 | 792 | 0.6692 | 0.5388 | 0.3838 | 0.5283 | 0.5270 |
154
+ | 1.4213 | 100.0 | 800 | 0.6691 | 0.5388 | 0.3838 | 0.5283 | 0.5270 |
155
+
156
+
157
+ ### Framework versions
158
+
159
+ - Transformers 4.37.2
160
+ - Pytorch 2.1.0+cu121
161
+ - Datasets 2.17.1
162
+ - Tokenizers 0.15.2
config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/mt5-small",
3
+ "architectures": [
4
+ "MT5ForConditionalGeneration"
5
+ ],
6
+ "classifier_dropout": 0.0,
7
+ "d_ff": 1024,
8
+ "d_kv": 64,
9
+ "d_model": 512,
10
+ "decoder_start_token_id": 0,
11
+ "dense_act_fn": "gelu_new",
12
+ "dropout_rate": 0.1,
13
+ "eos_token_id": 1,
14
+ "feed_forward_proj": "gated-gelu",
15
+ "initializer_factor": 1.0,
16
+ "is_encoder_decoder": true,
17
+ "is_gated_act": true,
18
+ "layer_norm_epsilon": 1e-06,
19
+ "model_type": "mt5",
20
+ "num_decoder_layers": 8,
21
+ "num_heads": 6,
22
+ "num_layers": 8,
23
+ "pad_token_id": 0,
24
+ "relative_attention_max_distance": 128,
25
+ "relative_attention_num_buckets": 32,
26
+ "tie_word_embeddings": false,
27
+ "tokenizer_class": "T5Tokenizer",
28
+ "torch_dtype": "float32",
29
+ "transformers_version": "4.37.2",
30
+ "use_cache": true,
31
+ "vocab_size": 250112
32
+ }
generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "decoder_start_token_id": 0,
3
+ "eos_token_id": 1,
4
+ "pad_token_id": 0,
5
+ "transformers_version": "4.37.2"
6
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58826d299abbc62210309163dffd75155ece64a1561c156f95b6fdfb98fe2a9a
3
+ size 1200729512
runs/Feb23_06-43-14_58f3319be355/events.out.tfevents.1708670697.58f3319be355.5247.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:efdde5633f9752d55577f088b222d8e9989012a83ea56039d543c8df5da880bc
3
+ size 4809
runs/Feb23_06-43-14_58f3319be355/events.out.tfevents.1708670764.58f3319be355.5247.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21833ead99b365fba0fc5936bcfdc119e2ca10b4fe8b085bb6c615744975d4cf
3
+ size 10109
runs/Feb23_06-47-47_58f3319be355/events.out.tfevents.1708670870.58f3319be355.5247.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec87c8e58d394d48ad323ddb7cc5c769d896dada650dd7709161e9c31d420a51
3
+ size 70120
special_tokens_map.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eos_token": {
3
+ "content": "</s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "pad_token": {
10
+ "content": "<pad>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "unk_token": {
17
+ "content": "<unk>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ }
23
+ }
spiece.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef78f86560d809067d12bac6c09f19a462cb3af3f54d2b8acbba26e1433125d6
3
+ size 4309802
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87a036c0dfd2d80e1202a7e2961aeee653ff63d67cd369b155c78a6e2003a390
3
+ size 16330562
tokenizer_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<pad>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "</s>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "<unk>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ }
27
+ },
28
+ "additional_special_tokens": [],
29
+ "clean_up_tokenization_spaces": true,
30
+ "eos_token": "</s>",
31
+ "extra_ids": 0,
32
+ "legacy": true,
33
+ "model_max_length": 1000000000000000019884624838656,
34
+ "pad_token": "<pad>",
35
+ "sp_model_kwargs": {},
36
+ "tokenizer_class": "T5Tokenizer",
37
+ "unk_token": "<unk>"
38
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4cd8b7309e3ac2cba237a3eead911ec4e29c550750533ac7ef7cf75f848664ed
3
+ size 4856