oSabre commited on
Commit
91b602b
1 Parent(s): 41c2bf6

End of training

Browse files
README.md CHANGED
@@ -1,4 +1,6 @@
1
  ---
 
 
2
  tags:
3
  - generated_from_trainer
4
  datasets:
@@ -14,13 +16,13 @@ model-index:
14
  dataset:
15
  name: opus_books
16
  type: opus_books
17
- config: es-pt
18
  split: train
19
- args: es-pt
20
  metrics:
21
  - name: Bleu
22
  type: bleu
23
- value: 1.5414
24
  ---
25
 
26
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -28,11 +30,11 @@ should probably proofread and complete it, then remove this comment. -->
28
 
29
  # opus_books_es_pt
30
 
31
- This model was trained from scratch on the opus_books dataset.
32
  It achieves the following results on the evaluation set:
33
- - Loss: 2.4043
34
- - Bleu: 1.5414
35
- - Gen Len: 18.3803
36
 
37
  ## Model description
38
 
@@ -57,113 +59,18 @@ The following hyperparameters were used during training:
57
  - seed: 42
58
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
59
  - lr_scheduler_type: linear
60
- - num_epochs: 100
61
  - mixed_precision_training: Native AMP
62
 
63
  ### Training results
64
 
65
  | Training Loss | Epoch | Step | Validation Loss | Bleu | Gen Len |
66
  |:-------------:|:-----:|:----:|:---------------:|:------:|:-------:|
67
- | No log | 1.0 | 53 | 2.8753 | 1.3055 | 18.3192 |
68
- | No log | 2.0 | 106 | 2.8517 | 1.3879 | 18.3239 |
69
- | No log | 3.0 | 159 | 2.8330 | 1.4455 | 18.3286 |
70
- | No log | 4.0 | 212 | 2.8172 | 1.4054 | 18.3803 |
71
- | No log | 5.0 | 265 | 2.8011 | 1.4365 | 18.3709 |
72
- | No log | 6.0 | 318 | 2.7803 | 1.4315 | 18.3474 |
73
- | No log | 7.0 | 371 | 2.7683 | 1.4768 | 18.3286 |
74
- | No log | 8.0 | 424 | 2.7552 | 1.5171 | 18.3192 |
75
- | No log | 9.0 | 477 | 2.7394 | 1.488 | 18.3474 |
76
- | 3.0631 | 10.0 | 530 | 2.7270 | 1.5307 | 18.385 |
77
- | 3.0631 | 11.0 | 583 | 2.7156 | 1.505 | 18.3005 |
78
- | 3.0631 | 12.0 | 636 | 2.7000 | 1.3708 | 18.3146 |
79
- | 3.0631 | 13.0 | 689 | 2.6914 | 1.3796 | 18.3192 |
80
- | 3.0631 | 14.0 | 742 | 2.6818 | 1.4616 | 18.3005 |
81
- | 3.0631 | 15.0 | 795 | 2.6728 | 1.4487 | 18.3005 |
82
- | 3.0631 | 16.0 | 848 | 2.6596 | 1.3979 | 18.2911 |
83
- | 3.0631 | 17.0 | 901 | 2.6506 | 1.4544 | 18.3099 |
84
- | 3.0631 | 18.0 | 954 | 2.6381 | 1.3779 | 18.3239 |
85
- | 2.9232 | 19.0 | 1007 | 2.6313 | 1.4275 | 18.3052 |
86
- | 2.9232 | 20.0 | 1060 | 2.6223 | 1.4489 | 18.3521 |
87
- | 2.9232 | 21.0 | 1113 | 2.6139 | 1.4473 | 18.3803 |
88
- | 2.9232 | 22.0 | 1166 | 2.6058 | 1.4407 | 18.3333 |
89
- | 2.9232 | 23.0 | 1219 | 2.5985 | 1.4594 | 18.3192 |
90
- | 2.9232 | 24.0 | 1272 | 2.5899 | 1.4473 | 18.2911 |
91
- | 2.9232 | 25.0 | 1325 | 2.5832 | 1.4717 | 18.3521 |
92
- | 2.9232 | 26.0 | 1378 | 2.5752 | 1.4282 | 18.3333 |
93
- | 2.9232 | 27.0 | 1431 | 2.5699 | 1.3598 | 18.3239 |
94
- | 2.9232 | 28.0 | 1484 | 2.5628 | 1.409 | 18.3286 |
95
- | 2.807 | 29.0 | 1537 | 2.5577 | 1.3461 | 18.3568 |
96
- | 2.807 | 30.0 | 1590 | 2.5524 | 1.425 | 18.3803 |
97
- | 2.807 | 31.0 | 1643 | 2.5449 | 1.3638 | 18.3615 |
98
- | 2.807 | 32.0 | 1696 | 2.5413 | 1.3604 | 18.3897 |
99
- | 2.807 | 33.0 | 1749 | 2.5380 | 1.5423 | 18.3991 |
100
- | 2.807 | 34.0 | 1802 | 2.5335 | 1.5392 | 18.3944 |
101
- | 2.807 | 35.0 | 1855 | 2.5266 | 1.4923 | 18.3474 |
102
- | 2.807 | 36.0 | 1908 | 2.5210 | 1.445 | 18.3192 |
103
- | 2.807 | 37.0 | 1961 | 2.5151 | 1.453 | 18.3521 |
104
- | 2.7147 | 38.0 | 2014 | 2.5113 | 1.4277 | 18.3286 |
105
- | 2.7147 | 39.0 | 2067 | 2.5093 | 1.4015 | 18.3568 |
106
- | 2.7147 | 40.0 | 2120 | 2.5033 | 1.4314 | 18.3615 |
107
- | 2.7147 | 41.0 | 2173 | 2.4992 | 1.3861 | 18.3803 |
108
- | 2.7147 | 42.0 | 2226 | 2.4961 | 1.4661 | 18.385 |
109
- | 2.7147 | 43.0 | 2279 | 2.4933 | 1.4569 | 18.3803 |
110
- | 2.7147 | 44.0 | 2332 | 2.4887 | 1.5818 | 18.3803 |
111
- | 2.7147 | 45.0 | 2385 | 2.4863 | 1.5672 | 18.3803 |
112
- | 2.7147 | 46.0 | 2438 | 2.4807 | 1.5475 | 18.3568 |
113
- | 2.7147 | 47.0 | 2491 | 2.4790 | 1.4686 | 18.3568 |
114
- | 2.6478 | 48.0 | 2544 | 2.4742 | 1.5072 | 18.3615 |
115
- | 2.6478 | 49.0 | 2597 | 2.4720 | 1.6371 | 18.3897 |
116
- | 2.6478 | 50.0 | 2650 | 2.4690 | 1.5358 | 18.3239 |
117
- | 2.6478 | 51.0 | 2703 | 2.4663 | 1.5322 | 18.3239 |
118
- | 2.6478 | 52.0 | 2756 | 2.4630 | 1.5193 | 18.3427 |
119
- | 2.6478 | 53.0 | 2809 | 2.4590 | 1.5162 | 18.3333 |
120
- | 2.6478 | 54.0 | 2862 | 2.4565 | 1.5365 | 18.3239 |
121
- | 2.6478 | 55.0 | 2915 | 2.4535 | 1.5086 | 18.3709 |
122
- | 2.6478 | 56.0 | 2968 | 2.4514 | 1.5211 | 18.3521 |
123
- | 2.5967 | 57.0 | 3021 | 2.4499 | 1.5442 | 18.3709 |
124
- | 2.5967 | 58.0 | 3074 | 2.4483 | 1.5441 | 18.3709 |
125
- | 2.5967 | 59.0 | 3127 | 2.4456 | 1.5288 | 18.3709 |
126
- | 2.5967 | 60.0 | 3180 | 2.4419 | 1.4669 | 18.3897 |
127
- | 2.5967 | 61.0 | 3233 | 2.4409 | 1.4707 | 18.3756 |
128
- | 2.5967 | 62.0 | 3286 | 2.4394 | 1.5037 | 18.385 |
129
- | 2.5967 | 63.0 | 3339 | 2.4371 | 1.5251 | 18.3709 |
130
- | 2.5967 | 64.0 | 3392 | 2.4334 | 1.4897 | 18.3991 |
131
- | 2.5967 | 65.0 | 3445 | 2.4326 | 1.5373 | 18.385 |
132
- | 2.5967 | 66.0 | 3498 | 2.4326 | 1.5174 | 18.3944 |
133
- | 2.5514 | 67.0 | 3551 | 2.4292 | 1.5326 | 18.3803 |
134
- | 2.5514 | 68.0 | 3604 | 2.4291 | 1.5224 | 18.3709 |
135
- | 2.5514 | 69.0 | 3657 | 2.4264 | 1.4945 | 18.3709 |
136
- | 2.5514 | 70.0 | 3710 | 2.4238 | 1.5155 | 18.385 |
137
- | 2.5514 | 71.0 | 3763 | 2.4220 | 1.556 | 18.3803 |
138
- | 2.5514 | 72.0 | 3816 | 2.4214 | 1.5782 | 18.385 |
139
- | 2.5514 | 73.0 | 3869 | 2.4197 | 1.6084 | 18.3709 |
140
- | 2.5514 | 74.0 | 3922 | 2.4184 | 1.5642 | 18.3709 |
141
- | 2.5514 | 75.0 | 3975 | 2.4185 | 1.6182 | 18.3897 |
142
- | 2.5176 | 76.0 | 4028 | 2.4169 | 1.5632 | 18.3756 |
143
- | 2.5176 | 77.0 | 4081 | 2.4139 | 1.5853 | 18.385 |
144
- | 2.5176 | 78.0 | 4134 | 2.4136 | 1.5852 | 18.3897 |
145
- | 2.5176 | 79.0 | 4187 | 2.4128 | 1.5608 | 18.3897 |
146
- | 2.5176 | 80.0 | 4240 | 2.4123 | 1.5707 | 18.3897 |
147
- | 2.5176 | 81.0 | 4293 | 2.4109 | 1.5622 | 18.3944 |
148
- | 2.5176 | 82.0 | 4346 | 2.4104 | 1.5608 | 18.3803 |
149
- | 2.5176 | 83.0 | 4399 | 2.4101 | 1.561 | 18.3803 |
150
- | 2.5176 | 84.0 | 4452 | 2.4097 | 1.56 | 18.3944 |
151
- | 2.497 | 85.0 | 4505 | 2.4096 | 1.5644 | 18.3944 |
152
- | 2.497 | 86.0 | 4558 | 2.4075 | 1.5636 | 18.4038 |
153
- | 2.497 | 87.0 | 4611 | 2.4073 | 1.5779 | 18.3944 |
154
- | 2.497 | 88.0 | 4664 | 2.4069 | 1.5611 | 18.3944 |
155
- | 2.497 | 89.0 | 4717 | 2.4068 | 1.5827 | 18.3944 |
156
- | 2.497 | 90.0 | 4770 | 2.4063 | 1.558 | 18.3944 |
157
- | 2.497 | 91.0 | 4823 | 2.4057 | 1.533 | 18.3944 |
158
- | 2.497 | 92.0 | 4876 | 2.4050 | 1.5271 | 18.3944 |
159
- | 2.497 | 93.0 | 4929 | 2.4048 | 1.5655 | 18.4038 |
160
- | 2.497 | 94.0 | 4982 | 2.4049 | 1.5351 | 18.3803 |
161
- | 2.4847 | 95.0 | 5035 | 2.4045 | 1.5411 | 18.3803 |
162
- | 2.4847 | 96.0 | 5088 | 2.4046 | 1.5468 | 18.3803 |
163
- | 2.4847 | 97.0 | 5141 | 2.4046 | 1.5474 | 18.3803 |
164
- | 2.4847 | 98.0 | 5194 | 2.4045 | 1.5468 | 18.3803 |
165
- | 2.4847 | 99.0 | 5247 | 2.4044 | 1.5468 | 18.3803 |
166
- | 2.4847 | 100.0 | 5300 | 2.4043 | 1.5414 | 18.3803 |
167
 
168
 
169
  ### Framework versions
 
1
  ---
2
+ license: apache-2.0
3
+ base_model: t5-small
4
  tags:
5
  - generated_from_trainer
6
  datasets:
 
16
  dataset:
17
  name: opus_books
18
  type: opus_books
19
+ config: en-pt
20
  split: train
21
+ args: en-pt
22
  metrics:
23
  - name: Bleu
24
  type: bleu
25
+ value: 0.3989
26
  ---
27
 
28
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
30
 
31
  # opus_books_es_pt
32
 
33
+ This model is a fine-tuned version of [t5-small](https://huggingface.co/t5-small) on the opus_books dataset.
34
  It achieves the following results on the evaluation set:
35
+ - Loss: 3.3303
36
+ - Bleu: 0.3989
37
+ - Gen Len: 17.5302
38
 
39
  ## Model description
40
 
 
59
  - seed: 42
60
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
61
  - lr_scheduler_type: linear
62
+ - num_epochs: 5
63
  - mixed_precision_training: Native AMP
64
 
65
  ### Training results
66
 
67
  | Training Loss | Epoch | Step | Validation Loss | Bleu | Gen Len |
68
  |:-------------:|:-----:|:----:|:---------------:|:------:|:-------:|
69
+ | No log | 1.0 | 71 | 3.7759 | 0.5559 | 16.9715 |
70
+ | No log | 2.0 | 142 | 3.5343 | 0.517 | 17.2776 |
71
+ | No log | 3.0 | 213 | 3.4102 | 0.4355 | 17.4448 |
72
+ | No log | 4.0 | 284 | 3.3491 | 0.4057 | 17.516 |
73
+ | No log | 5.0 | 355 | 3.3303 | 0.3989 | 17.5302 |
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
 
75
 
76
  ### Framework versions
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "oSabre/opus_books_es_pt",
3
  "architectures": [
4
  "T5ForConditionalGeneration"
5
  ],
 
1
  {
2
+ "_name_or_path": "t5-small",
3
  "architectures": [
4
  "T5ForConditionalGeneration"
5
  ],
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7f878daf8d74eca3e410b8c45213378200375cc12295a3c6839083669380ae77
3
  size 242041896
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ea81fa77c07165703229f52950f0ab0f4eab20619bb67807874b148f425c26a
3
  size 242041896
runs/Dec17_13-15-55_9e3f0cc9b6b3/events.out.tfevents.1702818955.9e3f0cc9b6b3.175.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49856aa7a871afa2711a2a013892e7a6e5914f57fe4320d1add0045c55268849
3
+ size 7551
tokenizer.json CHANGED
@@ -1,6 +1,11 @@
1
  {
2
  "version": "1.0",
3
- "truncation": null,
 
 
 
 
 
4
  "padding": null,
5
  "added_tokens": [
6
  {
 
1
  {
2
  "version": "1.0",
3
+ "truncation": {
4
+ "direction": "Right",
5
+ "max_length": 400,
6
+ "strategy": "LongestFirst",
7
+ "stride": 0
8
+ },
9
  "padding": null,
10
  "added_tokens": [
11
  {
tokenizer_config.json CHANGED
@@ -930,12 +930,8 @@
930
  "clean_up_tokenization_spaces": true,
931
  "eos_token": "</s>",
932
  "extra_ids": 100,
933
- "max_length": 400,
934
  "model_max_length": 512,
935
  "pad_token": "<pad>",
936
- "stride": 0,
937
  "tokenizer_class": "T5Tokenizer",
938
- "truncation_side": "right",
939
- "truncation_strategy": "longest_first",
940
  "unk_token": "<unk>"
941
  }
 
930
  "clean_up_tokenization_spaces": true,
931
  "eos_token": "</s>",
932
  "extra_ids": 100,
 
933
  "model_max_length": 512,
934
  "pad_token": "<pad>",
 
935
  "tokenizer_class": "T5Tokenizer",
 
 
936
  "unk_token": "<unk>"
937
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fe24fec043cde9c4ec339566c8c4002d748fc59fe259c6d3dcd8491781147203
3
  size 4856
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c79aa1e0f618ef1bd8c80b60267698992bb097c30a070d16cacdbb49290e65e
3
  size 4856