End of training
Browse files- README.md +15 -108
- config.json +1 -1
- model.safetensors +1 -1
- runs/Dec17_13-15-55_9e3f0cc9b6b3/events.out.tfevents.1702818955.9e3f0cc9b6b3.175.0 +3 -0
- tokenizer.json +6 -1
- tokenizer_config.json +0 -4
- training_args.bin +1 -1
README.md
CHANGED
@@ -1,4 +1,6 @@
|
|
1 |
---
|
|
|
|
|
2 |
tags:
|
3 |
- generated_from_trainer
|
4 |
datasets:
|
@@ -14,13 +16,13 @@ model-index:
|
|
14 |
dataset:
|
15 |
name: opus_books
|
16 |
type: opus_books
|
17 |
-
config:
|
18 |
split: train
|
19 |
-
args:
|
20 |
metrics:
|
21 |
- name: Bleu
|
22 |
type: bleu
|
23 |
-
value:
|
24 |
---
|
25 |
|
26 |
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
@@ -28,11 +30,11 @@ should probably proofread and complete it, then remove this comment. -->
|
|
28 |
|
29 |
# opus_books_es_pt
|
30 |
|
31 |
-
This model
|
32 |
It achieves the following results on the evaluation set:
|
33 |
-
- Loss:
|
34 |
-
- Bleu:
|
35 |
-
- Gen Len:
|
36 |
|
37 |
## Model description
|
38 |
|
@@ -57,113 +59,18 @@ The following hyperparameters were used during training:
|
|
57 |
- seed: 42
|
58 |
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
59 |
- lr_scheduler_type: linear
|
60 |
-
- num_epochs:
|
61 |
- mixed_precision_training: Native AMP
|
62 |
|
63 |
### Training results
|
64 |
|
65 |
| Training Loss | Epoch | Step | Validation Loss | Bleu | Gen Len |
|
66 |
|:-------------:|:-----:|:----:|:---------------:|:------:|:-------:|
|
67 |
-
| No log | 1.0 |
|
68 |
-
| No log | 2.0 |
|
69 |
-
| No log | 3.0 |
|
70 |
-
| No log | 4.0 |
|
71 |
-
| No log | 5.0 |
|
72 |
-
| No log | 6.0 | 318 | 2.7803 | 1.4315 | 18.3474 |
|
73 |
-
| No log | 7.0 | 371 | 2.7683 | 1.4768 | 18.3286 |
|
74 |
-
| No log | 8.0 | 424 | 2.7552 | 1.5171 | 18.3192 |
|
75 |
-
| No log | 9.0 | 477 | 2.7394 | 1.488 | 18.3474 |
|
76 |
-
| 3.0631 | 10.0 | 530 | 2.7270 | 1.5307 | 18.385 |
|
77 |
-
| 3.0631 | 11.0 | 583 | 2.7156 | 1.505 | 18.3005 |
|
78 |
-
| 3.0631 | 12.0 | 636 | 2.7000 | 1.3708 | 18.3146 |
|
79 |
-
| 3.0631 | 13.0 | 689 | 2.6914 | 1.3796 | 18.3192 |
|
80 |
-
| 3.0631 | 14.0 | 742 | 2.6818 | 1.4616 | 18.3005 |
|
81 |
-
| 3.0631 | 15.0 | 795 | 2.6728 | 1.4487 | 18.3005 |
|
82 |
-
| 3.0631 | 16.0 | 848 | 2.6596 | 1.3979 | 18.2911 |
|
83 |
-
| 3.0631 | 17.0 | 901 | 2.6506 | 1.4544 | 18.3099 |
|
84 |
-
| 3.0631 | 18.0 | 954 | 2.6381 | 1.3779 | 18.3239 |
|
85 |
-
| 2.9232 | 19.0 | 1007 | 2.6313 | 1.4275 | 18.3052 |
|
86 |
-
| 2.9232 | 20.0 | 1060 | 2.6223 | 1.4489 | 18.3521 |
|
87 |
-
| 2.9232 | 21.0 | 1113 | 2.6139 | 1.4473 | 18.3803 |
|
88 |
-
| 2.9232 | 22.0 | 1166 | 2.6058 | 1.4407 | 18.3333 |
|
89 |
-
| 2.9232 | 23.0 | 1219 | 2.5985 | 1.4594 | 18.3192 |
|
90 |
-
| 2.9232 | 24.0 | 1272 | 2.5899 | 1.4473 | 18.2911 |
|
91 |
-
| 2.9232 | 25.0 | 1325 | 2.5832 | 1.4717 | 18.3521 |
|
92 |
-
| 2.9232 | 26.0 | 1378 | 2.5752 | 1.4282 | 18.3333 |
|
93 |
-
| 2.9232 | 27.0 | 1431 | 2.5699 | 1.3598 | 18.3239 |
|
94 |
-
| 2.9232 | 28.0 | 1484 | 2.5628 | 1.409 | 18.3286 |
|
95 |
-
| 2.807 | 29.0 | 1537 | 2.5577 | 1.3461 | 18.3568 |
|
96 |
-
| 2.807 | 30.0 | 1590 | 2.5524 | 1.425 | 18.3803 |
|
97 |
-
| 2.807 | 31.0 | 1643 | 2.5449 | 1.3638 | 18.3615 |
|
98 |
-
| 2.807 | 32.0 | 1696 | 2.5413 | 1.3604 | 18.3897 |
|
99 |
-
| 2.807 | 33.0 | 1749 | 2.5380 | 1.5423 | 18.3991 |
|
100 |
-
| 2.807 | 34.0 | 1802 | 2.5335 | 1.5392 | 18.3944 |
|
101 |
-
| 2.807 | 35.0 | 1855 | 2.5266 | 1.4923 | 18.3474 |
|
102 |
-
| 2.807 | 36.0 | 1908 | 2.5210 | 1.445 | 18.3192 |
|
103 |
-
| 2.807 | 37.0 | 1961 | 2.5151 | 1.453 | 18.3521 |
|
104 |
-
| 2.7147 | 38.0 | 2014 | 2.5113 | 1.4277 | 18.3286 |
|
105 |
-
| 2.7147 | 39.0 | 2067 | 2.5093 | 1.4015 | 18.3568 |
|
106 |
-
| 2.7147 | 40.0 | 2120 | 2.5033 | 1.4314 | 18.3615 |
|
107 |
-
| 2.7147 | 41.0 | 2173 | 2.4992 | 1.3861 | 18.3803 |
|
108 |
-
| 2.7147 | 42.0 | 2226 | 2.4961 | 1.4661 | 18.385 |
|
109 |
-
| 2.7147 | 43.0 | 2279 | 2.4933 | 1.4569 | 18.3803 |
|
110 |
-
| 2.7147 | 44.0 | 2332 | 2.4887 | 1.5818 | 18.3803 |
|
111 |
-
| 2.7147 | 45.0 | 2385 | 2.4863 | 1.5672 | 18.3803 |
|
112 |
-
| 2.7147 | 46.0 | 2438 | 2.4807 | 1.5475 | 18.3568 |
|
113 |
-
| 2.7147 | 47.0 | 2491 | 2.4790 | 1.4686 | 18.3568 |
|
114 |
-
| 2.6478 | 48.0 | 2544 | 2.4742 | 1.5072 | 18.3615 |
|
115 |
-
| 2.6478 | 49.0 | 2597 | 2.4720 | 1.6371 | 18.3897 |
|
116 |
-
| 2.6478 | 50.0 | 2650 | 2.4690 | 1.5358 | 18.3239 |
|
117 |
-
| 2.6478 | 51.0 | 2703 | 2.4663 | 1.5322 | 18.3239 |
|
118 |
-
| 2.6478 | 52.0 | 2756 | 2.4630 | 1.5193 | 18.3427 |
|
119 |
-
| 2.6478 | 53.0 | 2809 | 2.4590 | 1.5162 | 18.3333 |
|
120 |
-
| 2.6478 | 54.0 | 2862 | 2.4565 | 1.5365 | 18.3239 |
|
121 |
-
| 2.6478 | 55.0 | 2915 | 2.4535 | 1.5086 | 18.3709 |
|
122 |
-
| 2.6478 | 56.0 | 2968 | 2.4514 | 1.5211 | 18.3521 |
|
123 |
-
| 2.5967 | 57.0 | 3021 | 2.4499 | 1.5442 | 18.3709 |
|
124 |
-
| 2.5967 | 58.0 | 3074 | 2.4483 | 1.5441 | 18.3709 |
|
125 |
-
| 2.5967 | 59.0 | 3127 | 2.4456 | 1.5288 | 18.3709 |
|
126 |
-
| 2.5967 | 60.0 | 3180 | 2.4419 | 1.4669 | 18.3897 |
|
127 |
-
| 2.5967 | 61.0 | 3233 | 2.4409 | 1.4707 | 18.3756 |
|
128 |
-
| 2.5967 | 62.0 | 3286 | 2.4394 | 1.5037 | 18.385 |
|
129 |
-
| 2.5967 | 63.0 | 3339 | 2.4371 | 1.5251 | 18.3709 |
|
130 |
-
| 2.5967 | 64.0 | 3392 | 2.4334 | 1.4897 | 18.3991 |
|
131 |
-
| 2.5967 | 65.0 | 3445 | 2.4326 | 1.5373 | 18.385 |
|
132 |
-
| 2.5967 | 66.0 | 3498 | 2.4326 | 1.5174 | 18.3944 |
|
133 |
-
| 2.5514 | 67.0 | 3551 | 2.4292 | 1.5326 | 18.3803 |
|
134 |
-
| 2.5514 | 68.0 | 3604 | 2.4291 | 1.5224 | 18.3709 |
|
135 |
-
| 2.5514 | 69.0 | 3657 | 2.4264 | 1.4945 | 18.3709 |
|
136 |
-
| 2.5514 | 70.0 | 3710 | 2.4238 | 1.5155 | 18.385 |
|
137 |
-
| 2.5514 | 71.0 | 3763 | 2.4220 | 1.556 | 18.3803 |
|
138 |
-
| 2.5514 | 72.0 | 3816 | 2.4214 | 1.5782 | 18.385 |
|
139 |
-
| 2.5514 | 73.0 | 3869 | 2.4197 | 1.6084 | 18.3709 |
|
140 |
-
| 2.5514 | 74.0 | 3922 | 2.4184 | 1.5642 | 18.3709 |
|
141 |
-
| 2.5514 | 75.0 | 3975 | 2.4185 | 1.6182 | 18.3897 |
|
142 |
-
| 2.5176 | 76.0 | 4028 | 2.4169 | 1.5632 | 18.3756 |
|
143 |
-
| 2.5176 | 77.0 | 4081 | 2.4139 | 1.5853 | 18.385 |
|
144 |
-
| 2.5176 | 78.0 | 4134 | 2.4136 | 1.5852 | 18.3897 |
|
145 |
-
| 2.5176 | 79.0 | 4187 | 2.4128 | 1.5608 | 18.3897 |
|
146 |
-
| 2.5176 | 80.0 | 4240 | 2.4123 | 1.5707 | 18.3897 |
|
147 |
-
| 2.5176 | 81.0 | 4293 | 2.4109 | 1.5622 | 18.3944 |
|
148 |
-
| 2.5176 | 82.0 | 4346 | 2.4104 | 1.5608 | 18.3803 |
|
149 |
-
| 2.5176 | 83.0 | 4399 | 2.4101 | 1.561 | 18.3803 |
|
150 |
-
| 2.5176 | 84.0 | 4452 | 2.4097 | 1.56 | 18.3944 |
|
151 |
-
| 2.497 | 85.0 | 4505 | 2.4096 | 1.5644 | 18.3944 |
|
152 |
-
| 2.497 | 86.0 | 4558 | 2.4075 | 1.5636 | 18.4038 |
|
153 |
-
| 2.497 | 87.0 | 4611 | 2.4073 | 1.5779 | 18.3944 |
|
154 |
-
| 2.497 | 88.0 | 4664 | 2.4069 | 1.5611 | 18.3944 |
|
155 |
-
| 2.497 | 89.0 | 4717 | 2.4068 | 1.5827 | 18.3944 |
|
156 |
-
| 2.497 | 90.0 | 4770 | 2.4063 | 1.558 | 18.3944 |
|
157 |
-
| 2.497 | 91.0 | 4823 | 2.4057 | 1.533 | 18.3944 |
|
158 |
-
| 2.497 | 92.0 | 4876 | 2.4050 | 1.5271 | 18.3944 |
|
159 |
-
| 2.497 | 93.0 | 4929 | 2.4048 | 1.5655 | 18.4038 |
|
160 |
-
| 2.497 | 94.0 | 4982 | 2.4049 | 1.5351 | 18.3803 |
|
161 |
-
| 2.4847 | 95.0 | 5035 | 2.4045 | 1.5411 | 18.3803 |
|
162 |
-
| 2.4847 | 96.0 | 5088 | 2.4046 | 1.5468 | 18.3803 |
|
163 |
-
| 2.4847 | 97.0 | 5141 | 2.4046 | 1.5474 | 18.3803 |
|
164 |
-
| 2.4847 | 98.0 | 5194 | 2.4045 | 1.5468 | 18.3803 |
|
165 |
-
| 2.4847 | 99.0 | 5247 | 2.4044 | 1.5468 | 18.3803 |
|
166 |
-
| 2.4847 | 100.0 | 5300 | 2.4043 | 1.5414 | 18.3803 |
|
167 |
|
168 |
|
169 |
### Framework versions
|
|
|
1 |
---
|
2 |
+
license: apache-2.0
|
3 |
+
base_model: t5-small
|
4 |
tags:
|
5 |
- generated_from_trainer
|
6 |
datasets:
|
|
|
16 |
dataset:
|
17 |
name: opus_books
|
18 |
type: opus_books
|
19 |
+
config: en-pt
|
20 |
split: train
|
21 |
+
args: en-pt
|
22 |
metrics:
|
23 |
- name: Bleu
|
24 |
type: bleu
|
25 |
+
value: 0.3989
|
26 |
---
|
27 |
|
28 |
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
|
|
30 |
|
31 |
# opus_books_es_pt
|
32 |
|
33 |
+
This model is a fine-tuned version of [t5-small](https://huggingface.co/t5-small) on the opus_books dataset.
|
34 |
It achieves the following results on the evaluation set:
|
35 |
+
- Loss: 3.3303
|
36 |
+
- Bleu: 0.3989
|
37 |
+
- Gen Len: 17.5302
|
38 |
|
39 |
## Model description
|
40 |
|
|
|
59 |
- seed: 42
|
60 |
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
61 |
- lr_scheduler_type: linear
|
62 |
+
- num_epochs: 5
|
63 |
- mixed_precision_training: Native AMP
|
64 |
|
65 |
### Training results
|
66 |
|
67 |
| Training Loss | Epoch | Step | Validation Loss | Bleu | Gen Len |
|
68 |
|:-------------:|:-----:|:----:|:---------------:|:------:|:-------:|
|
69 |
+
| No log | 1.0 | 71 | 3.7759 | 0.5559 | 16.9715 |
|
70 |
+
| No log | 2.0 | 142 | 3.5343 | 0.517 | 17.2776 |
|
71 |
+
| No log | 3.0 | 213 | 3.4102 | 0.4355 | 17.4448 |
|
72 |
+
| No log | 4.0 | 284 | 3.3491 | 0.4057 | 17.516 |
|
73 |
+
| No log | 5.0 | 355 | 3.3303 | 0.3989 | 17.5302 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
74 |
|
75 |
|
76 |
### Framework versions
|
config.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": "
|
3 |
"architectures": [
|
4 |
"T5ForConditionalGeneration"
|
5 |
],
|
|
|
1 |
{
|
2 |
+
"_name_or_path": "t5-small",
|
3 |
"architectures": [
|
4 |
"T5ForConditionalGeneration"
|
5 |
],
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 242041896
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1ea81fa77c07165703229f52950f0ab0f4eab20619bb67807874b148f425c26a
|
3 |
size 242041896
|
runs/Dec17_13-15-55_9e3f0cc9b6b3/events.out.tfevents.1702818955.9e3f0cc9b6b3.175.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:49856aa7a871afa2711a2a013892e7a6e5914f57fe4320d1add0045c55268849
|
3 |
+
size 7551
|
tokenizer.json
CHANGED
@@ -1,6 +1,11 @@
|
|
1 |
{
|
2 |
"version": "1.0",
|
3 |
-
"truncation":
|
|
|
|
|
|
|
|
|
|
|
4 |
"padding": null,
|
5 |
"added_tokens": [
|
6 |
{
|
|
|
1 |
{
|
2 |
"version": "1.0",
|
3 |
+
"truncation": {
|
4 |
+
"direction": "Right",
|
5 |
+
"max_length": 400,
|
6 |
+
"strategy": "LongestFirst",
|
7 |
+
"stride": 0
|
8 |
+
},
|
9 |
"padding": null,
|
10 |
"added_tokens": [
|
11 |
{
|
tokenizer_config.json
CHANGED
@@ -930,12 +930,8 @@
|
|
930 |
"clean_up_tokenization_spaces": true,
|
931 |
"eos_token": "</s>",
|
932 |
"extra_ids": 100,
|
933 |
-
"max_length": 400,
|
934 |
"model_max_length": 512,
|
935 |
"pad_token": "<pad>",
|
936 |
-
"stride": 0,
|
937 |
"tokenizer_class": "T5Tokenizer",
|
938 |
-
"truncation_side": "right",
|
939 |
-
"truncation_strategy": "longest_first",
|
940 |
"unk_token": "<unk>"
|
941 |
}
|
|
|
930 |
"clean_up_tokenization_spaces": true,
|
931 |
"eos_token": "</s>",
|
932 |
"extra_ids": 100,
|
|
|
933 |
"model_max_length": 512,
|
934 |
"pad_token": "<pad>",
|
|
|
935 |
"tokenizer_class": "T5Tokenizer",
|
|
|
|
|
936 |
"unk_token": "<unk>"
|
937 |
}
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4856
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0c79aa1e0f618ef1bd8c80b60267698992bb097c30a070d16cacdbb49290e65e
|
3 |
size 4856
|