End of training
Browse files- README.md +29 -7
- config.json +1 -1
- model.safetensors +1 -1
- runs/Jul24_14-41-57_f531c75ea080/events.out.tfevents.1721832321.f531c75ea080.1181.0 +3 -0
- tokenizer.json +2 -2
- tokenizer_config.json +2 -4
- training_args.bin +1 -1
README.md
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
---
|
2 |
-
base_model: ahmeddbahaa/mT5_multilingual_XLSum-finetuned-ar
|
3 |
tags:
|
4 |
- generated_from_trainer
|
5 |
model-index:
|
@@ -12,7 +11,14 @@ should probably proofread and complete it, then remove this comment. -->
|
|
12 |
|
13 |
# results_mt5_xl-sum
|
14 |
|
15 |
-
This model
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
|
17 |
## Model description
|
18 |
|
@@ -32,16 +38,32 @@ More information needed
|
|
32 |
|
33 |
The following hyperparameters were used during training:
|
34 |
- learning_rate: 0.0005
|
35 |
-
- train_batch_size:
|
36 |
-
- eval_batch_size:
|
37 |
- seed: 42
|
38 |
-
- gradient_accumulation_steps:
|
39 |
-
- total_train_batch_size:
|
40 |
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
41 |
- lr_scheduler_type: linear
|
42 |
- lr_scheduler_warmup_steps: 250
|
43 |
- num_epochs: 10
|
44 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
|
46 |
### Framework versions
|
47 |
|
|
|
1 |
---
|
|
|
2 |
tags:
|
3 |
- generated_from_trainer
|
4 |
model-index:
|
|
|
11 |
|
12 |
# results_mt5_xl-sum
|
13 |
|
14 |
+
This model was trained from scratch on an unknown dataset.
|
15 |
+
It achieves the following results on the evaluation set:
|
16 |
+
- Loss: 0.8431
|
17 |
+
- Rouge1 Fmeasure: 0.6139
|
18 |
+
- Rouge2 Fmeasure: 0.1189
|
19 |
+
- Rougel Fmeasure: 0.1997
|
20 |
+
- Meteor: 0.3315
|
21 |
+
- Bertscore F1: 0.8418
|
22 |
|
23 |
## Model description
|
24 |
|
|
|
38 |
|
39 |
The following hyperparameters were used during training:
|
40 |
- learning_rate: 0.0005
|
41 |
+
- train_batch_size: 4
|
42 |
+
- eval_batch_size: 4
|
43 |
- seed: 42
|
44 |
+
- gradient_accumulation_steps: 16
|
45 |
+
- total_train_batch_size: 64
|
46 |
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
47 |
- lr_scheduler_type: linear
|
48 |
- lr_scheduler_warmup_steps: 250
|
49 |
- num_epochs: 10
|
50 |
+
|
51 |
+
### Training results
|
52 |
+
|
53 |
+
| Training Loss | Epoch | Step | Validation Loss | Rouge1 Fmeasure | Rouge2 Fmeasure | Rougel Fmeasure | Meteor | Bertscore F1 |
|
54 |
+
|:-------------:|:------:|:----:|:---------------:|:---------------:|:---------------:|:---------------:|:------:|:------------:|
|
55 |
+
| 2.6516 | 0.8529 | 500 | 0.9710 | 0.2668 | 0.0484 | 0.1537 | 0.2745 | 0.8284 |
|
56 |
+
| 1.0475 | 1.7058 | 1000 | 0.8792 | 0.4289 | 0.0884 | 0.1737 | 0.2949 | 0.8278 |
|
57 |
+
| 0.9413 | 2.5586 | 1500 | 0.8457 | 0.4960 | 0.0865 | 0.1898 | 0.3141 | 0.8339 |
|
58 |
+
| 0.8711 | 3.4115 | 2000 | 0.8398 | 0.5400 | 0.1121 | 0.1941 | 0.3110 | 0.8397 |
|
59 |
+
| 0.8235 | 4.2644 | 2500 | 0.8345 | 0.5587 | 0.1022 | 0.2041 | 0.3160 | 0.8388 |
|
60 |
+
| 0.7797 | 5.1173 | 3000 | 0.8368 | 0.5735 | 0.1036 | 0.2044 | 0.3157 | 0.8344 |
|
61 |
+
| 0.7401 | 5.9701 | 3500 | 0.8217 | 0.5507 | 0.1133 | 0.1936 | 0.3186 | 0.8366 |
|
62 |
+
| 0.7022 | 6.8230 | 4000 | 0.8361 | 0.5808 | 0.1118 | 0.2008 | 0.3227 | 0.8406 |
|
63 |
+
| 0.6796 | 7.6759 | 4500 | 0.8344 | 0.6173 | 0.1277 | 0.1986 | 0.3260 | 0.8407 |
|
64 |
+
| 0.6523 | 8.5288 | 5000 | 0.8436 | 0.6232 | 0.1186 | 0.2024 | 0.3317 | 0.8398 |
|
65 |
+
| 0.6385 | 9.3817 | 5500 | 0.8431 | 0.6139 | 0.1189 | 0.1997 | 0.3315 | 0.8418 |
|
66 |
+
|
67 |
|
68 |
### Framework versions
|
69 |
|
config.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": "
|
3 |
"architectures": [
|
4 |
"MT5ForConditionalGeneration"
|
5 |
],
|
|
|
1 |
{
|
2 |
+
"_name_or_path": "csebuetnlp/mT5_multilingual_XLSum",
|
3 |
"architectures": [
|
4 |
"MT5ForConditionalGeneration"
|
5 |
],
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2329601904
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8c8a8586fae75d1a71505ff6a91cdbfe2f3835003f1230e9f90998a83deac3d4
|
3 |
size 2329601904
|
runs/Jul24_14-41-57_f531c75ea080/events.out.tfevents.1721832321.f531c75ea080.1181.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a700371ce49ecc5b891da5dff4ffd0d5eac40f720a91aa5e8981b5c2b50397ee
|
3 |
+
size 15493
|
tokenizer.json
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c75db174930c130a1b55e88f4ee57d5f706099adf12eb9c22bc9ef7c599da913
|
3 |
+
size 16330805
|
tokenizer_config.json
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
{
|
|
|
2 |
"added_tokens_decoder": {
|
3 |
"0": {
|
4 |
"content": "<pad>",
|
@@ -29,13 +30,10 @@
|
|
29 |
"clean_up_tokenization_spaces": true,
|
30 |
"eos_token": "</s>",
|
31 |
"extra_ids": 0,
|
32 |
-
"
|
33 |
"model_max_length": 1000000000000000019884624838656,
|
34 |
"pad_token": "<pad>",
|
35 |
"sp_model_kwargs": {},
|
36 |
-
"stride": 0,
|
37 |
"tokenizer_class": "T5Tokenizer",
|
38 |
-
"truncation_side": "right",
|
39 |
-
"truncation_strategy": "longest_first",
|
40 |
"unk_token": "<unk>"
|
41 |
}
|
|
|
1 |
{
|
2 |
+
"add_prefix_space": true,
|
3 |
"added_tokens_decoder": {
|
4 |
"0": {
|
5 |
"content": "<pad>",
|
|
|
30 |
"clean_up_tokenization_spaces": true,
|
31 |
"eos_token": "</s>",
|
32 |
"extra_ids": 0,
|
33 |
+
"legacy": true,
|
34 |
"model_max_length": 1000000000000000019884624838656,
|
35 |
"pad_token": "<pad>",
|
36 |
"sp_model_kwargs": {},
|
|
|
37 |
"tokenizer_class": "T5Tokenizer",
|
|
|
|
|
38 |
"unk_token": "<unk>"
|
39 |
}
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 6712
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d415f66977db62e6e69262ed0fcbc54908e68fff0d6f1b3f0096c64c2ca3ef6d
|
3 |
size 6712
|