Yahiael1 commited on
Commit
f6ed394
1 Parent(s): 058ec30

Training in progress, epoch 1

Browse files
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ checkpoint-*/
config.json ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "facebook/bart-base",
3
+ "activation_dropout": 0.1,
4
+ "activation_function": "gelu",
5
+ "add_bias_logits": false,
6
+ "add_final_layer_norm": false,
7
+ "architectures": [
8
+ "BartForConditionalGeneration"
9
+ ],
10
+ "attention_dropout": 0.1,
11
+ "bos_token_id": 0,
12
+ "classif_dropout": 0.1,
13
+ "classifier_dropout": 0.0,
14
+ "d_model": 768,
15
+ "decoder_attention_heads": 12,
16
+ "decoder_ffn_dim": 3072,
17
+ "decoder_layerdrop": 0.0,
18
+ "decoder_layers": 6,
19
+ "decoder_start_token_id": 2,
20
+ "dropout": 0.1,
21
+ "early_stopping": true,
22
+ "encoder_attention_heads": 12,
23
+ "encoder_ffn_dim": 3072,
24
+ "encoder_layerdrop": 0.0,
25
+ "encoder_layers": 6,
26
+ "eos_token_id": 2,
27
+ "forced_bos_token_id": 0,
28
+ "forced_eos_token_id": 2,
29
+ "gradient_checkpointing": false,
30
+ "id2label": {
31
+ "0": "LABEL_0",
32
+ "1": "LABEL_1",
33
+ "2": "LABEL_2"
34
+ },
35
+ "init_std": 0.02,
36
+ "is_encoder_decoder": true,
37
+ "label2id": {
38
+ "LABEL_0": 0,
39
+ "LABEL_1": 1,
40
+ "LABEL_2": 2
41
+ },
42
+ "max_position_embeddings": 1024,
43
+ "model_type": "bart",
44
+ "no_repeat_ngram_size": 3,
45
+ "normalize_before": false,
46
+ "normalize_embedding": true,
47
+ "num_beams": 4,
48
+ "num_hidden_layers": 6,
49
+ "pad_token_id": 1,
50
+ "scale_embedding": false,
51
+ "task_specific_params": {
52
+ "summarization": {
53
+ "length_penalty": 1.0,
54
+ "max_length": 128,
55
+ "min_length": 12,
56
+ "num_beams": 4
57
+ },
58
+ "summarization_cnn": {
59
+ "length_penalty": 2.0,
60
+ "max_length": 142,
61
+ "min_length": 56,
62
+ "num_beams": 4
63
+ },
64
+ "summarization_xsum": {
65
+ "length_penalty": 1.0,
66
+ "max_length": 62,
67
+ "min_length": 11,
68
+ "num_beams": 6
69
+ }
70
+ },
71
+ "torch_dtype": "float32",
72
+ "transformers_version": "4.28.1",
73
+ "use_cache": true,
74
+ "vocab_size": 50265
75
+ }
last-checkpoint/config.json ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "facebook/bart-base",
3
+ "activation_dropout": 0.1,
4
+ "activation_function": "gelu",
5
+ "add_bias_logits": false,
6
+ "add_final_layer_norm": false,
7
+ "architectures": [
8
+ "BartForConditionalGeneration"
9
+ ],
10
+ "attention_dropout": 0.1,
11
+ "bos_token_id": 0,
12
+ "classif_dropout": 0.1,
13
+ "classifier_dropout": 0.0,
14
+ "d_model": 768,
15
+ "decoder_attention_heads": 12,
16
+ "decoder_ffn_dim": 3072,
17
+ "decoder_layerdrop": 0.0,
18
+ "decoder_layers": 6,
19
+ "decoder_start_token_id": 2,
20
+ "dropout": 0.1,
21
+ "early_stopping": true,
22
+ "encoder_attention_heads": 12,
23
+ "encoder_ffn_dim": 3072,
24
+ "encoder_layerdrop": 0.0,
25
+ "encoder_layers": 6,
26
+ "eos_token_id": 2,
27
+ "forced_bos_token_id": 0,
28
+ "forced_eos_token_id": 2,
29
+ "gradient_checkpointing": false,
30
+ "id2label": {
31
+ "0": "LABEL_0",
32
+ "1": "LABEL_1",
33
+ "2": "LABEL_2"
34
+ },
35
+ "init_std": 0.02,
36
+ "is_encoder_decoder": true,
37
+ "label2id": {
38
+ "LABEL_0": 0,
39
+ "LABEL_1": 1,
40
+ "LABEL_2": 2
41
+ },
42
+ "max_position_embeddings": 1024,
43
+ "model_type": "bart",
44
+ "no_repeat_ngram_size": 3,
45
+ "normalize_before": false,
46
+ "normalize_embedding": true,
47
+ "num_beams": 4,
48
+ "num_hidden_layers": 6,
49
+ "pad_token_id": 1,
50
+ "scale_embedding": false,
51
+ "task_specific_params": {
52
+ "summarization": {
53
+ "length_penalty": 1.0,
54
+ "max_length": 128,
55
+ "min_length": 12,
56
+ "num_beams": 4
57
+ },
58
+ "summarization_cnn": {
59
+ "length_penalty": 2.0,
60
+ "max_length": 142,
61
+ "min_length": 56,
62
+ "num_beams": 4
63
+ },
64
+ "summarization_xsum": {
65
+ "length_penalty": 1.0,
66
+ "max_length": 62,
67
+ "min_length": 11,
68
+ "num_beams": 6
69
+ }
70
+ },
71
+ "torch_dtype": "float32",
72
+ "transformers_version": "4.28.1",
73
+ "use_cache": true,
74
+ "vocab_size": 50265
75
+ }
last-checkpoint/generation_config.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 0,
3
+ "decoder_start_token_id": 2,
4
+ "early_stopping": true,
5
+ "eos_token_id": 2,
6
+ "forced_bos_token_id": 0,
7
+ "forced_eos_token_id": 2,
8
+ "no_repeat_ngram_size": 3,
9
+ "num_beams": 4,
10
+ "pad_token_id": 1,
11
+ "transformers_version": "4.28.1"
12
+ }
last-checkpoint/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
last-checkpoint/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62af640d9257316aa7e0df1d54cc2ad0b08934ef57ffe1fdc3abf2b823297d68
3
+ size 1115579397
last-checkpoint/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1c5289a63bf439de3568339bd081b734c56752900b28f5111ae7e395aabb087
3
+ size 557971229
last-checkpoint/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac8fa45c1c9c80e32b4ab950e0fa6abb4c55eda437b8c409872ce7874a5a6455
3
+ size 15597
last-checkpoint/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:786ba050f033778cbd2d0b326163925a6e269b1b789ffb02f138cfca2421065a
3
+ size 557
last-checkpoint/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e395ed78ce282b12f543cf478f7b69637fa7676c695ce68991b12bea8178ef1f
3
+ size 627
last-checkpoint/special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "cls_token": "<s>",
4
+ "eos_token": "</s>",
5
+ "mask_token": {
6
+ "content": "<mask>",
7
+ "lstrip": true,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "pad_token": "<pad>",
13
+ "sep_token": "</s>",
14
+ "unk_token": "<unk>"
15
+ }
last-checkpoint/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
last-checkpoint/tokenizer_config.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "bos_token": "<s>",
4
+ "clean_up_tokenization_spaces": true,
5
+ "cls_token": "<s>",
6
+ "eos_token": "</s>",
7
+ "errors": "replace",
8
+ "mask_token": "<mask>",
9
+ "model_max_length": 1024,
10
+ "pad_token": "<pad>",
11
+ "sep_token": "</s>",
12
+ "tokenizer_class": "BartTokenizer",
13
+ "trim_offsets": true,
14
+ "unk_token": "<unk>"
15
+ }
last-checkpoint/trainer_state.json ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "global_step": 8004,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.06,
12
+ "learning_rate": 0.00019938030984507747,
13
+ "loss": 2.3666,
14
+ "step": 500
15
+ },
16
+ {
17
+ "epoch": 0.12,
18
+ "learning_rate": 0.00019875562218890554,
19
+ "loss": 2.2401,
20
+ "step": 1000
21
+ },
22
+ {
23
+ "epoch": 0.19,
24
+ "learning_rate": 0.00019813093453273364,
25
+ "loss": 2.2594,
26
+ "step": 1500
27
+ },
28
+ {
29
+ "epoch": 0.25,
30
+ "learning_rate": 0.00019750624687656172,
31
+ "loss": 2.188,
32
+ "step": 2000
33
+ },
34
+ {
35
+ "epoch": 0.25,
36
+ "eval_gen_len": 19.7309,
37
+ "eval_loss": 1.732377529144287,
38
+ "eval_rouge1": 1.8382,
39
+ "eval_rouge2": 1.4953,
40
+ "eval_rougeL": 1.7944,
41
+ "eval_rougeLsum": 1.8021,
42
+ "eval_runtime": 5362.1484,
43
+ "eval_samples_per_second": 7.462,
44
+ "eval_steps_per_second": 0.373,
45
+ "step": 2000
46
+ },
47
+ {
48
+ "epoch": 0.31,
49
+ "learning_rate": 0.00019688155922038982,
50
+ "loss": 2.1778,
51
+ "step": 2500
52
+ },
53
+ {
54
+ "epoch": 0.37,
55
+ "learning_rate": 0.00019625812093953024,
56
+ "loss": 2.185,
57
+ "step": 3000
58
+ },
59
+ {
60
+ "epoch": 0.44,
61
+ "learning_rate": 0.00019563343328335832,
62
+ "loss": 2.1511,
63
+ "step": 3500
64
+ },
65
+ {
66
+ "epoch": 0.5,
67
+ "learning_rate": 0.00019500874562718642,
68
+ "loss": 2.1531,
69
+ "step": 4000
70
+ },
71
+ {
72
+ "epoch": 0.5,
73
+ "eval_gen_len": 19.7056,
74
+ "eval_loss": 1.7064495086669922,
75
+ "eval_rouge1": 1.6395,
76
+ "eval_rouge2": 1.3247,
77
+ "eval_rougeL": 1.6037,
78
+ "eval_rougeLsum": 1.6108,
79
+ "eval_runtime": 5328.3896,
80
+ "eval_samples_per_second": 7.51,
81
+ "eval_steps_per_second": 0.376,
82
+ "step": 4000
83
+ },
84
+ {
85
+ "epoch": 0.56,
86
+ "learning_rate": 0.00019438405797101452,
87
+ "loss": 2.192,
88
+ "step": 4500
89
+ },
90
+ {
91
+ "epoch": 0.62,
92
+ "learning_rate": 0.0001937593703148426,
93
+ "loss": 2.1542,
94
+ "step": 5000
95
+ },
96
+ {
97
+ "epoch": 0.69,
98
+ "learning_rate": 0.00019313468265867067,
99
+ "loss": 2.1115,
100
+ "step": 5500
101
+ },
102
+ {
103
+ "epoch": 0.75,
104
+ "learning_rate": 0.00019250999500249877,
105
+ "loss": 2.147,
106
+ "step": 6000
107
+ },
108
+ {
109
+ "epoch": 0.75,
110
+ "eval_gen_len": 19.4502,
111
+ "eval_loss": 1.7010632753372192,
112
+ "eval_rouge1": 1.3881,
113
+ "eval_rouge2": 1.1271,
114
+ "eval_rougeL": 1.3515,
115
+ "eval_rougeLsum": 1.3593,
116
+ "eval_runtime": 5340.8815,
117
+ "eval_samples_per_second": 7.492,
118
+ "eval_steps_per_second": 0.375,
119
+ "step": 6000
120
+ },
121
+ {
122
+ "epoch": 0.81,
123
+ "learning_rate": 0.00019188530734632684,
124
+ "loss": 2.1467,
125
+ "step": 6500
126
+ },
127
+ {
128
+ "epoch": 0.87,
129
+ "learning_rate": 0.00019126061969015494,
130
+ "loss": 2.1084,
131
+ "step": 7000
132
+ },
133
+ {
134
+ "epoch": 0.94,
135
+ "learning_rate": 0.000190635932033983,
136
+ "loss": 2.1552,
137
+ "step": 7500
138
+ },
139
+ {
140
+ "epoch": 1.0,
141
+ "learning_rate": 0.0001900112443778111,
142
+ "loss": 2.1152,
143
+ "step": 8000
144
+ },
145
+ {
146
+ "epoch": 1.0,
147
+ "eval_gen_len": 19.6288,
148
+ "eval_loss": 1.692185401916504,
149
+ "eval_rouge1": 1.4518,
150
+ "eval_rouge2": 1.1955,
151
+ "eval_rougeL": 1.4252,
152
+ "eval_rougeLsum": 1.4308,
153
+ "eval_runtime": 5347.9231,
154
+ "eval_samples_per_second": 7.482,
155
+ "eval_steps_per_second": 0.374,
156
+ "step": 8000
157
+ }
158
+ ],
159
+ "max_steps": 160080,
160
+ "num_train_epochs": 20,
161
+ "total_flos": 9.756207646488576e+16,
162
+ "trial_name": null,
163
+ "trial_params": null
164
+ }
last-checkpoint/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7110166d952c969185f8550fd2df2892ce3dd876f742b4e7ff7738f01183fd80
3
+ size 3771
last-checkpoint/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1c5289a63bf439de3568339bd081b734c56752900b28f5111ae7e395aabb087
3
+ size 557971229
special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "cls_token": "<s>",
4
+ "eos_token": "</s>",
5
+ "mask_token": {
6
+ "content": "<mask>",
7
+ "lstrip": true,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "pad_token": "<pad>",
13
+ "sep_token": "</s>",
14
+ "unk_token": "<unk>"
15
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "bos_token": "<s>",
4
+ "clean_up_tokenization_spaces": true,
5
+ "cls_token": "<s>",
6
+ "eos_token": "</s>",
7
+ "errors": "replace",
8
+ "mask_token": "<mask>",
9
+ "model_max_length": 1024,
10
+ "pad_token": "<pad>",
11
+ "sep_token": "</s>",
12
+ "tokenizer_class": "BartTokenizer",
13
+ "trim_offsets": true,
14
+ "unk_token": "<unk>"
15
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7110166d952c969185f8550fd2df2892ce3dd876f742b4e7ff7738f01183fd80
3
+ size 3771
vocab.json ADDED
The diff for this file is too large to render. See raw diff