Yahiael1 commited on May 14, 2023

Commit

f6ed394

•

1 Parent(s): 058ec30

Training in progress, epoch 1

Browse files

Files changed (23) hide show

.gitignore +1 -0
config.json +75 -0
last-checkpoint/config.json +75 -0
last-checkpoint/generation_config.json +12 -0
last-checkpoint/merges.txt +0 -0
last-checkpoint/optimizer.pt +3 -0
last-checkpoint/pytorch_model.bin +3 -0
last-checkpoint/rng_state.pth +3 -0
last-checkpoint/scaler.pt +3 -0
last-checkpoint/scheduler.pt +3 -0
last-checkpoint/special_tokens_map.json +15 -0
last-checkpoint/tokenizer.json +0 -0
last-checkpoint/tokenizer_config.json +15 -0
last-checkpoint/trainer_state.json +164 -0
last-checkpoint/training_args.bin +3 -0
last-checkpoint/vocab.json +0 -0
merges.txt +0 -0
pytorch_model.bin +3 -0
special_tokens_map.json +15 -0
tokenizer.json +0 -0
tokenizer_config.json +15 -0
training_args.bin +3 -0
vocab.json +0 -0

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ checkpoint-*/

config.json ADDED Viewed

	@@ -0,0 +1,75 @@

+{
+  "_name_or_path": "facebook/bart-base",
+  "activation_dropout": 0.1,
+  "activation_function": "gelu",
+  "add_bias_logits": false,
+  "add_final_layer_norm": false,
+  "architectures": [
+    "BartForConditionalGeneration"
+  ],
+  "attention_dropout": 0.1,
+  "bos_token_id": 0,
+  "classif_dropout": 0.1,
+  "classifier_dropout": 0.0,
+  "d_model": 768,
+  "decoder_attention_heads": 12,
+  "decoder_ffn_dim": 3072,
+  "decoder_layerdrop": 0.0,
+  "decoder_layers": 6,
+  "decoder_start_token_id": 2,
+  "dropout": 0.1,
+  "early_stopping": true,
+  "encoder_attention_heads": 12,
+  "encoder_ffn_dim": 3072,
+  "encoder_layerdrop": 0.0,
+  "encoder_layers": 6,
+  "eos_token_id": 2,
+  "forced_bos_token_id": 0,
+  "forced_eos_token_id": 2,
+  "gradient_checkpointing": false,
+  "id2label": {
+    "0": "LABEL_0",
+    "1": "LABEL_1",
+    "2": "LABEL_2"
+  },
+  "init_std": 0.02,
+  "is_encoder_decoder": true,
+  "label2id": {
+    "LABEL_0": 0,
+    "LABEL_1": 1,
+    "LABEL_2": 2
+  },
+  "max_position_embeddings": 1024,
+  "model_type": "bart",
+  "no_repeat_ngram_size": 3,
+  "normalize_before": false,
+  "normalize_embedding": true,
+  "num_beams": 4,
+  "num_hidden_layers": 6,
+  "pad_token_id": 1,
+  "scale_embedding": false,
+  "task_specific_params": {
+    "summarization": {
+      "length_penalty": 1.0,
+      "max_length": 128,
+      "min_length": 12,
+      "num_beams": 4
+    },
+    "summarization_cnn": {
+      "length_penalty": 2.0,
+      "max_length": 142,
+      "min_length": 56,
+      "num_beams": 4
+    },
+    "summarization_xsum": {
+      "length_penalty": 1.0,
+      "max_length": 62,
+      "min_length": 11,
+      "num_beams": 6
+    }
+  },
+  "torch_dtype": "float32",
+  "transformers_version": "4.28.1",
+  "use_cache": true,
+  "vocab_size": 50265
+}

last-checkpoint/config.json ADDED Viewed

	@@ -0,0 +1,75 @@

+{
+  "_name_or_path": "facebook/bart-base",
+  "activation_dropout": 0.1,
+  "activation_function": "gelu",
+  "add_bias_logits": false,
+  "add_final_layer_norm": false,
+  "architectures": [
+    "BartForConditionalGeneration"
+  ],
+  "attention_dropout": 0.1,
+  "bos_token_id": 0,
+  "classif_dropout": 0.1,
+  "classifier_dropout": 0.0,
+  "d_model": 768,
+  "decoder_attention_heads": 12,
+  "decoder_ffn_dim": 3072,
+  "decoder_layerdrop": 0.0,
+  "decoder_layers": 6,
+  "decoder_start_token_id": 2,
+  "dropout": 0.1,
+  "early_stopping": true,
+  "encoder_attention_heads": 12,
+  "encoder_ffn_dim": 3072,
+  "encoder_layerdrop": 0.0,
+  "encoder_layers": 6,
+  "eos_token_id": 2,
+  "forced_bos_token_id": 0,
+  "forced_eos_token_id": 2,
+  "gradient_checkpointing": false,
+  "id2label": {
+    "0": "LABEL_0",
+    "1": "LABEL_1",
+    "2": "LABEL_2"
+  },
+  "init_std": 0.02,
+  "is_encoder_decoder": true,
+  "label2id": {
+    "LABEL_0": 0,
+    "LABEL_1": 1,
+    "LABEL_2": 2
+  },
+  "max_position_embeddings": 1024,
+  "model_type": "bart",
+  "no_repeat_ngram_size": 3,
+  "normalize_before": false,
+  "normalize_embedding": true,
+  "num_beams": 4,
+  "num_hidden_layers": 6,
+  "pad_token_id": 1,
+  "scale_embedding": false,
+  "task_specific_params": {
+    "summarization": {
+      "length_penalty": 1.0,
+      "max_length": 128,
+      "min_length": 12,
+      "num_beams": 4
+    },
+    "summarization_cnn": {
+      "length_penalty": 2.0,
+      "max_length": 142,
+      "min_length": 56,
+      "num_beams": 4
+    },
+    "summarization_xsum": {
+      "length_penalty": 1.0,
+      "max_length": 62,
+      "min_length": 11,
+      "num_beams": 6
+    }
+  },
+  "torch_dtype": "float32",
+  "transformers_version": "4.28.1",
+  "use_cache": true,
+  "vocab_size": 50265
+}

last-checkpoint/generation_config.json ADDED Viewed

	@@ -0,0 +1,12 @@

+{
+  "bos_token_id": 0,
+  "decoder_start_token_id": 2,
+  "early_stopping": true,
+  "eos_token_id": 2,
+  "forced_bos_token_id": 0,
+  "forced_eos_token_id": 2,
+  "no_repeat_ngram_size": 3,
+  "num_beams": 4,
+  "pad_token_id": 1,
+  "transformers_version": "4.28.1"
+}

last-checkpoint/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

last-checkpoint/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:62af640d9257316aa7e0df1d54cc2ad0b08934ef57ffe1fdc3abf2b823297d68
+size 1115579397

last-checkpoint/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a1c5289a63bf439de3568339bd081b734c56752900b28f5111ae7e395aabb087
+size 557971229

last-checkpoint/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ac8fa45c1c9c80e32b4ab950e0fa6abb4c55eda437b8c409872ce7874a5a6455
+size 15597

last-checkpoint/scaler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:786ba050f033778cbd2d0b326163925a6e269b1b789ffb02f138cfca2421065a
+size 557

last-checkpoint/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e395ed78ce282b12f543cf478f7b69637fa7676c695ce68991b12bea8178ef1f
+size 627

last-checkpoint/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+  "bos_token": "<s>",
+  "cls_token": "<s>",
+  "eos_token": "</s>",
+  "mask_token": {
+    "content": "<mask>",
+    "lstrip": true,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<pad>",
+  "sep_token": "</s>",
+  "unk_token": "<unk>"
+}

last-checkpoint/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

last-checkpoint/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+  "add_prefix_space": false,
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "<s>",
+  "eos_token": "</s>",
+  "errors": "replace",
+  "mask_token": "<mask>",
+  "model_max_length": 1024,
+  "pad_token": "<pad>",
+  "sep_token": "</s>",
+  "tokenizer_class": "BartTokenizer",
+  "trim_offsets": true,
+  "unk_token": "<unk>"
+}

last-checkpoint/trainer_state.json ADDED Viewed

	@@ -0,0 +1,164 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.0,
+  "global_step": 8004,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.06,
+      "learning_rate": 0.00019938030984507747,
+      "loss": 2.3666,
+      "step": 500
+    },
+    {
+      "epoch": 0.12,
+      "learning_rate": 0.00019875562218890554,
+      "loss": 2.2401,
+      "step": 1000
+    },
+    {
+      "epoch": 0.19,
+      "learning_rate": 0.00019813093453273364,
+      "loss": 2.2594,
+      "step": 1500
+    },
+    {
+      "epoch": 0.25,
+      "learning_rate": 0.00019750624687656172,
+      "loss": 2.188,
+      "step": 2000
+    },
+    {
+      "epoch": 0.25,
+      "eval_gen_len": 19.7309,
+      "eval_loss": 1.732377529144287,
+      "eval_rouge1": 1.8382,
+      "eval_rouge2": 1.4953,
+      "eval_rougeL": 1.7944,
+      "eval_rougeLsum": 1.8021,
+      "eval_runtime": 5362.1484,
+      "eval_samples_per_second": 7.462,
+      "eval_steps_per_second": 0.373,
+      "step": 2000
+    },
+    {
+      "epoch": 0.31,
+      "learning_rate": 0.00019688155922038982,
+      "loss": 2.1778,
+      "step": 2500
+    },
+    {
+      "epoch": 0.37,
+      "learning_rate": 0.00019625812093953024,
+      "loss": 2.185,
+      "step": 3000
+    },
+    {
+      "epoch": 0.44,
+      "learning_rate": 0.00019563343328335832,
+      "loss": 2.1511,
+      "step": 3500
+    },
+    {
+      "epoch": 0.5,
+      "learning_rate": 0.00019500874562718642,
+      "loss": 2.1531,
+      "step": 4000
+    },
+    {
+      "epoch": 0.5,
+      "eval_gen_len": 19.7056,
+      "eval_loss": 1.7064495086669922,
+      "eval_rouge1": 1.6395,
+      "eval_rouge2": 1.3247,
+      "eval_rougeL": 1.6037,
+      "eval_rougeLsum": 1.6108,
+      "eval_runtime": 5328.3896,
+      "eval_samples_per_second": 7.51,
+      "eval_steps_per_second": 0.376,
+      "step": 4000
+    },
+    {
+      "epoch": 0.56,
+      "learning_rate": 0.00019438405797101452,
+      "loss": 2.192,
+      "step": 4500
+    },
+    {
+      "epoch": 0.62,
+      "learning_rate": 0.0001937593703148426,
+      "loss": 2.1542,
+      "step": 5000
+    },
+    {
+      "epoch": 0.69,
+      "learning_rate": 0.00019313468265867067,
+      "loss": 2.1115,
+      "step": 5500
+    },
+    {
+      "epoch": 0.75,
+      "learning_rate": 0.00019250999500249877,
+      "loss": 2.147,
+      "step": 6000
+    },
+    {
+      "epoch": 0.75,
+      "eval_gen_len": 19.4502,
+      "eval_loss": 1.7010632753372192,
+      "eval_rouge1": 1.3881,
+      "eval_rouge2": 1.1271,
+      "eval_rougeL": 1.3515,
+      "eval_rougeLsum": 1.3593,
+      "eval_runtime": 5340.8815,
+      "eval_samples_per_second": 7.492,
+      "eval_steps_per_second": 0.375,
+      "step": 6000
+    },
+    {
+      "epoch": 0.81,
+      "learning_rate": 0.00019188530734632684,
+      "loss": 2.1467,
+      "step": 6500
+    },
+    {
+      "epoch": 0.87,
+      "learning_rate": 0.00019126061969015494,
+      "loss": 2.1084,
+      "step": 7000
+    },
+    {
+      "epoch": 0.94,
+      "learning_rate": 0.000190635932033983,
+      "loss": 2.1552,
+      "step": 7500
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 0.0001900112443778111,
+      "loss": 2.1152,
+      "step": 8000
+    },
+    {
+      "epoch": 1.0,
+      "eval_gen_len": 19.6288,
+      "eval_loss": 1.692185401916504,
+      "eval_rouge1": 1.4518,
+      "eval_rouge2": 1.1955,
+      "eval_rougeL": 1.4252,
+      "eval_rougeLsum": 1.4308,
+      "eval_runtime": 5347.9231,
+      "eval_samples_per_second": 7.482,
+      "eval_steps_per_second": 0.374,
+      "step": 8000
+    }
+  ],
+  "max_steps": 160080,
+  "num_train_epochs": 20,
+  "total_flos": 9.756207646488576e+16,
+  "trial_name": null,
+  "trial_params": null
+}

last-checkpoint/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7110166d952c969185f8550fd2df2892ce3dd876f742b4e7ff7738f01183fd80
+size 3771

last-checkpoint/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a1c5289a63bf439de3568339bd081b734c56752900b28f5111ae7e395aabb087
+size 557971229

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+  "bos_token": "<s>",
+  "cls_token": "<s>",
+  "eos_token": "</s>",
+  "mask_token": {
+    "content": "<mask>",
+    "lstrip": true,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<pad>",
+  "sep_token": "</s>",
+  "unk_token": "<unk>"
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+  "add_prefix_space": false,
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "<s>",
+  "eos_token": "</s>",
+  "errors": "replace",
+  "mask_token": "<mask>",
+  "model_max_length": 1024,
+  "pad_token": "<pad>",
+  "sep_token": "</s>",
+  "tokenizer_class": "BartTokenizer",
+  "trim_offsets": true,
+  "unk_token": "<unk>"
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7110166d952c969185f8550fd2df2892ce3dd876f742b4e7ff7738f01183fd80
+size 3771

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff