upload ckpt 3

Browse files

Files changed (5) hide show

outputs/.gitattributes +1 -0
outputs/ckpt_3/config.json +163 -0
outputs/ckpt_3/flax_model.msgpack +3 -0
outputs/events.out.tfevents.1626474479.t1v-n-cab111a8-w-0.878944.3.v2 +2 -2
outputs/summary.txt +2 -0

outputs/.gitattributes CHANGED Viewed

	@@ -1 +1,2 @@
1	ckpt_2/flax_model.msgpack filter=lfs diff=lfs merge=lfs -text


1	ckpt_2/flax_model.msgpack filter=lfs diff=lfs merge=lfs -text
2	+ ckpt_3/flax_model.msgpack filter=lfs diff=lfs merge=lfs -text

outputs/ckpt_3/config.json ADDED Viewed

	@@ -0,0 +1,163 @@

+{
+  "architectures": [
+    "ViTGPT2LMForConditionalGeneration"
+  ],
+  "bos_token_id": 0,
+  "decoder_start_token_id": 0,
+  "eos_token_id": 2,
+  "gpt2_config": {
+    "_name_or_path": "",
+    "activation_function": "gelu_new",
+    "add_cross_attention": true,
+    "architectures": null,
+    "attn_pdrop": 0.1,
+    "bad_words_ids": null,
+    "bos_token_id": 0,
+    "chunk_size_feed_forward": 0,
+    "decoder_start_token_id": null,
+    "diversity_penalty": 0.0,
+    "do_sample": false,
+    "early_stopping": false,
+    "embd_pdrop": 0.1,
+    "encoder_no_repeat_ngram_size": 0,
+    "eos_token_id": 2,
+    "finetuning_task": null,
+    "forced_bos_token_id": null,
+    "forced_eos_token_id": null,
+    "gradient_checkpointing": false,
+    "id2label": {
+      "0": "LABEL_0",
+      "1": "LABEL_1"
+    },
+    "initializer_range": 0.02,
+    "is_decoder": false,
+    "is_encoder_decoder": false,
+    "label2id": {
+      "LABEL_0": 0,
+      "LABEL_1": 1
+    },
+    "layer_norm_epsilon": 1e-05,
+    "length_penalty": 1.0,
+    "max_length": 20,
+    "min_length": 0,
+    "model_type": "gpt2",
+    "n_ctx": 1024,
+    "n_embd": 768,
+    "n_head": 12,
+    "n_inner": null,
+    "n_layer": 12,
+    "n_positions": 1024,
+    "no_repeat_ngram_size": 0,
+    "num_beam_groups": 1,
+    "num_beams": 1,
+    "num_return_sequences": 1,
+    "output_attentions": false,
+    "output_hidden_states": false,
+    "output_scores": false,
+    "pad_token_id": 1,
+    "prefix": null,
+    "problem_type": null,
+    "pruned_heads": {},
+    "remove_invalid_values": false,
+    "repetition_penalty": 1.0,
+    "resid_pdrop": 0.1,
+    "return_dict": true,
+    "return_dict_in_generate": false,
+    "scale_attn_weights": true,
+    "sep_token_id": null,
+    "summary_activation": null,
+    "summary_first_dropout": 0.1,
+    "summary_proj_to_labels": true,
+    "summary_type": "cls_index",
+    "summary_use_proj": true,
+    "task_specific_params": null,
+    "temperature": 1.0,
+    "tie_encoder_decoder": false,
+    "tie_word_embeddings": true,
+    "tokenizer_class": null,
+    "top_k": 50,
+    "top_p": 1.0,
+    "torch_dtype": null,
+    "torchscript": false,
+    "transformers_version": "4.9.0.dev0",
+    "use_bfloat16": false,
+    "use_cache": true,
+    "vocab_size": 50000
+  },
+  "is_encoder_decoder": true,
+  "model_type": "vit-gpt2",
+  "pad_token_id": 1,
+  "transformers_version": null,
+  "vit_config": {
+    "_name_or_path": "",
+    "add_cross_attention": false,
+    "architectures": [
+      "ViTModel"
+    ],
+    "attention_probs_dropout_prob": 0.0,
+    "bad_words_ids": null,
+    "bos_token_id": null,
+    "chunk_size_feed_forward": 0,
+    "decoder_start_token_id": null,
+    "diversity_penalty": 0.0,
+    "do_sample": false,
+    "early_stopping": false,
+    "encoder_no_repeat_ngram_size": 0,
+    "eos_token_id": null,
+    "finetuning_task": null,
+    "forced_bos_token_id": null,
+    "forced_eos_token_id": null,
+    "hidden_act": "gelu",
+    "hidden_dropout_prob": 0.0,
+    "hidden_size": 768,
+    "id2label": {
+      "0": "LABEL_0",
+      "1": "LABEL_1"
+    },
+    "image_size": 224,
+    "initializer_range": 0.02,
+    "intermediate_size": 3072,
+    "is_decoder": false,
+    "is_encoder_decoder": false,
+    "label2id": {
+      "LABEL_0": 0,
+      "LABEL_1": 1
+    },
+    "layer_norm_eps": 1e-12,
+    "length_penalty": 1.0,
+    "max_length": 20,
+    "min_length": 0,
+    "model_type": "vit",
+    "no_repeat_ngram_size": 0,
+    "num_attention_heads": 12,
+    "num_beam_groups": 1,
+    "num_beams": 1,
+    "num_channels": 3,
+    "num_hidden_layers": 12,
+    "num_return_sequences": 1,
+    "output_attentions": false,
+    "output_hidden_states": false,
+    "output_scores": false,
+    "pad_token_id": null,
+    "patch_size": 16,
+    "prefix": null,
+    "problem_type": null,
+    "pruned_heads": {},
+    "remove_invalid_values": false,
+    "repetition_penalty": 1.0,
+    "return_dict": true,
+    "return_dict_in_generate": false,
+    "sep_token_id": null,
+    "task_specific_params": null,
+    "temperature": 1.0,
+    "tie_encoder_decoder": false,
+    "tie_word_embeddings": true,
+    "tokenizer_class": null,
+    "top_k": 50,
+    "top_p": 1.0,
+    "torch_dtype": null,
+    "torchscript": false,
+    "transformers_version": "4.9.0.dev0",
+    "use_bfloat16": false
+  }
+}

outputs/ckpt_3/flax_model.msgpack ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1a5152a207ba30a963b32775047d0276b9cec79d9c7343eb01db4e8dab14bac2
+size 1012706583

outputs/events.out.tfevents.1626474479.t1v-n-cab111a8-w-0.878944.3.v2 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e15dc7bbd4cb6a4ae2efee9bc4eea09a11fe2e2ecca9468bc7c334112f88aeb3
-size 78845

 version https://git-lfs.github.com/spec/v1
+oid sha256:4e8a3d0a6269cbdb20e41469f2d609d57a48438b9fa13117dae18ce8aa723563
+size 116380

outputs/summary.txt CHANGED Viewed

@@ -2,3 +2,5 @@ Epoch... (1/10 | Loss: 2.3940577507019043, Learning Rate: 1.8007936887443066e-05
 Epoch... (1/10 | Eval Loss: 2.393078327178955 | Eval rouge1: 12.8071 | Eval rouge2: 2.0016 | Eval rougeL: 11.3098 | Eval rougeLsum: 11.3162 | Eval gen_len: 31.3255 |)
 Epoch... (2/10 | Loss: 2.1883292198181152, Learning Rate: 1.6007936210371554e-05)
 Epoch... (2/10 | Eval Loss: 2.2480881214141846 | Eval rouge1: 15.861 | Eval rouge2: 3.108 | Eval rougeL: 13.6457 | Eval rougeLsum: 13.6531 | Eval gen_len: 31.5794 |)

 Epoch... (1/10 | Eval Loss: 2.393078327178955 | Eval rouge1: 12.8071 | Eval rouge2: 2.0016 | Eval rougeL: 11.3098 | Eval rougeLsum: 11.3162 | Eval gen_len: 31.3255 |)
 Epoch... (2/10 | Loss: 2.1883292198181152, Learning Rate: 1.6007936210371554e-05)
 Epoch... (2/10 | Eval Loss: 2.2480881214141846 | Eval rouge1: 15.861 | Eval rouge2: 3.108 | Eval rougeL: 13.6457 | Eval rougeLsum: 13.6531 | Eval gen_len: 31.5794 |)
+Epoch... (3/10 | Loss: 2.1005117893218994, Learning Rate: 1.4007936442794744e-05)
+Epoch... (3/10 | Eval Loss: 2.182466506958008 | Eval rouge1: 18.7278 | Eval rouge2: 3.4425 | Eval rougeL: 15.3744 | Eval rougeLsum: 15.3757 | Eval gen_len: 31.9742 |)