xiaoxiaolin commited on Jun 9

Commit

ee17ccf

•

1 Parent(s): 48c0621

Upload folder using huggingface_hub

Browse files

Files changed (40) hide show

checkpoint-223/config.json +44 -0
checkpoint-223/model.safetensors +3 -0
checkpoint-223/optimizer.pt +3 -0
checkpoint-223/scheduler.pt +3 -0
checkpoint-223/special_tokens_map.json +37 -0
checkpoint-223/tokenizer.json +0 -0
checkpoint-223/tokenizer_config.json +62 -0
checkpoint-223/trainer_state.json +20 -0
checkpoint-223/training_args.bin +3 -0
checkpoint-223/vocab.txt +0 -0
checkpoint-446/config.json +44 -0
checkpoint-446/model.safetensors +3 -0
checkpoint-446/optimizer.pt +3 -0
checkpoint-446/scheduler.pt +3 -0
checkpoint-446/special_tokens_map.json +37 -0
checkpoint-446/tokenizer.json +0 -0
checkpoint-446/tokenizer_config.json +62 -0
checkpoint-446/trainer_state.json +20 -0
checkpoint-446/training_args.bin +3 -0
checkpoint-446/vocab.txt +0 -0
checkpoint-669/config.json +44 -0
checkpoint-669/model.safetensors +3 -0
checkpoint-669/optimizer.pt +3 -0
checkpoint-669/scheduler.pt +3 -0
checkpoint-669/special_tokens_map.json +37 -0
checkpoint-669/tokenizer.json +0 -0
checkpoint-669/tokenizer_config.json +62 -0
checkpoint-669/trainer_state.json +20 -0
checkpoint-669/training_args.bin +3 -0
checkpoint-669/vocab.txt +0 -0
config.json +44 -0
custom_info +9 -0
model.safetensors +3 -0
special_tokens_map.json +37 -0
tokenizer.json +0 -0
tokenizer_config.json +62 -0
train_results.txt +3 -0
trainer_state.json +28 -0
training_args.bin +3 -0
vocab.txt +0 -0

checkpoint-223/config.json ADDED Viewed

	@@ -0,0 +1,44 @@

+{
+  "_name_or_path": "Alibaba-NLP/gte-large-en-v1.5",
+  "architectures": [
+    "NewModelForCL"
+  ],
+  "attention_probs_dropout_prob": 0.0,
+  "auto_map": {
+    "AutoConfig": "Alibaba-NLP/new-impl--configuration.NewConfig",
+    "AutoModel": "Alibaba-NLP/new-impl--modeling.NewModel",
+    "AutoModelForMaskedLM": "Alibaba-NLP/new-impl--modeling.NewForMaskedLM",
+    "AutoModelForMultipleChoice": "Alibaba-NLP/new-impl--modeling.NewForMultipleChoice",
+    "AutoModelForQuestionAnswering": "Alibaba-NLP/new-impl--modeling.NewForQuestionAnswering",
+    "AutoModelForSequenceClassification": "Alibaba-NLP/new-impl--modeling.NewForSequenceClassification",
+    "AutoModelForTokenClassification": "Alibaba-NLP/new-impl--modeling.NewForTokenClassification"
+  },
+  "classifier_dropout": null,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 1024,
+  "initializer_range": 0.02,
+  "intermediate_size": 4096,
+  "layer_norm_eps": 1e-12,
+  "layer_norm_type": "layer_norm",
+  "logn_attention_clip1": false,
+  "logn_attention_scale": false,
+  "max_position_embeddings": 8192,
+  "model_type": "new",
+  "num_attention_heads": 16,
+  "num_hidden_layers": 24,
+  "pack_qkv": true,
+  "pad_token_id": 0,
+  "position_embedding_type": "rope",
+  "rope_scaling": {
+    "factor": 2.0,
+    "type": "ntk"
+  },
+  "rope_theta": 160000,
+  "torch_dtype": "float32",
+  "transformers_version": "4.36.2",
+  "type_vocab_size": 2,
+  "unpad_inputs": false,
+  "use_memory_efficient_attention": false,
+  "vocab_size": 30522
+}

checkpoint-223/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:baab41e5dab391c3ab0d26d3f612457810d455020402154e1f5560ea3a739c8d
+size 1736561104

checkpoint-223/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5a4fb675937460a9b17ad548dfc828bdbddda985766e4e017278766b511043d1
+size 3473287493

checkpoint-223/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cd7676efe65de30b30a67819d0925e6bca6c49f4e24571d28991416144f3b378
+size 627

checkpoint-223/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,37 @@

+{
+  "cls_token": {
+    "content": "[CLS]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "[MASK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "[PAD]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "[SEP]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "[UNK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

checkpoint-223/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-223/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,62 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "max_length": 8000,
+  "model_max_length": 32768,
+  "pad_to_multiple_of": null,
+  "pad_token": "[PAD]",
+  "pad_token_type_id": 0,
+  "padding_side": "right",
+  "sep_token": "[SEP]",
+  "stride": 0,
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "truncation_side": "right",
+  "truncation_strategy": "longest_first",
+  "unk_token": "[UNK]"
+}

checkpoint-223/trainer_state.json ADDED Viewed

	@@ -0,0 +1,20 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 223,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [],
+  "logging_steps": 500,
+  "max_steps": 669,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "total_flos": 0,
+  "train_batch_size": null,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-223/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c9d28c1554748c4e852ecb4292a6890e4f0b1fc576494b1f2b7a646ac50c07a2
+size 4091

checkpoint-223/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-446/config.json ADDED Viewed

	@@ -0,0 +1,44 @@

+{
+  "_name_or_path": "Alibaba-NLP/gte-large-en-v1.5",
+  "architectures": [
+    "NewModelForCL"
+  ],
+  "attention_probs_dropout_prob": 0.0,
+  "auto_map": {
+    "AutoConfig": "Alibaba-NLP/new-impl--configuration.NewConfig",
+    "AutoModel": "Alibaba-NLP/new-impl--modeling.NewModel",
+    "AutoModelForMaskedLM": "Alibaba-NLP/new-impl--modeling.NewForMaskedLM",
+    "AutoModelForMultipleChoice": "Alibaba-NLP/new-impl--modeling.NewForMultipleChoice",
+    "AutoModelForQuestionAnswering": "Alibaba-NLP/new-impl--modeling.NewForQuestionAnswering",
+    "AutoModelForSequenceClassification": "Alibaba-NLP/new-impl--modeling.NewForSequenceClassification",
+    "AutoModelForTokenClassification": "Alibaba-NLP/new-impl--modeling.NewForTokenClassification"
+  },
+  "classifier_dropout": null,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 1024,
+  "initializer_range": 0.02,
+  "intermediate_size": 4096,
+  "layer_norm_eps": 1e-12,
+  "layer_norm_type": "layer_norm",
+  "logn_attention_clip1": false,
+  "logn_attention_scale": false,
+  "max_position_embeddings": 8192,
+  "model_type": "new",
+  "num_attention_heads": 16,
+  "num_hidden_layers": 24,
+  "pack_qkv": true,
+  "pad_token_id": 0,
+  "position_embedding_type": "rope",
+  "rope_scaling": {
+    "factor": 2.0,
+    "type": "ntk"
+  },
+  "rope_theta": 160000,
+  "torch_dtype": "float32",
+  "transformers_version": "4.36.2",
+  "type_vocab_size": 2,
+  "unpad_inputs": false,
+  "use_memory_efficient_attention": false,
+  "vocab_size": 30522
+}

checkpoint-446/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ffac8d07d7d794e077b370f4a24a3a00ce4c4ef6c07a3943bc40a33f2dea0134
+size 1736561104

checkpoint-446/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:353dd026150b5608e76cebee925b6ccd55e7e8e58ec90d6b1637fa4e76c25edc
+size 3473287493

checkpoint-446/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e87a0295a20de0a7d18f617b38f6653507c08e0fc8b2cb6049c2a7531faba1f3
+size 627

checkpoint-446/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,37 @@

+{
+  "cls_token": {
+    "content": "[CLS]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "[MASK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "[PAD]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "[SEP]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "[UNK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

checkpoint-446/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-446/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,62 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "max_length": 8000,
+  "model_max_length": 32768,
+  "pad_to_multiple_of": null,
+  "pad_token": "[PAD]",
+  "pad_token_type_id": 0,
+  "padding_side": "right",
+  "sep_token": "[SEP]",
+  "stride": 0,
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "truncation_side": "right",
+  "truncation_strategy": "longest_first",
+  "unk_token": "[UNK]"
+}

checkpoint-446/trainer_state.json ADDED Viewed

	@@ -0,0 +1,20 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.0,
+  "eval_steps": 500,
+  "global_step": 446,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [],
+  "logging_steps": 500,
+  "max_steps": 669,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "total_flos": 0,
+  "train_batch_size": null,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-446/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c9d28c1554748c4e852ecb4292a6890e4f0b1fc576494b1f2b7a646ac50c07a2
+size 4091

checkpoint-446/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-669/config.json ADDED Viewed

	@@ -0,0 +1,44 @@

+{
+  "_name_or_path": "Alibaba-NLP/gte-large-en-v1.5",
+  "architectures": [
+    "NewModelForCL"
+  ],
+  "attention_probs_dropout_prob": 0.0,
+  "auto_map": {
+    "AutoConfig": "Alibaba-NLP/new-impl--configuration.NewConfig",
+    "AutoModel": "Alibaba-NLP/new-impl--modeling.NewModel",
+    "AutoModelForMaskedLM": "Alibaba-NLP/new-impl--modeling.NewForMaskedLM",
+    "AutoModelForMultipleChoice": "Alibaba-NLP/new-impl--modeling.NewForMultipleChoice",
+    "AutoModelForQuestionAnswering": "Alibaba-NLP/new-impl--modeling.NewForQuestionAnswering",
+    "AutoModelForSequenceClassification": "Alibaba-NLP/new-impl--modeling.NewForSequenceClassification",
+    "AutoModelForTokenClassification": "Alibaba-NLP/new-impl--modeling.NewForTokenClassification"
+  },
+  "classifier_dropout": null,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 1024,
+  "initializer_range": 0.02,
+  "intermediate_size": 4096,
+  "layer_norm_eps": 1e-12,
+  "layer_norm_type": "layer_norm",
+  "logn_attention_clip1": false,
+  "logn_attention_scale": false,
+  "max_position_embeddings": 8192,
+  "model_type": "new",
+  "num_attention_heads": 16,
+  "num_hidden_layers": 24,
+  "pack_qkv": true,
+  "pad_token_id": 0,
+  "position_embedding_type": "rope",
+  "rope_scaling": {
+    "factor": 2.0,
+    "type": "ntk"
+  },
+  "rope_theta": 160000,
+  "torch_dtype": "float32",
+  "transformers_version": "4.36.2",
+  "type_vocab_size": 2,
+  "unpad_inputs": false,
+  "use_memory_efficient_attention": false,
+  "vocab_size": 30522
+}

checkpoint-669/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1bd85c216db17223f4750727ba5bad9f8d86d1f9c6fffcb736dcda5a86223d8e
+size 1736561104

checkpoint-669/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f493a6bb3ff0ed9cb5909b94a2757d10560478e1ca0bb7259ef76a05ba374cca
+size 3473287493

checkpoint-669/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:82ec17673b3e7963e0c4f1af2ebfb57b440f40ce01f43680698e3b11a75f0f99
+size 627

checkpoint-669/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,37 @@

+{
+  "cls_token": {
+    "content": "[CLS]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "[MASK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "[PAD]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "[SEP]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "[UNK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

checkpoint-669/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-669/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,62 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "max_length": 8000,
+  "model_max_length": 32768,
+  "pad_to_multiple_of": null,
+  "pad_token": "[PAD]",
+  "pad_token_type_id": 0,
+  "padding_side": "right",
+  "sep_token": "[SEP]",
+  "stride": 0,
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "truncation_side": "right",
+  "truncation_strategy": "longest_first",
+  "unk_token": "[UNK]"
+}

checkpoint-669/trainer_state.json ADDED Viewed

	@@ -0,0 +1,20 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 3.0,
+  "eval_steps": 500,
+  "global_step": 669,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [],
+  "logging_steps": 500,
+  "max_steps": 669,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "total_flos": 0,
+  "train_batch_size": null,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-669/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c9d28c1554748c4e852ecb4292a6890e4f0b1fc576494b1f2b7a646ac50c07a2
+size 4091

checkpoint-669/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

config.json ADDED Viewed

	@@ -0,0 +1,44 @@

+{
+  "_name_or_path": "Alibaba-NLP/gte-large-en-v1.5",
+  "architectures": [
+    "NewModelForCL"
+  ],
+  "attention_probs_dropout_prob": 0.0,
+  "auto_map": {
+    "AutoConfig": "Alibaba-NLP/new-impl--configuration.NewConfig",
+    "AutoModel": "Alibaba-NLP/new-impl--modeling.NewModel",
+    "AutoModelForMaskedLM": "Alibaba-NLP/new-impl--modeling.NewForMaskedLM",
+    "AutoModelForMultipleChoice": "Alibaba-NLP/new-impl--modeling.NewForMultipleChoice",
+    "AutoModelForQuestionAnswering": "Alibaba-NLP/new-impl--modeling.NewForQuestionAnswering",
+    "AutoModelForSequenceClassification": "Alibaba-NLP/new-impl--modeling.NewForSequenceClassification",
+    "AutoModelForTokenClassification": "Alibaba-NLP/new-impl--modeling.NewForTokenClassification"
+  },
+  "classifier_dropout": null,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 1024,
+  "initializer_range": 0.02,
+  "intermediate_size": 4096,
+  "layer_norm_eps": 1e-12,
+  "layer_norm_type": "layer_norm",
+  "logn_attention_clip1": false,
+  "logn_attention_scale": false,
+  "max_position_embeddings": 8192,
+  "model_type": "new",
+  "num_attention_heads": 16,
+  "num_hidden_layers": 24,
+  "pack_qkv": true,
+  "pad_token_id": 0,
+  "position_embedding_type": "rope",
+  "rope_scaling": {
+    "factor": 2.0,
+    "type": "ntk"
+  },
+  "rope_theta": 160000,
+  "torch_dtype": "float32",
+  "transformers_version": "4.36.2",
+  "type_vocab_size": 2,
+  "unpad_inputs": false,
+  "use_memory_efficient_attention": false,
+  "vocab_size": 30522
+}

custom_info ADDED Viewed

	@@ -0,0 +1,9 @@

+epoch=0 step=74 progress=0.111 eval info {'sparsity_loss': 1.949, 'l1l2_ratio_z12': 22.631, 'l1l2_ratio_z13': 24.155, 'l1l2_ratio_z13_all': 25.045} train info {'sparsity_loss': 3.651, 'l1l2_ratio_z12': 24.58, 'l1l2_ratio_z13': 25.062, 'l1l2_ratio_z13_all': 25.343}
+epoch=0 step=148 progress=0.221 eval info {'sparsity_loss': 1.558, 'l1l2_ratio_z12': 22.342, 'l1l2_ratio_z13': 24.291, 'l1l2_ratio_z13_all': 25.219} train info {'sparsity_loss': 1.173, 'l1l2_ratio_z12': 22.703, 'l1l2_ratio_z13': 24.374, 'l1l2_ratio_z13_all': 25.265}
+epoch=0 step=222 progress=0.332 eval info {'sparsity_loss': 1.27, 'l1l2_ratio_z12': 22.035, 'l1l2_ratio_z13': 24.157, 'l1l2_ratio_z13_all': 25.208} train info {'sparsity_loss': 0.651, 'l1l2_ratio_z12': 22.266, 'l1l2_ratio_z13': 24.341, 'l1l2_ratio_z13_all': 25.342}
+epoch=1 step=74 progress=0.444 eval info {'sparsity_loss': 1.132, 'l1l2_ratio_z12': 22.216, 'l1l2_ratio_z13': 24.25, 'l1l2_ratio_z13_all': 25.28} train info {'sparsity_loss': 0.314, 'l1l2_ratio_z12': 21.793, 'l1l2_ratio_z13': 24.227, 'l1l2_ratio_z13_all': 25.301}
+epoch=1 step=148 progress=0.555 eval info {'sparsity_loss': 1.286, 'l1l2_ratio_z12': 21.864, 'l1l2_ratio_z13': 24.165, 'l1l2_ratio_z13_all': 25.198} train info {'sparsity_loss': 0.217, 'l1l2_ratio_z12': 21.591, 'l1l2_ratio_z13': 24.223, 'l1l2_ratio_z13_all': 25.317}
+epoch=1 step=222 progress=0.665 eval info {'sparsity_loss': 1.127, 'l1l2_ratio_z12': 21.936, 'l1l2_ratio_z13': 24.184, 'l1l2_ratio_z13_all': 25.2} train info {'sparsity_loss': 0.147, 'l1l2_ratio_z12': 21.386, 'l1l2_ratio_z13': 24.178, 'l1l2_ratio_z13_all': 25.285}
+epoch=2 step=74 progress=0.777 eval info {'sparsity_loss': 1.143, 'l1l2_ratio_z12': 21.521, 'l1l2_ratio_z13': 24.022, 'l1l2_ratio_z13_all': 25.143} train info {'sparsity_loss': 0.073, 'l1l2_ratio_z12': 20.703, 'l1l2_ratio_z13': 24.008, 'l1l2_ratio_z13_all': 25.209}
+epoch=2 step=148 progress=0.888 eval info {'sparsity_loss': 1.199, 'l1l2_ratio_z12': 21.347, 'l1l2_ratio_z13': 24.007, 'l1l2_ratio_z13_all': 25.138} train info {'sparsity_loss': 0.057, 'l1l2_ratio_z12': 20.624, 'l1l2_ratio_z13': 24.002, 'l1l2_ratio_z13_all': 25.216}
+epoch=2 step=222 progress=0.999 eval info {'sparsity_loss': 1.186, 'l1l2_ratio_z12': 21.136, 'l1l2_ratio_z13': 23.883, 'l1l2_ratio_z13_all': 25.085} train info {'sparsity_loss': 0.049, 'l1l2_ratio_z12': 20.424, 'l1l2_ratio_z13': 23.957, 'l1l2_ratio_z13_all': 25.19}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1bd85c216db17223f4750727ba5bad9f8d86d1f9c6fffcb736dcda5a86223d8e
+size 1736561104

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,37 @@

+{
+  "cls_token": {
+    "content": "[CLS]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "[MASK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "[PAD]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "[SEP]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "[UNK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,62 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "max_length": 8000,
+  "model_max_length": 32768,
+  "pad_to_multiple_of": null,
+  "pad_token": "[PAD]",
+  "pad_token_type_id": 0,
+  "padding_side": "right",
+  "sep_token": "[SEP]",
+  "stride": 0,
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "truncation_side": "right",
+  "truncation_strategy": "longest_first",
+  "unk_token": "[UNK]"
+}

train_results.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+epoch = 3.0
+train_runtime = 6614.8366
+train_samples_per_second = 0.101

trainer_state.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 3.0,
+  "eval_steps": 500,
+  "global_step": 669,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 3.0,
+      "step": 669,
+      "total_flos": 0,
+      "train_runtime": 6614.8366,
+      "train_samples_per_second": 0.101
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 669,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "total_flos": 0,
+  "train_batch_size": null,
+  "trial_name": null,
+  "trial_params": null
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c9d28c1554748c4e852ecb4292a6890e4f0b1fc576494b1f2b7a646ac50c07a2
+size 4091

vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff