ff670 commited on Oct 23, 2023

Commit

82d6f0f

•

1 Parent(s): fd745e4

Upload folder using huggingface_hub

Browse files

Files changed (48) hide show

Evaluation.txt +0 -0
config.json +29 -0
generation_config.json +6 -0
pytorch_model-00001-of-00041.bin +3 -0
pytorch_model-00002-of-00041.bin +3 -0
pytorch_model-00003-of-00041.bin +3 -0
pytorch_model-00004-of-00041.bin +3 -0
pytorch_model-00005-of-00041.bin +3 -0
pytorch_model-00006-of-00041.bin +3 -0
pytorch_model-00007-of-00041.bin +3 -0
pytorch_model-00008-of-00041.bin +3 -0
pytorch_model-00009-of-00041.bin +3 -0
pytorch_model-00010-of-00041.bin +3 -0
pytorch_model-00011-of-00041.bin +3 -0
pytorch_model-00012-of-00041.bin +3 -0
pytorch_model-00013-of-00041.bin +3 -0
pytorch_model-00014-of-00041.bin +3 -0
pytorch_model-00015-of-00041.bin +3 -0
pytorch_model-00016-of-00041.bin +3 -0
pytorch_model-00017-of-00041.bin +3 -0
pytorch_model-00018-of-00041.bin +3 -0
pytorch_model-00019-of-00041.bin +3 -0
pytorch_model-00020-of-00041.bin +3 -0
pytorch_model-00021-of-00041.bin +3 -0
pytorch_model-00022-of-00041.bin +3 -0
pytorch_model-00023-of-00041.bin +3 -0
pytorch_model-00024-of-00041.bin +3 -0
pytorch_model-00025-of-00041.bin +3 -0
pytorch_model-00026-of-00041.bin +3 -0
pytorch_model-00027-of-00041.bin +3 -0
pytorch_model-00028-of-00041.bin +3 -0
pytorch_model-00029-of-00041.bin +3 -0
pytorch_model-00030-of-00041.bin +3 -0
pytorch_model-00031-of-00041.bin +3 -0
pytorch_model-00032-of-00041.bin +3 -0
pytorch_model-00033-of-00041.bin +3 -0
pytorch_model-00034-of-00041.bin +3 -0
pytorch_model-00035-of-00041.bin +3 -0
pytorch_model-00036-of-00041.bin +3 -0
pytorch_model-00037-of-00041.bin +3 -0
pytorch_model-00038-of-00041.bin +3 -0
pytorch_model-00039-of-00041.bin +3 -0
pytorch_model-00040-of-00041.bin +3 -0
pytorch_model-00041-of-00041.bin +3 -0
pytorch_model.bin.index.json +651 -0
special_tokens_map.json +16 -0
tokenizer.json +0 -0
tokenizer_config.json +7 -0

Evaluation.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

config.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+  "_name_or_path": "openbuddy-falcon-180b-v13-preview1",
+  "alibi": false,
+  "architectures": [
+    "FalconForCausalLM"
+  ],
+  "attention_dropout": 0.0,
+  "bias": false,
+  "bos_token_id": 11,
+  "eos_token_id": 11,
+  "hidden_dropout": 0.0,
+  "hidden_size": 14848,
+  "initializer_range": 0.02,
+  "layer_norm_epsilon": 1e-05,
+  "max_position_embeddings": 2048,
+  "model_type": "falcon",
+  "multi_query": true,
+  "new_decoder_architecture": true,
+  "num_attention_heads": 232,
+  "num_hidden_layers": 80,
+  "num_kv_heads": 8,
+  "parallel_attn": true,
+  "rope_scaling": null,
+  "rope_theta": 10000.0,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.35.0.dev0",
+  "use_cache": true,
+  "vocab_size": 70656
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 11,
+  "eos_token_id": 11,
+  "transformers_version": "4.35.0.dev0"
+}

pytorch_model-00001-of-00041.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0d68607b9b80261b74dc638156834ab57ae6dc996bae771e78510b1306fbbbaa
+size 9213960603

pytorch_model-00002-of-00041.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a5885fd2fc9b2b6861cd6d984018ba0696a442b05c7b20b8a7a7bee25bc4c532
+size 8879584803

pytorch_model-00003-of-00041.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5138682e8736f612f7e85f6d5860b4e7bdf213cea3692e5c6366ab60c08731cf
+size 8879584803

pytorch_model-00004-of-00041.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ef4e7401e7ff8563b50cf8c3b91cc76db1645f07beb555137600ee6a33ec1921
+size 8879584803

pytorch_model-00005-of-00041.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ab475292cbe502e4c75ab9a937f344063fd598d8e1dd7b3fb0cc34d46dbd4808
+size 8879584803

pytorch_model-00006-of-00041.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2bcfb89963d02589647c4f3a0607607a58eb1271969969ed411aa90bfa659015
+size 8879584803

pytorch_model-00007-of-00041.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0c2f6de4add3586ba89d25f6278a69338f9fc13fa513f016806b3f0c6d550d09
+size 8879584803

pytorch_model-00008-of-00041.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8ee0110815ca87db674f0c876c1d5b88d90f552c6494a7a9dffc0df58b54ffa6
+size 8879584803

pytorch_model-00009-of-00041.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2b93e913b154e436f028c019fd3a6760c47f675520c52cf1b9b9a06c613c4e92
+size 8879584803

pytorch_model-00010-of-00041.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6074b7bc0bf78aaf86042d16943e449049d64dba9ab405b71776e0fabd060b95
+size 8879584803

pytorch_model-00011-of-00041.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:aa90ec9118059b7482d7eb3061da3b4e50a5d01fe5b1c9ff3e63a10415997e9a
+size 8879584803

pytorch_model-00012-of-00041.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:eaeb3cff49166411a5cfe37254f92052b3943914af74232ee6c3b369360cdb61
+size 8879584803

pytorch_model-00013-of-00041.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7caf022bf5e4a71302968c082e2b0073a2ece8342dd30e9e316504a45f0fac0e
+size 8879584803

pytorch_model-00014-of-00041.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a996458358a7188a893f11aadf2c0d1b54f6c424e20162c72d2a7ce6c7e83575
+size 8879584803

pytorch_model-00015-of-00041.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2539082b6009ae5ea75146712c8c8485fc36dfe3589802eb1590ecbb9e29c3e9
+size 8879584803

pytorch_model-00016-of-00041.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:01361e426eeca370696925f8b24582a9395411dcb23a60f746449ac8da6bc503
+size 8879584803

pytorch_model-00017-of-00041.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:70c17b80d2feaab19b5a6f466367c6b32a95e03a47d718fb2793af6fcbbe3d1f
+size 8879584803

pytorch_model-00018-of-00041.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c09247b11139c9f1f25ef538b97a4b9fc388b6da91146aecf2bfb9d0dd47f366
+size 8879584803

pytorch_model-00019-of-00041.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:779f96795840026c70dfe127b89813084585e99843efadcd40712b881abad5f8
+size 8879584803

pytorch_model-00020-of-00041.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:30ab55830f524fa81df3bd46ccdade32283eaf36f51855b6e982146c7aacb339
+size 8879584803

pytorch_model-00021-of-00041.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c6524c0b1114f2eeb1fc16bc13d38eafdd93b866967093c3c5a9929398fcb0c7
+size 8879584803

pytorch_model-00022-of-00041.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:da666af65e105e3595b8ed65f9f11304137b37a8d97f8f300e763cec8cc128da
+size 8879584803

pytorch_model-00023-of-00041.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2506cdcd45c183cfe8c539aa29ec5c5fb5b58dbb741675b8cfc6acbe830e2df3
+size 8879584803

pytorch_model-00024-of-00041.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8f168c6bc1a2fa6154ae8065962e7c8d13d512b6160f329761ea056a53cbd9c4
+size 8879584803

pytorch_model-00025-of-00041.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:44475f754ced99f18182a6db633ed302a0cc485728d0e180d3dcf5885ef75175
+size 8879584803

pytorch_model-00026-of-00041.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8fa2b08b845f5a12c5ca65c1a5ed7b3dec70e30a2bc435f2a79db10ebf00c300
+size 8879584803

pytorch_model-00027-of-00041.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1976b339136b311791106fa594d7ed1171754b33f6c25af4196da155974785ba
+size 8879584803

pytorch_model-00028-of-00041.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:32ffe5a51e1d311c1b22ba7c52a502e3fab9cd38bd2238b7634626e0c1a9cc3f
+size 8879584803

pytorch_model-00029-of-00041.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e9299e258ed9d69c345b9af3a4bfc77c3324c445b0772a2dc5d178497f2f30f8
+size 8879584803

pytorch_model-00030-of-00041.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d9c81d84b3df975fb22f85c708a38f9f1b1e92ed7fe5491ebcad93b9af565ae0
+size 8879584803

pytorch_model-00031-of-00041.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5357fbd859b884bc611375093cb1cc9ea8d73c62d79334b654a2c339840a291e
+size 8879584803

pytorch_model-00032-of-00041.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:90b97afef6a502cfa02be2664e0d8fd99d73e116963535edf4e542debf35597c
+size 8879584803

pytorch_model-00033-of-00041.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:55fba07682639b078bdd89c9bb5bea7d2f8dbe561a96e18bf638c31d640ce5f5
+size 8879584803

pytorch_model-00034-of-00041.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:38b06d651dd276c0e83a1789f5b10edec6a75952a8bd37bb1efd977a769f17f2
+size 8879584803

pytorch_model-00035-of-00041.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1f912140e778e50de4743e0ac0ede11d5df36a48f498d15d1eaf9dfa886ccff9
+size 8879584803

pytorch_model-00036-of-00041.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f458ff0754e2db42059685efcf075c512bcc5730dbe3262885458ca47df870b7
+size 8879584803

pytorch_model-00037-of-00041.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6ea1a48f035a29840e9522ae64014f50b3f6381233efd7bf1d6130c771f3020c
+size 8879584803

pytorch_model-00038-of-00041.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d5304c5ccbaf945bc9ee2857ef4e69de06eb5740206df18d11d5a771da4e5aa4
+size 8879584803

pytorch_model-00039-of-00041.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:59167e9a986099acd02d5c709e8a9faae5d1c18cc2e9a407178d5ef05552661d
+size 8879584803

pytorch_model-00040-of-00041.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ca80a744593763d3f1cc86d7a400ba1395b61b1be46ba207246af5f63aea8e20
+size 8879584803

pytorch_model-00041-of-00041.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a28398491ebcace3f9a204c3678deee7ab916f5bfed7a28c5c033590bef1eb26
+size 1763885712

pytorch_model.bin.index.json ADDED Viewed

	@@ -0,0 +1,651 @@

+{
+  "metadata": {
+    "total_size": 357281425408
+  },
+  "weight_map": {
+    "lm_head.weight": "pytorch_model-00001-of-00041.bin",
+    "transformer.h.0.ln_attn.bias": "pytorch_model-00001-of-00041.bin",
+    "transformer.h.0.ln_attn.weight": "pytorch_model-00001-of-00041.bin",
+    "transformer.h.0.ln_mlp.bias": "pytorch_model-00001-of-00041.bin",
+    "transformer.h.0.ln_mlp.weight": "pytorch_model-00001-of-00041.bin",
+    "transformer.h.0.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00041.bin",
+    "transformer.h.0.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00041.bin",
+    "transformer.h.0.self_attention.dense.weight": "pytorch_model-00001-of-00041.bin",
+    "transformer.h.0.self_attention.query_key_value.weight": "pytorch_model-00001-of-00041.bin",
+    "transformer.h.1.ln_attn.bias": "pytorch_model-00002-of-00041.bin",
+    "transformer.h.1.ln_attn.weight": "pytorch_model-00002-of-00041.bin",
+    "transformer.h.1.ln_mlp.bias": "pytorch_model-00002-of-00041.bin",
+    "transformer.h.1.ln_mlp.weight": "pytorch_model-00002-of-00041.bin",
+    "transformer.h.1.mlp.dense_4h_to_h.weight": "pytorch_model-00002-of-00041.bin",
+    "transformer.h.1.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00041.bin",
+    "transformer.h.1.self_attention.dense.weight": "pytorch_model-00001-of-00041.bin",
+    "transformer.h.1.self_attention.query_key_value.weight": "pytorch_model-00001-of-00041.bin",
+    "transformer.h.10.ln_attn.bias": "pytorch_model-00006-of-00041.bin",
+    "transformer.h.10.ln_attn.weight": "pytorch_model-00006-of-00041.bin",
+    "transformer.h.10.ln_mlp.bias": "pytorch_model-00006-of-00041.bin",
+    "transformer.h.10.ln_mlp.weight": "pytorch_model-00006-of-00041.bin",
+    "transformer.h.10.mlp.dense_4h_to_h.weight": "pytorch_model-00006-of-00041.bin",
+    "transformer.h.10.mlp.dense_h_to_4h.weight": "pytorch_model-00006-of-00041.bin",
+    "transformer.h.10.self_attention.dense.weight": "pytorch_model-00006-of-00041.bin",
+    "transformer.h.10.self_attention.query_key_value.weight": "pytorch_model-00006-of-00041.bin",
+    "transformer.h.11.ln_attn.bias": "pytorch_model-00007-of-00041.bin",
+    "transformer.h.11.ln_attn.weight": "pytorch_model-00007-of-00041.bin",
+    "transformer.h.11.ln_mlp.bias": "pytorch_model-00007-of-00041.bin",
+    "transformer.h.11.ln_mlp.weight": "pytorch_model-00007-of-00041.bin",
+    "transformer.h.11.mlp.dense_4h_to_h.weight": "pytorch_model-00007-of-00041.bin",
+    "transformer.h.11.mlp.dense_h_to_4h.weight": "pytorch_model-00006-of-00041.bin",
+    "transformer.h.11.self_attention.dense.weight": "pytorch_model-00006-of-00041.bin",
+    "transformer.h.11.self_attention.query_key_value.weight": "pytorch_model-00006-of-00041.bin",
+    "transformer.h.12.ln_attn.bias": "pytorch_model-00007-of-00041.bin",
+    "transformer.h.12.ln_attn.weight": "pytorch_model-00007-of-00041.bin",
+    "transformer.h.12.ln_mlp.bias": "pytorch_model-00007-of-00041.bin",
+    "transformer.h.12.ln_mlp.weight": "pytorch_model-00007-of-00041.bin",
+    "transformer.h.12.mlp.dense_4h_to_h.weight": "pytorch_model-00007-of-00041.bin",
+    "transformer.h.12.mlp.dense_h_to_4h.weight": "pytorch_model-00007-of-00041.bin",
+    "transformer.h.12.self_attention.dense.weight": "pytorch_model-00007-of-00041.bin",
+    "transformer.h.12.self_attention.query_key_value.weight": "pytorch_model-00007-of-00041.bin",
+    "transformer.h.13.ln_attn.bias": "pytorch_model-00008-of-00041.bin",
+    "transformer.h.13.ln_attn.weight": "pytorch_model-00008-of-00041.bin",
+    "transformer.h.13.ln_mlp.bias": "pytorch_model-00008-of-00041.bin",
+    "transformer.h.13.ln_mlp.weight": "pytorch_model-00008-of-00041.bin",
+    "transformer.h.13.mlp.dense_4h_to_h.weight": "pytorch_model-00008-of-00041.bin",
+    "transformer.h.13.mlp.dense_h_to_4h.weight": "pytorch_model-00007-of-00041.bin",
+    "transformer.h.13.self_attention.dense.weight": "pytorch_model-00007-of-00041.bin",
+    "transformer.h.13.self_attention.query_key_value.weight": "pytorch_model-00007-of-00041.bin",
+    "transformer.h.14.ln_attn.bias": "pytorch_model-00008-of-00041.bin",
+    "transformer.h.14.ln_attn.weight": "pytorch_model-00008-of-00041.bin",
+    "transformer.h.14.ln_mlp.bias": "pytorch_model-00008-of-00041.bin",
+    "transformer.h.14.ln_mlp.weight": "pytorch_model-00008-of-00041.bin",
+    "transformer.h.14.mlp.dense_4h_to_h.weight": "pytorch_model-00008-of-00041.bin",
+    "transformer.h.14.mlp.dense_h_to_4h.weight": "pytorch_model-00008-of-00041.bin",
+    "transformer.h.14.self_attention.dense.weight": "pytorch_model-00008-of-00041.bin",
+    "transformer.h.14.self_attention.query_key_value.weight": "pytorch_model-00008-of-00041.bin",
+    "transformer.h.15.ln_attn.bias": "pytorch_model-00009-of-00041.bin",
+    "transformer.h.15.ln_attn.weight": "pytorch_model-00009-of-00041.bin",
+    "transformer.h.15.ln_mlp.bias": "pytorch_model-00009-of-00041.bin",
+    "transformer.h.15.ln_mlp.weight": "pytorch_model-00009-of-00041.bin",
+    "transformer.h.15.mlp.dense_4h_to_h.weight": "pytorch_model-00009-of-00041.bin",
+    "transformer.h.15.mlp.dense_h_to_4h.weight": "pytorch_model-00008-of-00041.bin",
+    "transformer.h.15.self_attention.dense.weight": "pytorch_model-00008-of-00041.bin",
+    "transformer.h.15.self_attention.query_key_value.weight": "pytorch_model-00008-of-00041.bin",
+    "transformer.h.16.ln_attn.bias": "pytorch_model-00009-of-00041.bin",
+    "transformer.h.16.ln_attn.weight": "pytorch_model-00009-of-00041.bin",
+    "transformer.h.16.ln_mlp.bias": "pytorch_model-00009-of-00041.bin",
+    "transformer.h.16.ln_mlp.weight": "pytorch_model-00009-of-00041.bin",
+    "transformer.h.16.mlp.dense_4h_to_h.weight": "pytorch_model-00009-of-00041.bin",
+    "transformer.h.16.mlp.dense_h_to_4h.weight": "pytorch_model-00009-of-00041.bin",
+    "transformer.h.16.self_attention.dense.weight": "pytorch_model-00009-of-00041.bin",
+    "transformer.h.16.self_attention.query_key_value.weight": "pytorch_model-00009-of-00041.bin",
+    "transformer.h.17.ln_attn.bias": "pytorch_model-00010-of-00041.bin",
+    "transformer.h.17.ln_attn.weight": "pytorch_model-00010-of-00041.bin",
+    "transformer.h.17.ln_mlp.bias": "pytorch_model-00010-of-00041.bin",
+    "transformer.h.17.ln_mlp.weight": "pytorch_model-00010-of-00041.bin",
+    "transformer.h.17.mlp.dense_4h_to_h.weight": "pytorch_model-00010-of-00041.bin",
+    "transformer.h.17.mlp.dense_h_to_4h.weight": "pytorch_model-00009-of-00041.bin",
+    "transformer.h.17.self_attention.dense.weight": "pytorch_model-00009-of-00041.bin",
+    "transformer.h.17.self_attention.query_key_value.weight": "pytorch_model-00009-of-00041.bin",
+    "transformer.h.18.ln_attn.bias": "pytorch_model-00010-of-00041.bin",
+    "transformer.h.18.ln_attn.weight": "pytorch_model-00010-of-00041.bin",
+    "transformer.h.18.ln_mlp.bias": "pytorch_model-00010-of-00041.bin",
+    "transformer.h.18.ln_mlp.weight": "pytorch_model-00010-of-00041.bin",
+    "transformer.h.18.mlp.dense_4h_to_h.weight": "pytorch_model-00010-of-00041.bin",
+    "transformer.h.18.mlp.dense_h_to_4h.weight": "pytorch_model-00010-of-00041.bin",
+    "transformer.h.18.self_attention.dense.weight": "pytorch_model-00010-of-00041.bin",
+    "transformer.h.18.self_attention.query_key_value.weight": "pytorch_model-00010-of-00041.bin",
+    "transformer.h.19.ln_attn.bias": "pytorch_model-00011-of-00041.bin",
+    "transformer.h.19.ln_attn.weight": "pytorch_model-00011-of-00041.bin",
+    "transformer.h.19.ln_mlp.bias": "pytorch_model-00011-of-00041.bin",
+    "transformer.h.19.ln_mlp.weight": "pytorch_model-00011-of-00041.bin",
+    "transformer.h.19.mlp.dense_4h_to_h.weight": "pytorch_model-00011-of-00041.bin",
+    "transformer.h.19.mlp.dense_h_to_4h.weight": "pytorch_model-00010-of-00041.bin",
+    "transformer.h.19.self_attention.dense.weight": "pytorch_model-00010-of-00041.bin",
+    "transformer.h.19.self_attention.query_key_value.weight": "pytorch_model-00010-of-00041.bin",
+    "transformer.h.2.ln_attn.bias": "pytorch_model-00002-of-00041.bin",
+    "transformer.h.2.ln_attn.weight": "pytorch_model-00002-of-00041.bin",
+    "transformer.h.2.ln_mlp.bias": "pytorch_model-00002-of-00041.bin",
+    "transformer.h.2.ln_mlp.weight": "pytorch_model-00002-of-00041.bin",
+    "transformer.h.2.mlp.dense_4h_to_h.weight": "pytorch_model-00002-of-00041.bin",
+    "transformer.h.2.mlp.dense_h_to_4h.weight": "pytorch_model-00002-of-00041.bin",
+    "transformer.h.2.self_attention.dense.weight": "pytorch_model-00002-of-00041.bin",
+    "transformer.h.2.self_attention.query_key_value.weight": "pytorch_model-00002-of-00041.bin",
+    "transformer.h.20.ln_attn.bias": "pytorch_model-00011-of-00041.bin",
+    "transformer.h.20.ln_attn.weight": "pytorch_model-00011-of-00041.bin",
+    "transformer.h.20.ln_mlp.bias": "pytorch_model-00011-of-00041.bin",
+    "transformer.h.20.ln_mlp.weight": "pytorch_model-00011-of-00041.bin",
+    "transformer.h.20.mlp.dense_4h_to_h.weight": "pytorch_model-00011-of-00041.bin",
+    "transformer.h.20.mlp.dense_h_to_4h.weight": "pytorch_model-00011-of-00041.bin",
+    "transformer.h.20.self_attention.dense.weight": "pytorch_model-00011-of-00041.bin",
+    "transformer.h.20.self_attention.query_key_value.weight": "pytorch_model-00011-of-00041.bin",
+    "transformer.h.21.ln_attn.bias": "pytorch_model-00012-of-00041.bin",
+    "transformer.h.21.ln_attn.weight": "pytorch_model-00012-of-00041.bin",
+    "transformer.h.21.ln_mlp.bias": "pytorch_model-00012-of-00041.bin",
+    "transformer.h.21.ln_mlp.weight": "pytorch_model-00012-of-00041.bin",
+    "transformer.h.21.mlp.dense_4h_to_h.weight": "pytorch_model-00012-of-00041.bin",
+    "transformer.h.21.mlp.dense_h_to_4h.weight": "pytorch_model-00011-of-00041.bin",
+    "transformer.h.21.self_attention.dense.weight": "pytorch_model-00011-of-00041.bin",
+    "transformer.h.21.self_attention.query_key_value.weight": "pytorch_model-00011-of-00041.bin",
+    "transformer.h.22.ln_attn.bias": "pytorch_model-00012-of-00041.bin",
+    "transformer.h.22.ln_attn.weight": "pytorch_model-00012-of-00041.bin",
+    "transformer.h.22.ln_mlp.bias": "pytorch_model-00012-of-00041.bin",
+    "transformer.h.22.ln_mlp.weight": "pytorch_model-00012-of-00041.bin",
+    "transformer.h.22.mlp.dense_4h_to_h.weight": "pytorch_model-00012-of-00041.bin",
+    "transformer.h.22.mlp.dense_h_to_4h.weight": "pytorch_model-00012-of-00041.bin",
+    "transformer.h.22.self_attention.dense.weight": "pytorch_model-00012-of-00041.bin",
+    "transformer.h.22.self_attention.query_key_value.weight": "pytorch_model-00012-of-00041.bin",
+    "transformer.h.23.ln_attn.bias": "pytorch_model-00013-of-00041.bin",
+    "transformer.h.23.ln_attn.weight": "pytorch_model-00013-of-00041.bin",
+    "transformer.h.23.ln_mlp.bias": "pytorch_model-00013-of-00041.bin",
+    "transformer.h.23.ln_mlp.weight": "pytorch_model-00013-of-00041.bin",
+    "transformer.h.23.mlp.dense_4h_to_h.weight": "pytorch_model-00013-of-00041.bin",
+    "transformer.h.23.mlp.dense_h_to_4h.weight": "pytorch_model-00012-of-00041.bin",
+    "transformer.h.23.self_attention.dense.weight": "pytorch_model-00012-of-00041.bin",
+    "transformer.h.23.self_attention.query_key_value.weight": "pytorch_model-00012-of-00041.bin",
+    "transformer.h.24.ln_attn.bias": "pytorch_model-00013-of-00041.bin",
+    "transformer.h.24.ln_attn.weight": "pytorch_model-00013-of-00041.bin",
+    "transformer.h.24.ln_mlp.bias": "pytorch_model-00013-of-00041.bin",
+    "transformer.h.24.ln_mlp.weight": "pytorch_model-00013-of-00041.bin",
+    "transformer.h.24.mlp.dense_4h_to_h.weight": "pytorch_model-00013-of-00041.bin",
+    "transformer.h.24.mlp.dense_h_to_4h.weight": "pytorch_model-00013-of-00041.bin",
+    "transformer.h.24.self_attention.dense.weight": "pytorch_model-00013-of-00041.bin",
+    "transformer.h.24.self_attention.query_key_value.weight": "pytorch_model-00013-of-00041.bin",
+    "transformer.h.25.ln_attn.bias": "pytorch_model-00014-of-00041.bin",
+    "transformer.h.25.ln_attn.weight": "pytorch_model-00014-of-00041.bin",
+    "transformer.h.25.ln_mlp.bias": "pytorch_model-00014-of-00041.bin",
+    "transformer.h.25.ln_mlp.weight": "pytorch_model-00014-of-00041.bin",
+    "transformer.h.25.mlp.dense_4h_to_h.weight": "pytorch_model-00014-of-00041.bin",
+    "transformer.h.25.mlp.dense_h_to_4h.weight": "pytorch_model-00013-of-00041.bin",
+    "transformer.h.25.self_attention.dense.weight": "pytorch_model-00013-of-00041.bin",
+    "transformer.h.25.self_attention.query_key_value.weight": "pytorch_model-00013-of-00041.bin",
+    "transformer.h.26.ln_attn.bias": "pytorch_model-00014-of-00041.bin",
+    "transformer.h.26.ln_attn.weight": "pytorch_model-00014-of-00041.bin",
+    "transformer.h.26.ln_mlp.bias": "pytorch_model-00014-of-00041.bin",
+    "transformer.h.26.ln_mlp.weight": "pytorch_model-00014-of-00041.bin",
+    "transformer.h.26.mlp.dense_4h_to_h.weight": "pytorch_model-00014-of-00041.bin",
+    "transformer.h.26.mlp.dense_h_to_4h.weight": "pytorch_model-00014-of-00041.bin",
+    "transformer.h.26.self_attention.dense.weight": "pytorch_model-00014-of-00041.bin",
+    "transformer.h.26.self_attention.query_key_value.weight": "pytorch_model-00014-of-00041.bin",
+    "transformer.h.27.ln_attn.bias": "pytorch_model-00015-of-00041.bin",
+    "transformer.h.27.ln_attn.weight": "pytorch_model-00015-of-00041.bin",
+    "transformer.h.27.ln_mlp.bias": "pytorch_model-00015-of-00041.bin",
+    "transformer.h.27.ln_mlp.weight": "pytorch_model-00015-of-00041.bin",
+    "transformer.h.27.mlp.dense_4h_to_h.weight": "pytorch_model-00015-of-00041.bin",
+    "transformer.h.27.mlp.dense_h_to_4h.weight": "pytorch_model-00014-of-00041.bin",
+    "transformer.h.27.self_attention.dense.weight": "pytorch_model-00014-of-00041.bin",
+    "transformer.h.27.self_attention.query_key_value.weight": "pytorch_model-00014-of-00041.bin",
+    "transformer.h.28.ln_attn.bias": "pytorch_model-00015-of-00041.bin",
+    "transformer.h.28.ln_attn.weight": "pytorch_model-00015-of-00041.bin",
+    "transformer.h.28.ln_mlp.bias": "pytorch_model-00015-of-00041.bin",
+    "transformer.h.28.ln_mlp.weight": "pytorch_model-00015-of-00041.bin",
+    "transformer.h.28.mlp.dense_4h_to_h.weight": "pytorch_model-00015-of-00041.bin",
+    "transformer.h.28.mlp.dense_h_to_4h.weight": "pytorch_model-00015-of-00041.bin",
+    "transformer.h.28.self_attention.dense.weight": "pytorch_model-00015-of-00041.bin",
+    "transformer.h.28.self_attention.query_key_value.weight": "pytorch_model-00015-of-00041.bin",
+    "transformer.h.29.ln_attn.bias": "pytorch_model-00016-of-00041.bin",
+    "transformer.h.29.ln_attn.weight": "pytorch_model-00016-of-00041.bin",
+    "transformer.h.29.ln_mlp.bias": "pytorch_model-00016-of-00041.bin",
+    "transformer.h.29.ln_mlp.weight": "pytorch_model-00016-of-00041.bin",
+    "transformer.h.29.mlp.dense_4h_to_h.weight": "pytorch_model-00016-of-00041.bin",
+    "transformer.h.29.mlp.dense_h_to_4h.weight": "pytorch_model-00015-of-00041.bin",
+    "transformer.h.29.self_attention.dense.weight": "pytorch_model-00015-of-00041.bin",
+    "transformer.h.29.self_attention.query_key_value.weight": "pytorch_model-00015-of-00041.bin",
+    "transformer.h.3.ln_attn.bias": "pytorch_model-00003-of-00041.bin",
+    "transformer.h.3.ln_attn.weight": "pytorch_model-00003-of-00041.bin",
+    "transformer.h.3.ln_mlp.bias": "pytorch_model-00003-of-00041.bin",
+    "transformer.h.3.ln_mlp.weight": "pytorch_model-00003-of-00041.bin",
+    "transformer.h.3.mlp.dense_4h_to_h.weight": "pytorch_model-00003-of-00041.bin",
+    "transformer.h.3.mlp.dense_h_to_4h.weight": "pytorch_model-00002-of-00041.bin",
+    "transformer.h.3.self_attention.dense.weight": "pytorch_model-00002-of-00041.bin",
+    "transformer.h.3.self_attention.query_key_value.weight": "pytorch_model-00002-of-00041.bin",
+    "transformer.h.30.ln_attn.bias": "pytorch_model-00016-of-00041.bin",
+    "transformer.h.30.ln_attn.weight": "pytorch_model-00016-of-00041.bin",
+    "transformer.h.30.ln_mlp.bias": "pytorch_model-00016-of-00041.bin",
+    "transformer.h.30.ln_mlp.weight": "pytorch_model-00016-of-00041.bin",
+    "transformer.h.30.mlp.dense_4h_to_h.weight": "pytorch_model-00016-of-00041.bin",
+    "transformer.h.30.mlp.dense_h_to_4h.weight": "pytorch_model-00016-of-00041.bin",
+    "transformer.h.30.self_attention.dense.weight": "pytorch_model-00016-of-00041.bin",
+    "transformer.h.30.self_attention.query_key_value.weight": "pytorch_model-00016-of-00041.bin",
+    "transformer.h.31.ln_attn.bias": "pytorch_model-00017-of-00041.bin",
+    "transformer.h.31.ln_attn.weight": "pytorch_model-00017-of-00041.bin",
+    "transformer.h.31.ln_mlp.bias": "pytorch_model-00017-of-00041.bin",
+    "transformer.h.31.ln_mlp.weight": "pytorch_model-00017-of-00041.bin",
+    "transformer.h.31.mlp.dense_4h_to_h.weight": "pytorch_model-00017-of-00041.bin",
+    "transformer.h.31.mlp.dense_h_to_4h.weight": "pytorch_model-00016-of-00041.bin",
+    "transformer.h.31.self_attention.dense.weight": "pytorch_model-00016-of-00041.bin",
+    "transformer.h.31.self_attention.query_key_value.weight": "pytorch_model-00016-of-00041.bin",
+    "transformer.h.32.ln_attn.bias": "pytorch_model-00017-of-00041.bin",
+    "transformer.h.32.ln_attn.weight": "pytorch_model-00017-of-00041.bin",
+    "transformer.h.32.ln_mlp.bias": "pytorch_model-00017-of-00041.bin",
+    "transformer.h.32.ln_mlp.weight": "pytorch_model-00017-of-00041.bin",
+    "transformer.h.32.mlp.dense_4h_to_h.weight": "pytorch_model-00017-of-00041.bin",
+    "transformer.h.32.mlp.dense_h_to_4h.weight": "pytorch_model-00017-of-00041.bin",
+    "transformer.h.32.self_attention.dense.weight": "pytorch_model-00017-of-00041.bin",
+    "transformer.h.32.self_attention.query_key_value.weight": "pytorch_model-00017-of-00041.bin",
+    "transformer.h.33.ln_attn.bias": "pytorch_model-00018-of-00041.bin",
+    "transformer.h.33.ln_attn.weight": "pytorch_model-00018-of-00041.bin",
+    "transformer.h.33.ln_mlp.bias": "pytorch_model-00018-of-00041.bin",
+    "transformer.h.33.ln_mlp.weight": "pytorch_model-00018-of-00041.bin",
+    "transformer.h.33.mlp.dense_4h_to_h.weight": "pytorch_model-00018-of-00041.bin",
+    "transformer.h.33.mlp.dense_h_to_4h.weight": "pytorch_model-00017-of-00041.bin",
+    "transformer.h.33.self_attention.dense.weight": "pytorch_model-00017-of-00041.bin",
+    "transformer.h.33.self_attention.query_key_value.weight": "pytorch_model-00017-of-00041.bin",
+    "transformer.h.34.ln_attn.bias": "pytorch_model-00018-of-00041.bin",
+    "transformer.h.34.ln_attn.weight": "pytorch_model-00018-of-00041.bin",
+    "transformer.h.34.ln_mlp.bias": "pytorch_model-00018-of-00041.bin",
+    "transformer.h.34.ln_mlp.weight": "pytorch_model-00018-of-00041.bin",
+    "transformer.h.34.mlp.dense_4h_to_h.weight": "pytorch_model-00018-of-00041.bin",
+    "transformer.h.34.mlp.dense_h_to_4h.weight": "pytorch_model-00018-of-00041.bin",
+    "transformer.h.34.self_attention.dense.weight": "pytorch_model-00018-of-00041.bin",
+    "transformer.h.34.self_attention.query_key_value.weight": "pytorch_model-00018-of-00041.bin",
+    "transformer.h.35.ln_attn.bias": "pytorch_model-00019-of-00041.bin",
+    "transformer.h.35.ln_attn.weight": "pytorch_model-00019-of-00041.bin",
+    "transformer.h.35.ln_mlp.bias": "pytorch_model-00019-of-00041.bin",
+    "transformer.h.35.ln_mlp.weight": "pytorch_model-00019-of-00041.bin",
+    "transformer.h.35.mlp.dense_4h_to_h.weight": "pytorch_model-00019-of-00041.bin",
+    "transformer.h.35.mlp.dense_h_to_4h.weight": "pytorch_model-00018-of-00041.bin",
+    "transformer.h.35.self_attention.dense.weight": "pytorch_model-00018-of-00041.bin",
+    "transformer.h.35.self_attention.query_key_value.weight": "pytorch_model-00018-of-00041.bin",
+    "transformer.h.36.ln_attn.bias": "pytorch_model-00019-of-00041.bin",
+    "transformer.h.36.ln_attn.weight": "pytorch_model-00019-of-00041.bin",
+    "transformer.h.36.ln_mlp.bias": "pytorch_model-00019-of-00041.bin",
+    "transformer.h.36.ln_mlp.weight": "pytorch_model-00019-of-00041.bin",
+    "transformer.h.36.mlp.dense_4h_to_h.weight": "pytorch_model-00019-of-00041.bin",
+    "transformer.h.36.mlp.dense_h_to_4h.weight": "pytorch_model-00019-of-00041.bin",
+    "transformer.h.36.self_attention.dense.weight": "pytorch_model-00019-of-00041.bin",
+    "transformer.h.36.self_attention.query_key_value.weight": "pytorch_model-00019-of-00041.bin",
+    "transformer.h.37.ln_attn.bias": "pytorch_model-00020-of-00041.bin",
+    "transformer.h.37.ln_attn.weight": "pytorch_model-00020-of-00041.bin",
+    "transformer.h.37.ln_mlp.bias": "pytorch_model-00020-of-00041.bin",
+    "transformer.h.37.ln_mlp.weight": "pytorch_model-00020-of-00041.bin",
+    "transformer.h.37.mlp.dense_4h_to_h.weight": "pytorch_model-00020-of-00041.bin",
+    "transformer.h.37.mlp.dense_h_to_4h.weight": "pytorch_model-00019-of-00041.bin",
+    "transformer.h.37.self_attention.dense.weight": "pytorch_model-00019-of-00041.bin",
+    "transformer.h.37.self_attention.query_key_value.weight": "pytorch_model-00019-of-00041.bin",
+    "transformer.h.38.ln_attn.bias": "pytorch_model-00020-of-00041.bin",
+    "transformer.h.38.ln_attn.weight": "pytorch_model-00020-of-00041.bin",
+    "transformer.h.38.ln_mlp.bias": "pytorch_model-00020-of-00041.bin",
+    "transformer.h.38.ln_mlp.weight": "pytorch_model-00020-of-00041.bin",
+    "transformer.h.38.mlp.dense_4h_to_h.weight": "pytorch_model-00020-of-00041.bin",
+    "transformer.h.38.mlp.dense_h_to_4h.weight": "pytorch_model-00020-of-00041.bin",
+    "transformer.h.38.self_attention.dense.weight": "pytorch_model-00020-of-00041.bin",
+    "transformer.h.38.self_attention.query_key_value.weight": "pytorch_model-00020-of-00041.bin",
+    "transformer.h.39.ln_attn.bias": "pytorch_model-00021-of-00041.bin",
+    "transformer.h.39.ln_attn.weight": "pytorch_model-00021-of-00041.bin",
+    "transformer.h.39.ln_mlp.bias": "pytorch_model-00021-of-00041.bin",
+    "transformer.h.39.ln_mlp.weight": "pytorch_model-00021-of-00041.bin",
+    "transformer.h.39.mlp.dense_4h_to_h.weight": "pytorch_model-00021-of-00041.bin",
+    "transformer.h.39.mlp.dense_h_to_4h.weight": "pytorch_model-00020-of-00041.bin",
+    "transformer.h.39.self_attention.dense.weight": "pytorch_model-00020-of-00041.bin",
+    "transformer.h.39.self_attention.query_key_value.weight": "pytorch_model-00020-of-00041.bin",
+    "transformer.h.4.ln_attn.bias": "pytorch_model-00003-of-00041.bin",
+    "transformer.h.4.ln_attn.weight": "pytorch_model-00003-of-00041.bin",
+    "transformer.h.4.ln_mlp.bias": "pytorch_model-00003-of-00041.bin",
+    "transformer.h.4.ln_mlp.weight": "pytorch_model-00003-of-00041.bin",
+    "transformer.h.4.mlp.dense_4h_to_h.weight": "pytorch_model-00003-of-00041.bin",
+    "transformer.h.4.mlp.dense_h_to_4h.weight": "pytorch_model-00003-of-00041.bin",
+    "transformer.h.4.self_attention.dense.weight": "pytorch_model-00003-of-00041.bin",
+    "transformer.h.4.self_attention.query_key_value.weight": "pytorch_model-00003-of-00041.bin",
+    "transformer.h.40.ln_attn.bias": "pytorch_model-00021-of-00041.bin",
+    "transformer.h.40.ln_attn.weight": "pytorch_model-00021-of-00041.bin",
+    "transformer.h.40.ln_mlp.bias": "pytorch_model-00021-of-00041.bin",
+    "transformer.h.40.ln_mlp.weight": "pytorch_model-00021-of-00041.bin",
+    "transformer.h.40.mlp.dense_4h_to_h.weight": "pytorch_model-00021-of-00041.bin",
+    "transformer.h.40.mlp.dense_h_to_4h.weight": "pytorch_model-00021-of-00041.bin",
+    "transformer.h.40.self_attention.dense.weight": "pytorch_model-00021-of-00041.bin",
+    "transformer.h.40.self_attention.query_key_value.weight": "pytorch_model-00021-of-00041.bin",
+    "transformer.h.41.ln_attn.bias": "pytorch_model-00022-of-00041.bin",
+    "transformer.h.41.ln_attn.weight": "pytorch_model-00022-of-00041.bin",
+    "transformer.h.41.ln_mlp.bias": "pytorch_model-00022-of-00041.bin",
+    "transformer.h.41.ln_mlp.weight": "pytorch_model-00022-of-00041.bin",
+    "transformer.h.41.mlp.dense_4h_to_h.weight": "pytorch_model-00022-of-00041.bin",
+    "transformer.h.41.mlp.dense_h_to_4h.weight": "pytorch_model-00021-of-00041.bin",
+    "transformer.h.41.self_attention.dense.weight": "pytorch_model-00021-of-00041.bin",
+    "transformer.h.41.self_attention.query_key_value.weight": "pytorch_model-00021-of-00041.bin",
+    "transformer.h.42.ln_attn.bias": "pytorch_model-00022-of-00041.bin",
+    "transformer.h.42.ln_attn.weight": "pytorch_model-00022-of-00041.bin",
+    "transformer.h.42.ln_mlp.bias": "pytorch_model-00022-of-00041.bin",
+    "transformer.h.42.ln_mlp.weight": "pytorch_model-00022-of-00041.bin",
+    "transformer.h.42.mlp.dense_4h_to_h.weight": "pytorch_model-00022-of-00041.bin",
+    "transformer.h.42.mlp.dense_h_to_4h.weight": "pytorch_model-00022-of-00041.bin",
+    "transformer.h.42.self_attention.dense.weight": "pytorch_model-00022-of-00041.bin",
+    "transformer.h.42.self_attention.query_key_value.weight": "pytorch_model-00022-of-00041.bin",
+    "transformer.h.43.ln_attn.bias": "pytorch_model-00023-of-00041.bin",
+    "transformer.h.43.ln_attn.weight": "pytorch_model-00023-of-00041.bin",
+    "transformer.h.43.ln_mlp.bias": "pytorch_model-00023-of-00041.bin",
+    "transformer.h.43.ln_mlp.weight": "pytorch_model-00023-of-00041.bin",
+    "transformer.h.43.mlp.dense_4h_to_h.weight": "pytorch_model-00023-of-00041.bin",
+    "transformer.h.43.mlp.dense_h_to_4h.weight": "pytorch_model-00022-of-00041.bin",
+    "transformer.h.43.self_attention.dense.weight": "pytorch_model-00022-of-00041.bin",
+    "transformer.h.43.self_attention.query_key_value.weight": "pytorch_model-00022-of-00041.bin",
+    "transformer.h.44.ln_attn.bias": "pytorch_model-00023-of-00041.bin",
+    "transformer.h.44.ln_attn.weight": "pytorch_model-00023-of-00041.bin",
+    "transformer.h.44.ln_mlp.bias": "pytorch_model-00023-of-00041.bin",
+    "transformer.h.44.ln_mlp.weight": "pytorch_model-00023-of-00041.bin",
+    "transformer.h.44.mlp.dense_4h_to_h.weight": "pytorch_model-00023-of-00041.bin",
+    "transformer.h.44.mlp.dense_h_to_4h.weight": "pytorch_model-00023-of-00041.bin",
+    "transformer.h.44.self_attention.dense.weight": "pytorch_model-00023-of-00041.bin",
+    "transformer.h.44.self_attention.query_key_value.weight": "pytorch_model-00023-of-00041.bin",
+    "transformer.h.45.ln_attn.bias": "pytorch_model-00024-of-00041.bin",
+    "transformer.h.45.ln_attn.weight": "pytorch_model-00024-of-00041.bin",
+    "transformer.h.45.ln_mlp.bias": "pytorch_model-00024-of-00041.bin",
+    "transformer.h.45.ln_mlp.weight": "pytorch_model-00024-of-00041.bin",
+    "transformer.h.45.mlp.dense_4h_to_h.weight": "pytorch_model-00024-of-00041.bin",
+    "transformer.h.45.mlp.dense_h_to_4h.weight": "pytorch_model-00023-of-00041.bin",
+    "transformer.h.45.self_attention.dense.weight": "pytorch_model-00023-of-00041.bin",
+    "transformer.h.45.self_attention.query_key_value.weight": "pytorch_model-00023-of-00041.bin",
+    "transformer.h.46.ln_attn.bias": "pytorch_model-00024-of-00041.bin",
+    "transformer.h.46.ln_attn.weight": "pytorch_model-00024-of-00041.bin",
+    "transformer.h.46.ln_mlp.bias": "pytorch_model-00024-of-00041.bin",
+    "transformer.h.46.ln_mlp.weight": "pytorch_model-00024-of-00041.bin",
+    "transformer.h.46.mlp.dense_4h_to_h.weight": "pytorch_model-00024-of-00041.bin",
+    "transformer.h.46.mlp.dense_h_to_4h.weight": "pytorch_model-00024-of-00041.bin",
+    "transformer.h.46.self_attention.dense.weight": "pytorch_model-00024-of-00041.bin",
+    "transformer.h.46.self_attention.query_key_value.weight": "pytorch_model-00024-of-00041.bin",
+    "transformer.h.47.ln_attn.bias": "pytorch_model-00025-of-00041.bin",
+    "transformer.h.47.ln_attn.weight": "pytorch_model-00025-of-00041.bin",
+    "transformer.h.47.ln_mlp.bias": "pytorch_model-00025-of-00041.bin",
+    "transformer.h.47.ln_mlp.weight": "pytorch_model-00025-of-00041.bin",
+    "transformer.h.47.mlp.dense_4h_to_h.weight": "pytorch_model-00025-of-00041.bin",
+    "transformer.h.47.mlp.dense_h_to_4h.weight": "pytorch_model-00024-of-00041.bin",
+    "transformer.h.47.self_attention.dense.weight": "pytorch_model-00024-of-00041.bin",
+    "transformer.h.47.self_attention.query_key_value.weight": "pytorch_model-00024-of-00041.bin",
+    "transformer.h.48.ln_attn.bias": "pytorch_model-00025-of-00041.bin",
+    "transformer.h.48.ln_attn.weight": "pytorch_model-00025-of-00041.bin",
+    "transformer.h.48.ln_mlp.bias": "pytorch_model-00025-of-00041.bin",
+    "transformer.h.48.ln_mlp.weight": "pytorch_model-00025-of-00041.bin",
+    "transformer.h.48.mlp.dense_4h_to_h.weight": "pytorch_model-00025-of-00041.bin",
+    "transformer.h.48.mlp.dense_h_to_4h.weight": "pytorch_model-00025-of-00041.bin",
+    "transformer.h.48.self_attention.dense.weight": "pytorch_model-00025-of-00041.bin",
+    "transformer.h.48.self_attention.query_key_value.weight": "pytorch_model-00025-of-00041.bin",
+    "transformer.h.49.ln_attn.bias": "pytorch_model-00026-of-00041.bin",
+    "transformer.h.49.ln_attn.weight": "pytorch_model-00026-of-00041.bin",
+    "transformer.h.49.ln_mlp.bias": "pytorch_model-00026-of-00041.bin",
+    "transformer.h.49.ln_mlp.weight": "pytorch_model-00026-of-00041.bin",
+    "transformer.h.49.mlp.dense_4h_to_h.weight": "pytorch_model-00026-of-00041.bin",
+    "transformer.h.49.mlp.dense_h_to_4h.weight": "pytorch_model-00025-of-00041.bin",
+    "transformer.h.49.self_attention.dense.weight": "pytorch_model-00025-of-00041.bin",
+    "transformer.h.49.self_attention.query_key_value.weight": "pytorch_model-00025-of-00041.bin",
+    "transformer.h.5.ln_attn.bias": "pytorch_model-00004-of-00041.bin",
+    "transformer.h.5.ln_attn.weight": "pytorch_model-00004-of-00041.bin",
+    "transformer.h.5.ln_mlp.bias": "pytorch_model-00004-of-00041.bin",
+    "transformer.h.5.ln_mlp.weight": "pytorch_model-00004-of-00041.bin",
+    "transformer.h.5.mlp.dense_4h_to_h.weight": "pytorch_model-00004-of-00041.bin",
+    "transformer.h.5.mlp.dense_h_to_4h.weight": "pytorch_model-00003-of-00041.bin",
+    "transformer.h.5.self_attention.dense.weight": "pytorch_model-00003-of-00041.bin",
+    "transformer.h.5.self_attention.query_key_value.weight": "pytorch_model-00003-of-00041.bin",
+    "transformer.h.50.ln_attn.bias": "pytorch_model-00026-of-00041.bin",
+    "transformer.h.50.ln_attn.weight": "pytorch_model-00026-of-00041.bin",
+    "transformer.h.50.ln_mlp.bias": "pytorch_model-00026-of-00041.bin",
+    "transformer.h.50.ln_mlp.weight": "pytorch_model-00026-of-00041.bin",
+    "transformer.h.50.mlp.dense_4h_to_h.weight": "pytorch_model-00026-of-00041.bin",
+    "transformer.h.50.mlp.dense_h_to_4h.weight": "pytorch_model-00026-of-00041.bin",
+    "transformer.h.50.self_attention.dense.weight": "pytorch_model-00026-of-00041.bin",
+    "transformer.h.50.self_attention.query_key_value.weight": "pytorch_model-00026-of-00041.bin",
+    "transformer.h.51.ln_attn.bias": "pytorch_model-00027-of-00041.bin",
+    "transformer.h.51.ln_attn.weight": "pytorch_model-00027-of-00041.bin",
+    "transformer.h.51.ln_mlp.bias": "pytorch_model-00027-of-00041.bin",
+    "transformer.h.51.ln_mlp.weight": "pytorch_model-00027-of-00041.bin",
+    "transformer.h.51.mlp.dense_4h_to_h.weight": "pytorch_model-00027-of-00041.bin",
+    "transformer.h.51.mlp.dense_h_to_4h.weight": "pytorch_model-00026-of-00041.bin",
+    "transformer.h.51.self_attention.dense.weight": "pytorch_model-00026-of-00041.bin",
+    "transformer.h.51.self_attention.query_key_value.weight": "pytorch_model-00026-of-00041.bin",
+    "transformer.h.52.ln_attn.bias": "pytorch_model-00027-of-00041.bin",
+    "transformer.h.52.ln_attn.weight": "pytorch_model-00027-of-00041.bin",
+    "transformer.h.52.ln_mlp.bias": "pytorch_model-00027-of-00041.bin",
+    "transformer.h.52.ln_mlp.weight": "pytorch_model-00027-of-00041.bin",
+    "transformer.h.52.mlp.dense_4h_to_h.weight": "pytorch_model-00027-of-00041.bin",
+    "transformer.h.52.mlp.dense_h_to_4h.weight": "pytorch_model-00027-of-00041.bin",
+    "transformer.h.52.self_attention.dense.weight": "pytorch_model-00027-of-00041.bin",
+    "transformer.h.52.self_attention.query_key_value.weight": "pytorch_model-00027-of-00041.bin",
+    "transformer.h.53.ln_attn.bias": "pytorch_model-00028-of-00041.bin",
+    "transformer.h.53.ln_attn.weight": "pytorch_model-00028-of-00041.bin",
+    "transformer.h.53.ln_mlp.bias": "pytorch_model-00028-of-00041.bin",
+    "transformer.h.53.ln_mlp.weight": "pytorch_model-00028-of-00041.bin",
+    "transformer.h.53.mlp.dense_4h_to_h.weight": "pytorch_model-00028-of-00041.bin",
+    "transformer.h.53.mlp.dense_h_to_4h.weight": "pytorch_model-00027-of-00041.bin",
+    "transformer.h.53.self_attention.dense.weight": "pytorch_model-00027-of-00041.bin",
+    "transformer.h.53.self_attention.query_key_value.weight": "pytorch_model-00027-of-00041.bin",
+    "transformer.h.54.ln_attn.bias": "pytorch_model-00028-of-00041.bin",
+    "transformer.h.54.ln_attn.weight": "pytorch_model-00028-of-00041.bin",
+    "transformer.h.54.ln_mlp.bias": "pytorch_model-00028-of-00041.bin",
+    "transformer.h.54.ln_mlp.weight": "pytorch_model-00028-of-00041.bin",
+    "transformer.h.54.mlp.dense_4h_to_h.weight": "pytorch_model-00028-of-00041.bin",
+    "transformer.h.54.mlp.dense_h_to_4h.weight": "pytorch_model-00028-of-00041.bin",
+    "transformer.h.54.self_attention.dense.weight": "pytorch_model-00028-of-00041.bin",
+    "transformer.h.54.self_attention.query_key_value.weight": "pytorch_model-00028-of-00041.bin",
+    "transformer.h.55.ln_attn.bias": "pytorch_model-00029-of-00041.bin",
+    "transformer.h.55.ln_attn.weight": "pytorch_model-00029-of-00041.bin",
+    "transformer.h.55.ln_mlp.bias": "pytorch_model-00029-of-00041.bin",
+    "transformer.h.55.ln_mlp.weight": "pytorch_model-00029-of-00041.bin",
+    "transformer.h.55.mlp.dense_4h_to_h.weight": "pytorch_model-00029-of-00041.bin",
+    "transformer.h.55.mlp.dense_h_to_4h.weight": "pytorch_model-00028-of-00041.bin",
+    "transformer.h.55.self_attention.dense.weight": "pytorch_model-00028-of-00041.bin",
+    "transformer.h.55.self_attention.query_key_value.weight": "pytorch_model-00028-of-00041.bin",
+    "transformer.h.56.ln_attn.bias": "pytorch_model-00029-of-00041.bin",
+    "transformer.h.56.ln_attn.weight": "pytorch_model-00029-of-00041.bin",
+    "transformer.h.56.ln_mlp.bias": "pytorch_model-00029-of-00041.bin",
+    "transformer.h.56.ln_mlp.weight": "pytorch_model-00029-of-00041.bin",
+    "transformer.h.56.mlp.dense_4h_to_h.weight": "pytorch_model-00029-of-00041.bin",
+    "transformer.h.56.mlp.dense_h_to_4h.weight": "pytorch_model-00029-of-00041.bin",
+    "transformer.h.56.self_attention.dense.weight": "pytorch_model-00029-of-00041.bin",
+    "transformer.h.56.self_attention.query_key_value.weight": "pytorch_model-00029-of-00041.bin",
+    "transformer.h.57.ln_attn.bias": "pytorch_model-00030-of-00041.bin",
+    "transformer.h.57.ln_attn.weight": "pytorch_model-00030-of-00041.bin",
+    "transformer.h.57.ln_mlp.bias": "pytorch_model-00030-of-00041.bin",
+    "transformer.h.57.ln_mlp.weight": "pytorch_model-00030-of-00041.bin",
+    "transformer.h.57.mlp.dense_4h_to_h.weight": "pytorch_model-00030-of-00041.bin",
+    "transformer.h.57.mlp.dense_h_to_4h.weight": "pytorch_model-00029-of-00041.bin",
+    "transformer.h.57.self_attention.dense.weight": "pytorch_model-00029-of-00041.bin",
+    "transformer.h.57.self_attention.query_key_value.weight": "pytorch_model-00029-of-00041.bin",
+    "transformer.h.58.ln_attn.bias": "pytorch_model-00030-of-00041.bin",
+    "transformer.h.58.ln_attn.weight": "pytorch_model-00030-of-00041.bin",
+    "transformer.h.58.ln_mlp.bias": "pytorch_model-00030-of-00041.bin",
+    "transformer.h.58.ln_mlp.weight": "pytorch_model-00030-of-00041.bin",
+    "transformer.h.58.mlp.dense_4h_to_h.weight": "pytorch_model-00030-of-00041.bin",
+    "transformer.h.58.mlp.dense_h_to_4h.weight": "pytorch_model-00030-of-00041.bin",
+    "transformer.h.58.self_attention.dense.weight": "pytorch_model-00030-of-00041.bin",
+    "transformer.h.58.self_attention.query_key_value.weight": "pytorch_model-00030-of-00041.bin",
+    "transformer.h.59.ln_attn.bias": "pytorch_model-00031-of-00041.bin",
+    "transformer.h.59.ln_attn.weight": "pytorch_model-00031-of-00041.bin",
+    "transformer.h.59.ln_mlp.bias": "pytorch_model-00031-of-00041.bin",
+    "transformer.h.59.ln_mlp.weight": "pytorch_model-00031-of-00041.bin",
+    "transformer.h.59.mlp.dense_4h_to_h.weight": "pytorch_model-00031-of-00041.bin",
+    "transformer.h.59.mlp.dense_h_to_4h.weight": "pytorch_model-00030-of-00041.bin",
+    "transformer.h.59.self_attention.dense.weight": "pytorch_model-00030-of-00041.bin",
+    "transformer.h.59.self_attention.query_key_value.weight": "pytorch_model-00030-of-00041.bin",
+    "transformer.h.6.ln_attn.bias": "pytorch_model-00004-of-00041.bin",
+    "transformer.h.6.ln_attn.weight": "pytorch_model-00004-of-00041.bin",
+    "transformer.h.6.ln_mlp.bias": "pytorch_model-00004-of-00041.bin",
+    "transformer.h.6.ln_mlp.weight": "pytorch_model-00004-of-00041.bin",
+    "transformer.h.6.mlp.dense_4h_to_h.weight": "pytorch_model-00004-of-00041.bin",
+    "transformer.h.6.mlp.dense_h_to_4h.weight": "pytorch_model-00004-of-00041.bin",
+    "transformer.h.6.self_attention.dense.weight": "pytorch_model-00004-of-00041.bin",
+    "transformer.h.6.self_attention.query_key_value.weight": "pytorch_model-00004-of-00041.bin",
+    "transformer.h.60.ln_attn.bias": "pytorch_model-00031-of-00041.bin",
+    "transformer.h.60.ln_attn.weight": "pytorch_model-00031-of-00041.bin",
+    "transformer.h.60.ln_mlp.bias": "pytorch_model-00031-of-00041.bin",
+    "transformer.h.60.ln_mlp.weight": "pytorch_model-00031-of-00041.bin",
+    "transformer.h.60.mlp.dense_4h_to_h.weight": "pytorch_model-00031-of-00041.bin",
+    "transformer.h.60.mlp.dense_h_to_4h.weight": "pytorch_model-00031-of-00041.bin",
+    "transformer.h.60.self_attention.dense.weight": "pytorch_model-00031-of-00041.bin",
+    "transformer.h.60.self_attention.query_key_value.weight": "pytorch_model-00031-of-00041.bin",
+    "transformer.h.61.ln_attn.bias": "pytorch_model-00032-of-00041.bin",
+    "transformer.h.61.ln_attn.weight": "pytorch_model-00032-of-00041.bin",
+    "transformer.h.61.ln_mlp.bias": "pytorch_model-00032-of-00041.bin",
+    "transformer.h.61.ln_mlp.weight": "pytorch_model-00032-of-00041.bin",
+    "transformer.h.61.mlp.dense_4h_to_h.weight": "pytorch_model-00032-of-00041.bin",
+    "transformer.h.61.mlp.dense_h_to_4h.weight": "pytorch_model-00031-of-00041.bin",
+    "transformer.h.61.self_attention.dense.weight": "pytorch_model-00031-of-00041.bin",
+    "transformer.h.61.self_attention.query_key_value.weight": "pytorch_model-00031-of-00041.bin",
+    "transformer.h.62.ln_attn.bias": "pytorch_model-00032-of-00041.bin",
+    "transformer.h.62.ln_attn.weight": "pytorch_model-00032-of-00041.bin",
+    "transformer.h.62.ln_mlp.bias": "pytorch_model-00032-of-00041.bin",
+    "transformer.h.62.ln_mlp.weight": "pytorch_model-00032-of-00041.bin",
+    "transformer.h.62.mlp.dense_4h_to_h.weight": "pytorch_model-00032-of-00041.bin",
+    "transformer.h.62.mlp.dense_h_to_4h.weight": "pytorch_model-00032-of-00041.bin",
+    "transformer.h.62.self_attention.dense.weight": "pytorch_model-00032-of-00041.bin",
+    "transformer.h.62.self_attention.query_key_value.weight": "pytorch_model-00032-of-00041.bin",
+    "transformer.h.63.ln_attn.bias": "pytorch_model-00033-of-00041.bin",
+    "transformer.h.63.ln_attn.weight": "pytorch_model-00033-of-00041.bin",
+    "transformer.h.63.ln_mlp.bias": "pytorch_model-00033-of-00041.bin",
+    "transformer.h.63.ln_mlp.weight": "pytorch_model-00033-of-00041.bin",
+    "transformer.h.63.mlp.dense_4h_to_h.weight": "pytorch_model-00033-of-00041.bin",
+    "transformer.h.63.mlp.dense_h_to_4h.weight": "pytorch_model-00032-of-00041.bin",
+    "transformer.h.63.self_attention.dense.weight": "pytorch_model-00032-of-00041.bin",
+    "transformer.h.63.self_attention.query_key_value.weight": "pytorch_model-00032-of-00041.bin",
+    "transformer.h.64.ln_attn.bias": "pytorch_model-00033-of-00041.bin",
+    "transformer.h.64.ln_attn.weight": "pytorch_model-00033-of-00041.bin",
+    "transformer.h.64.ln_mlp.bias": "pytorch_model-00033-of-00041.bin",
+    "transformer.h.64.ln_mlp.weight": "pytorch_model-00033-of-00041.bin",
+    "transformer.h.64.mlp.dense_4h_to_h.weight": "pytorch_model-00033-of-00041.bin",
+    "transformer.h.64.mlp.dense_h_to_4h.weight": "pytorch_model-00033-of-00041.bin",
+    "transformer.h.64.self_attention.dense.weight": "pytorch_model-00033-of-00041.bin",
+    "transformer.h.64.self_attention.query_key_value.weight": "pytorch_model-00033-of-00041.bin",
+    "transformer.h.65.ln_attn.bias": "pytorch_model-00034-of-00041.bin",
+    "transformer.h.65.ln_attn.weight": "pytorch_model-00034-of-00041.bin",
+    "transformer.h.65.ln_mlp.bias": "pytorch_model-00034-of-00041.bin",
+    "transformer.h.65.ln_mlp.weight": "pytorch_model-00034-of-00041.bin",
+    "transformer.h.65.mlp.dense_4h_to_h.weight": "pytorch_model-00034-of-00041.bin",
+    "transformer.h.65.mlp.dense_h_to_4h.weight": "pytorch_model-00033-of-00041.bin",
+    "transformer.h.65.self_attention.dense.weight": "pytorch_model-00033-of-00041.bin",
+    "transformer.h.65.self_attention.query_key_value.weight": "pytorch_model-00033-of-00041.bin",
+    "transformer.h.66.ln_attn.bias": "pytorch_model-00034-of-00041.bin",
+    "transformer.h.66.ln_attn.weight": "pytorch_model-00034-of-00041.bin",
+    "transformer.h.66.ln_mlp.bias": "pytorch_model-00034-of-00041.bin",
+    "transformer.h.66.ln_mlp.weight": "pytorch_model-00034-of-00041.bin",
+    "transformer.h.66.mlp.dense_4h_to_h.weight": "pytorch_model-00034-of-00041.bin",
+    "transformer.h.66.mlp.dense_h_to_4h.weight": "pytorch_model-00034-of-00041.bin",
+    "transformer.h.66.self_attention.dense.weight": "pytorch_model-00034-of-00041.bin",
+    "transformer.h.66.self_attention.query_key_value.weight": "pytorch_model-00034-of-00041.bin",
+    "transformer.h.67.ln_attn.bias": "pytorch_model-00035-of-00041.bin",
+    "transformer.h.67.ln_attn.weight": "pytorch_model-00035-of-00041.bin",
+    "transformer.h.67.ln_mlp.bias": "pytorch_model-00035-of-00041.bin",
+    "transformer.h.67.ln_mlp.weight": "pytorch_model-00035-of-00041.bin",
+    "transformer.h.67.mlp.dense_4h_to_h.weight": "pytorch_model-00035-of-00041.bin",
+    "transformer.h.67.mlp.dense_h_to_4h.weight": "pytorch_model-00034-of-00041.bin",
+    "transformer.h.67.self_attention.dense.weight": "pytorch_model-00034-of-00041.bin",
+    "transformer.h.67.self_attention.query_key_value.weight": "pytorch_model-00034-of-00041.bin",
+    "transformer.h.68.ln_attn.bias": "pytorch_model-00035-of-00041.bin",
+    "transformer.h.68.ln_attn.weight": "pytorch_model-00035-of-00041.bin",
+    "transformer.h.68.ln_mlp.bias": "pytorch_model-00035-of-00041.bin",
+    "transformer.h.68.ln_mlp.weight": "pytorch_model-00035-of-00041.bin",
+    "transformer.h.68.mlp.dense_4h_to_h.weight": "pytorch_model-00035-of-00041.bin",
+    "transformer.h.68.mlp.dense_h_to_4h.weight": "pytorch_model-00035-of-00041.bin",
+    "transformer.h.68.self_attention.dense.weight": "pytorch_model-00035-of-00041.bin",
+    "transformer.h.68.self_attention.query_key_value.weight": "pytorch_model-00035-of-00041.bin",
+    "transformer.h.69.ln_attn.bias": "pytorch_model-00036-of-00041.bin",
+    "transformer.h.69.ln_attn.weight": "pytorch_model-00036-of-00041.bin",
+    "transformer.h.69.ln_mlp.bias": "pytorch_model-00036-of-00041.bin",
+    "transformer.h.69.ln_mlp.weight": "pytorch_model-00036-of-00041.bin",
+    "transformer.h.69.mlp.dense_4h_to_h.weight": "pytorch_model-00036-of-00041.bin",
+    "transformer.h.69.mlp.dense_h_to_4h.weight": "pytorch_model-00035-of-00041.bin",
+    "transformer.h.69.self_attention.dense.weight": "pytorch_model-00035-of-00041.bin",
+    "transformer.h.69.self_attention.query_key_value.weight": "pytorch_model-00035-of-00041.bin",
+    "transformer.h.7.ln_attn.bias": "pytorch_model-00005-of-00041.bin",
+    "transformer.h.7.ln_attn.weight": "pytorch_model-00005-of-00041.bin",
+    "transformer.h.7.ln_mlp.bias": "pytorch_model-00005-of-00041.bin",
+    "transformer.h.7.ln_mlp.weight": "pytorch_model-00005-of-00041.bin",
+    "transformer.h.7.mlp.dense_4h_to_h.weight": "pytorch_model-00005-of-00041.bin",
+    "transformer.h.7.mlp.dense_h_to_4h.weight": "pytorch_model-00004-of-00041.bin",
+    "transformer.h.7.self_attention.dense.weight": "pytorch_model-00004-of-00041.bin",
+    "transformer.h.7.self_attention.query_key_value.weight": "pytorch_model-00004-of-00041.bin",
+    "transformer.h.70.ln_attn.bias": "pytorch_model-00036-of-00041.bin",
+    "transformer.h.70.ln_attn.weight": "pytorch_model-00036-of-00041.bin",
+    "transformer.h.70.ln_mlp.bias": "pytorch_model-00036-of-00041.bin",
+    "transformer.h.70.ln_mlp.weight": "pytorch_model-00036-of-00041.bin",
+    "transformer.h.70.mlp.dense_4h_to_h.weight": "pytorch_model-00036-of-00041.bin",
+    "transformer.h.70.mlp.dense_h_to_4h.weight": "pytorch_model-00036-of-00041.bin",
+    "transformer.h.70.self_attention.dense.weight": "pytorch_model-00036-of-00041.bin",
+    "transformer.h.70.self_attention.query_key_value.weight": "pytorch_model-00036-of-00041.bin",
+    "transformer.h.71.ln_attn.bias": "pytorch_model-00037-of-00041.bin",
+    "transformer.h.71.ln_attn.weight": "pytorch_model-00037-of-00041.bin",
+    "transformer.h.71.ln_mlp.bias": "pytorch_model-00037-of-00041.bin",
+    "transformer.h.71.ln_mlp.weight": "pytorch_model-00037-of-00041.bin",
+    "transformer.h.71.mlp.dense_4h_to_h.weight": "pytorch_model-00037-of-00041.bin",
+    "transformer.h.71.mlp.dense_h_to_4h.weight": "pytorch_model-00036-of-00041.bin",
+    "transformer.h.71.self_attention.dense.weight": "pytorch_model-00036-of-00041.bin",
+    "transformer.h.71.self_attention.query_key_value.weight": "pytorch_model-00036-of-00041.bin",
+    "transformer.h.72.ln_attn.bias": "pytorch_model-00037-of-00041.bin",
+    "transformer.h.72.ln_attn.weight": "pytorch_model-00037-of-00041.bin",
+    "transformer.h.72.ln_mlp.bias": "pytorch_model-00037-of-00041.bin",
+    "transformer.h.72.ln_mlp.weight": "pytorch_model-00037-of-00041.bin",
+    "transformer.h.72.mlp.dense_4h_to_h.weight": "pytorch_model-00037-of-00041.bin",
+    "transformer.h.72.mlp.dense_h_to_4h.weight": "pytorch_model-00037-of-00041.bin",
+    "transformer.h.72.self_attention.dense.weight": "pytorch_model-00037-of-00041.bin",
+    "transformer.h.72.self_attention.query_key_value.weight": "pytorch_model-00037-of-00041.bin",
+    "transformer.h.73.ln_attn.bias": "pytorch_model-00038-of-00041.bin",
+    "transformer.h.73.ln_attn.weight": "pytorch_model-00038-of-00041.bin",
+    "transformer.h.73.ln_mlp.bias": "pytorch_model-00038-of-00041.bin",
+    "transformer.h.73.ln_mlp.weight": "pytorch_model-00038-of-00041.bin",
+    "transformer.h.73.mlp.dense_4h_to_h.weight": "pytorch_model-00038-of-00041.bin",
+    "transformer.h.73.mlp.dense_h_to_4h.weight": "pytorch_model-00037-of-00041.bin",
+    "transformer.h.73.self_attention.dense.weight": "pytorch_model-00037-of-00041.bin",
+    "transformer.h.73.self_attention.query_key_value.weight": "pytorch_model-00037-of-00041.bin",
+    "transformer.h.74.ln_attn.bias": "pytorch_model-00038-of-00041.bin",
+    "transformer.h.74.ln_attn.weight": "pytorch_model-00038-of-00041.bin",
+    "transformer.h.74.ln_mlp.bias": "pytorch_model-00038-of-00041.bin",
+    "transformer.h.74.ln_mlp.weight": "pytorch_model-00038-of-00041.bin",
+    "transformer.h.74.mlp.dense_4h_to_h.weight": "pytorch_model-00038-of-00041.bin",
+    "transformer.h.74.mlp.dense_h_to_4h.weight": "pytorch_model-00038-of-00041.bin",
+    "transformer.h.74.self_attention.dense.weight": "pytorch_model-00038-of-00041.bin",
+    "transformer.h.74.self_attention.query_key_value.weight": "pytorch_model-00038-of-00041.bin",
+    "transformer.h.75.ln_attn.bias": "pytorch_model-00039-of-00041.bin",
+    "transformer.h.75.ln_attn.weight": "pytorch_model-00039-of-00041.bin",
+    "transformer.h.75.ln_mlp.bias": "pytorch_model-00039-of-00041.bin",
+    "transformer.h.75.ln_mlp.weight": "pytorch_model-00039-of-00041.bin",
+    "transformer.h.75.mlp.dense_4h_to_h.weight": "pytorch_model-00039-of-00041.bin",
+    "transformer.h.75.mlp.dense_h_to_4h.weight": "pytorch_model-00038-of-00041.bin",
+    "transformer.h.75.self_attention.dense.weight": "pytorch_model-00038-of-00041.bin",
+    "transformer.h.75.self_attention.query_key_value.weight": "pytorch_model-00038-of-00041.bin",
+    "transformer.h.76.ln_attn.bias": "pytorch_model-00039-of-00041.bin",
+    "transformer.h.76.ln_attn.weight": "pytorch_model-00039-of-00041.bin",
+    "transformer.h.76.ln_mlp.bias": "pytorch_model-00039-of-00041.bin",
+    "transformer.h.76.ln_mlp.weight": "pytorch_model-00039-of-00041.bin",
+    "transformer.h.76.mlp.dense_4h_to_h.weight": "pytorch_model-00039-of-00041.bin",
+    "transformer.h.76.mlp.dense_h_to_4h.weight": "pytorch_model-00039-of-00041.bin",
+    "transformer.h.76.self_attention.dense.weight": "pytorch_model-00039-of-00041.bin",
+    "transformer.h.76.self_attention.query_key_value.weight": "pytorch_model-00039-of-00041.bin",
+    "transformer.h.77.ln_attn.bias": "pytorch_model-00040-of-00041.bin",
+    "transformer.h.77.ln_attn.weight": "pytorch_model-00040-of-00041.bin",
+    "transformer.h.77.ln_mlp.bias": "pytorch_model-00040-of-00041.bin",
+    "transformer.h.77.ln_mlp.weight": "pytorch_model-00040-of-00041.bin",
+    "transformer.h.77.mlp.dense_4h_to_h.weight": "pytorch_model-00040-of-00041.bin",
+    "transformer.h.77.mlp.dense_h_to_4h.weight": "pytorch_model-00039-of-00041.bin",
+    "transformer.h.77.self_attention.dense.weight": "pytorch_model-00039-of-00041.bin",
+    "transformer.h.77.self_attention.query_key_value.weight": "pytorch_model-00039-of-00041.bin",
+    "transformer.h.78.ln_attn.bias": "pytorch_model-00040-of-00041.bin",
+    "transformer.h.78.ln_attn.weight": "pytorch_model-00040-of-00041.bin",
+    "transformer.h.78.ln_mlp.bias": "pytorch_model-00040-of-00041.bin",
+    "transformer.h.78.ln_mlp.weight": "pytorch_model-00040-of-00041.bin",
+    "transformer.h.78.mlp.dense_4h_to_h.weight": "pytorch_model-00040-of-00041.bin",
+    "transformer.h.78.mlp.dense_h_to_4h.weight": "pytorch_model-00040-of-00041.bin",
+    "transformer.h.78.self_attention.dense.weight": "pytorch_model-00040-of-00041.bin",
+    "transformer.h.78.self_attention.query_key_value.weight": "pytorch_model-00040-of-00041.bin",
+    "transformer.h.79.ln_attn.bias": "pytorch_model-00041-of-00041.bin",
+    "transformer.h.79.ln_attn.weight": "pytorch_model-00041-of-00041.bin",
+    "transformer.h.79.ln_mlp.bias": "pytorch_model-00041-of-00041.bin",
+    "transformer.h.79.ln_mlp.weight": "pytorch_model-00041-of-00041.bin",
+    "transformer.h.79.mlp.dense_4h_to_h.weight": "pytorch_model-00041-of-00041.bin",
+    "transformer.h.79.mlp.dense_h_to_4h.weight": "pytorch_model-00040-of-00041.bin",
+    "transformer.h.79.self_attention.dense.weight": "pytorch_model-00040-of-00041.bin",
+    "transformer.h.79.self_attention.query_key_value.weight": "pytorch_model-00040-of-00041.bin",
+    "transformer.h.8.ln_attn.bias": "pytorch_model-00005-of-00041.bin",
+    "transformer.h.8.ln_attn.weight": "pytorch_model-00005-of-00041.bin",
+    "transformer.h.8.ln_mlp.bias": "pytorch_model-00005-of-00041.bin",
+    "transformer.h.8.ln_mlp.weight": "pytorch_model-00005-of-00041.bin",
+    "transformer.h.8.mlp.dense_4h_to_h.weight": "pytorch_model-00005-of-00041.bin",
+    "transformer.h.8.mlp.dense_h_to_4h.weight": "pytorch_model-00005-of-00041.bin",
+    "transformer.h.8.self_attention.dense.weight": "pytorch_model-00005-of-00041.bin",
+    "transformer.h.8.self_attention.query_key_value.weight": "pytorch_model-00005-of-00041.bin",
+    "transformer.h.9.ln_attn.bias": "pytorch_model-00006-of-00041.bin",
+    "transformer.h.9.ln_attn.weight": "pytorch_model-00006-of-00041.bin",
+    "transformer.h.9.ln_mlp.bias": "pytorch_model-00006-of-00041.bin",
+    "transformer.h.9.ln_mlp.weight": "pytorch_model-00006-of-00041.bin",
+    "transformer.h.9.mlp.dense_4h_to_h.weight": "pytorch_model-00006-of-00041.bin",
+    "transformer.h.9.mlp.dense_h_to_4h.weight": "pytorch_model-00005-of-00041.bin",
+    "transformer.h.9.self_attention.dense.weight": "pytorch_model-00005-of-00041.bin",
+    "transformer.h.9.self_attention.query_key_value.weight": "pytorch_model-00005-of-00041.bin",
+    "transformer.ln_f.bias": "pytorch_model-00041-of-00041.bin",
+    "transformer.ln_f.weight": "pytorch_model-00041-of-00041.bin",
+    "transformer.word_embeddings.weight": "pytorch_model-00001-of-00041.bin"
+  }
+}

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+  "additional_special_tokens": [
+    ">>TITLE<<",
+    ">>ABSTRACT<<",
+    ">>INTRODUCTION<<",
+    ">>SUMMARY<<",
+    ">>COMMENT<<",
+    ">>ANSWER<<",
+    ">>QUESTION<<",
+    ">>DOMAIN<<",
+    ">>PREFIX<<",
+    ">>SUFFIX<<",
+    ">>MIDDLE<<"
+  ],
+  "eos_token": "<|endoftext|>"
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "add_prefix_space": false,
+  "clean_up_tokenization_spaces": true,
+  "eos_token": "<|endoftext|>",
+  "model_max_length": 2048,
+  "tokenizer_class": "PreTrainedTokenizerFast"
+}