yukiontheiceberg commited on
Commit
dfd72b0
1 Parent(s): 6f5866f

Add files using upload-large-folder tool

Browse files
added_tokens.json CHANGED
@@ -1,4 +1,7 @@
1
  {
 
 
 
2
  "<commit_after>": 32016,
3
  "<commit_before>": 32014,
4
  "<commit_msg>": 32015,
@@ -17,8 +20,11 @@
17
  "<jupyter_start>": 32009,
18
  "<jupyter_text>": 32010,
19
  "<reponame>": 32017,
20
- "<|beginofsystem|>": 32019,
21
- "<|beginofuser|>": 32020,
22
- "<|endofchat|>": 32021,
23
- "<|endofsystemprompt|>": 32018
 
 
 
24
  }
 
1
  {
2
+ "</tool_response>": 32021,
3
+ "</tool_call>": 32022,
4
+ "</tools>": 32023,
5
  "<commit_after>": 32016,
6
  "<commit_before>": 32014,
7
  "<commit_msg>": 32015,
 
20
  "<jupyter_start>": 32009,
21
  "<jupyter_text>": 32010,
22
  "<reponame>": 32017,
23
+ "<tool_response>": 32018,
24
+ "<tool_call>": 32019,
25
+ "<tools>": 32020,
26
+ "<|beginofsystem|>": 32025,
27
+ "<|beginofuser|>": 32026,
28
+ "<|endofchat|>": 32027,
29
+ "<|endofsystemprompt|>": 32024
30
  }
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "LLM360/K2-Chat",
3
  "architectures": [
4
  "LlamaForCausalLM"
5
  ],
@@ -13,7 +13,6 @@
13
  "intermediate_size": 22016,
14
  "max_position_embeddings": 8192,
15
  "max_sequence_length": 8192,
16
- "mlp_bias": false,
17
  "model_type": "llama",
18
  "num_attention_heads": 64,
19
  "num_hidden_layers": 80,
@@ -24,8 +23,8 @@
24
  "rope_scaling": null,
25
  "rope_theta": 500000.0,
26
  "tie_word_embeddings": false,
27
- "torch_dtype": "float16",
28
- "transformers_version": "4.42.3",
29
  "use_cache": true,
30
  "vocab_size": 32032
31
  }
 
1
  {
2
+ "_name_or_path": "LLM360/K2",
3
  "architectures": [
4
  "LlamaForCausalLM"
5
  ],
 
13
  "intermediate_size": 22016,
14
  "max_position_embeddings": 8192,
15
  "max_sequence_length": 8192,
 
16
  "model_type": "llama",
17
  "num_attention_heads": 64,
18
  "num_hidden_layers": 80,
 
23
  "rope_scaling": null,
24
  "rope_theta": 500000.0,
25
  "tie_word_embeddings": false,
26
+ "torch_dtype": "float32",
27
+ "transformers_version": "4.31.0",
28
  "use_cache": true,
29
  "vocab_size": 32032
30
  }
generation_config.json CHANGED
@@ -1,7 +1,6 @@
1
  {
2
  "_from_model_config": true,
3
- "bos_token_id": 1,
4
  "eos_token_id": 2,
5
  "pad_token_id": 0,
6
- "transformers_version": "4.42.3"
7
  }
 
1
  {
2
  "_from_model_config": true,
 
3
  "eos_token_id": 2,
4
  "pad_token_id": 0,
5
+ "transformers_version": "4.31.0"
6
  }
pytorch_model-00001-of-00027.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1a585fd17236d809783282509a9014b0b79e2ae1cfade9aa72b56947052ed6d9
3
- size 4660470238
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad3d85de07d6a061c1db3c1c2572596d1e008eb87781d61d916da53861b1053f
3
+ size 9320933680
pytorch_model-00002-of-00027.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5676d9c45ee43df3b5cdfc11c912f58b8808d95610b287a01a704d2c97f9f72e
3
- size 4857111932
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50c62560ebe1bfec3b9b115aef48d3c0e849e20d7875257c23426f145d0d2ce9
3
+ size 9714216126
pytorch_model-00003-of-00027.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a3dd2cbb31406561715e19b55502d3b2e6626bdf0c0940dbe05739e96d1b65ba
3
- size 4857111932
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6405270d319c751c8931bd0ab8a626bd606a6b4f93972a83411b8c3d2c57a0cc
3
+ size 9714216126
pytorch_model-00004-of-00027.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:23a99b2ebe0a9c3a975c676135bf370955210bef71139d190b177aed5096bbd8
3
- size 4857111932
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:26613094fa5702b3392db0cea4e25057a041d403d006521d2b4bd13f64aa6c9e
3
+ size 9714216190
pytorch_model-00005-of-00027.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ba4d7298a56db5acb38b6e1b267aa5300b44b2095eb2a92483de4df988756921
3
- size 4857111932
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23294f55f421098841bee62a8b93dcaea51b784f2b75a882fb36863206188b3a
3
+ size 9714216190
pytorch_model-00006-of-00027.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2f82c5fd719780b0838dada11fef3789fd4e3e65d7e4ba7385ed15ecd6c43763
3
- size 4857111932
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c583b276f7396a089c08abe2e31e78b4bf8ca3252a10aa856e58c1e764c7848
3
+ size 9714216190
pytorch_model-00007-of-00027.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9675937dedddf86d365fe166a369cd2cdfecd3324f7381535d92701e397f5bb1
3
- size 4857111932
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:885b901b1a9d40e4195b1958b807d9d181b3c91cba18560db6f7cc995a2ee9cb
3
+ size 9714216190
pytorch_model-00008-of-00027.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5f5bb6b9cc543b659bf84cd1aa10fd13b3b41dc2cb747e750d5d372e8e897f26
3
- size 4857111932
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6dac93ed5ea2ab85c81bcf51920fa7d5cee297e8b72255d74554f131ce51867
3
+ size 9714216190
pytorch_model-00009-of-00027.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6cfcbea98a867e83ee8d9f18cf78de472dcd75e9026641c144b973ef121fbcaf
3
- size 4857111932
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24be29e727a4693ecb85785af1a5bd3ff69f5caf8c061a145459acee5253eab1
3
+ size 9714216190
pytorch_model-00010-of-00027.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:29639026ab3443e9b1fd4d4e9a3cc64779cfd2c1236c1796751c67bc12f44fb7
3
- size 4857111932
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7bf1863c0a84b905b192f8db433640c3fec3386ecec25cfc1093eee30a0ccdad
3
+ size 9714216190
pytorch_model-00011-of-00027.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5f0e9a1f0f8530c7bea9bccfb9bb59a7ce0ffae3bb1cbec284be4e582fb8a321
3
- size 4857111932
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3dd3451a1f8a040ff20305b1e2c8a5fccb4f4d95df92ff5f4fdfd69ab9d2d8f
3
+ size 9714216190
pytorch_model-00012-of-00027.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9cc379abb8e9489a952288609d6fe1f6f00ba90a0b44b79b429f11da7f3404dc
3
- size 4857111932
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ee258c2141f8d3cf39380d83eb68842fbfcc3695513c61cacb98035ee5af71d
3
+ size 9714216190
pytorch_model-00013-of-00027.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0065b56df7e8e65ebc20764eff6b5c0553d9b19761f48b2ebb42552a9838e438
3
- size 4857111932
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc50fce03cc8d9b6a38aff849b4561641d402441b86b3fa32a498aaa847d0324
3
+ size 9714216190
pytorch_model-00014-of-00027.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:91661a25fe6ed6b1215614ef3ccbc0ccea91ce3bee8ed167a4b4909f3c4aa822
3
- size 4857111932
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1a5063531cf56f96fbcf9d64b3b2ac959a82ee6214594cf5ad316acdf6e6766
3
+ size 9714216190
pytorch_model-00015-of-00027.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f31d226cdab9777484cdc75bb54867082feb6232809aacd7b28c1ab71a146a25
3
- size 4857111932
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dde029c616628b02b84f7ca4ba2ab63dd7a29317adfccbe693b0b03445d569df
3
+ size 9714216190
pytorch_model-00016-of-00027.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bcd66360e1185d2c0206263a0b904d2898d86de9159476ca7e4d040ae2a5306f
3
- size 4857111932
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de79433944783f21d8d66cc3bff60800ce8a7cde7b4c26e2d4ffb9e40bc9a842
3
+ size 9714216190
pytorch_model-00017-of-00027.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:73b69d3d99eab0b48307437205045cf81f2759306656e39e51beb7c38a54a745
3
- size 4857111932
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e8018f651de68885920875cd9f2e39140a2ee7139b2eeacffd3a2dcb38ea802
3
+ size 9714216190
pytorch_model-00018-of-00027.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d7564f8f626234af5d8301cb8a90bb5e8eab2e3ddccf55941abf52f224470e05
3
- size 4857111932
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:efba8b9cf63e00e5db49dd60a29841b4582fabac21fbb26cf4b8335eecdd9e19
3
+ size 9714216190
pytorch_model-00019-of-00027.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1dea6b1c683c31876ea5f9f807940183d8e486b78312dfa9c478ae02b3beb897
3
- size 4857111932
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58c7464831b8a4e0215421af76c6d35b760ad7f5dbde2a4c6effc5a188f34f87
3
+ size 9714216190
pytorch_model-00020-of-00027.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2b693c0031b86b1b31f5a1def7700efa2e3e9a585bd0f3d14acadea164b46722
3
- size 4857111932
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0d475e7d79ce0bdc57ec7234205ee6754591d44bfc17b67ccc008a6f9292178
3
+ size 9714216190
pytorch_model-00021-of-00027.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:af5909dbd4604551efba00d5e22c918b88f5b19e0c2eceb7ddff6b4a9ae3746b
3
- size 4857111932
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfe1601ef1b0148581106885a840d9a8fb823cf11a057446776a874dd9787478
3
+ size 9714216190
pytorch_model-00022-of-00027.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:21d430632dbd4a43828cae7aab66db914357430fc8f9b59c138f34af8b17c3bc
3
- size 4857111932
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8f25c0c74d21edcc1ee0d64676fb19b50e0467a5a60c29e268af51bf1312abe
3
+ size 9714216190
pytorch_model-00023-of-00027.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f16ce76b513a892f325bdb27b69a4d297f8b9c07328662eb110bb57bc1148704
3
- size 4857111932
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be6a096c8469c69c514e4ecc7aab421a7a3e58fd350a4753ceee0b37b7abbf9c
3
+ size 9714216190
pytorch_model-00024-of-00027.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:df6bc4f1f2d0ec8de1d8e9f2cfce7aa3385e4a4406fe09661297c352a37da868
3
- size 4857111932
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb7c50a0bbf507184d9887c0596763a77bb879e09aecd69a0cf5ed8d1c3b998f
3
+ size 9714216190
pytorch_model-00025-of-00027.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:37d5afae9b2da032c7c138940722793af36a384d5fb632b8a52319277cb3d83e
3
- size 4857111932
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f945c50484c7bc2956cd0ae5d092728d4464f681fc35fb639781d03c2ba805b0
3
+ size 9714216190
pytorch_model-00026-of-00027.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5cc8ab0fc7e2b2751768793aa7bbf391d87b118fc929e37b357372819686f146
3
- size 4857111932
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:551d5be9b1508820fc9147f49ab992b9ad8fd946366d639b97d246e68946260c
3
+ size 9714216190
pytorch_model-00027-of-00027.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c44c2acb0c4e546addc9c11b8fa02d5fa60e24894055f2a103ed9b478ce6c7cd
3
- size 4484358522
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c5d19e2de894f58ee8e28c7dab0c97eaaa6a147acadb5209b45e2f6db60e2cd
3
+ size 8968709830
pytorch_model.bin.index.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "metadata": {
3
- "total_size": 130572369920
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "pytorch_model-00027-of-00027.bin",
@@ -13,6 +13,7 @@
13
  "model.layers.0.self_attn.k_proj.weight": "pytorch_model-00001-of-00027.bin",
14
  "model.layers.0.self_attn.o_proj.weight": "pytorch_model-00001-of-00027.bin",
15
  "model.layers.0.self_attn.q_proj.weight": "pytorch_model-00001-of-00027.bin",
 
16
  "model.layers.0.self_attn.v_proj.weight": "pytorch_model-00001-of-00027.bin",
17
  "model.layers.1.input_layernorm.weight": "pytorch_model-00001-of-00027.bin",
18
  "model.layers.1.mlp.down_proj.weight": "pytorch_model-00001-of-00027.bin",
@@ -22,6 +23,7 @@
22
  "model.layers.1.self_attn.k_proj.weight": "pytorch_model-00001-of-00027.bin",
23
  "model.layers.1.self_attn.o_proj.weight": "pytorch_model-00001-of-00027.bin",
24
  "model.layers.1.self_attn.q_proj.weight": "pytorch_model-00001-of-00027.bin",
 
25
  "model.layers.1.self_attn.v_proj.weight": "pytorch_model-00001-of-00027.bin",
26
  "model.layers.10.input_layernorm.weight": "pytorch_model-00004-of-00027.bin",
27
  "model.layers.10.mlp.down_proj.weight": "pytorch_model-00004-of-00027.bin",
@@ -31,6 +33,7 @@
31
  "model.layers.10.self_attn.k_proj.weight": "pytorch_model-00004-of-00027.bin",
32
  "model.layers.10.self_attn.o_proj.weight": "pytorch_model-00004-of-00027.bin",
33
  "model.layers.10.self_attn.q_proj.weight": "pytorch_model-00004-of-00027.bin",
 
34
  "model.layers.10.self_attn.v_proj.weight": "pytorch_model-00004-of-00027.bin",
35
  "model.layers.11.input_layernorm.weight": "pytorch_model-00005-of-00027.bin",
36
  "model.layers.11.mlp.down_proj.weight": "pytorch_model-00005-of-00027.bin",
@@ -40,6 +43,7 @@
40
  "model.layers.11.self_attn.k_proj.weight": "pytorch_model-00004-of-00027.bin",
41
  "model.layers.11.self_attn.o_proj.weight": "pytorch_model-00004-of-00027.bin",
42
  "model.layers.11.self_attn.q_proj.weight": "pytorch_model-00004-of-00027.bin",
 
43
  "model.layers.11.self_attn.v_proj.weight": "pytorch_model-00004-of-00027.bin",
44
  "model.layers.12.input_layernorm.weight": "pytorch_model-00005-of-00027.bin",
45
  "model.layers.12.mlp.down_proj.weight": "pytorch_model-00005-of-00027.bin",
@@ -49,6 +53,7 @@
49
  "model.layers.12.self_attn.k_proj.weight": "pytorch_model-00005-of-00027.bin",
50
  "model.layers.12.self_attn.o_proj.weight": "pytorch_model-00005-of-00027.bin",
51
  "model.layers.12.self_attn.q_proj.weight": "pytorch_model-00005-of-00027.bin",
 
52
  "model.layers.12.self_attn.v_proj.weight": "pytorch_model-00005-of-00027.bin",
53
  "model.layers.13.input_layernorm.weight": "pytorch_model-00005-of-00027.bin",
54
  "model.layers.13.mlp.down_proj.weight": "pytorch_model-00005-of-00027.bin",
@@ -58,6 +63,7 @@
58
  "model.layers.13.self_attn.k_proj.weight": "pytorch_model-00005-of-00027.bin",
59
  "model.layers.13.self_attn.o_proj.weight": "pytorch_model-00005-of-00027.bin",
60
  "model.layers.13.self_attn.q_proj.weight": "pytorch_model-00005-of-00027.bin",
 
61
  "model.layers.13.self_attn.v_proj.weight": "pytorch_model-00005-of-00027.bin",
62
  "model.layers.14.input_layernorm.weight": "pytorch_model-00006-of-00027.bin",
63
  "model.layers.14.mlp.down_proj.weight": "pytorch_model-00006-of-00027.bin",
@@ -67,6 +73,7 @@
67
  "model.layers.14.self_attn.k_proj.weight": "pytorch_model-00005-of-00027.bin",
68
  "model.layers.14.self_attn.o_proj.weight": "pytorch_model-00005-of-00027.bin",
69
  "model.layers.14.self_attn.q_proj.weight": "pytorch_model-00005-of-00027.bin",
 
70
  "model.layers.14.self_attn.v_proj.weight": "pytorch_model-00005-of-00027.bin",
71
  "model.layers.15.input_layernorm.weight": "pytorch_model-00006-of-00027.bin",
72
  "model.layers.15.mlp.down_proj.weight": "pytorch_model-00006-of-00027.bin",
@@ -76,6 +83,7 @@
76
  "model.layers.15.self_attn.k_proj.weight": "pytorch_model-00006-of-00027.bin",
77
  "model.layers.15.self_attn.o_proj.weight": "pytorch_model-00006-of-00027.bin",
78
  "model.layers.15.self_attn.q_proj.weight": "pytorch_model-00006-of-00027.bin",
 
79
  "model.layers.15.self_attn.v_proj.weight": "pytorch_model-00006-of-00027.bin",
80
  "model.layers.16.input_layernorm.weight": "pytorch_model-00006-of-00027.bin",
81
  "model.layers.16.mlp.down_proj.weight": "pytorch_model-00006-of-00027.bin",
@@ -85,6 +93,7 @@
85
  "model.layers.16.self_attn.k_proj.weight": "pytorch_model-00006-of-00027.bin",
86
  "model.layers.16.self_attn.o_proj.weight": "pytorch_model-00006-of-00027.bin",
87
  "model.layers.16.self_attn.q_proj.weight": "pytorch_model-00006-of-00027.bin",
 
88
  "model.layers.16.self_attn.v_proj.weight": "pytorch_model-00006-of-00027.bin",
89
  "model.layers.17.input_layernorm.weight": "pytorch_model-00007-of-00027.bin",
90
  "model.layers.17.mlp.down_proj.weight": "pytorch_model-00007-of-00027.bin",
@@ -94,6 +103,7 @@
94
  "model.layers.17.self_attn.k_proj.weight": "pytorch_model-00006-of-00027.bin",
95
  "model.layers.17.self_attn.o_proj.weight": "pytorch_model-00006-of-00027.bin",
96
  "model.layers.17.self_attn.q_proj.weight": "pytorch_model-00006-of-00027.bin",
 
97
  "model.layers.17.self_attn.v_proj.weight": "pytorch_model-00006-of-00027.bin",
98
  "model.layers.18.input_layernorm.weight": "pytorch_model-00007-of-00027.bin",
99
  "model.layers.18.mlp.down_proj.weight": "pytorch_model-00007-of-00027.bin",
@@ -103,6 +113,7 @@
103
  "model.layers.18.self_attn.k_proj.weight": "pytorch_model-00007-of-00027.bin",
104
  "model.layers.18.self_attn.o_proj.weight": "pytorch_model-00007-of-00027.bin",
105
  "model.layers.18.self_attn.q_proj.weight": "pytorch_model-00007-of-00027.bin",
 
106
  "model.layers.18.self_attn.v_proj.weight": "pytorch_model-00007-of-00027.bin",
107
  "model.layers.19.input_layernorm.weight": "pytorch_model-00007-of-00027.bin",
108
  "model.layers.19.mlp.down_proj.weight": "pytorch_model-00007-of-00027.bin",
@@ -112,6 +123,7 @@
112
  "model.layers.19.self_attn.k_proj.weight": "pytorch_model-00007-of-00027.bin",
113
  "model.layers.19.self_attn.o_proj.weight": "pytorch_model-00007-of-00027.bin",
114
  "model.layers.19.self_attn.q_proj.weight": "pytorch_model-00007-of-00027.bin",
 
115
  "model.layers.19.self_attn.v_proj.weight": "pytorch_model-00007-of-00027.bin",
116
  "model.layers.2.input_layernorm.weight": "pytorch_model-00002-of-00027.bin",
117
  "model.layers.2.mlp.down_proj.weight": "pytorch_model-00002-of-00027.bin",
@@ -121,6 +133,7 @@
121
  "model.layers.2.self_attn.k_proj.weight": "pytorch_model-00001-of-00027.bin",
122
  "model.layers.2.self_attn.o_proj.weight": "pytorch_model-00001-of-00027.bin",
123
  "model.layers.2.self_attn.q_proj.weight": "pytorch_model-00001-of-00027.bin",
 
124
  "model.layers.2.self_attn.v_proj.weight": "pytorch_model-00001-of-00027.bin",
125
  "model.layers.20.input_layernorm.weight": "pytorch_model-00008-of-00027.bin",
126
  "model.layers.20.mlp.down_proj.weight": "pytorch_model-00008-of-00027.bin",
@@ -130,6 +143,7 @@
130
  "model.layers.20.self_attn.k_proj.weight": "pytorch_model-00007-of-00027.bin",
131
  "model.layers.20.self_attn.o_proj.weight": "pytorch_model-00007-of-00027.bin",
132
  "model.layers.20.self_attn.q_proj.weight": "pytorch_model-00007-of-00027.bin",
 
133
  "model.layers.20.self_attn.v_proj.weight": "pytorch_model-00007-of-00027.bin",
134
  "model.layers.21.input_layernorm.weight": "pytorch_model-00008-of-00027.bin",
135
  "model.layers.21.mlp.down_proj.weight": "pytorch_model-00008-of-00027.bin",
@@ -139,6 +153,7 @@
139
  "model.layers.21.self_attn.k_proj.weight": "pytorch_model-00008-of-00027.bin",
140
  "model.layers.21.self_attn.o_proj.weight": "pytorch_model-00008-of-00027.bin",
141
  "model.layers.21.self_attn.q_proj.weight": "pytorch_model-00008-of-00027.bin",
 
142
  "model.layers.21.self_attn.v_proj.weight": "pytorch_model-00008-of-00027.bin",
143
  "model.layers.22.input_layernorm.weight": "pytorch_model-00008-of-00027.bin",
144
  "model.layers.22.mlp.down_proj.weight": "pytorch_model-00008-of-00027.bin",
@@ -148,6 +163,7 @@
148
  "model.layers.22.self_attn.k_proj.weight": "pytorch_model-00008-of-00027.bin",
149
  "model.layers.22.self_attn.o_proj.weight": "pytorch_model-00008-of-00027.bin",
150
  "model.layers.22.self_attn.q_proj.weight": "pytorch_model-00008-of-00027.bin",
 
151
  "model.layers.22.self_attn.v_proj.weight": "pytorch_model-00008-of-00027.bin",
152
  "model.layers.23.input_layernorm.weight": "pytorch_model-00009-of-00027.bin",
153
  "model.layers.23.mlp.down_proj.weight": "pytorch_model-00009-of-00027.bin",
@@ -157,6 +173,7 @@
157
  "model.layers.23.self_attn.k_proj.weight": "pytorch_model-00008-of-00027.bin",
158
  "model.layers.23.self_attn.o_proj.weight": "pytorch_model-00008-of-00027.bin",
159
  "model.layers.23.self_attn.q_proj.weight": "pytorch_model-00008-of-00027.bin",
 
160
  "model.layers.23.self_attn.v_proj.weight": "pytorch_model-00008-of-00027.bin",
161
  "model.layers.24.input_layernorm.weight": "pytorch_model-00009-of-00027.bin",
162
  "model.layers.24.mlp.down_proj.weight": "pytorch_model-00009-of-00027.bin",
@@ -166,6 +183,7 @@
166
  "model.layers.24.self_attn.k_proj.weight": "pytorch_model-00009-of-00027.bin",
167
  "model.layers.24.self_attn.o_proj.weight": "pytorch_model-00009-of-00027.bin",
168
  "model.layers.24.self_attn.q_proj.weight": "pytorch_model-00009-of-00027.bin",
 
169
  "model.layers.24.self_attn.v_proj.weight": "pytorch_model-00009-of-00027.bin",
170
  "model.layers.25.input_layernorm.weight": "pytorch_model-00009-of-00027.bin",
171
  "model.layers.25.mlp.down_proj.weight": "pytorch_model-00009-of-00027.bin",
@@ -175,6 +193,7 @@
175
  "model.layers.25.self_attn.k_proj.weight": "pytorch_model-00009-of-00027.bin",
176
  "model.layers.25.self_attn.o_proj.weight": "pytorch_model-00009-of-00027.bin",
177
  "model.layers.25.self_attn.q_proj.weight": "pytorch_model-00009-of-00027.bin",
 
178
  "model.layers.25.self_attn.v_proj.weight": "pytorch_model-00009-of-00027.bin",
179
  "model.layers.26.input_layernorm.weight": "pytorch_model-00010-of-00027.bin",
180
  "model.layers.26.mlp.down_proj.weight": "pytorch_model-00010-of-00027.bin",
@@ -184,6 +203,7 @@
184
  "model.layers.26.self_attn.k_proj.weight": "pytorch_model-00009-of-00027.bin",
185
  "model.layers.26.self_attn.o_proj.weight": "pytorch_model-00009-of-00027.bin",
186
  "model.layers.26.self_attn.q_proj.weight": "pytorch_model-00009-of-00027.bin",
 
187
  "model.layers.26.self_attn.v_proj.weight": "pytorch_model-00009-of-00027.bin",
188
  "model.layers.27.input_layernorm.weight": "pytorch_model-00010-of-00027.bin",
189
  "model.layers.27.mlp.down_proj.weight": "pytorch_model-00010-of-00027.bin",
@@ -193,6 +213,7 @@
193
  "model.layers.27.self_attn.k_proj.weight": "pytorch_model-00010-of-00027.bin",
194
  "model.layers.27.self_attn.o_proj.weight": "pytorch_model-00010-of-00027.bin",
195
  "model.layers.27.self_attn.q_proj.weight": "pytorch_model-00010-of-00027.bin",
 
196
  "model.layers.27.self_attn.v_proj.weight": "pytorch_model-00010-of-00027.bin",
197
  "model.layers.28.input_layernorm.weight": "pytorch_model-00010-of-00027.bin",
198
  "model.layers.28.mlp.down_proj.weight": "pytorch_model-00010-of-00027.bin",
@@ -202,6 +223,7 @@
202
  "model.layers.28.self_attn.k_proj.weight": "pytorch_model-00010-of-00027.bin",
203
  "model.layers.28.self_attn.o_proj.weight": "pytorch_model-00010-of-00027.bin",
204
  "model.layers.28.self_attn.q_proj.weight": "pytorch_model-00010-of-00027.bin",
 
205
  "model.layers.28.self_attn.v_proj.weight": "pytorch_model-00010-of-00027.bin",
206
  "model.layers.29.input_layernorm.weight": "pytorch_model-00011-of-00027.bin",
207
  "model.layers.29.mlp.down_proj.weight": "pytorch_model-00011-of-00027.bin",
@@ -211,6 +233,7 @@
211
  "model.layers.29.self_attn.k_proj.weight": "pytorch_model-00010-of-00027.bin",
212
  "model.layers.29.self_attn.o_proj.weight": "pytorch_model-00010-of-00027.bin",
213
  "model.layers.29.self_attn.q_proj.weight": "pytorch_model-00010-of-00027.bin",
 
214
  "model.layers.29.self_attn.v_proj.weight": "pytorch_model-00010-of-00027.bin",
215
  "model.layers.3.input_layernorm.weight": "pytorch_model-00002-of-00027.bin",
216
  "model.layers.3.mlp.down_proj.weight": "pytorch_model-00002-of-00027.bin",
@@ -220,6 +243,7 @@
220
  "model.layers.3.self_attn.k_proj.weight": "pytorch_model-00002-of-00027.bin",
221
  "model.layers.3.self_attn.o_proj.weight": "pytorch_model-00002-of-00027.bin",
222
  "model.layers.3.self_attn.q_proj.weight": "pytorch_model-00002-of-00027.bin",
 
223
  "model.layers.3.self_attn.v_proj.weight": "pytorch_model-00002-of-00027.bin",
224
  "model.layers.30.input_layernorm.weight": "pytorch_model-00011-of-00027.bin",
225
  "model.layers.30.mlp.down_proj.weight": "pytorch_model-00011-of-00027.bin",
@@ -229,6 +253,7 @@
229
  "model.layers.30.self_attn.k_proj.weight": "pytorch_model-00011-of-00027.bin",
230
  "model.layers.30.self_attn.o_proj.weight": "pytorch_model-00011-of-00027.bin",
231
  "model.layers.30.self_attn.q_proj.weight": "pytorch_model-00011-of-00027.bin",
 
232
  "model.layers.30.self_attn.v_proj.weight": "pytorch_model-00011-of-00027.bin",
233
  "model.layers.31.input_layernorm.weight": "pytorch_model-00011-of-00027.bin",
234
  "model.layers.31.mlp.down_proj.weight": "pytorch_model-00011-of-00027.bin",
@@ -238,6 +263,7 @@
238
  "model.layers.31.self_attn.k_proj.weight": "pytorch_model-00011-of-00027.bin",
239
  "model.layers.31.self_attn.o_proj.weight": "pytorch_model-00011-of-00027.bin",
240
  "model.layers.31.self_attn.q_proj.weight": "pytorch_model-00011-of-00027.bin",
 
241
  "model.layers.31.self_attn.v_proj.weight": "pytorch_model-00011-of-00027.bin",
242
  "model.layers.32.input_layernorm.weight": "pytorch_model-00012-of-00027.bin",
243
  "model.layers.32.mlp.down_proj.weight": "pytorch_model-00012-of-00027.bin",
@@ -247,6 +273,7 @@
247
  "model.layers.32.self_attn.k_proj.weight": "pytorch_model-00011-of-00027.bin",
248
  "model.layers.32.self_attn.o_proj.weight": "pytorch_model-00011-of-00027.bin",
249
  "model.layers.32.self_attn.q_proj.weight": "pytorch_model-00011-of-00027.bin",
 
250
  "model.layers.32.self_attn.v_proj.weight": "pytorch_model-00011-of-00027.bin",
251
  "model.layers.33.input_layernorm.weight": "pytorch_model-00012-of-00027.bin",
252
  "model.layers.33.mlp.down_proj.weight": "pytorch_model-00012-of-00027.bin",
@@ -256,6 +283,7 @@
256
  "model.layers.33.self_attn.k_proj.weight": "pytorch_model-00012-of-00027.bin",
257
  "model.layers.33.self_attn.o_proj.weight": "pytorch_model-00012-of-00027.bin",
258
  "model.layers.33.self_attn.q_proj.weight": "pytorch_model-00012-of-00027.bin",
 
259
  "model.layers.33.self_attn.v_proj.weight": "pytorch_model-00012-of-00027.bin",
260
  "model.layers.34.input_layernorm.weight": "pytorch_model-00012-of-00027.bin",
261
  "model.layers.34.mlp.down_proj.weight": "pytorch_model-00012-of-00027.bin",
@@ -265,6 +293,7 @@
265
  "model.layers.34.self_attn.k_proj.weight": "pytorch_model-00012-of-00027.bin",
266
  "model.layers.34.self_attn.o_proj.weight": "pytorch_model-00012-of-00027.bin",
267
  "model.layers.34.self_attn.q_proj.weight": "pytorch_model-00012-of-00027.bin",
 
268
  "model.layers.34.self_attn.v_proj.weight": "pytorch_model-00012-of-00027.bin",
269
  "model.layers.35.input_layernorm.weight": "pytorch_model-00013-of-00027.bin",
270
  "model.layers.35.mlp.down_proj.weight": "pytorch_model-00013-of-00027.bin",
@@ -274,6 +303,7 @@
274
  "model.layers.35.self_attn.k_proj.weight": "pytorch_model-00012-of-00027.bin",
275
  "model.layers.35.self_attn.o_proj.weight": "pytorch_model-00012-of-00027.bin",
276
  "model.layers.35.self_attn.q_proj.weight": "pytorch_model-00012-of-00027.bin",
 
277
  "model.layers.35.self_attn.v_proj.weight": "pytorch_model-00012-of-00027.bin",
278
  "model.layers.36.input_layernorm.weight": "pytorch_model-00013-of-00027.bin",
279
  "model.layers.36.mlp.down_proj.weight": "pytorch_model-00013-of-00027.bin",
@@ -283,6 +313,7 @@
283
  "model.layers.36.self_attn.k_proj.weight": "pytorch_model-00013-of-00027.bin",
284
  "model.layers.36.self_attn.o_proj.weight": "pytorch_model-00013-of-00027.bin",
285
  "model.layers.36.self_attn.q_proj.weight": "pytorch_model-00013-of-00027.bin",
 
286
  "model.layers.36.self_attn.v_proj.weight": "pytorch_model-00013-of-00027.bin",
287
  "model.layers.37.input_layernorm.weight": "pytorch_model-00013-of-00027.bin",
288
  "model.layers.37.mlp.down_proj.weight": "pytorch_model-00013-of-00027.bin",
@@ -292,6 +323,7 @@
292
  "model.layers.37.self_attn.k_proj.weight": "pytorch_model-00013-of-00027.bin",
293
  "model.layers.37.self_attn.o_proj.weight": "pytorch_model-00013-of-00027.bin",
294
  "model.layers.37.self_attn.q_proj.weight": "pytorch_model-00013-of-00027.bin",
 
295
  "model.layers.37.self_attn.v_proj.weight": "pytorch_model-00013-of-00027.bin",
296
  "model.layers.38.input_layernorm.weight": "pytorch_model-00014-of-00027.bin",
297
  "model.layers.38.mlp.down_proj.weight": "pytorch_model-00014-of-00027.bin",
@@ -301,6 +333,7 @@
301
  "model.layers.38.self_attn.k_proj.weight": "pytorch_model-00013-of-00027.bin",
302
  "model.layers.38.self_attn.o_proj.weight": "pytorch_model-00013-of-00027.bin",
303
  "model.layers.38.self_attn.q_proj.weight": "pytorch_model-00013-of-00027.bin",
 
304
  "model.layers.38.self_attn.v_proj.weight": "pytorch_model-00013-of-00027.bin",
305
  "model.layers.39.input_layernorm.weight": "pytorch_model-00014-of-00027.bin",
306
  "model.layers.39.mlp.down_proj.weight": "pytorch_model-00014-of-00027.bin",
@@ -310,6 +343,7 @@
310
  "model.layers.39.self_attn.k_proj.weight": "pytorch_model-00014-of-00027.bin",
311
  "model.layers.39.self_attn.o_proj.weight": "pytorch_model-00014-of-00027.bin",
312
  "model.layers.39.self_attn.q_proj.weight": "pytorch_model-00014-of-00027.bin",
 
313
  "model.layers.39.self_attn.v_proj.weight": "pytorch_model-00014-of-00027.bin",
314
  "model.layers.4.input_layernorm.weight": "pytorch_model-00002-of-00027.bin",
315
  "model.layers.4.mlp.down_proj.weight": "pytorch_model-00002-of-00027.bin",
@@ -319,6 +353,7 @@
319
  "model.layers.4.self_attn.k_proj.weight": "pytorch_model-00002-of-00027.bin",
320
  "model.layers.4.self_attn.o_proj.weight": "pytorch_model-00002-of-00027.bin",
321
  "model.layers.4.self_attn.q_proj.weight": "pytorch_model-00002-of-00027.bin",
 
322
  "model.layers.4.self_attn.v_proj.weight": "pytorch_model-00002-of-00027.bin",
323
  "model.layers.40.input_layernorm.weight": "pytorch_model-00014-of-00027.bin",
324
  "model.layers.40.mlp.down_proj.weight": "pytorch_model-00014-of-00027.bin",
@@ -328,6 +363,7 @@
328
  "model.layers.40.self_attn.k_proj.weight": "pytorch_model-00014-of-00027.bin",
329
  "model.layers.40.self_attn.o_proj.weight": "pytorch_model-00014-of-00027.bin",
330
  "model.layers.40.self_attn.q_proj.weight": "pytorch_model-00014-of-00027.bin",
 
331
  "model.layers.40.self_attn.v_proj.weight": "pytorch_model-00014-of-00027.bin",
332
  "model.layers.41.input_layernorm.weight": "pytorch_model-00015-of-00027.bin",
333
  "model.layers.41.mlp.down_proj.weight": "pytorch_model-00015-of-00027.bin",
@@ -337,6 +373,7 @@
337
  "model.layers.41.self_attn.k_proj.weight": "pytorch_model-00014-of-00027.bin",
338
  "model.layers.41.self_attn.o_proj.weight": "pytorch_model-00014-of-00027.bin",
339
  "model.layers.41.self_attn.q_proj.weight": "pytorch_model-00014-of-00027.bin",
 
340
  "model.layers.41.self_attn.v_proj.weight": "pytorch_model-00014-of-00027.bin",
341
  "model.layers.42.input_layernorm.weight": "pytorch_model-00015-of-00027.bin",
342
  "model.layers.42.mlp.down_proj.weight": "pytorch_model-00015-of-00027.bin",
@@ -346,6 +383,7 @@
346
  "model.layers.42.self_attn.k_proj.weight": "pytorch_model-00015-of-00027.bin",
347
  "model.layers.42.self_attn.o_proj.weight": "pytorch_model-00015-of-00027.bin",
348
  "model.layers.42.self_attn.q_proj.weight": "pytorch_model-00015-of-00027.bin",
 
349
  "model.layers.42.self_attn.v_proj.weight": "pytorch_model-00015-of-00027.bin",
350
  "model.layers.43.input_layernorm.weight": "pytorch_model-00015-of-00027.bin",
351
  "model.layers.43.mlp.down_proj.weight": "pytorch_model-00015-of-00027.bin",
@@ -355,6 +393,7 @@
355
  "model.layers.43.self_attn.k_proj.weight": "pytorch_model-00015-of-00027.bin",
356
  "model.layers.43.self_attn.o_proj.weight": "pytorch_model-00015-of-00027.bin",
357
  "model.layers.43.self_attn.q_proj.weight": "pytorch_model-00015-of-00027.bin",
 
358
  "model.layers.43.self_attn.v_proj.weight": "pytorch_model-00015-of-00027.bin",
359
  "model.layers.44.input_layernorm.weight": "pytorch_model-00016-of-00027.bin",
360
  "model.layers.44.mlp.down_proj.weight": "pytorch_model-00016-of-00027.bin",
@@ -364,6 +403,7 @@
364
  "model.layers.44.self_attn.k_proj.weight": "pytorch_model-00015-of-00027.bin",
365
  "model.layers.44.self_attn.o_proj.weight": "pytorch_model-00015-of-00027.bin",
366
  "model.layers.44.self_attn.q_proj.weight": "pytorch_model-00015-of-00027.bin",
 
367
  "model.layers.44.self_attn.v_proj.weight": "pytorch_model-00015-of-00027.bin",
368
  "model.layers.45.input_layernorm.weight": "pytorch_model-00016-of-00027.bin",
369
  "model.layers.45.mlp.down_proj.weight": "pytorch_model-00016-of-00027.bin",
@@ -373,6 +413,7 @@
373
  "model.layers.45.self_attn.k_proj.weight": "pytorch_model-00016-of-00027.bin",
374
  "model.layers.45.self_attn.o_proj.weight": "pytorch_model-00016-of-00027.bin",
375
  "model.layers.45.self_attn.q_proj.weight": "pytorch_model-00016-of-00027.bin",
 
376
  "model.layers.45.self_attn.v_proj.weight": "pytorch_model-00016-of-00027.bin",
377
  "model.layers.46.input_layernorm.weight": "pytorch_model-00016-of-00027.bin",
378
  "model.layers.46.mlp.down_proj.weight": "pytorch_model-00016-of-00027.bin",
@@ -382,6 +423,7 @@
382
  "model.layers.46.self_attn.k_proj.weight": "pytorch_model-00016-of-00027.bin",
383
  "model.layers.46.self_attn.o_proj.weight": "pytorch_model-00016-of-00027.bin",
384
  "model.layers.46.self_attn.q_proj.weight": "pytorch_model-00016-of-00027.bin",
 
385
  "model.layers.46.self_attn.v_proj.weight": "pytorch_model-00016-of-00027.bin",
386
  "model.layers.47.input_layernorm.weight": "pytorch_model-00017-of-00027.bin",
387
  "model.layers.47.mlp.down_proj.weight": "pytorch_model-00017-of-00027.bin",
@@ -391,6 +433,7 @@
391
  "model.layers.47.self_attn.k_proj.weight": "pytorch_model-00016-of-00027.bin",
392
  "model.layers.47.self_attn.o_proj.weight": "pytorch_model-00016-of-00027.bin",
393
  "model.layers.47.self_attn.q_proj.weight": "pytorch_model-00016-of-00027.bin",
 
394
  "model.layers.47.self_attn.v_proj.weight": "pytorch_model-00016-of-00027.bin",
395
  "model.layers.48.input_layernorm.weight": "pytorch_model-00017-of-00027.bin",
396
  "model.layers.48.mlp.down_proj.weight": "pytorch_model-00017-of-00027.bin",
@@ -400,6 +443,7 @@
400
  "model.layers.48.self_attn.k_proj.weight": "pytorch_model-00017-of-00027.bin",
401
  "model.layers.48.self_attn.o_proj.weight": "pytorch_model-00017-of-00027.bin",
402
  "model.layers.48.self_attn.q_proj.weight": "pytorch_model-00017-of-00027.bin",
 
403
  "model.layers.48.self_attn.v_proj.weight": "pytorch_model-00017-of-00027.bin",
404
  "model.layers.49.input_layernorm.weight": "pytorch_model-00017-of-00027.bin",
405
  "model.layers.49.mlp.down_proj.weight": "pytorch_model-00017-of-00027.bin",
@@ -409,6 +453,7 @@
409
  "model.layers.49.self_attn.k_proj.weight": "pytorch_model-00017-of-00027.bin",
410
  "model.layers.49.self_attn.o_proj.weight": "pytorch_model-00017-of-00027.bin",
411
  "model.layers.49.self_attn.q_proj.weight": "pytorch_model-00017-of-00027.bin",
 
412
  "model.layers.49.self_attn.v_proj.weight": "pytorch_model-00017-of-00027.bin",
413
  "model.layers.5.input_layernorm.weight": "pytorch_model-00003-of-00027.bin",
414
  "model.layers.5.mlp.down_proj.weight": "pytorch_model-00003-of-00027.bin",
@@ -418,6 +463,7 @@
418
  "model.layers.5.self_attn.k_proj.weight": "pytorch_model-00002-of-00027.bin",
419
  "model.layers.5.self_attn.o_proj.weight": "pytorch_model-00002-of-00027.bin",
420
  "model.layers.5.self_attn.q_proj.weight": "pytorch_model-00002-of-00027.bin",
 
421
  "model.layers.5.self_attn.v_proj.weight": "pytorch_model-00002-of-00027.bin",
422
  "model.layers.50.input_layernorm.weight": "pytorch_model-00018-of-00027.bin",
423
  "model.layers.50.mlp.down_proj.weight": "pytorch_model-00018-of-00027.bin",
@@ -427,6 +473,7 @@
427
  "model.layers.50.self_attn.k_proj.weight": "pytorch_model-00017-of-00027.bin",
428
  "model.layers.50.self_attn.o_proj.weight": "pytorch_model-00017-of-00027.bin",
429
  "model.layers.50.self_attn.q_proj.weight": "pytorch_model-00017-of-00027.bin",
 
430
  "model.layers.50.self_attn.v_proj.weight": "pytorch_model-00017-of-00027.bin",
431
  "model.layers.51.input_layernorm.weight": "pytorch_model-00018-of-00027.bin",
432
  "model.layers.51.mlp.down_proj.weight": "pytorch_model-00018-of-00027.bin",
@@ -436,6 +483,7 @@
436
  "model.layers.51.self_attn.k_proj.weight": "pytorch_model-00018-of-00027.bin",
437
  "model.layers.51.self_attn.o_proj.weight": "pytorch_model-00018-of-00027.bin",
438
  "model.layers.51.self_attn.q_proj.weight": "pytorch_model-00018-of-00027.bin",
 
439
  "model.layers.51.self_attn.v_proj.weight": "pytorch_model-00018-of-00027.bin",
440
  "model.layers.52.input_layernorm.weight": "pytorch_model-00018-of-00027.bin",
441
  "model.layers.52.mlp.down_proj.weight": "pytorch_model-00018-of-00027.bin",
@@ -445,6 +493,7 @@
445
  "model.layers.52.self_attn.k_proj.weight": "pytorch_model-00018-of-00027.bin",
446
  "model.layers.52.self_attn.o_proj.weight": "pytorch_model-00018-of-00027.bin",
447
  "model.layers.52.self_attn.q_proj.weight": "pytorch_model-00018-of-00027.bin",
 
448
  "model.layers.52.self_attn.v_proj.weight": "pytorch_model-00018-of-00027.bin",
449
  "model.layers.53.input_layernorm.weight": "pytorch_model-00019-of-00027.bin",
450
  "model.layers.53.mlp.down_proj.weight": "pytorch_model-00019-of-00027.bin",
@@ -454,6 +503,7 @@
454
  "model.layers.53.self_attn.k_proj.weight": "pytorch_model-00018-of-00027.bin",
455
  "model.layers.53.self_attn.o_proj.weight": "pytorch_model-00018-of-00027.bin",
456
  "model.layers.53.self_attn.q_proj.weight": "pytorch_model-00018-of-00027.bin",
 
457
  "model.layers.53.self_attn.v_proj.weight": "pytorch_model-00018-of-00027.bin",
458
  "model.layers.54.input_layernorm.weight": "pytorch_model-00019-of-00027.bin",
459
  "model.layers.54.mlp.down_proj.weight": "pytorch_model-00019-of-00027.bin",
@@ -463,6 +513,7 @@
463
  "model.layers.54.self_attn.k_proj.weight": "pytorch_model-00019-of-00027.bin",
464
  "model.layers.54.self_attn.o_proj.weight": "pytorch_model-00019-of-00027.bin",
465
  "model.layers.54.self_attn.q_proj.weight": "pytorch_model-00019-of-00027.bin",
 
466
  "model.layers.54.self_attn.v_proj.weight": "pytorch_model-00019-of-00027.bin",
467
  "model.layers.55.input_layernorm.weight": "pytorch_model-00019-of-00027.bin",
468
  "model.layers.55.mlp.down_proj.weight": "pytorch_model-00019-of-00027.bin",
@@ -472,6 +523,7 @@
472
  "model.layers.55.self_attn.k_proj.weight": "pytorch_model-00019-of-00027.bin",
473
  "model.layers.55.self_attn.o_proj.weight": "pytorch_model-00019-of-00027.bin",
474
  "model.layers.55.self_attn.q_proj.weight": "pytorch_model-00019-of-00027.bin",
 
475
  "model.layers.55.self_attn.v_proj.weight": "pytorch_model-00019-of-00027.bin",
476
  "model.layers.56.input_layernorm.weight": "pytorch_model-00020-of-00027.bin",
477
  "model.layers.56.mlp.down_proj.weight": "pytorch_model-00020-of-00027.bin",
@@ -481,6 +533,7 @@
481
  "model.layers.56.self_attn.k_proj.weight": "pytorch_model-00019-of-00027.bin",
482
  "model.layers.56.self_attn.o_proj.weight": "pytorch_model-00019-of-00027.bin",
483
  "model.layers.56.self_attn.q_proj.weight": "pytorch_model-00019-of-00027.bin",
 
484
  "model.layers.56.self_attn.v_proj.weight": "pytorch_model-00019-of-00027.bin",
485
  "model.layers.57.input_layernorm.weight": "pytorch_model-00020-of-00027.bin",
486
  "model.layers.57.mlp.down_proj.weight": "pytorch_model-00020-of-00027.bin",
@@ -490,6 +543,7 @@
490
  "model.layers.57.self_attn.k_proj.weight": "pytorch_model-00020-of-00027.bin",
491
  "model.layers.57.self_attn.o_proj.weight": "pytorch_model-00020-of-00027.bin",
492
  "model.layers.57.self_attn.q_proj.weight": "pytorch_model-00020-of-00027.bin",
 
493
  "model.layers.57.self_attn.v_proj.weight": "pytorch_model-00020-of-00027.bin",
494
  "model.layers.58.input_layernorm.weight": "pytorch_model-00020-of-00027.bin",
495
  "model.layers.58.mlp.down_proj.weight": "pytorch_model-00020-of-00027.bin",
@@ -499,6 +553,7 @@
499
  "model.layers.58.self_attn.k_proj.weight": "pytorch_model-00020-of-00027.bin",
500
  "model.layers.58.self_attn.o_proj.weight": "pytorch_model-00020-of-00027.bin",
501
  "model.layers.58.self_attn.q_proj.weight": "pytorch_model-00020-of-00027.bin",
 
502
  "model.layers.58.self_attn.v_proj.weight": "pytorch_model-00020-of-00027.bin",
503
  "model.layers.59.input_layernorm.weight": "pytorch_model-00021-of-00027.bin",
504
  "model.layers.59.mlp.down_proj.weight": "pytorch_model-00021-of-00027.bin",
@@ -508,6 +563,7 @@
508
  "model.layers.59.self_attn.k_proj.weight": "pytorch_model-00020-of-00027.bin",
509
  "model.layers.59.self_attn.o_proj.weight": "pytorch_model-00020-of-00027.bin",
510
  "model.layers.59.self_attn.q_proj.weight": "pytorch_model-00020-of-00027.bin",
 
511
  "model.layers.59.self_attn.v_proj.weight": "pytorch_model-00020-of-00027.bin",
512
  "model.layers.6.input_layernorm.weight": "pytorch_model-00003-of-00027.bin",
513
  "model.layers.6.mlp.down_proj.weight": "pytorch_model-00003-of-00027.bin",
@@ -517,6 +573,7 @@
517
  "model.layers.6.self_attn.k_proj.weight": "pytorch_model-00003-of-00027.bin",
518
  "model.layers.6.self_attn.o_proj.weight": "pytorch_model-00003-of-00027.bin",
519
  "model.layers.6.self_attn.q_proj.weight": "pytorch_model-00003-of-00027.bin",
 
520
  "model.layers.6.self_attn.v_proj.weight": "pytorch_model-00003-of-00027.bin",
521
  "model.layers.60.input_layernorm.weight": "pytorch_model-00021-of-00027.bin",
522
  "model.layers.60.mlp.down_proj.weight": "pytorch_model-00021-of-00027.bin",
@@ -526,6 +583,7 @@
526
  "model.layers.60.self_attn.k_proj.weight": "pytorch_model-00021-of-00027.bin",
527
  "model.layers.60.self_attn.o_proj.weight": "pytorch_model-00021-of-00027.bin",
528
  "model.layers.60.self_attn.q_proj.weight": "pytorch_model-00021-of-00027.bin",
 
529
  "model.layers.60.self_attn.v_proj.weight": "pytorch_model-00021-of-00027.bin",
530
  "model.layers.61.input_layernorm.weight": "pytorch_model-00021-of-00027.bin",
531
  "model.layers.61.mlp.down_proj.weight": "pytorch_model-00021-of-00027.bin",
@@ -535,6 +593,7 @@
535
  "model.layers.61.self_attn.k_proj.weight": "pytorch_model-00021-of-00027.bin",
536
  "model.layers.61.self_attn.o_proj.weight": "pytorch_model-00021-of-00027.bin",
537
  "model.layers.61.self_attn.q_proj.weight": "pytorch_model-00021-of-00027.bin",
 
538
  "model.layers.61.self_attn.v_proj.weight": "pytorch_model-00021-of-00027.bin",
539
  "model.layers.62.input_layernorm.weight": "pytorch_model-00022-of-00027.bin",
540
  "model.layers.62.mlp.down_proj.weight": "pytorch_model-00022-of-00027.bin",
@@ -544,6 +603,7 @@
544
  "model.layers.62.self_attn.k_proj.weight": "pytorch_model-00021-of-00027.bin",
545
  "model.layers.62.self_attn.o_proj.weight": "pytorch_model-00021-of-00027.bin",
546
  "model.layers.62.self_attn.q_proj.weight": "pytorch_model-00021-of-00027.bin",
 
547
  "model.layers.62.self_attn.v_proj.weight": "pytorch_model-00021-of-00027.bin",
548
  "model.layers.63.input_layernorm.weight": "pytorch_model-00022-of-00027.bin",
549
  "model.layers.63.mlp.down_proj.weight": "pytorch_model-00022-of-00027.bin",
@@ -553,6 +613,7 @@
553
  "model.layers.63.self_attn.k_proj.weight": "pytorch_model-00022-of-00027.bin",
554
  "model.layers.63.self_attn.o_proj.weight": "pytorch_model-00022-of-00027.bin",
555
  "model.layers.63.self_attn.q_proj.weight": "pytorch_model-00022-of-00027.bin",
 
556
  "model.layers.63.self_attn.v_proj.weight": "pytorch_model-00022-of-00027.bin",
557
  "model.layers.64.input_layernorm.weight": "pytorch_model-00022-of-00027.bin",
558
  "model.layers.64.mlp.down_proj.weight": "pytorch_model-00022-of-00027.bin",
@@ -562,6 +623,7 @@
562
  "model.layers.64.self_attn.k_proj.weight": "pytorch_model-00022-of-00027.bin",
563
  "model.layers.64.self_attn.o_proj.weight": "pytorch_model-00022-of-00027.bin",
564
  "model.layers.64.self_attn.q_proj.weight": "pytorch_model-00022-of-00027.bin",
 
565
  "model.layers.64.self_attn.v_proj.weight": "pytorch_model-00022-of-00027.bin",
566
  "model.layers.65.input_layernorm.weight": "pytorch_model-00023-of-00027.bin",
567
  "model.layers.65.mlp.down_proj.weight": "pytorch_model-00023-of-00027.bin",
@@ -571,6 +633,7 @@
571
  "model.layers.65.self_attn.k_proj.weight": "pytorch_model-00022-of-00027.bin",
572
  "model.layers.65.self_attn.o_proj.weight": "pytorch_model-00022-of-00027.bin",
573
  "model.layers.65.self_attn.q_proj.weight": "pytorch_model-00022-of-00027.bin",
 
574
  "model.layers.65.self_attn.v_proj.weight": "pytorch_model-00022-of-00027.bin",
575
  "model.layers.66.input_layernorm.weight": "pytorch_model-00023-of-00027.bin",
576
  "model.layers.66.mlp.down_proj.weight": "pytorch_model-00023-of-00027.bin",
@@ -580,6 +643,7 @@
580
  "model.layers.66.self_attn.k_proj.weight": "pytorch_model-00023-of-00027.bin",
581
  "model.layers.66.self_attn.o_proj.weight": "pytorch_model-00023-of-00027.bin",
582
  "model.layers.66.self_attn.q_proj.weight": "pytorch_model-00023-of-00027.bin",
 
583
  "model.layers.66.self_attn.v_proj.weight": "pytorch_model-00023-of-00027.bin",
584
  "model.layers.67.input_layernorm.weight": "pytorch_model-00023-of-00027.bin",
585
  "model.layers.67.mlp.down_proj.weight": "pytorch_model-00023-of-00027.bin",
@@ -589,6 +653,7 @@
589
  "model.layers.67.self_attn.k_proj.weight": "pytorch_model-00023-of-00027.bin",
590
  "model.layers.67.self_attn.o_proj.weight": "pytorch_model-00023-of-00027.bin",
591
  "model.layers.67.self_attn.q_proj.weight": "pytorch_model-00023-of-00027.bin",
 
592
  "model.layers.67.self_attn.v_proj.weight": "pytorch_model-00023-of-00027.bin",
593
  "model.layers.68.input_layernorm.weight": "pytorch_model-00024-of-00027.bin",
594
  "model.layers.68.mlp.down_proj.weight": "pytorch_model-00024-of-00027.bin",
@@ -598,6 +663,7 @@
598
  "model.layers.68.self_attn.k_proj.weight": "pytorch_model-00023-of-00027.bin",
599
  "model.layers.68.self_attn.o_proj.weight": "pytorch_model-00023-of-00027.bin",
600
  "model.layers.68.self_attn.q_proj.weight": "pytorch_model-00023-of-00027.bin",
 
601
  "model.layers.68.self_attn.v_proj.weight": "pytorch_model-00023-of-00027.bin",
602
  "model.layers.69.input_layernorm.weight": "pytorch_model-00024-of-00027.bin",
603
  "model.layers.69.mlp.down_proj.weight": "pytorch_model-00024-of-00027.bin",
@@ -607,6 +673,7 @@
607
  "model.layers.69.self_attn.k_proj.weight": "pytorch_model-00024-of-00027.bin",
608
  "model.layers.69.self_attn.o_proj.weight": "pytorch_model-00024-of-00027.bin",
609
  "model.layers.69.self_attn.q_proj.weight": "pytorch_model-00024-of-00027.bin",
 
610
  "model.layers.69.self_attn.v_proj.weight": "pytorch_model-00024-of-00027.bin",
611
  "model.layers.7.input_layernorm.weight": "pytorch_model-00003-of-00027.bin",
612
  "model.layers.7.mlp.down_proj.weight": "pytorch_model-00003-of-00027.bin",
@@ -616,6 +683,7 @@
616
  "model.layers.7.self_attn.k_proj.weight": "pytorch_model-00003-of-00027.bin",
617
  "model.layers.7.self_attn.o_proj.weight": "pytorch_model-00003-of-00027.bin",
618
  "model.layers.7.self_attn.q_proj.weight": "pytorch_model-00003-of-00027.bin",
 
619
  "model.layers.7.self_attn.v_proj.weight": "pytorch_model-00003-of-00027.bin",
620
  "model.layers.70.input_layernorm.weight": "pytorch_model-00024-of-00027.bin",
621
  "model.layers.70.mlp.down_proj.weight": "pytorch_model-00024-of-00027.bin",
@@ -625,6 +693,7 @@
625
  "model.layers.70.self_attn.k_proj.weight": "pytorch_model-00024-of-00027.bin",
626
  "model.layers.70.self_attn.o_proj.weight": "pytorch_model-00024-of-00027.bin",
627
  "model.layers.70.self_attn.q_proj.weight": "pytorch_model-00024-of-00027.bin",
 
628
  "model.layers.70.self_attn.v_proj.weight": "pytorch_model-00024-of-00027.bin",
629
  "model.layers.71.input_layernorm.weight": "pytorch_model-00025-of-00027.bin",
630
  "model.layers.71.mlp.down_proj.weight": "pytorch_model-00025-of-00027.bin",
@@ -634,6 +703,7 @@
634
  "model.layers.71.self_attn.k_proj.weight": "pytorch_model-00024-of-00027.bin",
635
  "model.layers.71.self_attn.o_proj.weight": "pytorch_model-00024-of-00027.bin",
636
  "model.layers.71.self_attn.q_proj.weight": "pytorch_model-00024-of-00027.bin",
 
637
  "model.layers.71.self_attn.v_proj.weight": "pytorch_model-00024-of-00027.bin",
638
  "model.layers.72.input_layernorm.weight": "pytorch_model-00025-of-00027.bin",
639
  "model.layers.72.mlp.down_proj.weight": "pytorch_model-00025-of-00027.bin",
@@ -643,6 +713,7 @@
643
  "model.layers.72.self_attn.k_proj.weight": "pytorch_model-00025-of-00027.bin",
644
  "model.layers.72.self_attn.o_proj.weight": "pytorch_model-00025-of-00027.bin",
645
  "model.layers.72.self_attn.q_proj.weight": "pytorch_model-00025-of-00027.bin",
 
646
  "model.layers.72.self_attn.v_proj.weight": "pytorch_model-00025-of-00027.bin",
647
  "model.layers.73.input_layernorm.weight": "pytorch_model-00025-of-00027.bin",
648
  "model.layers.73.mlp.down_proj.weight": "pytorch_model-00025-of-00027.bin",
@@ -652,6 +723,7 @@
652
  "model.layers.73.self_attn.k_proj.weight": "pytorch_model-00025-of-00027.bin",
653
  "model.layers.73.self_attn.o_proj.weight": "pytorch_model-00025-of-00027.bin",
654
  "model.layers.73.self_attn.q_proj.weight": "pytorch_model-00025-of-00027.bin",
 
655
  "model.layers.73.self_attn.v_proj.weight": "pytorch_model-00025-of-00027.bin",
656
  "model.layers.74.input_layernorm.weight": "pytorch_model-00026-of-00027.bin",
657
  "model.layers.74.mlp.down_proj.weight": "pytorch_model-00026-of-00027.bin",
@@ -661,6 +733,7 @@
661
  "model.layers.74.self_attn.k_proj.weight": "pytorch_model-00025-of-00027.bin",
662
  "model.layers.74.self_attn.o_proj.weight": "pytorch_model-00025-of-00027.bin",
663
  "model.layers.74.self_attn.q_proj.weight": "pytorch_model-00025-of-00027.bin",
 
664
  "model.layers.74.self_attn.v_proj.weight": "pytorch_model-00025-of-00027.bin",
665
  "model.layers.75.input_layernorm.weight": "pytorch_model-00026-of-00027.bin",
666
  "model.layers.75.mlp.down_proj.weight": "pytorch_model-00026-of-00027.bin",
@@ -670,6 +743,7 @@
670
  "model.layers.75.self_attn.k_proj.weight": "pytorch_model-00026-of-00027.bin",
671
  "model.layers.75.self_attn.o_proj.weight": "pytorch_model-00026-of-00027.bin",
672
  "model.layers.75.self_attn.q_proj.weight": "pytorch_model-00026-of-00027.bin",
 
673
  "model.layers.75.self_attn.v_proj.weight": "pytorch_model-00026-of-00027.bin",
674
  "model.layers.76.input_layernorm.weight": "pytorch_model-00026-of-00027.bin",
675
  "model.layers.76.mlp.down_proj.weight": "pytorch_model-00026-of-00027.bin",
@@ -679,6 +753,7 @@
679
  "model.layers.76.self_attn.k_proj.weight": "pytorch_model-00026-of-00027.bin",
680
  "model.layers.76.self_attn.o_proj.weight": "pytorch_model-00026-of-00027.bin",
681
  "model.layers.76.self_attn.q_proj.weight": "pytorch_model-00026-of-00027.bin",
 
682
  "model.layers.76.self_attn.v_proj.weight": "pytorch_model-00026-of-00027.bin",
683
  "model.layers.77.input_layernorm.weight": "pytorch_model-00027-of-00027.bin",
684
  "model.layers.77.mlp.down_proj.weight": "pytorch_model-00027-of-00027.bin",
@@ -688,6 +763,7 @@
688
  "model.layers.77.self_attn.k_proj.weight": "pytorch_model-00026-of-00027.bin",
689
  "model.layers.77.self_attn.o_proj.weight": "pytorch_model-00026-of-00027.bin",
690
  "model.layers.77.self_attn.q_proj.weight": "pytorch_model-00026-of-00027.bin",
 
691
  "model.layers.77.self_attn.v_proj.weight": "pytorch_model-00026-of-00027.bin",
692
  "model.layers.78.input_layernorm.weight": "pytorch_model-00027-of-00027.bin",
693
  "model.layers.78.mlp.down_proj.weight": "pytorch_model-00027-of-00027.bin",
@@ -697,6 +773,7 @@
697
  "model.layers.78.self_attn.k_proj.weight": "pytorch_model-00027-of-00027.bin",
698
  "model.layers.78.self_attn.o_proj.weight": "pytorch_model-00027-of-00027.bin",
699
  "model.layers.78.self_attn.q_proj.weight": "pytorch_model-00027-of-00027.bin",
 
700
  "model.layers.78.self_attn.v_proj.weight": "pytorch_model-00027-of-00027.bin",
701
  "model.layers.79.input_layernorm.weight": "pytorch_model-00027-of-00027.bin",
702
  "model.layers.79.mlp.down_proj.weight": "pytorch_model-00027-of-00027.bin",
@@ -706,6 +783,7 @@
706
  "model.layers.79.self_attn.k_proj.weight": "pytorch_model-00027-of-00027.bin",
707
  "model.layers.79.self_attn.o_proj.weight": "pytorch_model-00027-of-00027.bin",
708
  "model.layers.79.self_attn.q_proj.weight": "pytorch_model-00027-of-00027.bin",
 
709
  "model.layers.79.self_attn.v_proj.weight": "pytorch_model-00027-of-00027.bin",
710
  "model.layers.8.input_layernorm.weight": "pytorch_model-00004-of-00027.bin",
711
  "model.layers.8.mlp.down_proj.weight": "pytorch_model-00004-of-00027.bin",
@@ -715,6 +793,7 @@
715
  "model.layers.8.self_attn.k_proj.weight": "pytorch_model-00003-of-00027.bin",
716
  "model.layers.8.self_attn.o_proj.weight": "pytorch_model-00003-of-00027.bin",
717
  "model.layers.8.self_attn.q_proj.weight": "pytorch_model-00003-of-00027.bin",
 
718
  "model.layers.8.self_attn.v_proj.weight": "pytorch_model-00003-of-00027.bin",
719
  "model.layers.9.input_layernorm.weight": "pytorch_model-00004-of-00027.bin",
720
  "model.layers.9.mlp.down_proj.weight": "pytorch_model-00004-of-00027.bin",
@@ -724,6 +803,7 @@
724
  "model.layers.9.self_attn.k_proj.weight": "pytorch_model-00004-of-00027.bin",
725
  "model.layers.9.self_attn.o_proj.weight": "pytorch_model-00004-of-00027.bin",
726
  "model.layers.9.self_attn.q_proj.weight": "pytorch_model-00004-of-00027.bin",
 
727
  "model.layers.9.self_attn.v_proj.weight": "pytorch_model-00004-of-00027.bin",
728
  "model.norm.weight": "pytorch_model-00027-of-00027.bin"
729
  }
 
1
  {
2
  "metadata": {
3
+ "total_size": 261144760320
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "pytorch_model-00027-of-00027.bin",
 
13
  "model.layers.0.self_attn.k_proj.weight": "pytorch_model-00001-of-00027.bin",
14
  "model.layers.0.self_attn.o_proj.weight": "pytorch_model-00001-of-00027.bin",
15
  "model.layers.0.self_attn.q_proj.weight": "pytorch_model-00001-of-00027.bin",
16
+ "model.layers.0.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00027.bin",
17
  "model.layers.0.self_attn.v_proj.weight": "pytorch_model-00001-of-00027.bin",
18
  "model.layers.1.input_layernorm.weight": "pytorch_model-00001-of-00027.bin",
19
  "model.layers.1.mlp.down_proj.weight": "pytorch_model-00001-of-00027.bin",
 
23
  "model.layers.1.self_attn.k_proj.weight": "pytorch_model-00001-of-00027.bin",
24
  "model.layers.1.self_attn.o_proj.weight": "pytorch_model-00001-of-00027.bin",
25
  "model.layers.1.self_attn.q_proj.weight": "pytorch_model-00001-of-00027.bin",
26
+ "model.layers.1.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00027.bin",
27
  "model.layers.1.self_attn.v_proj.weight": "pytorch_model-00001-of-00027.bin",
28
  "model.layers.10.input_layernorm.weight": "pytorch_model-00004-of-00027.bin",
29
  "model.layers.10.mlp.down_proj.weight": "pytorch_model-00004-of-00027.bin",
 
33
  "model.layers.10.self_attn.k_proj.weight": "pytorch_model-00004-of-00027.bin",
34
  "model.layers.10.self_attn.o_proj.weight": "pytorch_model-00004-of-00027.bin",
35
  "model.layers.10.self_attn.q_proj.weight": "pytorch_model-00004-of-00027.bin",
36
+ "model.layers.10.self_attn.rotary_emb.inv_freq": "pytorch_model-00004-of-00027.bin",
37
  "model.layers.10.self_attn.v_proj.weight": "pytorch_model-00004-of-00027.bin",
38
  "model.layers.11.input_layernorm.weight": "pytorch_model-00005-of-00027.bin",
39
  "model.layers.11.mlp.down_proj.weight": "pytorch_model-00005-of-00027.bin",
 
43
  "model.layers.11.self_attn.k_proj.weight": "pytorch_model-00004-of-00027.bin",
44
  "model.layers.11.self_attn.o_proj.weight": "pytorch_model-00004-of-00027.bin",
45
  "model.layers.11.self_attn.q_proj.weight": "pytorch_model-00004-of-00027.bin",
46
+ "model.layers.11.self_attn.rotary_emb.inv_freq": "pytorch_model-00004-of-00027.bin",
47
  "model.layers.11.self_attn.v_proj.weight": "pytorch_model-00004-of-00027.bin",
48
  "model.layers.12.input_layernorm.weight": "pytorch_model-00005-of-00027.bin",
49
  "model.layers.12.mlp.down_proj.weight": "pytorch_model-00005-of-00027.bin",
 
53
  "model.layers.12.self_attn.k_proj.weight": "pytorch_model-00005-of-00027.bin",
54
  "model.layers.12.self_attn.o_proj.weight": "pytorch_model-00005-of-00027.bin",
55
  "model.layers.12.self_attn.q_proj.weight": "pytorch_model-00005-of-00027.bin",
56
+ "model.layers.12.self_attn.rotary_emb.inv_freq": "pytorch_model-00005-of-00027.bin",
57
  "model.layers.12.self_attn.v_proj.weight": "pytorch_model-00005-of-00027.bin",
58
  "model.layers.13.input_layernorm.weight": "pytorch_model-00005-of-00027.bin",
59
  "model.layers.13.mlp.down_proj.weight": "pytorch_model-00005-of-00027.bin",
 
63
  "model.layers.13.self_attn.k_proj.weight": "pytorch_model-00005-of-00027.bin",
64
  "model.layers.13.self_attn.o_proj.weight": "pytorch_model-00005-of-00027.bin",
65
  "model.layers.13.self_attn.q_proj.weight": "pytorch_model-00005-of-00027.bin",
66
+ "model.layers.13.self_attn.rotary_emb.inv_freq": "pytorch_model-00005-of-00027.bin",
67
  "model.layers.13.self_attn.v_proj.weight": "pytorch_model-00005-of-00027.bin",
68
  "model.layers.14.input_layernorm.weight": "pytorch_model-00006-of-00027.bin",
69
  "model.layers.14.mlp.down_proj.weight": "pytorch_model-00006-of-00027.bin",
 
73
  "model.layers.14.self_attn.k_proj.weight": "pytorch_model-00005-of-00027.bin",
74
  "model.layers.14.self_attn.o_proj.weight": "pytorch_model-00005-of-00027.bin",
75
  "model.layers.14.self_attn.q_proj.weight": "pytorch_model-00005-of-00027.bin",
76
+ "model.layers.14.self_attn.rotary_emb.inv_freq": "pytorch_model-00005-of-00027.bin",
77
  "model.layers.14.self_attn.v_proj.weight": "pytorch_model-00005-of-00027.bin",
78
  "model.layers.15.input_layernorm.weight": "pytorch_model-00006-of-00027.bin",
79
  "model.layers.15.mlp.down_proj.weight": "pytorch_model-00006-of-00027.bin",
 
83
  "model.layers.15.self_attn.k_proj.weight": "pytorch_model-00006-of-00027.bin",
84
  "model.layers.15.self_attn.o_proj.weight": "pytorch_model-00006-of-00027.bin",
85
  "model.layers.15.self_attn.q_proj.weight": "pytorch_model-00006-of-00027.bin",
86
+ "model.layers.15.self_attn.rotary_emb.inv_freq": "pytorch_model-00006-of-00027.bin",
87
  "model.layers.15.self_attn.v_proj.weight": "pytorch_model-00006-of-00027.bin",
88
  "model.layers.16.input_layernorm.weight": "pytorch_model-00006-of-00027.bin",
89
  "model.layers.16.mlp.down_proj.weight": "pytorch_model-00006-of-00027.bin",
 
93
  "model.layers.16.self_attn.k_proj.weight": "pytorch_model-00006-of-00027.bin",
94
  "model.layers.16.self_attn.o_proj.weight": "pytorch_model-00006-of-00027.bin",
95
  "model.layers.16.self_attn.q_proj.weight": "pytorch_model-00006-of-00027.bin",
96
+ "model.layers.16.self_attn.rotary_emb.inv_freq": "pytorch_model-00006-of-00027.bin",
97
  "model.layers.16.self_attn.v_proj.weight": "pytorch_model-00006-of-00027.bin",
98
  "model.layers.17.input_layernorm.weight": "pytorch_model-00007-of-00027.bin",
99
  "model.layers.17.mlp.down_proj.weight": "pytorch_model-00007-of-00027.bin",
 
103
  "model.layers.17.self_attn.k_proj.weight": "pytorch_model-00006-of-00027.bin",
104
  "model.layers.17.self_attn.o_proj.weight": "pytorch_model-00006-of-00027.bin",
105
  "model.layers.17.self_attn.q_proj.weight": "pytorch_model-00006-of-00027.bin",
106
+ "model.layers.17.self_attn.rotary_emb.inv_freq": "pytorch_model-00006-of-00027.bin",
107
  "model.layers.17.self_attn.v_proj.weight": "pytorch_model-00006-of-00027.bin",
108
  "model.layers.18.input_layernorm.weight": "pytorch_model-00007-of-00027.bin",
109
  "model.layers.18.mlp.down_proj.weight": "pytorch_model-00007-of-00027.bin",
 
113
  "model.layers.18.self_attn.k_proj.weight": "pytorch_model-00007-of-00027.bin",
114
  "model.layers.18.self_attn.o_proj.weight": "pytorch_model-00007-of-00027.bin",
115
  "model.layers.18.self_attn.q_proj.weight": "pytorch_model-00007-of-00027.bin",
116
+ "model.layers.18.self_attn.rotary_emb.inv_freq": "pytorch_model-00007-of-00027.bin",
117
  "model.layers.18.self_attn.v_proj.weight": "pytorch_model-00007-of-00027.bin",
118
  "model.layers.19.input_layernorm.weight": "pytorch_model-00007-of-00027.bin",
119
  "model.layers.19.mlp.down_proj.weight": "pytorch_model-00007-of-00027.bin",
 
123
  "model.layers.19.self_attn.k_proj.weight": "pytorch_model-00007-of-00027.bin",
124
  "model.layers.19.self_attn.o_proj.weight": "pytorch_model-00007-of-00027.bin",
125
  "model.layers.19.self_attn.q_proj.weight": "pytorch_model-00007-of-00027.bin",
126
+ "model.layers.19.self_attn.rotary_emb.inv_freq": "pytorch_model-00007-of-00027.bin",
127
  "model.layers.19.self_attn.v_proj.weight": "pytorch_model-00007-of-00027.bin",
128
  "model.layers.2.input_layernorm.weight": "pytorch_model-00002-of-00027.bin",
129
  "model.layers.2.mlp.down_proj.weight": "pytorch_model-00002-of-00027.bin",
 
133
  "model.layers.2.self_attn.k_proj.weight": "pytorch_model-00001-of-00027.bin",
134
  "model.layers.2.self_attn.o_proj.weight": "pytorch_model-00001-of-00027.bin",
135
  "model.layers.2.self_attn.q_proj.weight": "pytorch_model-00001-of-00027.bin",
136
+ "model.layers.2.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00027.bin",
137
  "model.layers.2.self_attn.v_proj.weight": "pytorch_model-00001-of-00027.bin",
138
  "model.layers.20.input_layernorm.weight": "pytorch_model-00008-of-00027.bin",
139
  "model.layers.20.mlp.down_proj.weight": "pytorch_model-00008-of-00027.bin",
 
143
  "model.layers.20.self_attn.k_proj.weight": "pytorch_model-00007-of-00027.bin",
144
  "model.layers.20.self_attn.o_proj.weight": "pytorch_model-00007-of-00027.bin",
145
  "model.layers.20.self_attn.q_proj.weight": "pytorch_model-00007-of-00027.bin",
146
+ "model.layers.20.self_attn.rotary_emb.inv_freq": "pytorch_model-00007-of-00027.bin",
147
  "model.layers.20.self_attn.v_proj.weight": "pytorch_model-00007-of-00027.bin",
148
  "model.layers.21.input_layernorm.weight": "pytorch_model-00008-of-00027.bin",
149
  "model.layers.21.mlp.down_proj.weight": "pytorch_model-00008-of-00027.bin",
 
153
  "model.layers.21.self_attn.k_proj.weight": "pytorch_model-00008-of-00027.bin",
154
  "model.layers.21.self_attn.o_proj.weight": "pytorch_model-00008-of-00027.bin",
155
  "model.layers.21.self_attn.q_proj.weight": "pytorch_model-00008-of-00027.bin",
156
+ "model.layers.21.self_attn.rotary_emb.inv_freq": "pytorch_model-00008-of-00027.bin",
157
  "model.layers.21.self_attn.v_proj.weight": "pytorch_model-00008-of-00027.bin",
158
  "model.layers.22.input_layernorm.weight": "pytorch_model-00008-of-00027.bin",
159
  "model.layers.22.mlp.down_proj.weight": "pytorch_model-00008-of-00027.bin",
 
163
  "model.layers.22.self_attn.k_proj.weight": "pytorch_model-00008-of-00027.bin",
164
  "model.layers.22.self_attn.o_proj.weight": "pytorch_model-00008-of-00027.bin",
165
  "model.layers.22.self_attn.q_proj.weight": "pytorch_model-00008-of-00027.bin",
166
+ "model.layers.22.self_attn.rotary_emb.inv_freq": "pytorch_model-00008-of-00027.bin",
167
  "model.layers.22.self_attn.v_proj.weight": "pytorch_model-00008-of-00027.bin",
168
  "model.layers.23.input_layernorm.weight": "pytorch_model-00009-of-00027.bin",
169
  "model.layers.23.mlp.down_proj.weight": "pytorch_model-00009-of-00027.bin",
 
173
  "model.layers.23.self_attn.k_proj.weight": "pytorch_model-00008-of-00027.bin",
174
  "model.layers.23.self_attn.o_proj.weight": "pytorch_model-00008-of-00027.bin",
175
  "model.layers.23.self_attn.q_proj.weight": "pytorch_model-00008-of-00027.bin",
176
+ "model.layers.23.self_attn.rotary_emb.inv_freq": "pytorch_model-00008-of-00027.bin",
177
  "model.layers.23.self_attn.v_proj.weight": "pytorch_model-00008-of-00027.bin",
178
  "model.layers.24.input_layernorm.weight": "pytorch_model-00009-of-00027.bin",
179
  "model.layers.24.mlp.down_proj.weight": "pytorch_model-00009-of-00027.bin",
 
183
  "model.layers.24.self_attn.k_proj.weight": "pytorch_model-00009-of-00027.bin",
184
  "model.layers.24.self_attn.o_proj.weight": "pytorch_model-00009-of-00027.bin",
185
  "model.layers.24.self_attn.q_proj.weight": "pytorch_model-00009-of-00027.bin",
186
+ "model.layers.24.self_attn.rotary_emb.inv_freq": "pytorch_model-00009-of-00027.bin",
187
  "model.layers.24.self_attn.v_proj.weight": "pytorch_model-00009-of-00027.bin",
188
  "model.layers.25.input_layernorm.weight": "pytorch_model-00009-of-00027.bin",
189
  "model.layers.25.mlp.down_proj.weight": "pytorch_model-00009-of-00027.bin",
 
193
  "model.layers.25.self_attn.k_proj.weight": "pytorch_model-00009-of-00027.bin",
194
  "model.layers.25.self_attn.o_proj.weight": "pytorch_model-00009-of-00027.bin",
195
  "model.layers.25.self_attn.q_proj.weight": "pytorch_model-00009-of-00027.bin",
196
+ "model.layers.25.self_attn.rotary_emb.inv_freq": "pytorch_model-00009-of-00027.bin",
197
  "model.layers.25.self_attn.v_proj.weight": "pytorch_model-00009-of-00027.bin",
198
  "model.layers.26.input_layernorm.weight": "pytorch_model-00010-of-00027.bin",
199
  "model.layers.26.mlp.down_proj.weight": "pytorch_model-00010-of-00027.bin",
 
203
  "model.layers.26.self_attn.k_proj.weight": "pytorch_model-00009-of-00027.bin",
204
  "model.layers.26.self_attn.o_proj.weight": "pytorch_model-00009-of-00027.bin",
205
  "model.layers.26.self_attn.q_proj.weight": "pytorch_model-00009-of-00027.bin",
206
+ "model.layers.26.self_attn.rotary_emb.inv_freq": "pytorch_model-00009-of-00027.bin",
207
  "model.layers.26.self_attn.v_proj.weight": "pytorch_model-00009-of-00027.bin",
208
  "model.layers.27.input_layernorm.weight": "pytorch_model-00010-of-00027.bin",
209
  "model.layers.27.mlp.down_proj.weight": "pytorch_model-00010-of-00027.bin",
 
213
  "model.layers.27.self_attn.k_proj.weight": "pytorch_model-00010-of-00027.bin",
214
  "model.layers.27.self_attn.o_proj.weight": "pytorch_model-00010-of-00027.bin",
215
  "model.layers.27.self_attn.q_proj.weight": "pytorch_model-00010-of-00027.bin",
216
+ "model.layers.27.self_attn.rotary_emb.inv_freq": "pytorch_model-00010-of-00027.bin",
217
  "model.layers.27.self_attn.v_proj.weight": "pytorch_model-00010-of-00027.bin",
218
  "model.layers.28.input_layernorm.weight": "pytorch_model-00010-of-00027.bin",
219
  "model.layers.28.mlp.down_proj.weight": "pytorch_model-00010-of-00027.bin",
 
223
  "model.layers.28.self_attn.k_proj.weight": "pytorch_model-00010-of-00027.bin",
224
  "model.layers.28.self_attn.o_proj.weight": "pytorch_model-00010-of-00027.bin",
225
  "model.layers.28.self_attn.q_proj.weight": "pytorch_model-00010-of-00027.bin",
226
+ "model.layers.28.self_attn.rotary_emb.inv_freq": "pytorch_model-00010-of-00027.bin",
227
  "model.layers.28.self_attn.v_proj.weight": "pytorch_model-00010-of-00027.bin",
228
  "model.layers.29.input_layernorm.weight": "pytorch_model-00011-of-00027.bin",
229
  "model.layers.29.mlp.down_proj.weight": "pytorch_model-00011-of-00027.bin",
 
233
  "model.layers.29.self_attn.k_proj.weight": "pytorch_model-00010-of-00027.bin",
234
  "model.layers.29.self_attn.o_proj.weight": "pytorch_model-00010-of-00027.bin",
235
  "model.layers.29.self_attn.q_proj.weight": "pytorch_model-00010-of-00027.bin",
236
+ "model.layers.29.self_attn.rotary_emb.inv_freq": "pytorch_model-00010-of-00027.bin",
237
  "model.layers.29.self_attn.v_proj.weight": "pytorch_model-00010-of-00027.bin",
238
  "model.layers.3.input_layernorm.weight": "pytorch_model-00002-of-00027.bin",
239
  "model.layers.3.mlp.down_proj.weight": "pytorch_model-00002-of-00027.bin",
 
243
  "model.layers.3.self_attn.k_proj.weight": "pytorch_model-00002-of-00027.bin",
244
  "model.layers.3.self_attn.o_proj.weight": "pytorch_model-00002-of-00027.bin",
245
  "model.layers.3.self_attn.q_proj.weight": "pytorch_model-00002-of-00027.bin",
246
+ "model.layers.3.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00027.bin",
247
  "model.layers.3.self_attn.v_proj.weight": "pytorch_model-00002-of-00027.bin",
248
  "model.layers.30.input_layernorm.weight": "pytorch_model-00011-of-00027.bin",
249
  "model.layers.30.mlp.down_proj.weight": "pytorch_model-00011-of-00027.bin",
 
253
  "model.layers.30.self_attn.k_proj.weight": "pytorch_model-00011-of-00027.bin",
254
  "model.layers.30.self_attn.o_proj.weight": "pytorch_model-00011-of-00027.bin",
255
  "model.layers.30.self_attn.q_proj.weight": "pytorch_model-00011-of-00027.bin",
256
+ "model.layers.30.self_attn.rotary_emb.inv_freq": "pytorch_model-00011-of-00027.bin",
257
  "model.layers.30.self_attn.v_proj.weight": "pytorch_model-00011-of-00027.bin",
258
  "model.layers.31.input_layernorm.weight": "pytorch_model-00011-of-00027.bin",
259
  "model.layers.31.mlp.down_proj.weight": "pytorch_model-00011-of-00027.bin",
 
263
  "model.layers.31.self_attn.k_proj.weight": "pytorch_model-00011-of-00027.bin",
264
  "model.layers.31.self_attn.o_proj.weight": "pytorch_model-00011-of-00027.bin",
265
  "model.layers.31.self_attn.q_proj.weight": "pytorch_model-00011-of-00027.bin",
266
+ "model.layers.31.self_attn.rotary_emb.inv_freq": "pytorch_model-00011-of-00027.bin",
267
  "model.layers.31.self_attn.v_proj.weight": "pytorch_model-00011-of-00027.bin",
268
  "model.layers.32.input_layernorm.weight": "pytorch_model-00012-of-00027.bin",
269
  "model.layers.32.mlp.down_proj.weight": "pytorch_model-00012-of-00027.bin",
 
273
  "model.layers.32.self_attn.k_proj.weight": "pytorch_model-00011-of-00027.bin",
274
  "model.layers.32.self_attn.o_proj.weight": "pytorch_model-00011-of-00027.bin",
275
  "model.layers.32.self_attn.q_proj.weight": "pytorch_model-00011-of-00027.bin",
276
+ "model.layers.32.self_attn.rotary_emb.inv_freq": "pytorch_model-00011-of-00027.bin",
277
  "model.layers.32.self_attn.v_proj.weight": "pytorch_model-00011-of-00027.bin",
278
  "model.layers.33.input_layernorm.weight": "pytorch_model-00012-of-00027.bin",
279
  "model.layers.33.mlp.down_proj.weight": "pytorch_model-00012-of-00027.bin",
 
283
  "model.layers.33.self_attn.k_proj.weight": "pytorch_model-00012-of-00027.bin",
284
  "model.layers.33.self_attn.o_proj.weight": "pytorch_model-00012-of-00027.bin",
285
  "model.layers.33.self_attn.q_proj.weight": "pytorch_model-00012-of-00027.bin",
286
+ "model.layers.33.self_attn.rotary_emb.inv_freq": "pytorch_model-00012-of-00027.bin",
287
  "model.layers.33.self_attn.v_proj.weight": "pytorch_model-00012-of-00027.bin",
288
  "model.layers.34.input_layernorm.weight": "pytorch_model-00012-of-00027.bin",
289
  "model.layers.34.mlp.down_proj.weight": "pytorch_model-00012-of-00027.bin",
 
293
  "model.layers.34.self_attn.k_proj.weight": "pytorch_model-00012-of-00027.bin",
294
  "model.layers.34.self_attn.o_proj.weight": "pytorch_model-00012-of-00027.bin",
295
  "model.layers.34.self_attn.q_proj.weight": "pytorch_model-00012-of-00027.bin",
296
+ "model.layers.34.self_attn.rotary_emb.inv_freq": "pytorch_model-00012-of-00027.bin",
297
  "model.layers.34.self_attn.v_proj.weight": "pytorch_model-00012-of-00027.bin",
298
  "model.layers.35.input_layernorm.weight": "pytorch_model-00013-of-00027.bin",
299
  "model.layers.35.mlp.down_proj.weight": "pytorch_model-00013-of-00027.bin",
 
303
  "model.layers.35.self_attn.k_proj.weight": "pytorch_model-00012-of-00027.bin",
304
  "model.layers.35.self_attn.o_proj.weight": "pytorch_model-00012-of-00027.bin",
305
  "model.layers.35.self_attn.q_proj.weight": "pytorch_model-00012-of-00027.bin",
306
+ "model.layers.35.self_attn.rotary_emb.inv_freq": "pytorch_model-00012-of-00027.bin",
307
  "model.layers.35.self_attn.v_proj.weight": "pytorch_model-00012-of-00027.bin",
308
  "model.layers.36.input_layernorm.weight": "pytorch_model-00013-of-00027.bin",
309
  "model.layers.36.mlp.down_proj.weight": "pytorch_model-00013-of-00027.bin",
 
313
  "model.layers.36.self_attn.k_proj.weight": "pytorch_model-00013-of-00027.bin",
314
  "model.layers.36.self_attn.o_proj.weight": "pytorch_model-00013-of-00027.bin",
315
  "model.layers.36.self_attn.q_proj.weight": "pytorch_model-00013-of-00027.bin",
316
+ "model.layers.36.self_attn.rotary_emb.inv_freq": "pytorch_model-00013-of-00027.bin",
317
  "model.layers.36.self_attn.v_proj.weight": "pytorch_model-00013-of-00027.bin",
318
  "model.layers.37.input_layernorm.weight": "pytorch_model-00013-of-00027.bin",
319
  "model.layers.37.mlp.down_proj.weight": "pytorch_model-00013-of-00027.bin",
 
323
  "model.layers.37.self_attn.k_proj.weight": "pytorch_model-00013-of-00027.bin",
324
  "model.layers.37.self_attn.o_proj.weight": "pytorch_model-00013-of-00027.bin",
325
  "model.layers.37.self_attn.q_proj.weight": "pytorch_model-00013-of-00027.bin",
326
+ "model.layers.37.self_attn.rotary_emb.inv_freq": "pytorch_model-00013-of-00027.bin",
327
  "model.layers.37.self_attn.v_proj.weight": "pytorch_model-00013-of-00027.bin",
328
  "model.layers.38.input_layernorm.weight": "pytorch_model-00014-of-00027.bin",
329
  "model.layers.38.mlp.down_proj.weight": "pytorch_model-00014-of-00027.bin",
 
333
  "model.layers.38.self_attn.k_proj.weight": "pytorch_model-00013-of-00027.bin",
334
  "model.layers.38.self_attn.o_proj.weight": "pytorch_model-00013-of-00027.bin",
335
  "model.layers.38.self_attn.q_proj.weight": "pytorch_model-00013-of-00027.bin",
336
+ "model.layers.38.self_attn.rotary_emb.inv_freq": "pytorch_model-00013-of-00027.bin",
337
  "model.layers.38.self_attn.v_proj.weight": "pytorch_model-00013-of-00027.bin",
338
  "model.layers.39.input_layernorm.weight": "pytorch_model-00014-of-00027.bin",
339
  "model.layers.39.mlp.down_proj.weight": "pytorch_model-00014-of-00027.bin",
 
343
  "model.layers.39.self_attn.k_proj.weight": "pytorch_model-00014-of-00027.bin",
344
  "model.layers.39.self_attn.o_proj.weight": "pytorch_model-00014-of-00027.bin",
345
  "model.layers.39.self_attn.q_proj.weight": "pytorch_model-00014-of-00027.bin",
346
+ "model.layers.39.self_attn.rotary_emb.inv_freq": "pytorch_model-00014-of-00027.bin",
347
  "model.layers.39.self_attn.v_proj.weight": "pytorch_model-00014-of-00027.bin",
348
  "model.layers.4.input_layernorm.weight": "pytorch_model-00002-of-00027.bin",
349
  "model.layers.4.mlp.down_proj.weight": "pytorch_model-00002-of-00027.bin",
 
353
  "model.layers.4.self_attn.k_proj.weight": "pytorch_model-00002-of-00027.bin",
354
  "model.layers.4.self_attn.o_proj.weight": "pytorch_model-00002-of-00027.bin",
355
  "model.layers.4.self_attn.q_proj.weight": "pytorch_model-00002-of-00027.bin",
356
+ "model.layers.4.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00027.bin",
357
  "model.layers.4.self_attn.v_proj.weight": "pytorch_model-00002-of-00027.bin",
358
  "model.layers.40.input_layernorm.weight": "pytorch_model-00014-of-00027.bin",
359
  "model.layers.40.mlp.down_proj.weight": "pytorch_model-00014-of-00027.bin",
 
363
  "model.layers.40.self_attn.k_proj.weight": "pytorch_model-00014-of-00027.bin",
364
  "model.layers.40.self_attn.o_proj.weight": "pytorch_model-00014-of-00027.bin",
365
  "model.layers.40.self_attn.q_proj.weight": "pytorch_model-00014-of-00027.bin",
366
+ "model.layers.40.self_attn.rotary_emb.inv_freq": "pytorch_model-00014-of-00027.bin",
367
  "model.layers.40.self_attn.v_proj.weight": "pytorch_model-00014-of-00027.bin",
368
  "model.layers.41.input_layernorm.weight": "pytorch_model-00015-of-00027.bin",
369
  "model.layers.41.mlp.down_proj.weight": "pytorch_model-00015-of-00027.bin",
 
373
  "model.layers.41.self_attn.k_proj.weight": "pytorch_model-00014-of-00027.bin",
374
  "model.layers.41.self_attn.o_proj.weight": "pytorch_model-00014-of-00027.bin",
375
  "model.layers.41.self_attn.q_proj.weight": "pytorch_model-00014-of-00027.bin",
376
+ "model.layers.41.self_attn.rotary_emb.inv_freq": "pytorch_model-00014-of-00027.bin",
377
  "model.layers.41.self_attn.v_proj.weight": "pytorch_model-00014-of-00027.bin",
378
  "model.layers.42.input_layernorm.weight": "pytorch_model-00015-of-00027.bin",
379
  "model.layers.42.mlp.down_proj.weight": "pytorch_model-00015-of-00027.bin",
 
383
  "model.layers.42.self_attn.k_proj.weight": "pytorch_model-00015-of-00027.bin",
384
  "model.layers.42.self_attn.o_proj.weight": "pytorch_model-00015-of-00027.bin",
385
  "model.layers.42.self_attn.q_proj.weight": "pytorch_model-00015-of-00027.bin",
386
+ "model.layers.42.self_attn.rotary_emb.inv_freq": "pytorch_model-00015-of-00027.bin",
387
  "model.layers.42.self_attn.v_proj.weight": "pytorch_model-00015-of-00027.bin",
388
  "model.layers.43.input_layernorm.weight": "pytorch_model-00015-of-00027.bin",
389
  "model.layers.43.mlp.down_proj.weight": "pytorch_model-00015-of-00027.bin",
 
393
  "model.layers.43.self_attn.k_proj.weight": "pytorch_model-00015-of-00027.bin",
394
  "model.layers.43.self_attn.o_proj.weight": "pytorch_model-00015-of-00027.bin",
395
  "model.layers.43.self_attn.q_proj.weight": "pytorch_model-00015-of-00027.bin",
396
+ "model.layers.43.self_attn.rotary_emb.inv_freq": "pytorch_model-00015-of-00027.bin",
397
  "model.layers.43.self_attn.v_proj.weight": "pytorch_model-00015-of-00027.bin",
398
  "model.layers.44.input_layernorm.weight": "pytorch_model-00016-of-00027.bin",
399
  "model.layers.44.mlp.down_proj.weight": "pytorch_model-00016-of-00027.bin",
 
403
  "model.layers.44.self_attn.k_proj.weight": "pytorch_model-00015-of-00027.bin",
404
  "model.layers.44.self_attn.o_proj.weight": "pytorch_model-00015-of-00027.bin",
405
  "model.layers.44.self_attn.q_proj.weight": "pytorch_model-00015-of-00027.bin",
406
+ "model.layers.44.self_attn.rotary_emb.inv_freq": "pytorch_model-00015-of-00027.bin",
407
  "model.layers.44.self_attn.v_proj.weight": "pytorch_model-00015-of-00027.bin",
408
  "model.layers.45.input_layernorm.weight": "pytorch_model-00016-of-00027.bin",
409
  "model.layers.45.mlp.down_proj.weight": "pytorch_model-00016-of-00027.bin",
 
413
  "model.layers.45.self_attn.k_proj.weight": "pytorch_model-00016-of-00027.bin",
414
  "model.layers.45.self_attn.o_proj.weight": "pytorch_model-00016-of-00027.bin",
415
  "model.layers.45.self_attn.q_proj.weight": "pytorch_model-00016-of-00027.bin",
416
+ "model.layers.45.self_attn.rotary_emb.inv_freq": "pytorch_model-00016-of-00027.bin",
417
  "model.layers.45.self_attn.v_proj.weight": "pytorch_model-00016-of-00027.bin",
418
  "model.layers.46.input_layernorm.weight": "pytorch_model-00016-of-00027.bin",
419
  "model.layers.46.mlp.down_proj.weight": "pytorch_model-00016-of-00027.bin",
 
423
  "model.layers.46.self_attn.k_proj.weight": "pytorch_model-00016-of-00027.bin",
424
  "model.layers.46.self_attn.o_proj.weight": "pytorch_model-00016-of-00027.bin",
425
  "model.layers.46.self_attn.q_proj.weight": "pytorch_model-00016-of-00027.bin",
426
+ "model.layers.46.self_attn.rotary_emb.inv_freq": "pytorch_model-00016-of-00027.bin",
427
  "model.layers.46.self_attn.v_proj.weight": "pytorch_model-00016-of-00027.bin",
428
  "model.layers.47.input_layernorm.weight": "pytorch_model-00017-of-00027.bin",
429
  "model.layers.47.mlp.down_proj.weight": "pytorch_model-00017-of-00027.bin",
 
433
  "model.layers.47.self_attn.k_proj.weight": "pytorch_model-00016-of-00027.bin",
434
  "model.layers.47.self_attn.o_proj.weight": "pytorch_model-00016-of-00027.bin",
435
  "model.layers.47.self_attn.q_proj.weight": "pytorch_model-00016-of-00027.bin",
436
+ "model.layers.47.self_attn.rotary_emb.inv_freq": "pytorch_model-00016-of-00027.bin",
437
  "model.layers.47.self_attn.v_proj.weight": "pytorch_model-00016-of-00027.bin",
438
  "model.layers.48.input_layernorm.weight": "pytorch_model-00017-of-00027.bin",
439
  "model.layers.48.mlp.down_proj.weight": "pytorch_model-00017-of-00027.bin",
 
443
  "model.layers.48.self_attn.k_proj.weight": "pytorch_model-00017-of-00027.bin",
444
  "model.layers.48.self_attn.o_proj.weight": "pytorch_model-00017-of-00027.bin",
445
  "model.layers.48.self_attn.q_proj.weight": "pytorch_model-00017-of-00027.bin",
446
+ "model.layers.48.self_attn.rotary_emb.inv_freq": "pytorch_model-00017-of-00027.bin",
447
  "model.layers.48.self_attn.v_proj.weight": "pytorch_model-00017-of-00027.bin",
448
  "model.layers.49.input_layernorm.weight": "pytorch_model-00017-of-00027.bin",
449
  "model.layers.49.mlp.down_proj.weight": "pytorch_model-00017-of-00027.bin",
 
453
  "model.layers.49.self_attn.k_proj.weight": "pytorch_model-00017-of-00027.bin",
454
  "model.layers.49.self_attn.o_proj.weight": "pytorch_model-00017-of-00027.bin",
455
  "model.layers.49.self_attn.q_proj.weight": "pytorch_model-00017-of-00027.bin",
456
+ "model.layers.49.self_attn.rotary_emb.inv_freq": "pytorch_model-00017-of-00027.bin",
457
  "model.layers.49.self_attn.v_proj.weight": "pytorch_model-00017-of-00027.bin",
458
  "model.layers.5.input_layernorm.weight": "pytorch_model-00003-of-00027.bin",
459
  "model.layers.5.mlp.down_proj.weight": "pytorch_model-00003-of-00027.bin",
 
463
  "model.layers.5.self_attn.k_proj.weight": "pytorch_model-00002-of-00027.bin",
464
  "model.layers.5.self_attn.o_proj.weight": "pytorch_model-00002-of-00027.bin",
465
  "model.layers.5.self_attn.q_proj.weight": "pytorch_model-00002-of-00027.bin",
466
+ "model.layers.5.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00027.bin",
467
  "model.layers.5.self_attn.v_proj.weight": "pytorch_model-00002-of-00027.bin",
468
  "model.layers.50.input_layernorm.weight": "pytorch_model-00018-of-00027.bin",
469
  "model.layers.50.mlp.down_proj.weight": "pytorch_model-00018-of-00027.bin",
 
473
  "model.layers.50.self_attn.k_proj.weight": "pytorch_model-00017-of-00027.bin",
474
  "model.layers.50.self_attn.o_proj.weight": "pytorch_model-00017-of-00027.bin",
475
  "model.layers.50.self_attn.q_proj.weight": "pytorch_model-00017-of-00027.bin",
476
+ "model.layers.50.self_attn.rotary_emb.inv_freq": "pytorch_model-00017-of-00027.bin",
477
  "model.layers.50.self_attn.v_proj.weight": "pytorch_model-00017-of-00027.bin",
478
  "model.layers.51.input_layernorm.weight": "pytorch_model-00018-of-00027.bin",
479
  "model.layers.51.mlp.down_proj.weight": "pytorch_model-00018-of-00027.bin",
 
483
  "model.layers.51.self_attn.k_proj.weight": "pytorch_model-00018-of-00027.bin",
484
  "model.layers.51.self_attn.o_proj.weight": "pytorch_model-00018-of-00027.bin",
485
  "model.layers.51.self_attn.q_proj.weight": "pytorch_model-00018-of-00027.bin",
486
+ "model.layers.51.self_attn.rotary_emb.inv_freq": "pytorch_model-00018-of-00027.bin",
487
  "model.layers.51.self_attn.v_proj.weight": "pytorch_model-00018-of-00027.bin",
488
  "model.layers.52.input_layernorm.weight": "pytorch_model-00018-of-00027.bin",
489
  "model.layers.52.mlp.down_proj.weight": "pytorch_model-00018-of-00027.bin",
 
493
  "model.layers.52.self_attn.k_proj.weight": "pytorch_model-00018-of-00027.bin",
494
  "model.layers.52.self_attn.o_proj.weight": "pytorch_model-00018-of-00027.bin",
495
  "model.layers.52.self_attn.q_proj.weight": "pytorch_model-00018-of-00027.bin",
496
+ "model.layers.52.self_attn.rotary_emb.inv_freq": "pytorch_model-00018-of-00027.bin",
497
  "model.layers.52.self_attn.v_proj.weight": "pytorch_model-00018-of-00027.bin",
498
  "model.layers.53.input_layernorm.weight": "pytorch_model-00019-of-00027.bin",
499
  "model.layers.53.mlp.down_proj.weight": "pytorch_model-00019-of-00027.bin",
 
503
  "model.layers.53.self_attn.k_proj.weight": "pytorch_model-00018-of-00027.bin",
504
  "model.layers.53.self_attn.o_proj.weight": "pytorch_model-00018-of-00027.bin",
505
  "model.layers.53.self_attn.q_proj.weight": "pytorch_model-00018-of-00027.bin",
506
+ "model.layers.53.self_attn.rotary_emb.inv_freq": "pytorch_model-00018-of-00027.bin",
507
  "model.layers.53.self_attn.v_proj.weight": "pytorch_model-00018-of-00027.bin",
508
  "model.layers.54.input_layernorm.weight": "pytorch_model-00019-of-00027.bin",
509
  "model.layers.54.mlp.down_proj.weight": "pytorch_model-00019-of-00027.bin",
 
513
  "model.layers.54.self_attn.k_proj.weight": "pytorch_model-00019-of-00027.bin",
514
  "model.layers.54.self_attn.o_proj.weight": "pytorch_model-00019-of-00027.bin",
515
  "model.layers.54.self_attn.q_proj.weight": "pytorch_model-00019-of-00027.bin",
516
+ "model.layers.54.self_attn.rotary_emb.inv_freq": "pytorch_model-00019-of-00027.bin",
517
  "model.layers.54.self_attn.v_proj.weight": "pytorch_model-00019-of-00027.bin",
518
  "model.layers.55.input_layernorm.weight": "pytorch_model-00019-of-00027.bin",
519
  "model.layers.55.mlp.down_proj.weight": "pytorch_model-00019-of-00027.bin",
 
523
  "model.layers.55.self_attn.k_proj.weight": "pytorch_model-00019-of-00027.bin",
524
  "model.layers.55.self_attn.o_proj.weight": "pytorch_model-00019-of-00027.bin",
525
  "model.layers.55.self_attn.q_proj.weight": "pytorch_model-00019-of-00027.bin",
526
+ "model.layers.55.self_attn.rotary_emb.inv_freq": "pytorch_model-00019-of-00027.bin",
527
  "model.layers.55.self_attn.v_proj.weight": "pytorch_model-00019-of-00027.bin",
528
  "model.layers.56.input_layernorm.weight": "pytorch_model-00020-of-00027.bin",
529
  "model.layers.56.mlp.down_proj.weight": "pytorch_model-00020-of-00027.bin",
 
533
  "model.layers.56.self_attn.k_proj.weight": "pytorch_model-00019-of-00027.bin",
534
  "model.layers.56.self_attn.o_proj.weight": "pytorch_model-00019-of-00027.bin",
535
  "model.layers.56.self_attn.q_proj.weight": "pytorch_model-00019-of-00027.bin",
536
+ "model.layers.56.self_attn.rotary_emb.inv_freq": "pytorch_model-00019-of-00027.bin",
537
  "model.layers.56.self_attn.v_proj.weight": "pytorch_model-00019-of-00027.bin",
538
  "model.layers.57.input_layernorm.weight": "pytorch_model-00020-of-00027.bin",
539
  "model.layers.57.mlp.down_proj.weight": "pytorch_model-00020-of-00027.bin",
 
543
  "model.layers.57.self_attn.k_proj.weight": "pytorch_model-00020-of-00027.bin",
544
  "model.layers.57.self_attn.o_proj.weight": "pytorch_model-00020-of-00027.bin",
545
  "model.layers.57.self_attn.q_proj.weight": "pytorch_model-00020-of-00027.bin",
546
+ "model.layers.57.self_attn.rotary_emb.inv_freq": "pytorch_model-00020-of-00027.bin",
547
  "model.layers.57.self_attn.v_proj.weight": "pytorch_model-00020-of-00027.bin",
548
  "model.layers.58.input_layernorm.weight": "pytorch_model-00020-of-00027.bin",
549
  "model.layers.58.mlp.down_proj.weight": "pytorch_model-00020-of-00027.bin",
 
553
  "model.layers.58.self_attn.k_proj.weight": "pytorch_model-00020-of-00027.bin",
554
  "model.layers.58.self_attn.o_proj.weight": "pytorch_model-00020-of-00027.bin",
555
  "model.layers.58.self_attn.q_proj.weight": "pytorch_model-00020-of-00027.bin",
556
+ "model.layers.58.self_attn.rotary_emb.inv_freq": "pytorch_model-00020-of-00027.bin",
557
  "model.layers.58.self_attn.v_proj.weight": "pytorch_model-00020-of-00027.bin",
558
  "model.layers.59.input_layernorm.weight": "pytorch_model-00021-of-00027.bin",
559
  "model.layers.59.mlp.down_proj.weight": "pytorch_model-00021-of-00027.bin",
 
563
  "model.layers.59.self_attn.k_proj.weight": "pytorch_model-00020-of-00027.bin",
564
  "model.layers.59.self_attn.o_proj.weight": "pytorch_model-00020-of-00027.bin",
565
  "model.layers.59.self_attn.q_proj.weight": "pytorch_model-00020-of-00027.bin",
566
+ "model.layers.59.self_attn.rotary_emb.inv_freq": "pytorch_model-00020-of-00027.bin",
567
  "model.layers.59.self_attn.v_proj.weight": "pytorch_model-00020-of-00027.bin",
568
  "model.layers.6.input_layernorm.weight": "pytorch_model-00003-of-00027.bin",
569
  "model.layers.6.mlp.down_proj.weight": "pytorch_model-00003-of-00027.bin",
 
573
  "model.layers.6.self_attn.k_proj.weight": "pytorch_model-00003-of-00027.bin",
574
  "model.layers.6.self_attn.o_proj.weight": "pytorch_model-00003-of-00027.bin",
575
  "model.layers.6.self_attn.q_proj.weight": "pytorch_model-00003-of-00027.bin",
576
+ "model.layers.6.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00027.bin",
577
  "model.layers.6.self_attn.v_proj.weight": "pytorch_model-00003-of-00027.bin",
578
  "model.layers.60.input_layernorm.weight": "pytorch_model-00021-of-00027.bin",
579
  "model.layers.60.mlp.down_proj.weight": "pytorch_model-00021-of-00027.bin",
 
583
  "model.layers.60.self_attn.k_proj.weight": "pytorch_model-00021-of-00027.bin",
584
  "model.layers.60.self_attn.o_proj.weight": "pytorch_model-00021-of-00027.bin",
585
  "model.layers.60.self_attn.q_proj.weight": "pytorch_model-00021-of-00027.bin",
586
+ "model.layers.60.self_attn.rotary_emb.inv_freq": "pytorch_model-00021-of-00027.bin",
587
  "model.layers.60.self_attn.v_proj.weight": "pytorch_model-00021-of-00027.bin",
588
  "model.layers.61.input_layernorm.weight": "pytorch_model-00021-of-00027.bin",
589
  "model.layers.61.mlp.down_proj.weight": "pytorch_model-00021-of-00027.bin",
 
593
  "model.layers.61.self_attn.k_proj.weight": "pytorch_model-00021-of-00027.bin",
594
  "model.layers.61.self_attn.o_proj.weight": "pytorch_model-00021-of-00027.bin",
595
  "model.layers.61.self_attn.q_proj.weight": "pytorch_model-00021-of-00027.bin",
596
+ "model.layers.61.self_attn.rotary_emb.inv_freq": "pytorch_model-00021-of-00027.bin",
597
  "model.layers.61.self_attn.v_proj.weight": "pytorch_model-00021-of-00027.bin",
598
  "model.layers.62.input_layernorm.weight": "pytorch_model-00022-of-00027.bin",
599
  "model.layers.62.mlp.down_proj.weight": "pytorch_model-00022-of-00027.bin",
 
603
  "model.layers.62.self_attn.k_proj.weight": "pytorch_model-00021-of-00027.bin",
604
  "model.layers.62.self_attn.o_proj.weight": "pytorch_model-00021-of-00027.bin",
605
  "model.layers.62.self_attn.q_proj.weight": "pytorch_model-00021-of-00027.bin",
606
+ "model.layers.62.self_attn.rotary_emb.inv_freq": "pytorch_model-00021-of-00027.bin",
607
  "model.layers.62.self_attn.v_proj.weight": "pytorch_model-00021-of-00027.bin",
608
  "model.layers.63.input_layernorm.weight": "pytorch_model-00022-of-00027.bin",
609
  "model.layers.63.mlp.down_proj.weight": "pytorch_model-00022-of-00027.bin",
 
613
  "model.layers.63.self_attn.k_proj.weight": "pytorch_model-00022-of-00027.bin",
614
  "model.layers.63.self_attn.o_proj.weight": "pytorch_model-00022-of-00027.bin",
615
  "model.layers.63.self_attn.q_proj.weight": "pytorch_model-00022-of-00027.bin",
616
+ "model.layers.63.self_attn.rotary_emb.inv_freq": "pytorch_model-00022-of-00027.bin",
617
  "model.layers.63.self_attn.v_proj.weight": "pytorch_model-00022-of-00027.bin",
618
  "model.layers.64.input_layernorm.weight": "pytorch_model-00022-of-00027.bin",
619
  "model.layers.64.mlp.down_proj.weight": "pytorch_model-00022-of-00027.bin",
 
623
  "model.layers.64.self_attn.k_proj.weight": "pytorch_model-00022-of-00027.bin",
624
  "model.layers.64.self_attn.o_proj.weight": "pytorch_model-00022-of-00027.bin",
625
  "model.layers.64.self_attn.q_proj.weight": "pytorch_model-00022-of-00027.bin",
626
+ "model.layers.64.self_attn.rotary_emb.inv_freq": "pytorch_model-00022-of-00027.bin",
627
  "model.layers.64.self_attn.v_proj.weight": "pytorch_model-00022-of-00027.bin",
628
  "model.layers.65.input_layernorm.weight": "pytorch_model-00023-of-00027.bin",
629
  "model.layers.65.mlp.down_proj.weight": "pytorch_model-00023-of-00027.bin",
 
633
  "model.layers.65.self_attn.k_proj.weight": "pytorch_model-00022-of-00027.bin",
634
  "model.layers.65.self_attn.o_proj.weight": "pytorch_model-00022-of-00027.bin",
635
  "model.layers.65.self_attn.q_proj.weight": "pytorch_model-00022-of-00027.bin",
636
+ "model.layers.65.self_attn.rotary_emb.inv_freq": "pytorch_model-00022-of-00027.bin",
637
  "model.layers.65.self_attn.v_proj.weight": "pytorch_model-00022-of-00027.bin",
638
  "model.layers.66.input_layernorm.weight": "pytorch_model-00023-of-00027.bin",
639
  "model.layers.66.mlp.down_proj.weight": "pytorch_model-00023-of-00027.bin",
 
643
  "model.layers.66.self_attn.k_proj.weight": "pytorch_model-00023-of-00027.bin",
644
  "model.layers.66.self_attn.o_proj.weight": "pytorch_model-00023-of-00027.bin",
645
  "model.layers.66.self_attn.q_proj.weight": "pytorch_model-00023-of-00027.bin",
646
+ "model.layers.66.self_attn.rotary_emb.inv_freq": "pytorch_model-00023-of-00027.bin",
647
  "model.layers.66.self_attn.v_proj.weight": "pytorch_model-00023-of-00027.bin",
648
  "model.layers.67.input_layernorm.weight": "pytorch_model-00023-of-00027.bin",
649
  "model.layers.67.mlp.down_proj.weight": "pytorch_model-00023-of-00027.bin",
 
653
  "model.layers.67.self_attn.k_proj.weight": "pytorch_model-00023-of-00027.bin",
654
  "model.layers.67.self_attn.o_proj.weight": "pytorch_model-00023-of-00027.bin",
655
  "model.layers.67.self_attn.q_proj.weight": "pytorch_model-00023-of-00027.bin",
656
+ "model.layers.67.self_attn.rotary_emb.inv_freq": "pytorch_model-00023-of-00027.bin",
657
  "model.layers.67.self_attn.v_proj.weight": "pytorch_model-00023-of-00027.bin",
658
  "model.layers.68.input_layernorm.weight": "pytorch_model-00024-of-00027.bin",
659
  "model.layers.68.mlp.down_proj.weight": "pytorch_model-00024-of-00027.bin",
 
663
  "model.layers.68.self_attn.k_proj.weight": "pytorch_model-00023-of-00027.bin",
664
  "model.layers.68.self_attn.o_proj.weight": "pytorch_model-00023-of-00027.bin",
665
  "model.layers.68.self_attn.q_proj.weight": "pytorch_model-00023-of-00027.bin",
666
+ "model.layers.68.self_attn.rotary_emb.inv_freq": "pytorch_model-00023-of-00027.bin",
667
  "model.layers.68.self_attn.v_proj.weight": "pytorch_model-00023-of-00027.bin",
668
  "model.layers.69.input_layernorm.weight": "pytorch_model-00024-of-00027.bin",
669
  "model.layers.69.mlp.down_proj.weight": "pytorch_model-00024-of-00027.bin",
 
673
  "model.layers.69.self_attn.k_proj.weight": "pytorch_model-00024-of-00027.bin",
674
  "model.layers.69.self_attn.o_proj.weight": "pytorch_model-00024-of-00027.bin",
675
  "model.layers.69.self_attn.q_proj.weight": "pytorch_model-00024-of-00027.bin",
676
+ "model.layers.69.self_attn.rotary_emb.inv_freq": "pytorch_model-00024-of-00027.bin",
677
  "model.layers.69.self_attn.v_proj.weight": "pytorch_model-00024-of-00027.bin",
678
  "model.layers.7.input_layernorm.weight": "pytorch_model-00003-of-00027.bin",
679
  "model.layers.7.mlp.down_proj.weight": "pytorch_model-00003-of-00027.bin",
 
683
  "model.layers.7.self_attn.k_proj.weight": "pytorch_model-00003-of-00027.bin",
684
  "model.layers.7.self_attn.o_proj.weight": "pytorch_model-00003-of-00027.bin",
685
  "model.layers.7.self_attn.q_proj.weight": "pytorch_model-00003-of-00027.bin",
686
+ "model.layers.7.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00027.bin",
687
  "model.layers.7.self_attn.v_proj.weight": "pytorch_model-00003-of-00027.bin",
688
  "model.layers.70.input_layernorm.weight": "pytorch_model-00024-of-00027.bin",
689
  "model.layers.70.mlp.down_proj.weight": "pytorch_model-00024-of-00027.bin",
 
693
  "model.layers.70.self_attn.k_proj.weight": "pytorch_model-00024-of-00027.bin",
694
  "model.layers.70.self_attn.o_proj.weight": "pytorch_model-00024-of-00027.bin",
695
  "model.layers.70.self_attn.q_proj.weight": "pytorch_model-00024-of-00027.bin",
696
+ "model.layers.70.self_attn.rotary_emb.inv_freq": "pytorch_model-00024-of-00027.bin",
697
  "model.layers.70.self_attn.v_proj.weight": "pytorch_model-00024-of-00027.bin",
698
  "model.layers.71.input_layernorm.weight": "pytorch_model-00025-of-00027.bin",
699
  "model.layers.71.mlp.down_proj.weight": "pytorch_model-00025-of-00027.bin",
 
703
  "model.layers.71.self_attn.k_proj.weight": "pytorch_model-00024-of-00027.bin",
704
  "model.layers.71.self_attn.o_proj.weight": "pytorch_model-00024-of-00027.bin",
705
  "model.layers.71.self_attn.q_proj.weight": "pytorch_model-00024-of-00027.bin",
706
+ "model.layers.71.self_attn.rotary_emb.inv_freq": "pytorch_model-00024-of-00027.bin",
707
  "model.layers.71.self_attn.v_proj.weight": "pytorch_model-00024-of-00027.bin",
708
  "model.layers.72.input_layernorm.weight": "pytorch_model-00025-of-00027.bin",
709
  "model.layers.72.mlp.down_proj.weight": "pytorch_model-00025-of-00027.bin",
 
713
  "model.layers.72.self_attn.k_proj.weight": "pytorch_model-00025-of-00027.bin",
714
  "model.layers.72.self_attn.o_proj.weight": "pytorch_model-00025-of-00027.bin",
715
  "model.layers.72.self_attn.q_proj.weight": "pytorch_model-00025-of-00027.bin",
716
+ "model.layers.72.self_attn.rotary_emb.inv_freq": "pytorch_model-00025-of-00027.bin",
717
  "model.layers.72.self_attn.v_proj.weight": "pytorch_model-00025-of-00027.bin",
718
  "model.layers.73.input_layernorm.weight": "pytorch_model-00025-of-00027.bin",
719
  "model.layers.73.mlp.down_proj.weight": "pytorch_model-00025-of-00027.bin",
 
723
  "model.layers.73.self_attn.k_proj.weight": "pytorch_model-00025-of-00027.bin",
724
  "model.layers.73.self_attn.o_proj.weight": "pytorch_model-00025-of-00027.bin",
725
  "model.layers.73.self_attn.q_proj.weight": "pytorch_model-00025-of-00027.bin",
726
+ "model.layers.73.self_attn.rotary_emb.inv_freq": "pytorch_model-00025-of-00027.bin",
727
  "model.layers.73.self_attn.v_proj.weight": "pytorch_model-00025-of-00027.bin",
728
  "model.layers.74.input_layernorm.weight": "pytorch_model-00026-of-00027.bin",
729
  "model.layers.74.mlp.down_proj.weight": "pytorch_model-00026-of-00027.bin",
 
733
  "model.layers.74.self_attn.k_proj.weight": "pytorch_model-00025-of-00027.bin",
734
  "model.layers.74.self_attn.o_proj.weight": "pytorch_model-00025-of-00027.bin",
735
  "model.layers.74.self_attn.q_proj.weight": "pytorch_model-00025-of-00027.bin",
736
+ "model.layers.74.self_attn.rotary_emb.inv_freq": "pytorch_model-00025-of-00027.bin",
737
  "model.layers.74.self_attn.v_proj.weight": "pytorch_model-00025-of-00027.bin",
738
  "model.layers.75.input_layernorm.weight": "pytorch_model-00026-of-00027.bin",
739
  "model.layers.75.mlp.down_proj.weight": "pytorch_model-00026-of-00027.bin",
 
743
  "model.layers.75.self_attn.k_proj.weight": "pytorch_model-00026-of-00027.bin",
744
  "model.layers.75.self_attn.o_proj.weight": "pytorch_model-00026-of-00027.bin",
745
  "model.layers.75.self_attn.q_proj.weight": "pytorch_model-00026-of-00027.bin",
746
+ "model.layers.75.self_attn.rotary_emb.inv_freq": "pytorch_model-00026-of-00027.bin",
747
  "model.layers.75.self_attn.v_proj.weight": "pytorch_model-00026-of-00027.bin",
748
  "model.layers.76.input_layernorm.weight": "pytorch_model-00026-of-00027.bin",
749
  "model.layers.76.mlp.down_proj.weight": "pytorch_model-00026-of-00027.bin",
 
753
  "model.layers.76.self_attn.k_proj.weight": "pytorch_model-00026-of-00027.bin",
754
  "model.layers.76.self_attn.o_proj.weight": "pytorch_model-00026-of-00027.bin",
755
  "model.layers.76.self_attn.q_proj.weight": "pytorch_model-00026-of-00027.bin",
756
+ "model.layers.76.self_attn.rotary_emb.inv_freq": "pytorch_model-00026-of-00027.bin",
757
  "model.layers.76.self_attn.v_proj.weight": "pytorch_model-00026-of-00027.bin",
758
  "model.layers.77.input_layernorm.weight": "pytorch_model-00027-of-00027.bin",
759
  "model.layers.77.mlp.down_proj.weight": "pytorch_model-00027-of-00027.bin",
 
763
  "model.layers.77.self_attn.k_proj.weight": "pytorch_model-00026-of-00027.bin",
764
  "model.layers.77.self_attn.o_proj.weight": "pytorch_model-00026-of-00027.bin",
765
  "model.layers.77.self_attn.q_proj.weight": "pytorch_model-00026-of-00027.bin",
766
+ "model.layers.77.self_attn.rotary_emb.inv_freq": "pytorch_model-00026-of-00027.bin",
767
  "model.layers.77.self_attn.v_proj.weight": "pytorch_model-00026-of-00027.bin",
768
  "model.layers.78.input_layernorm.weight": "pytorch_model-00027-of-00027.bin",
769
  "model.layers.78.mlp.down_proj.weight": "pytorch_model-00027-of-00027.bin",
 
773
  "model.layers.78.self_attn.k_proj.weight": "pytorch_model-00027-of-00027.bin",
774
  "model.layers.78.self_attn.o_proj.weight": "pytorch_model-00027-of-00027.bin",
775
  "model.layers.78.self_attn.q_proj.weight": "pytorch_model-00027-of-00027.bin",
776
+ "model.layers.78.self_attn.rotary_emb.inv_freq": "pytorch_model-00027-of-00027.bin",
777
  "model.layers.78.self_attn.v_proj.weight": "pytorch_model-00027-of-00027.bin",
778
  "model.layers.79.input_layernorm.weight": "pytorch_model-00027-of-00027.bin",
779
  "model.layers.79.mlp.down_proj.weight": "pytorch_model-00027-of-00027.bin",
 
783
  "model.layers.79.self_attn.k_proj.weight": "pytorch_model-00027-of-00027.bin",
784
  "model.layers.79.self_attn.o_proj.weight": "pytorch_model-00027-of-00027.bin",
785
  "model.layers.79.self_attn.q_proj.weight": "pytorch_model-00027-of-00027.bin",
786
+ "model.layers.79.self_attn.rotary_emb.inv_freq": "pytorch_model-00027-of-00027.bin",
787
  "model.layers.79.self_attn.v_proj.weight": "pytorch_model-00027-of-00027.bin",
788
  "model.layers.8.input_layernorm.weight": "pytorch_model-00004-of-00027.bin",
789
  "model.layers.8.mlp.down_proj.weight": "pytorch_model-00004-of-00027.bin",
 
793
  "model.layers.8.self_attn.k_proj.weight": "pytorch_model-00003-of-00027.bin",
794
  "model.layers.8.self_attn.o_proj.weight": "pytorch_model-00003-of-00027.bin",
795
  "model.layers.8.self_attn.q_proj.weight": "pytorch_model-00003-of-00027.bin",
796
+ "model.layers.8.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00027.bin",
797
  "model.layers.8.self_attn.v_proj.weight": "pytorch_model-00003-of-00027.bin",
798
  "model.layers.9.input_layernorm.weight": "pytorch_model-00004-of-00027.bin",
799
  "model.layers.9.mlp.down_proj.weight": "pytorch_model-00004-of-00027.bin",
 
803
  "model.layers.9.self_attn.k_proj.weight": "pytorch_model-00004-of-00027.bin",
804
  "model.layers.9.self_attn.o_proj.weight": "pytorch_model-00004-of-00027.bin",
805
  "model.layers.9.self_attn.q_proj.weight": "pytorch_model-00004-of-00027.bin",
806
+ "model.layers.9.self_attn.rotary_emb.inv_freq": "pytorch_model-00004-of-00027.bin",
807
  "model.layers.9.self_attn.v_proj.weight": "pytorch_model-00004-of-00027.bin",
808
  "model.norm.weight": "pytorch_model-00027-of-00027.bin"
809
  }
special_tokens_map.json CHANGED
@@ -1,9 +1,93 @@
1
  {
2
  "additional_special_tokens": [
3
- "<|endofsystemprompt|>",
4
- "<|beginofsystem|>",
5
- "<|beginofuser|>",
6
- "<|endofchat|>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  ],
8
  "bos_token": {
9
  "content": "<|endoftext|>",
 
1
  {
2
  "additional_special_tokens": [
3
+ "<fim_prefix>",
4
+ "<fim_middle>",
5
+ "<fim_suffix>",
6
+ "<fim_pad>",
7
+ "<filename>",
8
+ "<gh_stars>",
9
+ "<issue_start>",
10
+ "<issue_comment>",
11
+ "<issue_closed>",
12
+ "<jupyter_start>",
13
+ "<jupyter_text>",
14
+ "<jupyter_code>",
15
+ "<jupyter_output>",
16
+ "<empty_output>",
17
+ "<commit_before>",
18
+ "<commit_msg>",
19
+ "<commit_after>",
20
+ "<reponame>",
21
+ {
22
+ "content": "<tool_call>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false
27
+ },
28
+ {
29
+ "content": "<tool_response>",
30
+ "lstrip": false,
31
+ "normalized": false,
32
+ "rstrip": false,
33
+ "single_word": false
34
+ },
35
+ {
36
+ "content": "<tools>",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false
41
+ },
42
+ {
43
+ "content": "</tool_call>",
44
+ "lstrip": false,
45
+ "normalized": false,
46
+ "rstrip": false,
47
+ "single_word": false
48
+ },
49
+ {
50
+ "content": "</tool_response>",
51
+ "lstrip": false,
52
+ "normalized": false,
53
+ "rstrip": false,
54
+ "single_word": false
55
+ },
56
+ {
57
+ "content": "</tools>",
58
+ "lstrip": false,
59
+ "normalized": false,
60
+ "rstrip": false,
61
+ "single_word": false
62
+ },
63
+ {
64
+ "content": "<|endofsystemprompt|>",
65
+ "lstrip": false,
66
+ "normalized": false,
67
+ "rstrip": false,
68
+ "single_word": false
69
+ },
70
+ {
71
+ "content": "<|beginofsystem|>",
72
+ "lstrip": false,
73
+ "normalized": false,
74
+ "rstrip": false,
75
+ "single_word": false
76
+ },
77
+ {
78
+ "content": "<|beginofuser|>",
79
+ "lstrip": false,
80
+ "normalized": false,
81
+ "rstrip": false,
82
+ "single_word": false
83
+ },
84
+ {
85
+ "content": "<|endofchat|>",
86
+ "lstrip": false,
87
+ "normalized": false,
88
+ "rstrip": false,
89
+ "single_word": false
90
+ }
91
  ],
92
  "bos_token": {
93
  "content": "<|endoftext|>",
tokenizer.json CHANGED
@@ -194,7 +194,7 @@
194
  },
195
  {
196
  "id": 32018,
197
- "content": "<|endofsystemprompt|>",
198
  "single_word": false,
199
  "lstrip": false,
200
  "rstrip": false,
@@ -203,7 +203,7 @@
203
  },
204
  {
205
  "id": 32019,
206
- "content": "<|beginofsystem|>",
207
  "single_word": false,
208
  "lstrip": false,
209
  "rstrip": false,
@@ -212,7 +212,7 @@
212
  },
213
  {
214
  "id": 32020,
215
- "content": "<|beginofuser|>",
216
  "single_word": false,
217
  "lstrip": false,
218
  "rstrip": false,
@@ -221,6 +221,60 @@
221
  },
222
  {
223
  "id": 32021,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
224
  "content": "<|endofchat|>",
225
  "single_word": false,
226
  "lstrip": false,
@@ -93559,4 +93613,4 @@
93559
  "▁ ▁"
93560
  ]
93561
  }
93562
- }
 
194
  },
195
  {
196
  "id": 32018,
197
+ "content": "<tool_response>",
198
  "single_word": false,
199
  "lstrip": false,
200
  "rstrip": false,
 
203
  },
204
  {
205
  "id": 32019,
206
+ "content": "<tool_call>",
207
  "single_word": false,
208
  "lstrip": false,
209
  "rstrip": false,
 
212
  },
213
  {
214
  "id": 32020,
215
+ "content": "<tools>",
216
  "single_word": false,
217
  "lstrip": false,
218
  "rstrip": false,
 
221
  },
222
  {
223
  "id": 32021,
224
+ "content": "</tool_response>",
225
+ "single_word": false,
226
+ "lstrip": false,
227
+ "rstrip": false,
228
+ "normalized": false,
229
+ "special": true
230
+ },
231
+ {
232
+ "id": 32022,
233
+ "content": "</tool_call>",
234
+ "single_word": false,
235
+ "lstrip": false,
236
+ "rstrip": false,
237
+ "normalized": false,
238
+ "special": true
239
+ },
240
+ {
241
+ "id": 32023,
242
+ "content": "</tools>",
243
+ "single_word": false,
244
+ "lstrip": false,
245
+ "rstrip": false,
246
+ "normalized": false,
247
+ "special": true
248
+ },
249
+ {
250
+ "id": 32024,
251
+ "content": "<|endofsystemprompt|>",
252
+ "single_word": false,
253
+ "lstrip": false,
254
+ "rstrip": false,
255
+ "normalized": false,
256
+ "special": true
257
+ },
258
+ {
259
+ "id": 32025,
260
+ "content": "<|beginofsystem|>",
261
+ "single_word": false,
262
+ "lstrip": false,
263
+ "rstrip": false,
264
+ "normalized": false,
265
+ "special": true
266
+ },
267
+ {
268
+ "id": 32026,
269
+ "content": "<|beginofuser|>",
270
+ "single_word": false,
271
+ "lstrip": false,
272
+ "rstrip": false,
273
+ "normalized": false,
274
+ "special": true
275
+ },
276
+ {
277
+ "id": 32027,
278
  "content": "<|endofchat|>",
279
  "single_word": false,
280
  "lstrip": false,
 
93613
  "▁ ▁"
93614
  ]
93615
  }
93616
+ }
tokenizer_config.json CHANGED
@@ -1,6 +1,7 @@
1
  {
2
  "add_bos_token": false,
3
  "add_eos_token": false,
 
4
  "added_tokens_decoder": {
5
  "0": {
6
  "content": "<unk>",
@@ -171,7 +172,7 @@
171
  "special": true
172
  },
173
  "32018": {
174
- "content": "<|endofsystemprompt|>",
175
  "lstrip": false,
176
  "normalized": false,
177
  "rstrip": false,
@@ -179,7 +180,7 @@
179
  "special": true
180
  },
181
  "32019": {
182
- "content": "<|beginofsystem|>",
183
  "lstrip": false,
184
  "normalized": false,
185
  "rstrip": false,
@@ -187,7 +188,7 @@
187
  "special": true
188
  },
189
  "32020": {
190
- "content": "<|beginofuser|>",
191
  "lstrip": false,
192
  "normalized": false,
193
  "rstrip": false,
@@ -195,6 +196,54 @@
195
  "special": true
196
  },
197
  "32021": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
198
  "content": "<|endofchat|>",
199
  "lstrip": false,
200
  "normalized": false,
@@ -204,13 +253,45 @@
204
  }
205
  },
206
  "additional_special_tokens": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
207
  "<|endofsystemprompt|>",
208
  "<|beginofsystem|>",
209
  "<|beginofuser|>",
210
  "<|endofchat|>"
211
  ],
212
  "bos_token": "<|endoftext|>",
213
- "chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ '<|beginofuser|>' + message['content'] }}{% elif message['role'] == 'system' %}{{ message['content'] + '<|endofsystemprompt|>' }}{% elif message['role'] == 'assistant' %}{{ '<|beginofsystem|>' + message['content'] }}{% endif %}{% if loop.last and add_generation_prompt %}{{ '<|beginofsystem|>' }}{% endif %}{% endfor %}",
 
 
 
 
 
 
 
 
214
  "clean_up_tokenization_spaces": false,
215
  "eos_token": "<|endoftext|>",
216
  "legacy": true,
 
1
  {
2
  "add_bos_token": false,
3
  "add_eos_token": false,
4
+ "add_prefix_space": null,
5
  "added_tokens_decoder": {
6
  "0": {
7
  "content": "<unk>",
 
172
  "special": true
173
  },
174
  "32018": {
175
+ "content": "<tool_response>",
176
  "lstrip": false,
177
  "normalized": false,
178
  "rstrip": false,
 
180
  "special": true
181
  },
182
  "32019": {
183
+ "content": "<tool_call>",
184
  "lstrip": false,
185
  "normalized": false,
186
  "rstrip": false,
 
188
  "special": true
189
  },
190
  "32020": {
191
+ "content": "<tools>",
192
  "lstrip": false,
193
  "normalized": false,
194
  "rstrip": false,
 
196
  "special": true
197
  },
198
  "32021": {
199
+ "content": "</tool_response>",
200
+ "lstrip": false,
201
+ "normalized": false,
202
+ "rstrip": false,
203
+ "single_word": false,
204
+ "special": true
205
+ },
206
+ "32022": {
207
+ "content": "</tool_call>",
208
+ "lstrip": false,
209
+ "normalized": false,
210
+ "rstrip": false,
211
+ "single_word": false,
212
+ "special": true
213
+ },
214
+ "32023": {
215
+ "content": "</tools>",
216
+ "lstrip": false,
217
+ "normalized": false,
218
+ "rstrip": false,
219
+ "single_word": false,
220
+ "special": true
221
+ },
222
+ "32024": {
223
+ "content": "<|endofsystemprompt|>",
224
+ "lstrip": false,
225
+ "normalized": false,
226
+ "rstrip": false,
227
+ "single_word": false,
228
+ "special": true
229
+ },
230
+ "32025": {
231
+ "content": "<|beginofsystem|>",
232
+ "lstrip": false,
233
+ "normalized": false,
234
+ "rstrip": false,
235
+ "single_word": false,
236
+ "special": true
237
+ },
238
+ "32026": {
239
+ "content": "<|beginofuser|>",
240
+ "lstrip": false,
241
+ "normalized": false,
242
+ "rstrip": false,
243
+ "single_word": false,
244
+ "special": true
245
+ },
246
+ "32027": {
247
  "content": "<|endofchat|>",
248
  "lstrip": false,
249
  "normalized": false,
 
253
  }
254
  },
255
  "additional_special_tokens": [
256
+ "<fim_prefix>",
257
+ "<fim_middle>",
258
+ "<fim_suffix>",
259
+ "<fim_pad>",
260
+ "<filename>",
261
+ "<gh_stars>",
262
+ "<issue_start>",
263
+ "<issue_comment>",
264
+ "<issue_closed>",
265
+ "<jupyter_start>",
266
+ "<jupyter_text>",
267
+ "<jupyter_code>",
268
+ "<jupyter_output>",
269
+ "<empty_output>",
270
+ "<commit_before>",
271
+ "<commit_msg>",
272
+ "<commit_after>",
273
+ "<reponame>",
274
+ "<tool_call>",
275
+ "<tool_response>",
276
+ "<tools>",
277
+ "</tool_call>",
278
+ "</tool_response>",
279
+ "</tools>",
280
  "<|endofsystemprompt|>",
281
  "<|beginofsystem|>",
282
  "<|beginofuser|>",
283
  "<|endofchat|>"
284
  ],
285
  "bos_token": "<|endoftext|>",
286
+ "chat_template": [
287
+ {
288
+ "name": "default",
289
+ "template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ '<|beginofuser|>' + message['content'] }}{% elif message['role'] == 'system' %}{{ message['content'] + '<|endofsystemprompt|>' }}{% elif message['role'] == 'assistant' %}{{ '<|beginofsystem|>' + message['content'] }}{% endif %}{% if loop.last and add_generation_prompt %}{{ '<|beginofsystem|>' }}{% endif %}{% endfor %}" },
290
+ {
291
+ "name": "tool_use",
292
+ "template": "{%- macro json_to_python_type(json_spec) %}\n{%- set basic_type_map = {\n \"string\": \"str\",\n \"number\": \"float\",\n \"integer\": \"int\",\n \"boolean\": \"bool\"\n} %}\n\n{%- if basic_type_map[json_spec.type] is defined %}\n {{- basic_type_map[json_spec.type] }}\n{%- elif json_spec.type == \"array\" %}\n {{- \"list[\" + json_to_python_type(json_spec|items) + \"]\"}}\n{%- elif json_spec.type == \"object\" %}\n {%- if json_spec.additionalProperties is defined %}\n {{- \"dict[str, \" + json_to_python_type(json_spec.additionalProperties) + ']'}}\n {%- else %}\n {{- \"dict\" }}\n {%- endif %}\n{%- elif json_spec.type is iterable %}\n {{- \"Union[\" }}\n {%- for t in json_spec.type %}\n {{- json_to_python_type({\"type\": t}) }}\n {%- if not loop.last %}\n {{- \",\" }} \n {%- endif %}\n {%- endfor %}\n {{- \"]\" }}\n{%- else %}\n {{- \"Any\" }}\n{%- endif %}\n{%- endmacro %}\n\n\n{{- \"You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools\\n<tools>\\n\" }}\n{%- for tool in tools %}\n {%- if tool.function is defined %}\n {%- set tool = tool.function %}\n {%- endif %}\n {{ '{\"type\": \"function\", \"function\": ' }}\n {{- '{\"name\": ' + tool.name + '\", ' }}\n {{- '\"description\": \"' + tool.name + '(' }}\n {%- for param_name, param_fields in tool.parameters.properties|items %}\n {{- param_name + \": \" + json_to_python_type(param_fields) }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \")\" }}\n {%- if tool.return is defined %}\n {{- \" -> \" + json_to_python_type(tool.return) }}\n {%- endif %}\n {{- \" - \" + tool.description + \"\\n\\n\" }}\n {%- for param_name, param_fields in tool.parameters.properties|items %}\n {%- if loop.first %}\n {{- \" Args:\" }}\n {%- endif %}\n {{- \"\\n \" + param_name + \"(\" + json_to_python_type(param_fields) + \"): \" + param_fields.description|trim }}\n {%- endfor %}\n {%- if tool.return is defined and tool.return.description is defined %}\n {{- \"\\n Returns:\\n \" + tool.return.description }}\n {%- endif %}\n {{- '\"' }}\n {{- ', \"parameters\": ' }}\n {%- if tool.parameters.properties | length == 0 %}\n {{- \"{}\" }}\n {%- else %}\n {{- tool.parameters|tojson }}\n {%- endif %}\n {{- \"}\" }}\n {%- if not loop.last %}\n {{- \"\\n\" }}\n {%- endif %}\n{%- endfor %}\n{{- \"\\n</tools>\\n\" }}\n{{- 'Use the following pydantic model json schema for each tool call you will make: {\"properties\": {\"arguments\": {\"title\": \"Arguments\", \"type\": \"object\"}, \"name\": {\"title\": \"Name\", \"type\": \"string\"}}, \"required\": [\"arguments\", \"name\"], \"title\": \"FunctionCall\", \"type\": \"object\"}\n' }}\n{{- \"For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:\n\" }}\n{{- \"<tool_call>\n\" }}\n{{- '{\"arguments\": <args-dict>, \"name\": <function-name>}\n' }}\n{{- '</tool_call><|endofsystemprompt|>' }}\n{%- for message in messages %}{%- if message.role == \"user\" %}{{- '<|beginofuser|>' + message.content }}{%- elif (message.role == \"assistant\" and message.tool_calls is not defined) %} {{- '<|beginofsystem|>' + message.content }}{%- elif message.role == \"assistant\" %}{{- '<tool_call>\\n' }}{%- for tool_call in message.tool_calls %}{%- if tool_call.function is defined %}{%- set tool_call = tool_call.function %}{%- endif %}{{- '{ ' }}{%- if tool_call.arguments is defined %}{{- '\"arguments\": ' }}{{- tool_call.arguments|tojson }}{{- ', '}}{%- endif %}{{- '\"name\": \"' }}{{- tool_call.name }}{{- '\"}' }}{{- '\\n</tool_call> ' }}\n {%- endfor %}{%- elif message.role == \"tool\" %}{%- if not message.name is defined %}{{- raise_exception(\"Tool response dicts require a 'name' key indicating the name of the called function!\") }}{%- endif %}{{- '<tool_response>\\n' }}{{- '{\"name\": \"' }}{{- message.name }}{{- '\", \"content\": ' }}{{- message.content|tojson + '}' }}{{- '\\n</tool_response>\\n' }}{%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}{{- '<|beginofsystem|>' }}{%- endif %}\n"
293
+ }
294
+ ],
295
  "clean_up_tokenization_spaces": false,
296
  "eos_token": "<|endoftext|>",
297
  "legacy": true,
tokenizer_config_bk.json ADDED
@@ -0,0 +1,304 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_eos_token": false,
4
+ "add_prefix_space": null,
5
+ "added_tokens_decoder": {
6
+ "0": {
7
+ "content": "<unk>",
8
+ "lstrip": false,
9
+ "normalized": true,
10
+ "rstrip": false,
11
+ "single_word": false,
12
+ "special": true
13
+ },
14
+ "1": {
15
+ "content": "<s>",
16
+ "lstrip": false,
17
+ "normalized": false,
18
+ "rstrip": false,
19
+ "single_word": false,
20
+ "special": false
21
+ },
22
+ "2": {
23
+ "content": "<|endoftext|>",
24
+ "lstrip": false,
25
+ "normalized": true,
26
+ "rstrip": false,
27
+ "single_word": false,
28
+ "special": true
29
+ },
30
+ "32000": {
31
+ "content": "<fim_prefix>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false,
36
+ "special": true
37
+ },
38
+ "32001": {
39
+ "content": "<fim_middle>",
40
+ "lstrip": false,
41
+ "normalized": false,
42
+ "rstrip": false,
43
+ "single_word": false,
44
+ "special": true
45
+ },
46
+ "32002": {
47
+ "content": "<fim_suffix>",
48
+ "lstrip": false,
49
+ "normalized": false,
50
+ "rstrip": false,
51
+ "single_word": false,
52
+ "special": true
53
+ },
54
+ "32003": {
55
+ "content": "<fim_pad>",
56
+ "lstrip": false,
57
+ "normalized": false,
58
+ "rstrip": false,
59
+ "single_word": false,
60
+ "special": true
61
+ },
62
+ "32004": {
63
+ "content": "<filename>",
64
+ "lstrip": false,
65
+ "normalized": false,
66
+ "rstrip": false,
67
+ "single_word": false,
68
+ "special": true
69
+ },
70
+ "32005": {
71
+ "content": "<gh_stars>",
72
+ "lstrip": false,
73
+ "normalized": false,
74
+ "rstrip": false,
75
+ "single_word": false,
76
+ "special": true
77
+ },
78
+ "32006": {
79
+ "content": "<issue_start>",
80
+ "lstrip": false,
81
+ "normalized": false,
82
+ "rstrip": false,
83
+ "single_word": false,
84
+ "special": true
85
+ },
86
+ "32007": {
87
+ "content": "<issue_comment>",
88
+ "lstrip": false,
89
+ "normalized": false,
90
+ "rstrip": false,
91
+ "single_word": false,
92
+ "special": true
93
+ },
94
+ "32008": {
95
+ "content": "<issue_closed>",
96
+ "lstrip": false,
97
+ "normalized": false,
98
+ "rstrip": false,
99
+ "single_word": false,
100
+ "special": true
101
+ },
102
+ "32009": {
103
+ "content": "<jupyter_start>",
104
+ "lstrip": false,
105
+ "normalized": false,
106
+ "rstrip": false,
107
+ "single_word": false,
108
+ "special": true
109
+ },
110
+ "32010": {
111
+ "content": "<jupyter_text>",
112
+ "lstrip": false,
113
+ "normalized": false,
114
+ "rstrip": false,
115
+ "single_word": false,
116
+ "special": true
117
+ },
118
+ "32011": {
119
+ "content": "<jupyter_code>",
120
+ "lstrip": false,
121
+ "normalized": false,
122
+ "rstrip": false,
123
+ "single_word": false,
124
+ "special": true
125
+ },
126
+ "32012": {
127
+ "content": "<jupyter_output>",
128
+ "lstrip": false,
129
+ "normalized": false,
130
+ "rstrip": false,
131
+ "single_word": false,
132
+ "special": true
133
+ },
134
+ "32013": {
135
+ "content": "<empty_output>",
136
+ "lstrip": false,
137
+ "normalized": false,
138
+ "rstrip": false,
139
+ "single_word": false,
140
+ "special": true
141
+ },
142
+ "32014": {
143
+ "content": "<commit_before>",
144
+ "lstrip": false,
145
+ "normalized": false,
146
+ "rstrip": false,
147
+ "single_word": false,
148
+ "special": true
149
+ },
150
+ "32015": {
151
+ "content": "<commit_msg>",
152
+ "lstrip": false,
153
+ "normalized": false,
154
+ "rstrip": false,
155
+ "single_word": false,
156
+ "special": true
157
+ },
158
+ "32016": {
159
+ "content": "<commit_after>",
160
+ "lstrip": false,
161
+ "normalized": false,
162
+ "rstrip": false,
163
+ "single_word": false,
164
+ "special": true
165
+ },
166
+ "32017": {
167
+ "content": "<reponame>",
168
+ "lstrip": false,
169
+ "normalized": false,
170
+ "rstrip": false,
171
+ "single_word": false,
172
+ "special": true
173
+ },
174
+ "32018": {
175
+ "content": "<tool_response>",
176
+ "lstrip": false,
177
+ "normalized": false,
178
+ "rstrip": false,
179
+ "single_word": false,
180
+ "special": true
181
+ },
182
+ "32019": {
183
+ "content": "<tool_call>",
184
+ "lstrip": false,
185
+ "normalized": false,
186
+ "rstrip": false,
187
+ "single_word": false,
188
+ "special": true
189
+ },
190
+ "32020": {
191
+ "content": "<tools>",
192
+ "lstrip": false,
193
+ "normalized": false,
194
+ "rstrip": false,
195
+ "single_word": false,
196
+ "special": true
197
+ },
198
+ "32021": {
199
+ "content": "</tool_response>",
200
+ "lstrip": false,
201
+ "normalized": false,
202
+ "rstrip": false,
203
+ "single_word": false,
204
+ "special": true
205
+ },
206
+ "32022": {
207
+ "content": "</tool_call>",
208
+ "lstrip": false,
209
+ "normalized": false,
210
+ "rstrip": false,
211
+ "single_word": false,
212
+ "special": true
213
+ },
214
+ "32023": {
215
+ "content": "</tools>",
216
+ "lstrip": false,
217
+ "normalized": false,
218
+ "rstrip": false,
219
+ "single_word": false,
220
+ "special": true
221
+ },
222
+ "32024": {
223
+ "content": "<|endofsystemprompt|>",
224
+ "lstrip": false,
225
+ "normalized": false,
226
+ "rstrip": false,
227
+ "single_word": false,
228
+ "special": true
229
+ },
230
+ "32025": {
231
+ "content": "<|beginofsystem|>",
232
+ "lstrip": false,
233
+ "normalized": false,
234
+ "rstrip": false,
235
+ "single_word": false,
236
+ "special": true
237
+ },
238
+ "32026": {
239
+ "content": "<|beginofuser|>",
240
+ "lstrip": false,
241
+ "normalized": false,
242
+ "rstrip": false,
243
+ "single_word": false,
244
+ "special": true
245
+ },
246
+ "32027": {
247
+ "content": "<|endofchat|>",
248
+ "lstrip": false,
249
+ "normalized": false,
250
+ "rstrip": false,
251
+ "single_word": false,
252
+ "special": true
253
+ }
254
+ },
255
+ "additional_special_tokens": [
256
+ "<fim_prefix>",
257
+ "<fim_middle>",
258
+ "<fim_suffix>",
259
+ "<fim_pad>",
260
+ "<filename>",
261
+ "<gh_stars>",
262
+ "<issue_start>",
263
+ "<issue_comment>",
264
+ "<issue_closed>",
265
+ "<jupyter_start>",
266
+ "<jupyter_text>",
267
+ "<jupyter_code>",
268
+ "<jupyter_output>",
269
+ "<empty_output>",
270
+ "<commit_before>",
271
+ "<commit_msg>",
272
+ "<commit_after>",
273
+ "<reponame>",
274
+ "<tool_call>",
275
+ "<tool_response>",
276
+ "<tools>",
277
+ "</tool_call>",
278
+ "</tool_response>",
279
+ "</tools>",
280
+ "<|endofsystemprompt|>",
281
+ "<|beginofsystem|>",
282
+ "<|beginofuser|>",
283
+ "<|endofchat|>"
284
+ ],
285
+ "bos_token": "<|endoftext|>",
286
+ "chat_template": [
287
+ {
288
+ "name": "default",
289
+ "template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ '<|beginofuser|>' + message['content'] }}{% elif message['role'] == 'system' %}{{ message['content'] + '<|endofsystemprompt|>' }}{% elif message['role'] == 'assistant' %}{{ '<|beginofsystem|>' + message['content'] }}{% endif %}{% if loop.last and add_generation_prompt %}{{ '<|beginofsystem|>' }}{% endif %}{% endfor %}" },
290
+ {
291
+ "name": "tool_use",
292
+ "template": "{%- macro json_to_python_type(json_spec) %}\n{%- set basic_type_map = {\n \"string\": \"str\",\n \"number\": \"float\",\n \"integer\": \"int\",\n \"boolean\": \"bool\"\n} %}\n\n{%- if basic_type_map[json_spec.type] is defined %}\n {{- basic_type_map[json_spec.type] }}\n{%- elif json_spec.type == \"array\" %}\n {{- \"list[\" + json_to_python_type(json_spec|items) + \"]\"}}\n{%- elif json_spec.type == \"object\" %}\n {%- if json_spec.additionalProperties is defined %}\n {{- \"dict[str, \" + json_to_python_type(json_spec.additionalProperties) + ']'}}\n {%- else %}\n {{- \"dict\" }}\n {%- endif %}\n{%- elif json_spec.type is iterable %}\n {{- \"Union[\" }}\n {%- for t in json_spec.type %}\n {{- json_to_python_type({\"type\": t}) }}\n {%- if not loop.last %}\n {{- \",\" }} \n {%- endif %}\n {%- endfor %}\n {{- \"]\" }}\n{%- else %}\n {{- \"Any\" }}\n{%- endif %}\n{%- endmacro %}\n\n\n{{- \"You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: <tools> \" }}\n{%- for tool in tools %}\n {%- if tool.function is defined %}\n {%- set tool = tool.function %}\n {%- endif %}\n {{- '{\"type\": \"function\", \"function\": ' }}\n {{- '{\"name\": ' + tool.name + '\", ' }}\n {{- '\"description\": \"' + tool.name + '(' }}\n {%- for param_name, param_fields in tool.parameters.properties|items %}\n {{- param_name + \": \" + json_to_python_type(param_fields) }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \")\" }}\n {%- if tool.return is defined %}\n {{- \" -> \" + json_to_python_type(tool.return) }}\n {%- endif %}\n {{- \" - \" + tool.description + \"\\n\\n\" }}\n {%- for param_name, param_fields in tool.parameters.properties|items %}\n {%- if loop.first %}\n {{- \" Args:\\n\" }}\n {%- endif %}\n {{- \" \" + param_name + \"(\" + json_to_python_type(param_fields) + \"): \" + param_fields.description|trim }}\n {%- endfor %}\n {%- if tool.return is defined and tool.return.description is defined %}\n {{- \"\\n Returns:\\n \" + tool.return.description }}\n {%- endif %}\n {{- '\"' }}\n {{- ', \"parameters\": ' }}\n {%- if tool.parameters.properties | length == 0 %}\n {{- \"{}\" }}\n {%- else %}\n {{- tool.parameters|tojson }}\n {%- endif %}\n {{- \"}\" }}\n {%- if not loop.last %}\n {{- \"\\n\" }}\n {%- endif %}\n{%- endfor %}\n{{- \" </tools>\" }}\n{{- 'Use the following pydantic model json schema for each tool call you will make: {\"properties\": {\"arguments\": {\"title\": \"Arguments\", \"type\": \"object\"}, \"name\": {\"title\": \"Name\", \"type\": \"string\"}}, \"required\": [\"arguments\", \"name\"], \"title\": \"FunctionCall\", \"type\": \"object\"}\n' }}\n{{- \"For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:\n\" }}\n{{- \"<tool_call>\n\" }}\n{{- '{\"arguments\": <args-dict>, \"name\": <function-name>}\n' }}\n{{- '</tool_call><|endofsystemprompt|>' }}\n{%- for message in messages %}{%- if message.role == \"user\" %}{{- '<|beginofuser|>' + message.content }}{%- if message.role == \"assistant\" and message.tool_calls is not defined) %} {{- '<|beginofsystem|>' + message.content }}{%- elif message.role == \"assistant\" %}{{- '<tool_call>\n' }}{%- for tool_call in message.tool_calls %}{%- if tool_call.function is defined %}{%- set tool_call = tool_call.function %}{%- endif %}{{- '{ ' }}{%- if tool_call.arguments is defined %}{{- '\"arguments\": ' }}{{- tool_call.arguments|tojson }}{{- ', '}}{%- endif %}{{- '\"name\": \"' }}{{- tool_call.name }}{{- '\"}' }}{{- '\n</tool_call> ' }}\n {%- endfor %}{%- elif message.role == \"tool\" %}{%- if not message.name is defined %}{{- raise_exception(\"Tool response dicts require a 'name' key indicating the name of the called function!\") }}{%- endif %}{{- '<tool_response>\n' }}{{- '{\"name\": \"' }}{{- message.name }}{{- '\", \"content\": ' }}{{- message.content|tojson + '}' }}{{- '\n</tool_response>\n' }}{%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}{{- '<|beginofsystem|>' }}{%- endif %}\n"
293
+ }
294
+ ],
295
+ "clean_up_tokenization_spaces": false,
296
+ "eos_token": "<|endoftext|>",
297
+ "legacy": true,
298
+ "model_max_length": 8192,
299
+ "pad_token": null,
300
+ "sp_model_kwargs": {},
301
+ "tokenizer_class": "LlamaTokenizer",
302
+ "unk_token": "<unk>",
303
+ "use_default_system_prompt": false
304
+ }