ArturBaranowskiAA
commited on
Commit
•
10c9a0b
1
Parent(s):
694a2a9
Release Pharia-1-LLM-7B-control-aligned
Browse files- config.yml +44 -0
- model_state_layer_0_EmbeddingInput.pt +3 -0
- model_state_layer_10_TransformerLayer.pt +3 -0
- model_state_layer_11_TransformerLayer.pt +3 -0
- model_state_layer_12_TransformerLayer.pt +3 -0
- model_state_layer_13_TransformerLayer.pt +3 -0
- model_state_layer_14_TransformerLayer.pt +3 -0
- model_state_layer_15_TransformerLayer.pt +3 -0
- model_state_layer_16_TransformerLayer.pt +3 -0
- model_state_layer_17_TransformerLayer.pt +3 -0
- model_state_layer_18_TransformerLayer.pt +3 -0
- model_state_layer_19_TransformerLayer.pt +3 -0
- model_state_layer_1_TransformerLayer.pt +3 -0
- model_state_layer_20_TransformerLayer.pt +3 -0
- model_state_layer_21_TransformerLayer.pt +3 -0
- model_state_layer_22_TransformerLayer.pt +3 -0
- model_state_layer_23_TransformerLayer.pt +3 -0
- model_state_layer_24_TransformerLayer.pt +3 -0
- model_state_layer_25_TransformerLayer.pt +3 -0
- model_state_layer_26_TransformerLayer.pt +3 -0
- model_state_layer_27_TransformerLayer.pt +3 -0
- model_state_layer_28_LayerNormWrapper.pt +3 -0
- model_state_layer_29_TransformerLMHead.pt +3 -0
- model_state_layer_2_TransformerLayer.pt +3 -0
- model_state_layer_3_TransformerLayer.pt +3 -0
- model_state_layer_4_TransformerLayer.pt +3 -0
- model_state_layer_5_TransformerLayer.pt +3 -0
- model_state_layer_6_TransformerLayer.pt +3 -0
- model_state_layer_7_TransformerLayer.pt +3 -0
- model_state_layer_8_TransformerLayer.pt +3 -0
- model_state_layer_9_TransformerLayer.pt +3 -0
- vocab.json +0 -0
config.yml
ADDED
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"luminous_architecture": {
|
3 |
+
"vocab_size": 128000,
|
4 |
+
"vocab_file": "vocab.json",
|
5 |
+
"hidden_size": 4608,
|
6 |
+
"num_layers": 27,
|
7 |
+
"num_attention_heads": 36,
|
8 |
+
"num_local_attention_heads": 0,
|
9 |
+
"local_attention_window_size": null,
|
10 |
+
"rotary_embedding_base": 1000000,
|
11 |
+
"rotary_percentage": 1.0,
|
12 |
+
"sequence_length": 8192,
|
13 |
+
"norm_type": "layernorm",
|
14 |
+
"relative_position_embedding_type": "rotary_complex",
|
15 |
+
"mlp_type": "default",
|
16 |
+
"mlp_factor": 4.0,
|
17 |
+
"attention_bias": true,
|
18 |
+
"attention_qkv_in_one": false,
|
19 |
+
"attention_num_kv_heads": 4,
|
20 |
+
"attention_use_matmul": false,
|
21 |
+
"mlp_bias": true,
|
22 |
+
"key_query_norm": false,
|
23 |
+
"weight_tying": false,
|
24 |
+
"masked_softmax": {
|
25 |
+
"kernel": "torch",
|
26 |
+
"softmax_in_fp32": true,
|
27 |
+
"scale": 1.0,
|
28 |
+
"deterministic_flash_attn_bwd": false
|
29 |
+
},
|
30 |
+
"layernorm": {
|
31 |
+
"optimization_type": "torch",
|
32 |
+
"layernorm_epsilon": 1e-05
|
33 |
+
},
|
34 |
+
"precision": "bfloat16",
|
35 |
+
"dropout_embedding": 0.0,
|
36 |
+
"dropout_attention_probs": 0.0,
|
37 |
+
"dropout_after_attention": 0.0,
|
38 |
+
"dropout_after_mlp": 0.0,
|
39 |
+
"finetunable_token_ids": [],
|
40 |
+
"image_encoder": false,
|
41 |
+
"dropout_image_encoder": 0.0,
|
42 |
+
"lora_config": null
|
43 |
+
}
|
44 |
+
}
|
model_state_layer_0_EmbeddingInput.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:944575547a8f40de0468abedfcb3a67e823ae2e6dee8cc9913ab7ce3139d2d11
|
3 |
+
size 1179649483
|
model_state_layer_10_TransformerLayer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3326b40056f8bef2f170ae36a395aa95df9d41919f1b55bc421aa28ba1672287
|
3 |
+
size 434219842
|
model_state_layer_11_TransformerLayer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0a2b27f899309f51a3a2e1babd385c0448fc2da2d07287ef9460bc412e1813eb
|
3 |
+
size 434219842
|
model_state_layer_12_TransformerLayer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3d492be6c62ac54151536f146f610eee326cbd5b44f47db3731e18ab6a28c9f7
|
3 |
+
size 434219842
|
model_state_layer_13_TransformerLayer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a0ab604522d1fcbbf1f0ea3cc584ff45274b1ec49ecd7cfa17ffceee9bc3c46e
|
3 |
+
size 434219842
|
model_state_layer_14_TransformerLayer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c0c5841264d139d63495512848a58e7dda74111c9a06fd448377ebae2153814a
|
3 |
+
size 434219842
|
model_state_layer_15_TransformerLayer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e99a49074e983328fed12365744432eef3faa7d436e25557398b94d543fab268
|
3 |
+
size 434219842
|
model_state_layer_16_TransformerLayer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9927876f812755e84fe23f0f04d490f830a1e570c46c645c278b317b64d273da
|
3 |
+
size 434219842
|
model_state_layer_17_TransformerLayer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6517aec4734c54a0ebe5c3a34db56b4fd0319c51699bc3581cb115906e6f5ddd
|
3 |
+
size 434219842
|
model_state_layer_18_TransformerLayer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:40b0b908e961b266089175de2c6b0e00fbd68c9e4af48eec174ffc4f4b0e9115
|
3 |
+
size 434219842
|
model_state_layer_19_TransformerLayer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e0acb652c0d858188e52bf0f3ab28e3839439608253773bb41cd7bfc222320b7
|
3 |
+
size 434219842
|
model_state_layer_1_TransformerLayer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:06e979172c717bf58e75173dfaca906a3f0dca58dc7540e5c8e707e912937574
|
3 |
+
size 434219822
|
model_state_layer_20_TransformerLayer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:21bc2ebcdcd23925278c52a5e3582f3e4827c14a9473dfa19850cde497d78e26
|
3 |
+
size 434219842
|
model_state_layer_21_TransformerLayer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e0afdf95ac6e5c49308375c318fec5212469398c6a92d76eff29aa42159f5b74
|
3 |
+
size 434219842
|
model_state_layer_22_TransformerLayer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b023d1ad0752422a30a49cb8914a909408b227490ffc572892aa0c43f120af1f
|
3 |
+
size 434219842
|
model_state_layer_23_TransformerLayer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:91f7a51afc64cd8514c28497b8f668a88b776aa826d5003b80aeb201ee0c6bde
|
3 |
+
size 434219842
|
model_state_layer_24_TransformerLayer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a9738644818fbe5822f06418c252b89d279efc7b809c845597250b287508ff2f
|
3 |
+
size 434219842
|
model_state_layer_25_TransformerLayer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ec5831c9afcddadcc93f9f453a90eb454c991f7dc05d5702705d317bf2d887e2
|
3 |
+
size 434219842
|
model_state_layer_26_TransformerLayer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d62c3eed8d99092efea7d7cd6567a739a3448d71da827dce125609aeb47b62ba
|
3 |
+
size 434219842
|
model_state_layer_27_TransformerLayer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ac7b9275de32430ba744d35510b0aa823e0ea988ca5239c55ae17a1fc08ea5c7
|
3 |
+
size 434219842
|
model_state_layer_28_LayerNormWrapper.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c43ee90f5370a66010b7aa690e4a1d855cfef85fe2627772ccc01f0a5e468d48
|
3 |
+
size 20172
|
model_state_layer_29_TransformerLMHead.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c0b5aa89e032b95749a0a11f931477d8605d5223160396e92dd8cae4abe12b63
|
3 |
+
size 1179649448
|
model_state_layer_2_TransformerLayer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:03c1d967b97a16bc4cf11130dca957daddd95608198b03d7a742090c574904b9
|
3 |
+
size 434219822
|
model_state_layer_3_TransformerLayer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:00110ff21e16c28d166ae7737f0eb9c3702348e9b09d34280908fd153c679fcc
|
3 |
+
size 434219822
|
model_state_layer_4_TransformerLayer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:976b2ce91395c252c00717e19e52143e045fec139ceb4c564fab2028e5c2cf99
|
3 |
+
size 434219822
|
model_state_layer_5_TransformerLayer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:46cdef8b748a158e47891a5934f089bb32d708fb7bf22bc262a0b336eade2aaa
|
3 |
+
size 434219822
|
model_state_layer_6_TransformerLayer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d1747434ad1a7f760b7ec0a33065d270368e903eec36698b714f425659969029
|
3 |
+
size 434219822
|
model_state_layer_7_TransformerLayer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6d333b2c506093cf6329c6430b76553868dc1c15c7bfaef5d4d1c878097f17ba
|
3 |
+
size 434219822
|
model_state_layer_8_TransformerLayer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d6e454bcb6cf09ea985c72317c9df9fc3a666e5fb7c95ec7363aeef325a23151
|
3 |
+
size 434219822
|
model_state_layer_9_TransformerLayer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2889738c8a166aa6048478c1513517237b7b18a8d863eb4686b644239a285ead
|
3 |
+
size 434219822
|
vocab.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|