medardif commited on
Commit
189b434
1 Parent(s): be04d1b

Upload LlamaForCausalLM

Browse files
model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e7283282983332016596234d8a287a9c361cda16b0d690835e8680dbba30bd84
3
- size 4983490992
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d41473ac2c2bf03fee50a52fc202e433fabb9c9c2f32a66f7d290634d59a0fa
3
+ size 4994513900
model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:951c3157949280fe7dd99f9a85023dd3845b50384eaedb5840fcef552a393e25
3
- size 2988978829
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2afe22afc39607165c7dc0c62dc135f8558d0a0406f9ea25133fd445c0690b11
3
+ size 2894068673
model.safetensors.index.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "metadata": {
3
- "total_size": 7972228693
4
  },
5
  "weight_map": {
6
  "lm_head.modules_to_save.default.weight": "model-00002-of-00002.safetensors",
@@ -993,15 +993,15 @@
993
  "model.layers.23.self_attn.v_proj.base_layer.weight.quant_state.bitsandbytes__nf4": "model-00001-of-00002.safetensors",
994
  "model.layers.23.self_attn.v_proj.lora_A.default.weight": "model-00001-of-00002.safetensors",
995
  "model.layers.23.self_attn.v_proj.lora_B.default.weight": "model-00001-of-00002.safetensors",
996
- "model.layers.24.input_layernorm.weight": "model-00002-of-00002.safetensors",
997
- "model.layers.24.mlp.down_proj.base_layer.weight": "model-00002-of-00002.safetensors",
998
- "model.layers.24.mlp.down_proj.base_layer.weight.absmax": "model-00002-of-00002.safetensors",
999
- "model.layers.24.mlp.down_proj.base_layer.weight.nested_absmax": "model-00002-of-00002.safetensors",
1000
- "model.layers.24.mlp.down_proj.base_layer.weight.nested_quant_map": "model-00002-of-00002.safetensors",
1001
- "model.layers.24.mlp.down_proj.base_layer.weight.quant_map": "model-00002-of-00002.safetensors",
1002
- "model.layers.24.mlp.down_proj.base_layer.weight.quant_state.bitsandbytes__nf4": "model-00002-of-00002.safetensors",
1003
- "model.layers.24.mlp.down_proj.lora_A.default.weight": "model-00002-of-00002.safetensors",
1004
- "model.layers.24.mlp.down_proj.lora_B.default.weight": "model-00002-of-00002.safetensors",
1005
  "model.layers.24.mlp.gate_proj.base_layer.weight": "model-00001-of-00002.safetensors",
1006
  "model.layers.24.mlp.gate_proj.base_layer.weight.absmax": "model-00001-of-00002.safetensors",
1007
  "model.layers.24.mlp.gate_proj.base_layer.weight.nested_absmax": "model-00001-of-00002.safetensors",
@@ -1010,15 +1010,15 @@
1010
  "model.layers.24.mlp.gate_proj.base_layer.weight.quant_state.bitsandbytes__nf4": "model-00001-of-00002.safetensors",
1011
  "model.layers.24.mlp.gate_proj.lora_A.default.weight": "model-00001-of-00002.safetensors",
1012
  "model.layers.24.mlp.gate_proj.lora_B.default.weight": "model-00001-of-00002.safetensors",
1013
- "model.layers.24.mlp.up_proj.base_layer.weight": "model-00002-of-00002.safetensors",
1014
- "model.layers.24.mlp.up_proj.base_layer.weight.absmax": "model-00002-of-00002.safetensors",
1015
- "model.layers.24.mlp.up_proj.base_layer.weight.nested_absmax": "model-00002-of-00002.safetensors",
1016
- "model.layers.24.mlp.up_proj.base_layer.weight.nested_quant_map": "model-00002-of-00002.safetensors",
1017
- "model.layers.24.mlp.up_proj.base_layer.weight.quant_map": "model-00002-of-00002.safetensors",
1018
- "model.layers.24.mlp.up_proj.base_layer.weight.quant_state.bitsandbytes__nf4": "model-00002-of-00002.safetensors",
1019
- "model.layers.24.mlp.up_proj.lora_A.default.weight": "model-00002-of-00002.safetensors",
1020
- "model.layers.24.mlp.up_proj.lora_B.default.weight": "model-00002-of-00002.safetensors",
1021
- "model.layers.24.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
1022
  "model.layers.24.self_attn.k_proj.base_layer.weight": "model-00001-of-00002.safetensors",
1023
  "model.layers.24.self_attn.k_proj.base_layer.weight.absmax": "model-00001-of-00002.safetensors",
1024
  "model.layers.24.self_attn.k_proj.base_layer.weight.nested_absmax": "model-00001-of-00002.safetensors",
@@ -1077,14 +1077,14 @@
1077
  "model.layers.25.mlp.up_proj.lora_A.default.weight": "model-00002-of-00002.safetensors",
1078
  "model.layers.25.mlp.up_proj.lora_B.default.weight": "model-00002-of-00002.safetensors",
1079
  "model.layers.25.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
1080
- "model.layers.25.self_attn.k_proj.base_layer.weight": "model-00002-of-00002.safetensors",
1081
- "model.layers.25.self_attn.k_proj.base_layer.weight.absmax": "model-00002-of-00002.safetensors",
1082
- "model.layers.25.self_attn.k_proj.base_layer.weight.nested_absmax": "model-00002-of-00002.safetensors",
1083
- "model.layers.25.self_attn.k_proj.base_layer.weight.nested_quant_map": "model-00002-of-00002.safetensors",
1084
- "model.layers.25.self_attn.k_proj.base_layer.weight.quant_map": "model-00002-of-00002.safetensors",
1085
- "model.layers.25.self_attn.k_proj.base_layer.weight.quant_state.bitsandbytes__nf4": "model-00002-of-00002.safetensors",
1086
- "model.layers.25.self_attn.k_proj.lora_A.default.weight": "model-00002-of-00002.safetensors",
1087
- "model.layers.25.self_attn.k_proj.lora_B.default.weight": "model-00002-of-00002.safetensors",
1088
  "model.layers.25.self_attn.o_proj.base_layer.weight": "model-00002-of-00002.safetensors",
1089
  "model.layers.25.self_attn.o_proj.base_layer.weight.absmax": "model-00002-of-00002.safetensors",
1090
  "model.layers.25.self_attn.o_proj.base_layer.weight.nested_absmax": "model-00002-of-00002.safetensors",
@@ -1093,22 +1093,22 @@
1093
  "model.layers.25.self_attn.o_proj.base_layer.weight.quant_state.bitsandbytes__nf4": "model-00002-of-00002.safetensors",
1094
  "model.layers.25.self_attn.o_proj.lora_A.default.weight": "model-00002-of-00002.safetensors",
1095
  "model.layers.25.self_attn.o_proj.lora_B.default.weight": "model-00002-of-00002.safetensors",
1096
- "model.layers.25.self_attn.q_proj.base_layer.weight": "model-00002-of-00002.safetensors",
1097
- "model.layers.25.self_attn.q_proj.base_layer.weight.absmax": "model-00002-of-00002.safetensors",
1098
- "model.layers.25.self_attn.q_proj.base_layer.weight.nested_absmax": "model-00002-of-00002.safetensors",
1099
- "model.layers.25.self_attn.q_proj.base_layer.weight.nested_quant_map": "model-00002-of-00002.safetensors",
1100
- "model.layers.25.self_attn.q_proj.base_layer.weight.quant_map": "model-00002-of-00002.safetensors",
1101
- "model.layers.25.self_attn.q_proj.base_layer.weight.quant_state.bitsandbytes__nf4": "model-00002-of-00002.safetensors",
1102
- "model.layers.25.self_attn.q_proj.lora_A.default.weight": "model-00002-of-00002.safetensors",
1103
- "model.layers.25.self_attn.q_proj.lora_B.default.weight": "model-00002-of-00002.safetensors",
1104
- "model.layers.25.self_attn.v_proj.base_layer.weight": "model-00002-of-00002.safetensors",
1105
- "model.layers.25.self_attn.v_proj.base_layer.weight.absmax": "model-00002-of-00002.safetensors",
1106
- "model.layers.25.self_attn.v_proj.base_layer.weight.nested_absmax": "model-00002-of-00002.safetensors",
1107
- "model.layers.25.self_attn.v_proj.base_layer.weight.nested_quant_map": "model-00002-of-00002.safetensors",
1108
- "model.layers.25.self_attn.v_proj.base_layer.weight.quant_map": "model-00002-of-00002.safetensors",
1109
- "model.layers.25.self_attn.v_proj.base_layer.weight.quant_state.bitsandbytes__nf4": "model-00002-of-00002.safetensors",
1110
- "model.layers.25.self_attn.v_proj.lora_A.default.weight": "model-00002-of-00002.safetensors",
1111
- "model.layers.25.self_attn.v_proj.lora_B.default.weight": "model-00002-of-00002.safetensors",
1112
  "model.layers.26.input_layernorm.weight": "model-00002-of-00002.safetensors",
1113
  "model.layers.26.mlp.down_proj.base_layer.weight": "model-00002-of-00002.safetensors",
1114
  "model.layers.26.mlp.down_proj.base_layer.weight.absmax": "model-00002-of-00002.safetensors",
 
1
  {
2
  "metadata": {
3
+ "total_size": 7888342613
4
  },
5
  "weight_map": {
6
  "lm_head.modules_to_save.default.weight": "model-00002-of-00002.safetensors",
 
993
  "model.layers.23.self_attn.v_proj.base_layer.weight.quant_state.bitsandbytes__nf4": "model-00001-of-00002.safetensors",
994
  "model.layers.23.self_attn.v_proj.lora_A.default.weight": "model-00001-of-00002.safetensors",
995
  "model.layers.23.self_attn.v_proj.lora_B.default.weight": "model-00001-of-00002.safetensors",
996
+ "model.layers.24.input_layernorm.weight": "model-00001-of-00002.safetensors",
997
+ "model.layers.24.mlp.down_proj.base_layer.weight": "model-00001-of-00002.safetensors",
998
+ "model.layers.24.mlp.down_proj.base_layer.weight.absmax": "model-00001-of-00002.safetensors",
999
+ "model.layers.24.mlp.down_proj.base_layer.weight.nested_absmax": "model-00001-of-00002.safetensors",
1000
+ "model.layers.24.mlp.down_proj.base_layer.weight.nested_quant_map": "model-00001-of-00002.safetensors",
1001
+ "model.layers.24.mlp.down_proj.base_layer.weight.quant_map": "model-00001-of-00002.safetensors",
1002
+ "model.layers.24.mlp.down_proj.base_layer.weight.quant_state.bitsandbytes__nf4": "model-00001-of-00002.safetensors",
1003
+ "model.layers.24.mlp.down_proj.lora_A.default.weight": "model-00001-of-00002.safetensors",
1004
+ "model.layers.24.mlp.down_proj.lora_B.default.weight": "model-00001-of-00002.safetensors",
1005
  "model.layers.24.mlp.gate_proj.base_layer.weight": "model-00001-of-00002.safetensors",
1006
  "model.layers.24.mlp.gate_proj.base_layer.weight.absmax": "model-00001-of-00002.safetensors",
1007
  "model.layers.24.mlp.gate_proj.base_layer.weight.nested_absmax": "model-00001-of-00002.safetensors",
 
1010
  "model.layers.24.mlp.gate_proj.base_layer.weight.quant_state.bitsandbytes__nf4": "model-00001-of-00002.safetensors",
1011
  "model.layers.24.mlp.gate_proj.lora_A.default.weight": "model-00001-of-00002.safetensors",
1012
  "model.layers.24.mlp.gate_proj.lora_B.default.weight": "model-00001-of-00002.safetensors",
1013
+ "model.layers.24.mlp.up_proj.base_layer.weight": "model-00001-of-00002.safetensors",
1014
+ "model.layers.24.mlp.up_proj.base_layer.weight.absmax": "model-00001-of-00002.safetensors",
1015
+ "model.layers.24.mlp.up_proj.base_layer.weight.nested_absmax": "model-00001-of-00002.safetensors",
1016
+ "model.layers.24.mlp.up_proj.base_layer.weight.nested_quant_map": "model-00001-of-00002.safetensors",
1017
+ "model.layers.24.mlp.up_proj.base_layer.weight.quant_map": "model-00001-of-00002.safetensors",
1018
+ "model.layers.24.mlp.up_proj.base_layer.weight.quant_state.bitsandbytes__nf4": "model-00001-of-00002.safetensors",
1019
+ "model.layers.24.mlp.up_proj.lora_A.default.weight": "model-00001-of-00002.safetensors",
1020
+ "model.layers.24.mlp.up_proj.lora_B.default.weight": "model-00001-of-00002.safetensors",
1021
+ "model.layers.24.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
1022
  "model.layers.24.self_attn.k_proj.base_layer.weight": "model-00001-of-00002.safetensors",
1023
  "model.layers.24.self_attn.k_proj.base_layer.weight.absmax": "model-00001-of-00002.safetensors",
1024
  "model.layers.24.self_attn.k_proj.base_layer.weight.nested_absmax": "model-00001-of-00002.safetensors",
 
1077
  "model.layers.25.mlp.up_proj.lora_A.default.weight": "model-00002-of-00002.safetensors",
1078
  "model.layers.25.mlp.up_proj.lora_B.default.weight": "model-00002-of-00002.safetensors",
1079
  "model.layers.25.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
1080
+ "model.layers.25.self_attn.k_proj.base_layer.weight": "model-00001-of-00002.safetensors",
1081
+ "model.layers.25.self_attn.k_proj.base_layer.weight.absmax": "model-00001-of-00002.safetensors",
1082
+ "model.layers.25.self_attn.k_proj.base_layer.weight.nested_absmax": "model-00001-of-00002.safetensors",
1083
+ "model.layers.25.self_attn.k_proj.base_layer.weight.nested_quant_map": "model-00001-of-00002.safetensors",
1084
+ "model.layers.25.self_attn.k_proj.base_layer.weight.quant_map": "model-00001-of-00002.safetensors",
1085
+ "model.layers.25.self_attn.k_proj.base_layer.weight.quant_state.bitsandbytes__nf4": "model-00001-of-00002.safetensors",
1086
+ "model.layers.25.self_attn.k_proj.lora_A.default.weight": "model-00001-of-00002.safetensors",
1087
+ "model.layers.25.self_attn.k_proj.lora_B.default.weight": "model-00001-of-00002.safetensors",
1088
  "model.layers.25.self_attn.o_proj.base_layer.weight": "model-00002-of-00002.safetensors",
1089
  "model.layers.25.self_attn.o_proj.base_layer.weight.absmax": "model-00002-of-00002.safetensors",
1090
  "model.layers.25.self_attn.o_proj.base_layer.weight.nested_absmax": "model-00002-of-00002.safetensors",
 
1093
  "model.layers.25.self_attn.o_proj.base_layer.weight.quant_state.bitsandbytes__nf4": "model-00002-of-00002.safetensors",
1094
  "model.layers.25.self_attn.o_proj.lora_A.default.weight": "model-00002-of-00002.safetensors",
1095
  "model.layers.25.self_attn.o_proj.lora_B.default.weight": "model-00002-of-00002.safetensors",
1096
+ "model.layers.25.self_attn.q_proj.base_layer.weight": "model-00001-of-00002.safetensors",
1097
+ "model.layers.25.self_attn.q_proj.base_layer.weight.absmax": "model-00001-of-00002.safetensors",
1098
+ "model.layers.25.self_attn.q_proj.base_layer.weight.nested_absmax": "model-00001-of-00002.safetensors",
1099
+ "model.layers.25.self_attn.q_proj.base_layer.weight.nested_quant_map": "model-00001-of-00002.safetensors",
1100
+ "model.layers.25.self_attn.q_proj.base_layer.weight.quant_map": "model-00001-of-00002.safetensors",
1101
+ "model.layers.25.self_attn.q_proj.base_layer.weight.quant_state.bitsandbytes__nf4": "model-00001-of-00002.safetensors",
1102
+ "model.layers.25.self_attn.q_proj.lora_A.default.weight": "model-00001-of-00002.safetensors",
1103
+ "model.layers.25.self_attn.q_proj.lora_B.default.weight": "model-00001-of-00002.safetensors",
1104
+ "model.layers.25.self_attn.v_proj.base_layer.weight": "model-00001-of-00002.safetensors",
1105
+ "model.layers.25.self_attn.v_proj.base_layer.weight.absmax": "model-00001-of-00002.safetensors",
1106
+ "model.layers.25.self_attn.v_proj.base_layer.weight.nested_absmax": "model-00001-of-00002.safetensors",
1107
+ "model.layers.25.self_attn.v_proj.base_layer.weight.nested_quant_map": "model-00001-of-00002.safetensors",
1108
+ "model.layers.25.self_attn.v_proj.base_layer.weight.quant_map": "model-00001-of-00002.safetensors",
1109
+ "model.layers.25.self_attn.v_proj.base_layer.weight.quant_state.bitsandbytes__nf4": "model-00001-of-00002.safetensors",
1110
+ "model.layers.25.self_attn.v_proj.lora_A.default.weight": "model-00001-of-00002.safetensors",
1111
+ "model.layers.25.self_attn.v_proj.lora_B.default.weight": "model-00001-of-00002.safetensors",
1112
  "model.layers.26.input_layernorm.weight": "model-00002-of-00002.safetensors",
1113
  "model.layers.26.mlp.down_proj.base_layer.weight": "model-00002-of-00002.safetensors",
1114
  "model.layers.26.mlp.down_proj.base_layer.weight.absmax": "model-00002-of-00002.safetensors",