Upload JambaForCausalLM
Browse files- config.json +6 -23
- model-00001-of-00008.safetensors +3 -0
- model-00002-of-00008.safetensors +3 -0
- model-00003-of-00008.safetensors +3 -0
- model-00004-of-00008.safetensors +3 -0
- model-00005-of-00008.safetensors +3 -0
- model-00006-of-00008.safetensors +3 -0
- model-00007-of-00008.safetensors +3 -0
- model-00008-of-00008.safetensors +3 -0
- model.safetensors.index.json +0 -0
config.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": "
|
3 |
"architectures": [
|
4 |
"JambaForCausalLM"
|
5 |
],
|
@@ -7,10 +7,10 @@
|
|
7 |
"attn_layer_offset": 4,
|
8 |
"attn_layer_period": 8,
|
9 |
"auto_map": {
|
10 |
-
"AutoConfig": "
|
11 |
-
"AutoModel": "
|
12 |
-
"AutoModelForCausalLM": "
|
13 |
-
"AutoModelForSequenceClassification": "
|
14 |
},
|
15 |
"bos_token_id": 1,
|
16 |
"calc_logits_for_entire_prompt": false,
|
@@ -31,29 +31,12 @@
|
|
31 |
"model_type": "jamba",
|
32 |
"n_ctx": 262144,
|
33 |
"num_attention_heads": 32,
|
34 |
-
"num_experts":
|
35 |
"num_experts_per_tok": 2,
|
36 |
"num_hidden_layers": 32,
|
37 |
"num_key_value_heads": 8,
|
38 |
"output_router_logits": false,
|
39 |
"pad_token_id": 0,
|
40 |
-
"quantization_config": {
|
41 |
-
"_load_in_4bit": true,
|
42 |
-
"_load_in_8bit": false,
|
43 |
-
"bnb_4bit_compute_dtype": "float32",
|
44 |
-
"bnb_4bit_quant_storage": "uint8",
|
45 |
-
"bnb_4bit_quant_type": "fp4",
|
46 |
-
"bnb_4bit_use_double_quant": false,
|
47 |
-
"llm_int8_enable_fp32_cpu_offload": false,
|
48 |
-
"llm_int8_has_fp16_weight": false,
|
49 |
-
"llm_int8_skip_modules": [
|
50 |
-
"mamba"
|
51 |
-
],
|
52 |
-
"llm_int8_threshold": 6.0,
|
53 |
-
"load_in_4bit": true,
|
54 |
-
"load_in_8bit": false,
|
55 |
-
"quant_method": "bitsandbytes"
|
56 |
-
},
|
57 |
"rms_norm_eps": 1e-06,
|
58 |
"router_aux_loss_coef": 0.001,
|
59 |
"sliding_window": null,
|
|
|
1 |
{
|
2 |
+
"_name_or_path": "isemmanuelolowe/Jamba-4xMoE_slerp",
|
3 |
"architectures": [
|
4 |
"JambaForCausalLM"
|
5 |
],
|
|
|
7 |
"attn_layer_offset": 4,
|
8 |
"attn_layer_period": 8,
|
9 |
"auto_map": {
|
10 |
+
"AutoConfig": "isemmanuelolowe/Jamba-4xMoE_slerp--configuration_jamba.JambaConfig",
|
11 |
+
"AutoModel": "isemmanuelolowe/Jamba-4xMoE_slerp--modeling_jamba.JambaModel",
|
12 |
+
"AutoModelForCausalLM": "isemmanuelolowe/Jamba-4xMoE_slerp--modeling_jamba.JambaForCausalLM",
|
13 |
+
"AutoModelForSequenceClassification": "isemmanuelolowe/Jamba-4xMoE_slerp--model.JambaForSequenceClassification"
|
14 |
},
|
15 |
"bos_token_id": 1,
|
16 |
"calc_logits_for_entire_prompt": false,
|
|
|
31 |
"model_type": "jamba",
|
32 |
"n_ctx": 262144,
|
33 |
"num_attention_heads": 32,
|
34 |
+
"num_experts": 4,
|
35 |
"num_experts_per_tok": 2,
|
36 |
"num_hidden_layers": 32,
|
37 |
"num_key_value_heads": 8,
|
38 |
"output_router_logits": false,
|
39 |
"pad_token_id": 0,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
"rms_norm_eps": 1e-06,
|
41 |
"router_aux_loss_coef": 0.001,
|
42 |
"sliding_window": null,
|
model-00001-of-00008.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9222ed281e8f2cbf3b531e22ca1514267559e5c408668f3985e944850a8a61ba
|
3 |
+
size 4987630232
|
model-00002-of-00008.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9074b89993cb1c8a4df25b413c15ac811367ab34263e2cfc26aeb3f4b544ce15
|
3 |
+
size 4930124392
|
model-00003-of-00008.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1b72f78ad801bde740c0860faf2d24e2423e0771faae1b791c5e023f9e798999
|
3 |
+
size 4944522688
|
model-00004-of-00008.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a49c767416c0f468462f6f25fcb49ac927647ac8117a248c5d862deff77a9171
|
3 |
+
size 4954075928
|
model-00005-of-00008.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:66d1521e9dfbf5479c2bc589ea1a202a5f5beae8a5c088cb47f2bf92fd3a8ef7
|
3 |
+
size 4921178040
|
model-00006-of-00008.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:87864b5e4d96a9a76d71bc798b67854ba567fbfdf77b9580a9adb58ca9efc19b
|
3 |
+
size 4929484872
|
model-00007-of-00008.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0b73c561783801dc085949ac87eae658d122dd3c7826dc660e10d4b530be547e
|
3 |
+
size 4944522776
|
model-00008-of-00008.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f88e8de536daee5b7da10980e98b4d735efa5a5ab16bd671a4332143aac9a042
|
3 |
+
size 889217840
|
model.safetensors.index.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|