Llama-3.2-1B-Instruct-q0f32-MLC / ndarray-cache-b16.json
CharlieFRuan's picture
Upload folder using huggingface_hub
97c3b34 verified
raw
history blame
47.4 kB
{
"metadata": {
"ParamSize": 98,
"ParamBytes": 4943257600.0,
"BitsPerParam": 32.0
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 525336576,
"records": [
{
"name": "model.embed_tokens.weight",
"shape": [
128256,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 525336576,
"byteOffset": 0
}
],
"md5sum": "2097305f352cca56c3dc0ad1edb5bcbf"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.0.mlp.down_proj.weight",
"shape": [
2048,
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "800df08c617c74598d4ca6af76553de0"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "eb3b214c8e085725718f03cf804164ae"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.1.mlp.down_proj.weight",
"shape": [
2048,
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "6a9a2411562a216b805451d914163a8c"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "32b4b83389df3b212ef853f8f19fc2d1"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 20987904,
"records": [
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 0
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 4096
},
{
"name": "model.layers.0.self_attn.qkv_proj.weight",
"shape": [
3072,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12582912,
"byteOffset": 8192
},
{
"name": "model.layers.0.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8388608,
"byteOffset": 12591104
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 20979712
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 20983808
}
],
"md5sum": "35b8ff220ef5e2e6627dcfb621fb832d"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.10.mlp.down_proj.weight",
"shape": [
2048,
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "9d8db30764216ea077a052dcd20fb2c4"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "0856245486dd0d049581359e670f33e0"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 20979712,
"records": [
{
"name": "model.layers.1.self_attn.qkv_proj.weight",
"shape": [
3072,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.1.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8388608,
"byteOffset": 12582912
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 20971520
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 20975616
}
],
"md5sum": "af71427b909126ccc9a9635a8279e0a9"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.11.mlp.down_proj.weight",
"shape": [
2048,
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "795af0ca2c531da387572abece860a7e"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "4c8d8ed56e53eb99608130db0c1de05d"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 20979712,
"records": [
{
"name": "model.layers.10.self_attn.qkv_proj.weight",
"shape": [
3072,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.10.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8388608,
"byteOffset": 12582912
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 20971520
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 20975616
}
],
"md5sum": "f7628a591cb04c54546aaaa4f9398a68"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.12.mlp.down_proj.weight",
"shape": [
2048,
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "64f4c86756397349e929cd1f2f849e5c"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "ea4ff7cec9e3c5ec3c805a6586bdd2ee"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 20979712,
"records": [
{
"name": "model.layers.11.self_attn.qkv_proj.weight",
"shape": [
3072,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.11.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8388608,
"byteOffset": 12582912
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 20971520
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 20975616
}
],
"md5sum": "90f829661b428a1095ad0216de8f525e"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.13.mlp.down_proj.weight",
"shape": [
2048,
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "fb6f9e666f95e8149da896c79bbb730c"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "097a18cf907839e45e7bceedcfeee09c"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 20979712,
"records": [
{
"name": "model.layers.12.self_attn.qkv_proj.weight",
"shape": [
3072,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.12.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8388608,
"byteOffset": 12582912
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 20971520
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 20975616
}
],
"md5sum": "8c3f48c9a1fb32d02410baeb954f1bb6"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.14.mlp.down_proj.weight",
"shape": [
2048,
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "ab16992964f5135c087f55a6c96eaf30"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "75e6fc44ba742deec3aea8e1bd310f22"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 20979712,
"records": [
{
"name": "model.layers.13.self_attn.qkv_proj.weight",
"shape": [
3072,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.13.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8388608,
"byteOffset": 12582912
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 20971520
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 20975616
}
],
"md5sum": "a2c7db5bc8440c57f50b3c38a403c9f8"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.15.mlp.down_proj.weight",
"shape": [
2048,
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "3fe8a1e2c9e49e659f3411c7e138cd9f"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "6929ff78c04aaf58088e2b0fd33c61c4"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 20979712,
"records": [
{
"name": "model.layers.14.self_attn.qkv_proj.weight",
"shape": [
3072,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.14.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8388608,
"byteOffset": 12582912
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 20971520
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 20975616
}
],
"md5sum": "50aa881a0f07e3bf087ec22fd3c2c7f7"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.2.mlp.down_proj.weight",
"shape": [
2048,
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "5814d5a86c183a63f441e473b96eac9f"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "66938847fe03b3b7beb0c34e0ea3d146"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 20979712,
"records": [
{
"name": "model.layers.15.self_attn.qkv_proj.weight",
"shape": [
3072,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.15.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8388608,
"byteOffset": 12582912
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 20971520
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 20975616
}
],
"md5sum": "60e2dec95a7f652b11879a58814a0b72"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.3.mlp.down_proj.weight",
"shape": [
2048,
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "daca4f0d1145ceba60aba195b6e4b717"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "a7d140e2aa8117b4e0251ab7d4f1e894"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 20979712,
"records": [
{
"name": "model.layers.2.self_attn.qkv_proj.weight",
"shape": [
3072,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.2.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8388608,
"byteOffset": 12582912
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 20971520
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 20975616
}
],
"md5sum": "aa3290f5cbda1c5d43f00d0846fb3c5d"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.4.mlp.down_proj.weight",
"shape": [
2048,
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "6f6f0eafe2557a8793292023a93fc44d"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "ac207948c6dd9f8c618249b5c8370101"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 20979712,
"records": [
{
"name": "model.layers.3.self_attn.qkv_proj.weight",
"shape": [
3072,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.3.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8388608,
"byteOffset": 12582912
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 20971520
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 20975616
}
],
"md5sum": "5fd4c85aebd5f940fb7cb90a06bc0e3d"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.5.mlp.down_proj.weight",
"shape": [
2048,
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "385fbfd45620a6a7c7d555b8cf7d35ff"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "9d34ce6dc3d3bcecc868378358d66bcb"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 20979712,
"records": [
{
"name": "model.layers.4.self_attn.qkv_proj.weight",
"shape": [
3072,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.4.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8388608,
"byteOffset": 12582912
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 20971520
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 20975616
}
],
"md5sum": "24b869dad75eb377412439976e3ae012"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.6.mlp.down_proj.weight",
"shape": [
2048,
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "0168cb04de60c94a82da930ef69dfc79"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "f38fe5a46d483f933a79ce6577bd5236"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 20979712,
"records": [
{
"name": "model.layers.5.self_attn.qkv_proj.weight",
"shape": [
3072,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.5.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8388608,
"byteOffset": 12582912
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 20971520
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 20975616
}
],
"md5sum": "49401c003986778a26ab6e1d17dea5a9"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.7.mlp.down_proj.weight",
"shape": [
2048,
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "c9edef3101ccb32cf23b9342315b1a38"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "c7b95c82540aad932bca9ebac8caa96e"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 20979712,
"records": [
{
"name": "model.layers.6.self_attn.qkv_proj.weight",
"shape": [
3072,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.6.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8388608,
"byteOffset": 12582912
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 20971520
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 20975616
}
],
"md5sum": "e44904f0c432ad45b9919a22b590ae9a"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.8.mlp.down_proj.weight",
"shape": [
2048,
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "3e337579a791f65389f4a5a93dc95633"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "79c0f7d1fa6519b778d30ef952522121"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 20979712,
"records": [
{
"name": "model.layers.7.self_attn.qkv_proj.weight",
"shape": [
3072,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.7.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8388608,
"byteOffset": 12582912
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 20971520
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 20975616
}
],
"md5sum": "5a57388997fefe29f3788be628569150"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.9.mlp.down_proj.weight",
"shape": [
2048,
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "e2d663fbf063a76d8845a1117834610f"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "50e4119a748b23e67bfa10d1ec8e1f00"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 20979712,
"records": [
{
"name": "model.layers.8.self_attn.qkv_proj.weight",
"shape": [
3072,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.8.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8388608,
"byteOffset": 12582912
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 20971520
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 20975616
}
],
"md5sum": "c7f736b657c13231c2c106aab8827f06"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 20975616,
"records": [
{
"name": "model.layers.9.self_attn.qkv_proj.weight",
"shape": [
3072,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.9.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8388608,
"byteOffset": 12582912
},
{
"name": "model.norm.weight",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 20971520
}
],
"md5sum": "2484e69deb5afeb91287322eac29c4f8"
}
]
}