internlm2_5-1_8b-q0f16-MLC / ndarray-cache.json
riczhou's picture
Upload folder using huggingface_hub
c9b85b7 verified
{
"metadata": {
"ParamSize": 147,
"ParamBytes": 3778220032.0,
"BitsPerParam": 16.0
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 379060224,
"records": [
{
"name": "model.tok_embeddings.weight",
"shape": [
92544,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 379060224,
"byteOffset": 0
}
],
"md5sum": "c8f6d0d0c5f58f58b02af872f8519b94"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.0.feed_forward.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "e9e0ea543bb67a206b5d53f23b57a969"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.0.feed_forward.w2.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "70cbda20b2adbebe5c23af69cd4e31ed"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 16777216,
"records": [
{
"name": "model.layers.1.attention.wqkv.weight",
"shape": [
4096,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 0
}
],
"md5sum": "fed4a1284c4f23d3d9abca686cbcbf7b"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 25174016,
"records": [
{
"name": "model.layers.0.attention.wqkv.weight",
"shape": [
4096,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 0
},
{
"name": "model.layers.0.attention.wo.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 16777216
},
{
"name": "model.layers.0.attention_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25165824
},
{
"name": "model.layers.0.ffn_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25169920
}
],
"md5sum": "028257deb68100714c861c2561a92894"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.1.feed_forward.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "ed9044dffe359958caed045919b36188"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.1.feed_forward.w2.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "682f08a479bbeacd53b109382cbf4182"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 25174016,
"records": [
{
"name": "model.layers.1.attention.wo.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.1.attention_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.1.ffn_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8392704
},
{
"name": "model.layers.2.attention.wqkv.weight",
"shape": [
4096,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 8396800
}
],
"md5sum": "e8b9621a13bc64aad4377f5300cdd68a"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.2.feed_forward.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "23f4d64d37703ceeb250249e9cbf574a"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.2.feed_forward.w2.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "8b9163b046b254bcae6b6ce69e5367d5"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 25174016,
"records": [
{
"name": "model.layers.2.attention.wo.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.2.attention_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.2.ffn_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8392704
},
{
"name": "model.layers.3.attention.wqkv.weight",
"shape": [
4096,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 8396800
}
],
"md5sum": "77d70dee602590a2472593b4b08ac350"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.3.feed_forward.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "d268c66d207209508f136c5f2dbad373"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.3.feed_forward.w2.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "86561d0d004aa36e77c3eaf5531b6b20"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 25174016,
"records": [
{
"name": "model.layers.3.attention.wo.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.3.attention_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.3.ffn_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8392704
},
{
"name": "model.layers.4.attention.wqkv.weight",
"shape": [
4096,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 8396800
}
],
"md5sum": "532707f8381833e717c516147071ae5a"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.4.feed_forward.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "f639c7f948770c385f8a947e89f83080"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.4.feed_forward.w2.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "2aded3a887e46dcf74e0e59556092811"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 25174016,
"records": [
{
"name": "model.layers.4.attention.wo.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.4.attention_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.4.ffn_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8392704
},
{
"name": "model.layers.5.attention.wqkv.weight",
"shape": [
4096,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 8396800
}
],
"md5sum": "4d0f3ccc94d472357c2551878619478a"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.5.feed_forward.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "fde2165f55fcce1cd07339890ad0953c"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.5.feed_forward.w2.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "e952acf155de30cb5bd4418a2605c38e"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 25174016,
"records": [
{
"name": "model.layers.5.attention.wo.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.5.attention_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.5.ffn_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8392704
},
{
"name": "model.layers.6.attention.wqkv.weight",
"shape": [
4096,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 8396800
}
],
"md5sum": "57285e9b285c039e68ba931ff2c38abf"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.6.feed_forward.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "1ec9ea556ac6318d477f27a9ffccc1b4"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.6.feed_forward.w2.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "e4961b543fc0540d53355fadb5202997"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 25174016,
"records": [
{
"name": "model.layers.6.attention.wo.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.6.attention_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.6.ffn_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8392704
},
{
"name": "model.layers.7.attention.wqkv.weight",
"shape": [
4096,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 8396800
}
],
"md5sum": "01d1a8f29dff55d3e5106ae2fd2bf92b"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.7.feed_forward.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "ce15bfcf9d4c007ff198bbac8311d275"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.7.feed_forward.w2.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "3b848263184d5f655c63131eaa9aa7c5"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 25174016,
"records": [
{
"name": "model.layers.7.attention.wo.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.7.attention_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.7.ffn_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8392704
},
{
"name": "model.layers.8.attention.wqkv.weight",
"shape": [
4096,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 8396800
}
],
"md5sum": "2e00fe5598e7974e3558325e16933b05"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.8.feed_forward.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "62d160d27fa806ee042a2d5d1a2d7f9c"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.8.feed_forward.w2.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "4291c51e797a54603f2918f85bba0f28"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 25174016,
"records": [
{
"name": "model.layers.8.attention.wo.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.8.attention_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.8.ffn_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8392704
},
{
"name": "model.layers.9.attention.wqkv.weight",
"shape": [
4096,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 8396800
}
],
"md5sum": "1456d6dce1538c10ba035f37f028b6f9"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.9.feed_forward.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "e8423d504e5a1f07794fa6a19c6516c1"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.9.feed_forward.w2.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "8622070d4f372b7748b17907528d575a"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 25174016,
"records": [
{
"name": "model.layers.9.attention.wo.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.9.attention_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.9.ffn_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8392704
},
{
"name": "model.layers.10.attention.wqkv.weight",
"shape": [
4096,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 8396800
}
],
"md5sum": "004f0aeb8769bb482560a71def97dc0b"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.10.feed_forward.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "d4f80971ed13439e41517b52d4a6740e"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.10.feed_forward.w2.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "0d855105d7a746718a1b391637b5c3fb"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 25174016,
"records": [
{
"name": "model.layers.10.attention.wo.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.10.attention_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.10.ffn_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8392704
},
{
"name": "model.layers.11.attention.wqkv.weight",
"shape": [
4096,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 8396800
}
],
"md5sum": "003e068d64993094f976be6fca62343a"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.11.feed_forward.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "928d5b31f74ed716f8a94bee96eac465"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.11.feed_forward.w2.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "ce1376dfdf59e8b06aca43aafbd95d70"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 25174016,
"records": [
{
"name": "model.layers.11.attention.wo.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.11.attention_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.11.ffn_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8392704
},
{
"name": "model.layers.12.attention.wqkv.weight",
"shape": [
4096,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 8396800
}
],
"md5sum": "002539101d27a72e45c3ef2fe6d35bca"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.12.feed_forward.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "3994a0500df53882d6e8e7f58c709192"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.12.feed_forward.w2.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "d9b0377aee207f260351bb243c039f1c"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 25174016,
"records": [
{
"name": "model.layers.12.attention.wo.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.12.attention_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.12.ffn_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8392704
},
{
"name": "model.layers.13.attention.wqkv.weight",
"shape": [
4096,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 8396800
}
],
"md5sum": "cca43e273ed1b32466550de2f273c9f2"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.13.feed_forward.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "b1ab553bb92ee1bb7cf15820f5d94143"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.13.feed_forward.w2.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "0c77b71d9d73936cf23ffc1c0c9b9399"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 25174016,
"records": [
{
"name": "model.layers.13.attention.wo.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.13.attention_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.13.ffn_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8392704
},
{
"name": "model.layers.14.attention.wqkv.weight",
"shape": [
4096,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 8396800
}
],
"md5sum": "f4b038503aa58e5641edf4587a862307"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.14.feed_forward.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "fc95094dd77fd22b71cc741ca2a23646"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.14.feed_forward.w2.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "0779d46f31a7dbe1de153ed47078dba2"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 25174016,
"records": [
{
"name": "model.layers.14.attention.wo.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.14.attention_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.14.ffn_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8392704
},
{
"name": "model.layers.15.attention.wqkv.weight",
"shape": [
4096,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 8396800
}
],
"md5sum": "cb9ac262924e1f6e5f7afac1ce9dda54"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.15.feed_forward.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "b21a9db197211f02e2453cd8aac1a5f9"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.15.feed_forward.w2.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "66d68e0373d2d3dc8ed25e161d6d817e"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 25174016,
"records": [
{
"name": "model.layers.15.attention.wo.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.15.attention_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.15.ffn_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8392704
},
{
"name": "model.layers.16.attention.wqkv.weight",
"shape": [
4096,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 8396800
}
],
"md5sum": "7ea0c9c7f983448b8ae224c47f447198"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.16.feed_forward.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "ba5262dce77d42c47e6c50632ba264ef"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.16.feed_forward.w2.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "ef98e9f936328631b1e07249d359caca"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 25174016,
"records": [
{
"name": "model.layers.16.attention.wo.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.16.attention_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.16.ffn_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8392704
},
{
"name": "model.layers.17.attention.wqkv.weight",
"shape": [
4096,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 8396800
}
],
"md5sum": "bdc69580a1be1538f4b8860b1b095f4e"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.17.feed_forward.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "01502631503072992ee7b6ba289f9607"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.17.feed_forward.w2.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "49d9a61e33dcdf8a054187b1049eba40"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 25174016,
"records": [
{
"name": "model.layers.17.attention.wo.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.17.attention_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.17.ffn_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8392704
},
{
"name": "model.layers.18.attention.wqkv.weight",
"shape": [
4096,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 8396800
}
],
"md5sum": "fcf5b35d38ed1317876899d604989ff5"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.18.feed_forward.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "a2e5924cf0ac9933abc7e76a5907ecc8"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.18.feed_forward.w2.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "283384e32b930f44d91f41786ce8766d"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 25174016,
"records": [
{
"name": "model.layers.18.attention.wo.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.18.attention_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.18.ffn_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8392704
},
{
"name": "model.layers.19.attention.wqkv.weight",
"shape": [
4096,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 8396800
}
],
"md5sum": "a34e69f01562020c50d436658b643204"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.19.feed_forward.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "6ab434fab1ed92442c4d50568ddbffd4"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.19.feed_forward.w2.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "fede46845f6e20a3387630cc52583457"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 25174016,
"records": [
{
"name": "model.layers.19.attention.wo.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.19.attention_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.19.ffn_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8392704
},
{
"name": "model.layers.20.attention.wqkv.weight",
"shape": [
4096,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 8396800
}
],
"md5sum": "ba467d7ddd7ec9238c6463091e80c9ab"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.20.feed_forward.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "b331223ab22e514ba30311898a491a9f"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.20.feed_forward.w2.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "922de5be8f5163e2437b95d8f7abfba3"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 25174016,
"records": [
{
"name": "model.layers.20.attention.wo.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.20.attention_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.20.ffn_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8392704
},
{
"name": "model.layers.21.attention.wqkv.weight",
"shape": [
4096,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 8396800
}
],
"md5sum": "7e81d6ec7d477fa616946139388ddc08"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.21.feed_forward.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "82ac1735c6afb9be52902c700aea7a12"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.21.feed_forward.w2.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "12ce1c76efe436f02473a58282bb9133"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 25174016,
"records": [
{
"name": "model.layers.21.attention.wo.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.21.attention_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.21.ffn_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8392704
},
{
"name": "model.layers.22.attention.wqkv.weight",
"shape": [
4096,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 8396800
}
],
"md5sum": "28c1604c347cab66d2927f2b459defed"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.22.feed_forward.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "32a91a4d279fc93278dcfbfb15dc0d5d"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.22.feed_forward.w2.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "9b551d0279e7ea93eb08cb68b29bf71a"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 25174016,
"records": [
{
"name": "model.layers.22.attention.wo.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.22.attention_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.22.ffn_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8392704
},
{
"name": "model.layers.23.attention.wqkv.weight",
"shape": [
4096,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 8396800
}
],
"md5sum": "56eb834474d9be0bb245e3fd3a012950"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.23.feed_forward.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "ff6264f5e5d1279434606ce9d6041084"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.23.feed_forward.w2.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "002fbfadeee34e731aa3393b7be3ef2c"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 379060224,
"records": [
{
"name": "output.weight",
"shape": [
92544,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 379060224,
"byteOffset": 0
}
],
"md5sum": "44dadfc1e5ec5a1e920d17a727bb730a"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 8400896,
"records": [
{
"name": "model.layers.23.attention.wo.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.23.attention_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.23.ffn_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8392704
},
{
"name": "model.norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8396800
}
],
"md5sum": "6e98b51d4e6355c1bc64a23646a583c9"
}
]
}