|
{ |
|
"metadata": { |
|
"ParamSize": 147, |
|
"ParamBytes": 3778220032.0, |
|
"BitsPerParam": 16.0 |
|
}, |
|
"records": [ |
|
{ |
|
"dataPath": "params_shard_0.bin", |
|
"format": "raw-shard", |
|
"nbytes": 379060224, |
|
"records": [ |
|
{ |
|
"name": "model.tok_embeddings.weight", |
|
"shape": [ |
|
92544, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 379060224, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c8f6d0d0c5f58f58b02af872f8519b94" |
|
}, |
|
{ |
|
"dataPath": "params_shard_1.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67108864, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.feed_forward.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67108864, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e9e0ea543bb67a206b5d53f23b57a969" |
|
}, |
|
{ |
|
"dataPath": "params_shard_2.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.feed_forward.w2.weight", |
|
"shape": [ |
|
2048, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "70cbda20b2adbebe5c23af69cd4e31ed" |
|
}, |
|
{ |
|
"dataPath": "params_shard_3.bin", |
|
"format": "raw-shard", |
|
"nbytes": 16777216, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.attention.wqkv.weight", |
|
"shape": [ |
|
4096, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "fed4a1284c4f23d3d9abca686cbcbf7b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_4.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25174016, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.attention.wqkv.weight", |
|
"shape": [ |
|
4096, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.0.attention.wo.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 16777216 |
|
}, |
|
{ |
|
"name": "model.layers.0.attention_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 25165824 |
|
}, |
|
{ |
|
"name": "model.layers.0.ffn_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 25169920 |
|
} |
|
], |
|
"md5sum": "028257deb68100714c861c2561a92894" |
|
}, |
|
{ |
|
"dataPath": "params_shard_5.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67108864, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.feed_forward.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67108864, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ed9044dffe359958caed045919b36188" |
|
}, |
|
{ |
|
"dataPath": "params_shard_6.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.feed_forward.w2.weight", |
|
"shape": [ |
|
2048, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "682f08a479bbeacd53b109382cbf4182" |
|
}, |
|
{ |
|
"dataPath": "params_shard_7.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25174016, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.attention.wo.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.1.attention_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.1.ffn_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8392704 |
|
}, |
|
{ |
|
"name": "model.layers.2.attention.wqkv.weight", |
|
"shape": [ |
|
4096, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 8396800 |
|
} |
|
], |
|
"md5sum": "e8b9621a13bc64aad4377f5300cdd68a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_8.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67108864, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.feed_forward.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67108864, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "23f4d64d37703ceeb250249e9cbf574a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_9.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.feed_forward.w2.weight", |
|
"shape": [ |
|
2048, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8b9163b046b254bcae6b6ce69e5367d5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_10.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25174016, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.attention.wo.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.2.attention_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.2.ffn_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8392704 |
|
}, |
|
{ |
|
"name": "model.layers.3.attention.wqkv.weight", |
|
"shape": [ |
|
4096, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 8396800 |
|
} |
|
], |
|
"md5sum": "77d70dee602590a2472593b4b08ac350" |
|
}, |
|
{ |
|
"dataPath": "params_shard_11.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67108864, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.feed_forward.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67108864, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d268c66d207209508f136c5f2dbad373" |
|
}, |
|
{ |
|
"dataPath": "params_shard_12.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.feed_forward.w2.weight", |
|
"shape": [ |
|
2048, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "86561d0d004aa36e77c3eaf5531b6b20" |
|
}, |
|
{ |
|
"dataPath": "params_shard_13.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25174016, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.attention.wo.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.3.attention_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.3.ffn_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8392704 |
|
}, |
|
{ |
|
"name": "model.layers.4.attention.wqkv.weight", |
|
"shape": [ |
|
4096, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 8396800 |
|
} |
|
], |
|
"md5sum": "532707f8381833e717c516147071ae5a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_14.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67108864, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.feed_forward.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67108864, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f639c7f948770c385f8a947e89f83080" |
|
}, |
|
{ |
|
"dataPath": "params_shard_15.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.feed_forward.w2.weight", |
|
"shape": [ |
|
2048, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2aded3a887e46dcf74e0e59556092811" |
|
}, |
|
{ |
|
"dataPath": "params_shard_16.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25174016, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.attention.wo.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.4.attention_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.4.ffn_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8392704 |
|
}, |
|
{ |
|
"name": "model.layers.5.attention.wqkv.weight", |
|
"shape": [ |
|
4096, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 8396800 |
|
} |
|
], |
|
"md5sum": "4d0f3ccc94d472357c2551878619478a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_17.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67108864, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.feed_forward.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67108864, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "fde2165f55fcce1cd07339890ad0953c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_18.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.feed_forward.w2.weight", |
|
"shape": [ |
|
2048, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e952acf155de30cb5bd4418a2605c38e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_19.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25174016, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.attention.wo.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.5.attention_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.5.ffn_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8392704 |
|
}, |
|
{ |
|
"name": "model.layers.6.attention.wqkv.weight", |
|
"shape": [ |
|
4096, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 8396800 |
|
} |
|
], |
|
"md5sum": "57285e9b285c039e68ba931ff2c38abf" |
|
}, |
|
{ |
|
"dataPath": "params_shard_20.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67108864, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.feed_forward.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67108864, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1ec9ea556ac6318d477f27a9ffccc1b4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_21.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.feed_forward.w2.weight", |
|
"shape": [ |
|
2048, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e4961b543fc0540d53355fadb5202997" |
|
}, |
|
{ |
|
"dataPath": "params_shard_22.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25174016, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.attention.wo.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.6.attention_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.6.ffn_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8392704 |
|
}, |
|
{ |
|
"name": "model.layers.7.attention.wqkv.weight", |
|
"shape": [ |
|
4096, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 8396800 |
|
} |
|
], |
|
"md5sum": "01d1a8f29dff55d3e5106ae2fd2bf92b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_23.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67108864, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.feed_forward.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67108864, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ce15bfcf9d4c007ff198bbac8311d275" |
|
}, |
|
{ |
|
"dataPath": "params_shard_24.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.feed_forward.w2.weight", |
|
"shape": [ |
|
2048, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3b848263184d5f655c63131eaa9aa7c5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_25.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25174016, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.attention.wo.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.7.attention_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.7.ffn_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8392704 |
|
}, |
|
{ |
|
"name": "model.layers.8.attention.wqkv.weight", |
|
"shape": [ |
|
4096, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 8396800 |
|
} |
|
], |
|
"md5sum": "2e00fe5598e7974e3558325e16933b05" |
|
}, |
|
{ |
|
"dataPath": "params_shard_26.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67108864, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.feed_forward.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67108864, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "62d160d27fa806ee042a2d5d1a2d7f9c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_27.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.feed_forward.w2.weight", |
|
"shape": [ |
|
2048, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4291c51e797a54603f2918f85bba0f28" |
|
}, |
|
{ |
|
"dataPath": "params_shard_28.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25174016, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.attention.wo.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.8.attention_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.8.ffn_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8392704 |
|
}, |
|
{ |
|
"name": "model.layers.9.attention.wqkv.weight", |
|
"shape": [ |
|
4096, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 8396800 |
|
} |
|
], |
|
"md5sum": "1456d6dce1538c10ba035f37f028b6f9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_29.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67108864, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.feed_forward.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67108864, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e8423d504e5a1f07794fa6a19c6516c1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_30.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.feed_forward.w2.weight", |
|
"shape": [ |
|
2048, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8622070d4f372b7748b17907528d575a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_31.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25174016, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.attention.wo.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.9.attention_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.9.ffn_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8392704 |
|
}, |
|
{ |
|
"name": "model.layers.10.attention.wqkv.weight", |
|
"shape": [ |
|
4096, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 8396800 |
|
} |
|
], |
|
"md5sum": "004f0aeb8769bb482560a71def97dc0b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_32.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67108864, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.feed_forward.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67108864, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d4f80971ed13439e41517b52d4a6740e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_33.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.feed_forward.w2.weight", |
|
"shape": [ |
|
2048, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0d855105d7a746718a1b391637b5c3fb" |
|
}, |
|
{ |
|
"dataPath": "params_shard_34.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25174016, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.attention.wo.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.10.attention_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.10.ffn_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8392704 |
|
}, |
|
{ |
|
"name": "model.layers.11.attention.wqkv.weight", |
|
"shape": [ |
|
4096, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 8396800 |
|
} |
|
], |
|
"md5sum": "003e068d64993094f976be6fca62343a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_35.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67108864, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.feed_forward.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67108864, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "928d5b31f74ed716f8a94bee96eac465" |
|
}, |
|
{ |
|
"dataPath": "params_shard_36.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.feed_forward.w2.weight", |
|
"shape": [ |
|
2048, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ce1376dfdf59e8b06aca43aafbd95d70" |
|
}, |
|
{ |
|
"dataPath": "params_shard_37.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25174016, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.attention.wo.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.11.attention_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.11.ffn_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8392704 |
|
}, |
|
{ |
|
"name": "model.layers.12.attention.wqkv.weight", |
|
"shape": [ |
|
4096, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 8396800 |
|
} |
|
], |
|
"md5sum": "002539101d27a72e45c3ef2fe6d35bca" |
|
}, |
|
{ |
|
"dataPath": "params_shard_38.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67108864, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.feed_forward.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67108864, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3994a0500df53882d6e8e7f58c709192" |
|
}, |
|
{ |
|
"dataPath": "params_shard_39.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.feed_forward.w2.weight", |
|
"shape": [ |
|
2048, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d9b0377aee207f260351bb243c039f1c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_40.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25174016, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.attention.wo.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.12.attention_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.12.ffn_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8392704 |
|
}, |
|
{ |
|
"name": "model.layers.13.attention.wqkv.weight", |
|
"shape": [ |
|
4096, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 8396800 |
|
} |
|
], |
|
"md5sum": "cca43e273ed1b32466550de2f273c9f2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_41.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67108864, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.feed_forward.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67108864, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b1ab553bb92ee1bb7cf15820f5d94143" |
|
}, |
|
{ |
|
"dataPath": "params_shard_42.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.feed_forward.w2.weight", |
|
"shape": [ |
|
2048, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0c77b71d9d73936cf23ffc1c0c9b9399" |
|
}, |
|
{ |
|
"dataPath": "params_shard_43.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25174016, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.attention.wo.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.13.attention_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.13.ffn_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8392704 |
|
}, |
|
{ |
|
"name": "model.layers.14.attention.wqkv.weight", |
|
"shape": [ |
|
4096, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 8396800 |
|
} |
|
], |
|
"md5sum": "f4b038503aa58e5641edf4587a862307" |
|
}, |
|
{ |
|
"dataPath": "params_shard_44.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67108864, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.feed_forward.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67108864, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "fc95094dd77fd22b71cc741ca2a23646" |
|
}, |
|
{ |
|
"dataPath": "params_shard_45.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.feed_forward.w2.weight", |
|
"shape": [ |
|
2048, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0779d46f31a7dbe1de153ed47078dba2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_46.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25174016, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.attention.wo.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.14.attention_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.14.ffn_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8392704 |
|
}, |
|
{ |
|
"name": "model.layers.15.attention.wqkv.weight", |
|
"shape": [ |
|
4096, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 8396800 |
|
} |
|
], |
|
"md5sum": "cb9ac262924e1f6e5f7afac1ce9dda54" |
|
}, |
|
{ |
|
"dataPath": "params_shard_47.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67108864, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.feed_forward.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67108864, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b21a9db197211f02e2453cd8aac1a5f9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_48.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.feed_forward.w2.weight", |
|
"shape": [ |
|
2048, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "66d68e0373d2d3dc8ed25e161d6d817e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_49.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25174016, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.attention.wo.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.15.attention_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.15.ffn_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8392704 |
|
}, |
|
{ |
|
"name": "model.layers.16.attention.wqkv.weight", |
|
"shape": [ |
|
4096, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 8396800 |
|
} |
|
], |
|
"md5sum": "7ea0c9c7f983448b8ae224c47f447198" |
|
}, |
|
{ |
|
"dataPath": "params_shard_50.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67108864, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.feed_forward.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67108864, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ba5262dce77d42c47e6c50632ba264ef" |
|
}, |
|
{ |
|
"dataPath": "params_shard_51.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.feed_forward.w2.weight", |
|
"shape": [ |
|
2048, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ef98e9f936328631b1e07249d359caca" |
|
}, |
|
{ |
|
"dataPath": "params_shard_52.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25174016, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.attention.wo.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.16.attention_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.16.ffn_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8392704 |
|
}, |
|
{ |
|
"name": "model.layers.17.attention.wqkv.weight", |
|
"shape": [ |
|
4096, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 8396800 |
|
} |
|
], |
|
"md5sum": "bdc69580a1be1538f4b8860b1b095f4e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_53.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67108864, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.feed_forward.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67108864, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "01502631503072992ee7b6ba289f9607" |
|
}, |
|
{ |
|
"dataPath": "params_shard_54.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.feed_forward.w2.weight", |
|
"shape": [ |
|
2048, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "49d9a61e33dcdf8a054187b1049eba40" |
|
}, |
|
{ |
|
"dataPath": "params_shard_55.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25174016, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.attention.wo.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.17.attention_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.17.ffn_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8392704 |
|
}, |
|
{ |
|
"name": "model.layers.18.attention.wqkv.weight", |
|
"shape": [ |
|
4096, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 8396800 |
|
} |
|
], |
|
"md5sum": "fcf5b35d38ed1317876899d604989ff5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_56.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67108864, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.feed_forward.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67108864, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a2e5924cf0ac9933abc7e76a5907ecc8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_57.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.feed_forward.w2.weight", |
|
"shape": [ |
|
2048, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "283384e32b930f44d91f41786ce8766d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_58.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25174016, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.attention.wo.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.18.attention_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.18.ffn_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8392704 |
|
}, |
|
{ |
|
"name": "model.layers.19.attention.wqkv.weight", |
|
"shape": [ |
|
4096, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 8396800 |
|
} |
|
], |
|
"md5sum": "a34e69f01562020c50d436658b643204" |
|
}, |
|
{ |
|
"dataPath": "params_shard_59.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67108864, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.feed_forward.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67108864, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6ab434fab1ed92442c4d50568ddbffd4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_60.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.feed_forward.w2.weight", |
|
"shape": [ |
|
2048, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "fede46845f6e20a3387630cc52583457" |
|
}, |
|
{ |
|
"dataPath": "params_shard_61.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25174016, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.attention.wo.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.19.attention_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.19.ffn_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8392704 |
|
}, |
|
{ |
|
"name": "model.layers.20.attention.wqkv.weight", |
|
"shape": [ |
|
4096, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 8396800 |
|
} |
|
], |
|
"md5sum": "ba467d7ddd7ec9238c6463091e80c9ab" |
|
}, |
|
{ |
|
"dataPath": "params_shard_62.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67108864, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.feed_forward.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67108864, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b331223ab22e514ba30311898a491a9f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_63.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.feed_forward.w2.weight", |
|
"shape": [ |
|
2048, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "922de5be8f5163e2437b95d8f7abfba3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_64.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25174016, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.attention.wo.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.20.attention_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.20.ffn_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8392704 |
|
}, |
|
{ |
|
"name": "model.layers.21.attention.wqkv.weight", |
|
"shape": [ |
|
4096, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 8396800 |
|
} |
|
], |
|
"md5sum": "7e81d6ec7d477fa616946139388ddc08" |
|
}, |
|
{ |
|
"dataPath": "params_shard_65.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67108864, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.feed_forward.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67108864, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "82ac1735c6afb9be52902c700aea7a12" |
|
}, |
|
{ |
|
"dataPath": "params_shard_66.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.feed_forward.w2.weight", |
|
"shape": [ |
|
2048, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "12ce1c76efe436f02473a58282bb9133" |
|
}, |
|
{ |
|
"dataPath": "params_shard_67.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25174016, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.attention.wo.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.21.attention_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.21.ffn_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8392704 |
|
}, |
|
{ |
|
"name": "model.layers.22.attention.wqkv.weight", |
|
"shape": [ |
|
4096, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 8396800 |
|
} |
|
], |
|
"md5sum": "28c1604c347cab66d2927f2b459defed" |
|
}, |
|
{ |
|
"dataPath": "params_shard_68.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67108864, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.feed_forward.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67108864, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "32a91a4d279fc93278dcfbfb15dc0d5d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_69.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.feed_forward.w2.weight", |
|
"shape": [ |
|
2048, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9b551d0279e7ea93eb08cb68b29bf71a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_70.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25174016, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.attention.wo.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.22.attention_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.22.ffn_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8392704 |
|
}, |
|
{ |
|
"name": "model.layers.23.attention.wqkv.weight", |
|
"shape": [ |
|
4096, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 8396800 |
|
} |
|
], |
|
"md5sum": "56eb834474d9be0bb245e3fd3a012950" |
|
}, |
|
{ |
|
"dataPath": "params_shard_71.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67108864, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.feed_forward.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67108864, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ff6264f5e5d1279434606ce9d6041084" |
|
}, |
|
{ |
|
"dataPath": "params_shard_72.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.feed_forward.w2.weight", |
|
"shape": [ |
|
2048, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "002fbfadeee34e731aa3393b7be3ef2c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_73.bin", |
|
"format": "raw-shard", |
|
"nbytes": 379060224, |
|
"records": [ |
|
{ |
|
"name": "output.weight", |
|
"shape": [ |
|
92544, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 379060224, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "44dadfc1e5ec5a1e920d17a727bb730a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_74.bin", |
|
"format": "raw-shard", |
|
"nbytes": 8400896, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.attention.wo.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.23.attention_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.23.ffn_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8392704 |
|
}, |
|
{ |
|
"name": "model.norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8396800 |
|
} |
|
], |
|
"md5sum": "6e98b51d4e6355c1bc64a23646a583c9" |
|
} |
|
] |
|
} |