{ "metadata": { "ParamSize": 147, "ParamBytes": 3778220032.0, "BitsPerParam": 16.0 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 379060224, "records": [ { "name": "model.tok_embeddings.weight", "shape": [ 92544, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 379060224, "byteOffset": 0 } ], "md5sum": "c8f6d0d0c5f58f58b02af872f8519b94" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.0.feed_forward.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "e9e0ea543bb67a206b5d53f23b57a969" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.0.feed_forward.w2.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "70cbda20b2adbebe5c23af69cd4e31ed" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.1.attention.wqkv.weight", "shape": [ 4096, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "fed4a1284c4f23d3d9abca686cbcbf7b" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.0.attention.wqkv.weight", "shape": [ 4096, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 }, { "name": "model.layers.0.attention.wo.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 16777216 }, { "name": "model.layers.0.attention_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25165824 }, { "name": "model.layers.0.ffn_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25169920 } ], "md5sum": "028257deb68100714c861c2561a92894" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.1.feed_forward.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "ed9044dffe359958caed045919b36188" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.1.feed_forward.w2.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "682f08a479bbeacd53b109382cbf4182" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.1.attention.wo.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.1.attention_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.1.ffn_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8392704 }, { "name": "model.layers.2.attention.wqkv.weight", "shape": [ 4096, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 8396800 } ], "md5sum": "e8b9621a13bc64aad4377f5300cdd68a" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.2.feed_forward.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "23f4d64d37703ceeb250249e9cbf574a" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.2.feed_forward.w2.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "8b9163b046b254bcae6b6ce69e5367d5" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.2.attention.wo.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.2.attention_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.2.ffn_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8392704 }, { "name": "model.layers.3.attention.wqkv.weight", "shape": [ 4096, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 8396800 } ], "md5sum": "77d70dee602590a2472593b4b08ac350" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.3.feed_forward.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "d268c66d207209508f136c5f2dbad373" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.3.feed_forward.w2.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "86561d0d004aa36e77c3eaf5531b6b20" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.3.attention.wo.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.3.attention_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.3.ffn_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8392704 }, { "name": "model.layers.4.attention.wqkv.weight", "shape": [ 4096, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 8396800 } ], "md5sum": "532707f8381833e717c516147071ae5a" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.4.feed_forward.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "f639c7f948770c385f8a947e89f83080" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.4.feed_forward.w2.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "2aded3a887e46dcf74e0e59556092811" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.4.attention.wo.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.4.attention_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.4.ffn_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8392704 }, { "name": "model.layers.5.attention.wqkv.weight", "shape": [ 4096, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 8396800 } ], "md5sum": "4d0f3ccc94d472357c2551878619478a" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.5.feed_forward.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "fde2165f55fcce1cd07339890ad0953c" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.5.feed_forward.w2.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "e952acf155de30cb5bd4418a2605c38e" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.5.attention.wo.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.5.attention_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.5.ffn_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8392704 }, { "name": "model.layers.6.attention.wqkv.weight", "shape": [ 4096, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 8396800 } ], "md5sum": "57285e9b285c039e68ba931ff2c38abf" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.6.feed_forward.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "1ec9ea556ac6318d477f27a9ffccc1b4" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.6.feed_forward.w2.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "e4961b543fc0540d53355fadb5202997" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.6.attention.wo.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.6.attention_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.6.ffn_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8392704 }, { "name": "model.layers.7.attention.wqkv.weight", "shape": [ 4096, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 8396800 } ], "md5sum": "01d1a8f29dff55d3e5106ae2fd2bf92b" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.7.feed_forward.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "ce15bfcf9d4c007ff198bbac8311d275" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.7.feed_forward.w2.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "3b848263184d5f655c63131eaa9aa7c5" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.7.attention.wo.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.7.attention_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.7.ffn_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8392704 }, { "name": "model.layers.8.attention.wqkv.weight", "shape": [ 4096, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 8396800 } ], "md5sum": "2e00fe5598e7974e3558325e16933b05" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.8.feed_forward.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "62d160d27fa806ee042a2d5d1a2d7f9c" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.8.feed_forward.w2.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "4291c51e797a54603f2918f85bba0f28" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.8.attention.wo.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.8.attention_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.8.ffn_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8392704 }, { "name": "model.layers.9.attention.wqkv.weight", "shape": [ 4096, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 8396800 } ], "md5sum": "1456d6dce1538c10ba035f37f028b6f9" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.9.feed_forward.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "e8423d504e5a1f07794fa6a19c6516c1" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.9.feed_forward.w2.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "8622070d4f372b7748b17907528d575a" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.9.attention.wo.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.9.attention_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.9.ffn_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8392704 }, { "name": "model.layers.10.attention.wqkv.weight", "shape": [ 4096, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 8396800 } ], "md5sum": "004f0aeb8769bb482560a71def97dc0b" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.10.feed_forward.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "d4f80971ed13439e41517b52d4a6740e" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.10.feed_forward.w2.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "0d855105d7a746718a1b391637b5c3fb" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.10.attention.wo.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.10.attention_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.10.ffn_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8392704 }, { "name": "model.layers.11.attention.wqkv.weight", "shape": [ 4096, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 8396800 } ], "md5sum": "003e068d64993094f976be6fca62343a" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.11.feed_forward.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "928d5b31f74ed716f8a94bee96eac465" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.11.feed_forward.w2.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "ce1376dfdf59e8b06aca43aafbd95d70" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.11.attention.wo.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.11.attention_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.11.ffn_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8392704 }, { "name": "model.layers.12.attention.wqkv.weight", "shape": [ 4096, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 8396800 } ], "md5sum": "002539101d27a72e45c3ef2fe6d35bca" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.12.feed_forward.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "3994a0500df53882d6e8e7f58c709192" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.12.feed_forward.w2.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "d9b0377aee207f260351bb243c039f1c" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.12.attention.wo.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.12.attention_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.12.ffn_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8392704 }, { "name": "model.layers.13.attention.wqkv.weight", "shape": [ 4096, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 8396800 } ], "md5sum": "cca43e273ed1b32466550de2f273c9f2" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.13.feed_forward.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "b1ab553bb92ee1bb7cf15820f5d94143" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.13.feed_forward.w2.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "0c77b71d9d73936cf23ffc1c0c9b9399" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.13.attention.wo.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.13.attention_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.13.ffn_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8392704 }, { "name": "model.layers.14.attention.wqkv.weight", "shape": [ 4096, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 8396800 } ], "md5sum": "f4b038503aa58e5641edf4587a862307" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.14.feed_forward.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "fc95094dd77fd22b71cc741ca2a23646" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.14.feed_forward.w2.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "0779d46f31a7dbe1de153ed47078dba2" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.14.attention.wo.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.14.attention_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.14.ffn_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8392704 }, { "name": "model.layers.15.attention.wqkv.weight", "shape": [ 4096, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 8396800 } ], "md5sum": "cb9ac262924e1f6e5f7afac1ce9dda54" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.15.feed_forward.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "b21a9db197211f02e2453cd8aac1a5f9" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.15.feed_forward.w2.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "66d68e0373d2d3dc8ed25e161d6d817e" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.15.attention.wo.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.15.attention_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.15.ffn_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8392704 }, { "name": "model.layers.16.attention.wqkv.weight", "shape": [ 4096, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 8396800 } ], "md5sum": "7ea0c9c7f983448b8ae224c47f447198" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.16.feed_forward.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "ba5262dce77d42c47e6c50632ba264ef" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.16.feed_forward.w2.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "ef98e9f936328631b1e07249d359caca" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.16.attention.wo.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.16.attention_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.16.ffn_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8392704 }, { "name": "model.layers.17.attention.wqkv.weight", "shape": [ 4096, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 8396800 } ], "md5sum": "bdc69580a1be1538f4b8860b1b095f4e" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.17.feed_forward.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "01502631503072992ee7b6ba289f9607" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.17.feed_forward.w2.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "49d9a61e33dcdf8a054187b1049eba40" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.17.attention.wo.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.17.attention_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.17.ffn_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8392704 }, { "name": "model.layers.18.attention.wqkv.weight", "shape": [ 4096, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 8396800 } ], "md5sum": "fcf5b35d38ed1317876899d604989ff5" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.18.feed_forward.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "a2e5924cf0ac9933abc7e76a5907ecc8" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.18.feed_forward.w2.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "283384e32b930f44d91f41786ce8766d" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.18.attention.wo.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.18.attention_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.18.ffn_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8392704 }, { "name": "model.layers.19.attention.wqkv.weight", "shape": [ 4096, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 8396800 } ], "md5sum": "a34e69f01562020c50d436658b643204" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.19.feed_forward.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "6ab434fab1ed92442c4d50568ddbffd4" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.19.feed_forward.w2.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "fede46845f6e20a3387630cc52583457" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.19.attention.wo.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.19.attention_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.19.ffn_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8392704 }, { "name": "model.layers.20.attention.wqkv.weight", "shape": [ 4096, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 8396800 } ], "md5sum": "ba467d7ddd7ec9238c6463091e80c9ab" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.20.feed_forward.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "b331223ab22e514ba30311898a491a9f" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.20.feed_forward.w2.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "922de5be8f5163e2437b95d8f7abfba3" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.20.attention.wo.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.20.attention_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.20.ffn_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8392704 }, { "name": "model.layers.21.attention.wqkv.weight", "shape": [ 4096, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 8396800 } ], "md5sum": "7e81d6ec7d477fa616946139388ddc08" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.21.feed_forward.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "82ac1735c6afb9be52902c700aea7a12" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.21.feed_forward.w2.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "12ce1c76efe436f02473a58282bb9133" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.21.attention.wo.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.21.attention_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.21.ffn_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8392704 }, { "name": "model.layers.22.attention.wqkv.weight", "shape": [ 4096, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 8396800 } ], "md5sum": "28c1604c347cab66d2927f2b459defed" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.22.feed_forward.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "32a91a4d279fc93278dcfbfb15dc0d5d" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.22.feed_forward.w2.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "9b551d0279e7ea93eb08cb68b29bf71a" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.22.attention.wo.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.22.attention_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.22.ffn_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8392704 }, { "name": "model.layers.23.attention.wqkv.weight", "shape": [ 4096, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 8396800 } ], "md5sum": "56eb834474d9be0bb245e3fd3a012950" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.23.feed_forward.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "ff6264f5e5d1279434606ce9d6041084" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.23.feed_forward.w2.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "002fbfadeee34e731aa3393b7be3ef2c" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 379060224, "records": [ { "name": "output.weight", "shape": [ 92544, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 379060224, "byteOffset": 0 } ], "md5sum": "44dadfc1e5ec5a1e920d17a727bb730a" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 8400896, "records": [ { "name": "model.layers.23.attention.wo.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.23.attention_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.23.ffn_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8392704 }, { "name": "model.norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8396800 } ], "md5sum": "6e98b51d4e6355c1bc64a23646a583c9" } ] }