{ "metadata": { "ParamSize": 803, "ParamBytes": 72656372992.0, "BitsPerParam": 7.974819159215532 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 2101346304, "records": [ { "name": "lm_head.weight", "shape": [ 128256, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2101346304, "byteOffset": 0 } ], "md5sum": "cfa949870a3a6251bff1b4be3ca29c79" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 2101346304, "records": [ { "name": "model.embed_tokens.weight", "shape": [ 128256, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2101346304, "byteOffset": 0 } ], "md5sum": "6e717a7b7eaa6b4d6408f6b3ee52be9f" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "5923b604bd7f410793867b0233870976" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "7290109f3f9ffb07baec66b9c20a4fbd" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "2fbb70fcc586918ea1f2f4005b8e3fec" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "9f1585054cdbed9d3721a1e57108d69f" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "71dd307dd814fbd66ce1be8f067c9a89" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "1ced405415c3dc4e2adc83f46da5d59e" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "86946544f6db993078e95edd9b7d0780" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "d9ecdef3457dfece37d1a9ca76b304d6" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "6af151c36ac483048cb1adfa9444b1ac" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "99820308cb7b7459a14022d133af2038" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "d3d2ef365d915a16b1608041c583582a" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "9dc39341471aec46b5df02829e726892" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "6a26eb88fc02b37039ec3277507ca456" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "fdc84b3e451c9ec79630ad358f9e1433" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "4b6a96d8e848c66ada1baae33942f25d" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "19f185715521fcdf69387ba5695d2692" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "a4de6dd53493ea3712ecc0af8e9acebf" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "cf43723ec590ba8df9bd546991d0c742" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "e0b53c2e9e64ee09f2aa9404972cec89" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "80955bd4cd0c46e2a201d2cee07a8543" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "7fd85a8cf2c32005ada21536ef692ab2" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "0e8584b44d0847f1c96c2b3de65fad64" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "280e8d1d5fc19b01bc54a36ffe721d6e" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "a9ffaf86f658928bb5c2dade31f8dd82" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "f8a04dfd0781644eaf0236ced1fd6557" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "e20ffd77d2c85114d27c47465c8ef874" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "dc7a16bb573853dd613a3430c8439cab" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "b99e6d9ed9e89c6caa2bfcdda8ff9aee" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "e47ef92e077f7e63613b9cb4fee52ad6" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "7794d246b2b8acba6eba3a372775b93e" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "f4446af1a09c32865589ad4910b0c908" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "8d9f0a6c3444a144b90f81fa05a88484" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "66206577c0afd99c8a3a6ca497a708ea" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "c8d347f3f568b817f9c39c8ba1833b96" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "6603050f0d40465c6e8f50bf7a90c675" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "bb4fe970087d0ab4c412d62fdeb10803" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "b0227e85ee2605213954ce3769a05c81" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "5a701d378b1746314080fb4d263a9f81" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "2f6e932a451ef649686ccdc75e0a028c" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "1c481e3b603c3f9040d03e804d5d0e8f" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "0980af196fa9a7acb044553e00881fd6" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "056629225a63780449a82f8ab454db71" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "8f11096808f084dea8c69b687f06f664" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "faeb1a2c538a32c07c76a1e9b73e4520" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.16.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "635964181666a49a6771bd5863d02c59" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "93cb18190e4f3b7d809b70c5a7e30833" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "e7234e9dfb6386b7236984d2f9d76680" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "3ef0d7513014648f51a8a021ea7d8759" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.17.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "300d8f55ed436983a3967666bf81feef" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "315b42c740c87da95f03bb1873a54a7b" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.18.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "d4e7084ed03504c158ea7eec342de6ba" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "8fecb1ff25b245d28e60beb0a2b45e92" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "ca70bb47ad1c9895ed82d9c1a5161e40" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "5548823e29713e87ecb5267a56c64151" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "2ae0932c276f2091302cd1581b1ece57" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "2e9e113d542a5991b0ece79b9e5a217e" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.19.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "070e49476ffaba98b146a2ac50d774e0" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "df534ec6299f604bf2a5b561ac69d0d3" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "e68b05fa21b4d80e2258c2727fe28937" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "89bbf0a78c9cba4ac66e4bb3178059e4" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "c5525185298f9c01ee1af6046eff5563" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "7d75bf9226f24869d7943631fdacbd52" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.21.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "923fdf4792dc0d423b7c0a90bd502e5e" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "b379e157a437af852f652f3d56df4cb4" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "f29848a0ea9c39231c84da956520187a" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "32ce963ba20f6d129e3799b8f20cbeb4" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "8cef2d9ba637e6dafeb2aef45bb2cd71" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "fcb57f2c6cf99fe019b0e4e412d5104f" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.22.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "af06798f7df22f757b6b81efdfe70911" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "0165f911f2b466d4d3d3274f23f20f34" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "185f8cb7f0335e81d7b83e5a45697f57" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "5b18a472d4d617aec3dc29d12b989cad" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.23.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "7b67862555921ae82dbb76e6161859c1" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "15cbb47130949f5edf2511f567f2d6af" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "d66ad6d49d1338807012205c31cd0acd" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "802649fe7f6206bbea91615568ddd30b" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.24.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "b1fe25ce4c77795da6625e6854bd1524" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "f687c573746b5d417bc09bbe0ae70d58" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "2b004480120b5f88de5d5e63a26c3ceb" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "0fbfe34cb9d39a35f5ff54f36b0532bc" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.25.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "933777c1ba15165e61bf1824047a2d3d" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "137e3f760b375f873e6167d843eed4ab" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "a3bc53a34ea937b87c821cf92c601ae8" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.26.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "2ba4176c287627ba6e8f7788af1f6b5d" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "b34c13445813e579666ff315381e204b" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "71b321d1d0d1affb64da0b5bf273e777" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "adda71eaf91b5f979b19c2ba8a0e48de" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "0f7718553e55928fadcf0b84127668cd" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.27.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "e90822eba121cf72b0d5984c6e77284e" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "7d33628d198ce793bf26a79797e61444" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "fd25f9bbe8568d3eb28be91ffa3ed0af" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "e4a642336ea5fb8a756bbfedfb3d30d0" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.28.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "def52924d20fbab64d7900b183fca065" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "4210769f43e1a8324727531141a3e9a9" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "8d1575d3f380baea8c672f7bec5049ef" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.29.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "d74ceee2a95c1d26835b9f582ddc989e" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "4781069b9b5863ab598692cd5185f092" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "e2c4925b3666676e630d426b2d8e16ce" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.30.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "3abf8a530e5d282b399b5d23a2f810fc" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "8162226f8712782d6011248c9c7f70fc" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.30.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "5811e5cd150c9dc2c00dac358f163622" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.30.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "e99b5269a18350410dc7198f2bece253" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.31.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "3fe0df87f648cdbfab653f3ab2405dc6" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "4f48a7fad9ffdfda8d0c8b9b0a214b9b" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.31.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "b036afe0ebaaab15e8adf2eb81511948" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.31.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "112c7dcdf2aa75b56498b7df4b52dda6" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.32.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "a485ed117034da32e1f8dc0751d5eab9" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.32.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "705479a8791f8fa99c9fb121bd71ed67" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.32.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "ea373543816ba54b79a32cb4a594dd39" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "aac8bc1dd5ac15fbed322a61d59c4c1d" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.33.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "ed72d41f9608e93579e1eb5666c2622b" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "9104d06edc06c7b5b3d06575bed3ead5" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.33.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "afbad1dcca4662de59c6b1dacbe77af2" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.33.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "3af96c9c2ef33a278c3970da8806e863" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.34.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "c5435d03a3161d19541dc7f7c533b825" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "2200b77244fc8dbc026a59c7eed0c815" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.34.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "c1c2df3d1f936830ef93b56226022c83" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.34.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "0e9fcda6311597e41518ceb39ade4020" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.35.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "f3c20edfac5de0c7970c314946ab5cf7" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.35.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "f08b2c10df593d1d608d0320f20d3e47" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "6aa3da97bb81eb474deaef0ec6e73d8a" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.35.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "d13af68c90fd7926e978e6ccdd560c5b" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.36.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "1448f6a54d2600adb670fa313c16b688" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "7e29f081123351d7561128efeecaebd3" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.36.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "e4a84d1325f1030ad289da4c208358f1" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.36.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "8304f242e7b70854011515e80d9dba5f" }, { "dataPath": "params_shard_128.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.37.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "557374ac932dc91bcf2f37c4d80f78aa" }, { "dataPath": "params_shard_129.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "c628a717b0e3d4c7817571ac6892b0d2" }, { "dataPath": "params_shard_130.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.37.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "f660db9981790ac064bb8110ab1bde0c" }, { "dataPath": "params_shard_131.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.37.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "9c41eca7a1083b9ad73fd23554fa67ed" }, { "dataPath": "params_shard_132.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.38.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "9cfa786d295cc28c779363d7a9823ad6" }, { "dataPath": "params_shard_133.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "4b0b565d8c9e7288c66e0020dbba373a" }, { "dataPath": "params_shard_134.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.38.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "5cb23db1ef58f8d82cbee7ac5631a78b" }, { "dataPath": "params_shard_135.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.38.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "1a1c7865c027aa2f655401ba72632aa5" }, { "dataPath": "params_shard_136.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.39.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "33c98081399e29548154de67bfa78af7" }, { "dataPath": "params_shard_137.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "7897ea3f06cbbddb08d4970cb8326126" }, { "dataPath": "params_shard_138.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.39.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "65859bd62c59e9eace710890d4a6ffc2" }, { "dataPath": "params_shard_139.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.39.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "875eb3353205281e4272ea96763af7fe" }, { "dataPath": "params_shard_140.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.40.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "3571b602347c25976d9d1855fa617b07" }, { "dataPath": "params_shard_141.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.40.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "d9c49a8467fd7e7857f3865d33cec748" }, { "dataPath": "params_shard_142.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.40.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "9245377758d04e0e47846de4755a402c" }, { "dataPath": "params_shard_143.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "5e45c729d8230ea44e45fea80de29429" }, { "dataPath": "params_shard_144.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "96e347b1c9c5a484c5cb2b7560690545" }, { "dataPath": "params_shard_145.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "1810428317922edb0093e57666e0744c" }, { "dataPath": "params_shard_146.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "2baeab419b8f3912530e376a7feb4d37" }, { "dataPath": "params_shard_147.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "eb7eade47cccbe78824e8a293647ec1e" }, { "dataPath": "params_shard_148.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "2317119609c0c204f9ab84c4f8a1524e" }, { "dataPath": "params_shard_149.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "228ae5243a6d520261035251b236553d" }, { "dataPath": "params_shard_150.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "d2baf494cb9c9569f06cdce900e9c2f8" }, { "dataPath": "params_shard_151.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "c828ecaec84796deb17e9e85022b786b" }, { "dataPath": "params_shard_152.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "3982123564b985d44c303c46ecf9ceb4" }, { "dataPath": "params_shard_153.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "e6e8f889d857fa619297448e8612561b" }, { "dataPath": "params_shard_154.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.40.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "624b6712c714bcda5ded259079d0b79a" }, { "dataPath": "params_shard_155.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.41.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "963921d496f3e011eb738e851eb29105" }, { "dataPath": "params_shard_156.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.41.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "0227397243cf8c300227080bec7e9db3" }, { "dataPath": "params_shard_157.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.41.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "5feca76f6934f6d7b0a49d401a1ab5ea" }, { "dataPath": "params_shard_158.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.41.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "2d2033bb4ad12d3b597dfa3e219d98bb" }, { "dataPath": "params_shard_159.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.42.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "b8ec00b2cb2682f4fbc903a1eb5ac183" }, { "dataPath": "params_shard_160.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.42.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "94811e62d14a17f97b1274850dbd1df8" }, { "dataPath": "params_shard_161.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.42.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "5b4d05b8cdb29d1f8d743995c214028b" }, { "dataPath": "params_shard_162.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.42.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "54f584d9880f5d6a35d6146303c4d406" }, { "dataPath": "params_shard_163.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.43.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "99109fd32b407aff729b3c689e412b8b" }, { "dataPath": "params_shard_164.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.43.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "317f8d6145777fd676c88509e6e6fab6" }, { "dataPath": "params_shard_165.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.43.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "9992befb5d2a0cee7ca1c320527ae1d9" }, { "dataPath": "params_shard_166.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.43.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "0e36778233ccf542f52eab944b4465ee" }, { "dataPath": "params_shard_167.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.44.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "9786b3eb0ea09db22bbe249e4636f5f3" }, { "dataPath": "params_shard_168.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.44.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "995489ba81728c86e5446baf1c037684" }, { "dataPath": "params_shard_169.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.44.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "2792994d41dc7effae24e80cb3e380d7" }, { "dataPath": "params_shard_170.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.44.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "2bf0c3f00d4980f49c499852c57b0e7b" }, { "dataPath": "params_shard_171.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.45.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "4e78ecb7348965709d29d4cf8aba7d90" }, { "dataPath": "params_shard_172.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.45.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "0ad3e6e6481a34ddf21325b5647feb63" }, { "dataPath": "params_shard_173.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.45.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "9c0752c45f0026d13ed8b5d1710498d0" }, { "dataPath": "params_shard_174.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.45.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "8705225ece7200a8f0912af1a6a68a1d" }, { "dataPath": "params_shard_175.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.46.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "ebfadb68f23ff5df9e57f87b52a871a3" }, { "dataPath": "params_shard_176.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.46.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "63685633a3c981996f6f79115dae5c9e" }, { "dataPath": "params_shard_177.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.46.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "dcdf1e6b65bcc63df5d58f1855b66eac" }, { "dataPath": "params_shard_178.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.46.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "ae58b944065a2b41cdaa3129c8f621ad" }, { "dataPath": "params_shard_179.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.47.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "cfffd25c0baee86727406991b10171e3" }, { "dataPath": "params_shard_180.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.47.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "c6e50c818e9c23655359bf8a3dc6da05" }, { "dataPath": "params_shard_181.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.47.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "a273567d03aa6b2fe20f624c864bbdb0" }, { "dataPath": "params_shard_182.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.47.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "e1e06accfa9a57dd98357a4de38f95ff" }, { "dataPath": "params_shard_183.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.48.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "3708304ee393cf59e5376fae26641585" }, { "dataPath": "params_shard_184.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.48.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "e58322244011c323b442ae543ee0c0d9" }, { "dataPath": "params_shard_185.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.48.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "1727a56975039cb87d39a7100a3d4eea" }, { "dataPath": "params_shard_186.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.48.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "23a9ff8b6fed81ade5220f9e9a27ce94" }, { "dataPath": "params_shard_187.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.49.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "06cf2758b0e03e6e446483acdc298c89" }, { "dataPath": "params_shard_188.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.49.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "6f2c21392fff513507da79c674a901ca" }, { "dataPath": "params_shard_189.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.49.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "8d9ef1415621fbf1554758c04cdc8b0f" }, { "dataPath": "params_shard_190.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.49.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "e9a6ad9f0a084beabfdf66b8179dca80" }, { "dataPath": "params_shard_191.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.50.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "4ab743b2a9a6e13aec91d080ea592586" }, { "dataPath": "params_shard_192.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.50.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "879dd0e7e3a61e39be3c26839f00054a" }, { "dataPath": "params_shard_193.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.50.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "da9a3741bcfafd1d20b8d552365746f5" }, { "dataPath": "params_shard_194.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.50.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "abb52d0121a3e386a9b2abaf14920d13" }, { "dataPath": "params_shard_195.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.51.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "86a75ae32458f18817c96406bae8a74c" }, { "dataPath": "params_shard_196.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.51.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "6c8ed58458a91e8c3b762da4e07a4977" }, { "dataPath": "params_shard_197.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.51.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "7634cd7069557c3065f6d42961cf3620" }, { "dataPath": "params_shard_198.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.51.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "b66ec39e6279e767925ca066576073ae" }, { "dataPath": "params_shard_199.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.52.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "1ec4ce32b9adc3888c6c8cfac6113e14" }, { "dataPath": "params_shard_200.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.52.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "239d14367c325842a259d408a0d45d88" }, { "dataPath": "params_shard_201.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.52.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "7bd7a144b4bcd143919c05c382f748bc" }, { "dataPath": "params_shard_202.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.52.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "983a989c2dca7b994e9d6af093b455eb" }, { "dataPath": "params_shard_203.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.53.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "cde6007f465274135b6b4ddb7f86909b" }, { "dataPath": "params_shard_204.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.53.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "c6a4096d2c585ac604959f85a18e2e77" }, { "dataPath": "params_shard_205.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.53.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "f3cd44ecb06f901c2644e66ca27c2113" }, { "dataPath": "params_shard_206.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.53.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "9b963a0ade5bc014a024a43d46b71f32" }, { "dataPath": "params_shard_207.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.54.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "817f7af33066232376d9f499da7c08f4" }, { "dataPath": "params_shard_208.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.54.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "40af3d60b50afd52d0ff7035813fc15c" }, { "dataPath": "params_shard_209.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.54.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "6bb61fd0c95f35bb1109509155790d34" }, { "dataPath": "params_shard_210.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.54.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "753bb2a58ee32a4aa5c9c4f0f519ec78" }, { "dataPath": "params_shard_211.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.55.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "d0c97fa924499abe1a9d4031fee83e1e" }, { "dataPath": "params_shard_212.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.55.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "5f12d0bb1e4f804a65de63f6fb958c25" }, { "dataPath": "params_shard_213.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.55.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "396579f2b79786885b48ffe83145fbd5" }, { "dataPath": "params_shard_214.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.55.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "75e2b6daa8d355d1af933c04a9d64d3b" }, { "dataPath": "params_shard_215.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.56.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "4e7449fd0e5392092e489fea1064eee9" }, { "dataPath": "params_shard_216.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.56.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "f9d81045749d64c47b43f598cacfc0cc" }, { "dataPath": "params_shard_217.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.56.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "5d8721098384107336083f9d0656f9d3" }, { "dataPath": "params_shard_218.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.56.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "fb2fa427407e2a0e68d95e1f4266935b" }, { "dataPath": "params_shard_219.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.57.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "dfcbcaa371c1693ae2b4aa83c8921e6b" }, { "dataPath": "params_shard_220.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.57.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "5d228d83dce1e3a6d577ca742f2070b6" }, { "dataPath": "params_shard_221.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.57.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "87b42de4bc684d596452b6395e6807eb" }, { "dataPath": "params_shard_222.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.57.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "ff7ef9629d99300da4a1749962c7913d" }, { "dataPath": "params_shard_223.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.58.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "ea7dfa4edaea7d399fa20bd386e749c0" }, { "dataPath": "params_shard_224.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.58.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "f7243d23edc7b23570797e27d3a4b64d" }, { "dataPath": "params_shard_225.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.58.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "574509186184aacd9147978e89ee4728" }, { "dataPath": "params_shard_226.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.58.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "556495ca06f67be6299e6d629627832d" }, { "dataPath": "params_shard_227.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.59.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "3cea65ebb2b2d3e3ebc6e2c1ea8c08a8" }, { "dataPath": "params_shard_228.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.59.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "17d2d9a0a673f26a153f14e22396b771" }, { "dataPath": "params_shard_229.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.59.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "1eaeca4c8eab7cc9c65bbcd3eb78e2ab" }, { "dataPath": "params_shard_230.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.59.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "8f0c8c74b8907f35a97d96e7afbdd219" }, { "dataPath": "params_shard_231.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.60.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "1190fd219018f069a505e0e002415712" }, { "dataPath": "params_shard_232.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.60.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "c1eb3a841ac8f5559c8d9bd35b9a856e" }, { "dataPath": "params_shard_233.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.60.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "ee1c9c85685a77bb142cd1a501d894ad" }, { "dataPath": "params_shard_234.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.60.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "f590966865bdb63a486cc671918233e4" }, { "dataPath": "params_shard_235.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.61.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "32888f916f39333ec5eb4f7ecf43092f" }, { "dataPath": "params_shard_236.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.61.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "6a3b1b662c470c6adf7c0242ffe5a715" }, { "dataPath": "params_shard_237.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.61.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "c403857578391c2146f8454291f357fc" }, { "dataPath": "params_shard_238.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.61.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "9ab25e4495885dc0a60ef96b140e4ec5" }, { "dataPath": "params_shard_239.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.62.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "60d6df12019a5b4dce6b013a55dc7141" }, { "dataPath": "params_shard_240.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.62.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "cacdfb4f4761784caf8d4c38d49c66df" }, { "dataPath": "params_shard_241.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.62.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "182109677948e9b3d2dbdb337d615c26" }, { "dataPath": "params_shard_242.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.62.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "5c42e487721f36c959b0b40cd344020a" }, { "dataPath": "params_shard_243.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.63.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "6c55258fa688f508f6691f7860356397" }, { "dataPath": "params_shard_244.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.63.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "d5cf67665417a84581769697195c1a0a" }, { "dataPath": "params_shard_245.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.63.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "6aa0921d06033184b73e40b46c83e438" }, { "dataPath": "params_shard_246.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.63.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "5c7a4b2d6be7ce722362047d3f1279c5" }, { "dataPath": "params_shard_247.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.64.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "11817e2ef7daae0cbdbbabf52515b9dc" }, { "dataPath": "params_shard_248.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.64.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "8ef8f8f632f32d32249aa3393721a963" }, { "dataPath": "params_shard_249.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.64.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "d5efd79d7c9fbc361fdaec35bb429db2" }, { "dataPath": "params_shard_250.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.64.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "205029cb7ec384dacbdf26ed10f2f934" }, { "dataPath": "params_shard_251.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.65.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "fcfc3c4d4db6f97bcca79216a3512e7d" }, { "dataPath": "params_shard_252.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.65.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "bec222f7d288ddc5fff0c9977ba96b50" }, { "dataPath": "params_shard_253.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.65.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "04a4fc321c158388abf46a426e1973e9" }, { "dataPath": "params_shard_254.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.65.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "3745ca0964e946619f2b44055e637d64" }, { "dataPath": "params_shard_255.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.66.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "83c426a0cb20a623aa7ede7237a96207" }, { "dataPath": "params_shard_256.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.66.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "e9762bcc9f13440c2379af87480e4b57" }, { "dataPath": "params_shard_257.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.66.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "bb22ce5b49cc94e63ff24b9fd24ce08f" }, { "dataPath": "params_shard_258.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.66.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "a6aaa8aca96afb78e122ac8e55cc1608" }, { "dataPath": "params_shard_259.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.67.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "0e4bf11b97390d80bbbb213c36b072a7" }, { "dataPath": "params_shard_260.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.67.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "e4e6916359411ef9faa662f888910035" }, { "dataPath": "params_shard_261.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.67.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "75c566bdfc3d1ebff81be07c88fa18d1" }, { "dataPath": "params_shard_262.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.67.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "b514ee525f23ac75225899b372a392f5" }, { "dataPath": "params_shard_263.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.68.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "5cc5ad6d303226a739aec4c1e3c1ecf6" }, { "dataPath": "params_shard_264.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.68.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "fed21e1ac7e6957b85add7d3e7d7f8ce" }, { "dataPath": "params_shard_265.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.68.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "7dbcdeb9d4848d0f5b1cb4a62a3de0b4" }, { "dataPath": "params_shard_266.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.68.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "979050891843708ce3d534e114c47d64" }, { "dataPath": "params_shard_267.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.69.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "5d2c22afd122951a90afb0f1fecf7689" }, { "dataPath": "params_shard_268.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.69.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "daddf03b7d7f2111ae272387d119466b" }, { "dataPath": "params_shard_269.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.69.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "028891c5e81ec5ad575fe5689de39b17" }, { "dataPath": "params_shard_270.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.69.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "6f45fcdc21b976f4d77e0c75166e2af0" }, { "dataPath": "params_shard_271.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.70.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "5116e44f460f8f3dd71dfe5c54b9516a" }, { "dataPath": "params_shard_272.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.70.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "c67c8e32989370119216d6bbac6b4636" }, { "dataPath": "params_shard_273.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.70.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "c110be61996209729fd72c7f34a13478" }, { "dataPath": "params_shard_274.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.70.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "ef3d7cce7f5f6caf5e884fbbc28d69bb" }, { "dataPath": "params_shard_275.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.71.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "1d957036ada7c76bbe74f7064b74e4be" }, { "dataPath": "params_shard_276.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.71.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "f7d5b3dc4342a3c2e7c976a8788243ba" }, { "dataPath": "params_shard_277.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.71.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "7b92ea8bad519f3828d0d4fe75eef8b6" }, { "dataPath": "params_shard_278.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "a46b2c747530ddc0bac266ed74f52a99" }, { "dataPath": "params_shard_279.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "500f6b333ee89ce0105102963524746f" }, { "dataPath": "params_shard_280.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "a222ce01828ec20965bb216dd45ae4ba" }, { "dataPath": "params_shard_281.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "dce66a81f6dfc8e85c0ec75c47e1ed35" }, { "dataPath": "params_shard_282.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "f5961e89190bc3402e2c0a50727eafbe" }, { "dataPath": "params_shard_283.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "28cd11851b861351fe78801f6e488697" }, { "dataPath": "params_shard_284.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "9653ee73b2962943f941c9f2b4e79621" }, { "dataPath": "params_shard_285.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "0910a4324e041f1b1a82c8b2d12ed714" }, { "dataPath": "params_shard_286.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "315568abde3e0f034d52ce573dc068e4" }, { "dataPath": "params_shard_287.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "58f0585aa7288faabab17e3a717dbc8a" }, { "dataPath": "params_shard_288.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "be550dd01eb43e0d22de2126e11cbe17" }, { "dataPath": "params_shard_289.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.71.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "1791aa2ee1f7ce3d5880b27c0fb73de1" }, { "dataPath": "params_shard_290.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.72.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "6221bef1390b472761987ce5d7fb52b6" }, { "dataPath": "params_shard_291.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.72.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "968f934b644baf5c76f05d656e7731e8" }, { "dataPath": "params_shard_292.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.72.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "d818d1bd085daf4e83495a45433beec3" }, { "dataPath": "params_shard_293.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.72.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "c7da5eeac983f486ae4cae000f3ed60d" }, { "dataPath": "params_shard_294.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.73.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "bd494dc38ff71d2bdd0ea3923b7f42c3" }, { "dataPath": "params_shard_295.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.73.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "5a1b15896af686a5cfe88172599e652f" }, { "dataPath": "params_shard_296.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.73.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "4cffe2c3d318cd8c17df6033cf8d3094" }, { "dataPath": "params_shard_297.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.73.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "a870d4785e519fadb9748087bce9dee6" }, { "dataPath": "params_shard_298.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.74.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "740a4faad3496bc181f3da784b33190d" }, { "dataPath": "params_shard_299.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.74.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "e79ccb2376ff3ecd12efd4c170d7f5f6" }, { "dataPath": "params_shard_300.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.74.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "81e045cc7268a5795a7dda5be769aeea" }, { "dataPath": "params_shard_301.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.74.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "b1e02dc648b4643ffa24f3c1fbcb17f9" }, { "dataPath": "params_shard_302.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.75.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "947da87dfb92b3e3c307cf2e09308b70" }, { "dataPath": "params_shard_303.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.75.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "2ab70196e5000a951a1a733047dcf91a" }, { "dataPath": "params_shard_304.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.75.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "4aebca1af0a3021e1b824aa28fee731b" }, { "dataPath": "params_shard_305.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.75.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "b26f943697305937200564cc7f04fd6e" }, { "dataPath": "params_shard_306.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.76.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "bc6adb68840bcf3ba3ea257e1fd28a22" }, { "dataPath": "params_shard_307.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.76.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "700fee362cd0dda656d27f16547f5a18" }, { "dataPath": "params_shard_308.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.76.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "ad0314adb5b0aee252935b744c2b1c22" }, { "dataPath": "params_shard_309.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.76.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "2fcbce4a52ad6f55fe3e80090c29d7e5" }, { "dataPath": "params_shard_310.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.77.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "ef044893843da4ffc167383e2cbca0b4" }, { "dataPath": "params_shard_311.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.77.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "4661af697e7d28ca2117d2678087c50e" }, { "dataPath": "params_shard_312.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.77.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "17feee64f5eb47d72ac049b5f8763890" }, { "dataPath": "params_shard_313.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.77.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "159a360c6b06ffd946e68626a41c7974" }, { "dataPath": "params_shard_314.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.78.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "fe91e1c8e232d86cd243e70ff37e8399" }, { "dataPath": "params_shard_315.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.78.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "9a1e035f8f8947cbb7c66f98dfcf5c64" }, { "dataPath": "params_shard_316.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.78.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "69d69f76f9b96676ec0c42251cd9ff66" }, { "dataPath": "params_shard_317.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.78.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "15e990c8af54c913d1ee4184b683ed5c" }, { "dataPath": "params_shard_318.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.79.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "06609ee2952beb903cbe0b7022e1e3df" }, { "dataPath": "params_shard_319.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.79.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "20a133168707156e9407d1d7b9c3da18" }, { "dataPath": "params_shard_320.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.79.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "4be85fd7f2fde4c8f6e8bd2e2a4feb85" }, { "dataPath": "params_shard_321.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.79.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "46519d04717882fa9954f293075627f4" }, { "dataPath": "params_shard_322.bin", "format": "raw-shard", "nbytes": 2638464, "records": [ { "name": "model.layers.0.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 0 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 16384 }, { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 16386 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 16388 }, { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 32772 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 32774 }, { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 32776 }, { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 32778 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 32780 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 32782 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 49166 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 49168 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 65552 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 81936 }, { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 81938 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 81940 }, { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 98324 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 98326 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 98328 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 114712 }, { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 114714 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 114716 }, { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 131100 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 131102 }, { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 131104 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 131106 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 131108 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 147492 }, { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 147494 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 147496 }, { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 163880 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 163882 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 163884 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 180268 }, { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 180270 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 180272 }, { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 196656 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 196658 }, { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 196660 }, { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 196662 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 196664 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 196666 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 213050 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 213052 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 229436 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 245820 }, { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 245822 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 245824 }, { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 262208 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 262210 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 262212 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 278596 }, { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 278598 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 278600 }, { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 294984 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 294986 }, { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 294988 }, { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 294990 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 294992 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 294994 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 311378 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 311380 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 327764 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 344148 }, { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 344150 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 344152 }, { "name": "model.layers.16.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 360536 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 360538 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 360540 }, { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 376924 }, { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 376926 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 376928 }, { "name": "model.layers.17.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 393312 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 393314 }, { "name": "model.layers.18.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 393316 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 393318 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 393320 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 409704 }, { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 409706 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 409708 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 426092 }, { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 442476 }, { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 442478 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 442480 }, { "name": "model.layers.19.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 458864 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 458866 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 458868 }, { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 475252 }, { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 475254 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 475256 }, { "name": "model.layers.20.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 491640 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 491642 }, { "name": "model.layers.21.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 491644 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 491646 }, { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 508030 }, { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 508032 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 508034 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 524418 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 524420 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 540804 }, { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 540806 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 540808 }, { "name": "model.layers.22.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 557192 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 557194 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 557196 }, { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 573580 }, { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 573582 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 573584 }, { "name": "model.layers.23.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 589968 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 589970 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 589972 }, { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 606356 }, { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 606358 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 606360 }, { "name": "model.layers.24.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 622744 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 622746 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 622748 }, { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 639132 }, { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 639134 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 639136 }, { "name": "model.layers.25.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 655520 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 655522 }, { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 655524 }, { "name": "model.layers.26.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 655526 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 655528 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 655530 }, { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 671914 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 671916 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 688300 }, { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 704684 }, { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 704686 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 704688 }, { "name": "model.layers.27.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 721072 }, { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 721074 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 721076 }, { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 737460 }, { "name": "model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 737462 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 737464 }, { "name": "model.layers.28.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 753848 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 753850 }, { "name": "model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 753852 }, { "name": "model.layers.29.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 753854 }, { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 753856 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 753858 }, { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 770242 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 770244 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 786628 }, { "name": "model.layers.30.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 803012 }, { "name": "model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 803014 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 803016 }, { "name": "model.layers.30.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 819400 }, { "name": "model.layers.30.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 819402 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 819404 }, { "name": "model.layers.31.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 835788 }, { "name": "model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 835790 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 835792 }, { "name": "model.layers.31.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 852176 }, { "name": "model.layers.31.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 852178 }, { "name": "model.layers.32.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 852180 }, { "name": "model.layers.32.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 852182 }, { "name": "model.layers.32.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 852184 }, { "name": "model.layers.32.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 868568 }, { "name": "model.layers.32.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 868570 }, { "name": "model.layers.32.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 868572 }, { "name": "model.layers.33.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 884956 }, { "name": "model.layers.33.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 901340 }, { "name": "model.layers.33.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 901342 }, { "name": "model.layers.33.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 901344 }, { "name": "model.layers.33.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 917728 }, { "name": "model.layers.33.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 917730 }, { "name": "model.layers.34.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 917732 }, { "name": "model.layers.34.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 934116 }, { "name": "model.layers.34.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 934118 }, { "name": "model.layers.34.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 934120 }, { "name": "model.layers.34.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 950504 }, { "name": "model.layers.34.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 950506 }, { "name": "model.layers.35.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 950508 }, { "name": "model.layers.35.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 950510 }, { "name": "model.layers.35.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 966894 }, { "name": "model.layers.35.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 966896 }, { "name": "model.layers.35.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 966898 }, { "name": "model.layers.35.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 983282 }, { "name": "model.layers.36.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 983284 }, { "name": "model.layers.36.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 999668 }, { "name": "model.layers.36.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 999670 }, { "name": "model.layers.36.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 999672 }, { "name": "model.layers.36.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1016056 }, { "name": "model.layers.36.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1016058 }, { "name": "model.layers.37.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1016060 }, { "name": "model.layers.37.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1032444 }, { "name": "model.layers.37.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1032446 }, { "name": "model.layers.37.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1032448 }, { "name": "model.layers.37.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1048832 }, { "name": "model.layers.37.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1048834 }, { "name": "model.layers.38.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1048836 }, { "name": "model.layers.38.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1065220 }, { "name": "model.layers.38.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1065222 }, { "name": "model.layers.38.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1065224 }, { "name": "model.layers.38.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1081608 }, { "name": "model.layers.38.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1081610 }, { "name": "model.layers.39.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1081612 }, { "name": "model.layers.39.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1097996 }, { "name": "model.layers.39.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1097998 }, { "name": "model.layers.39.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1098000 }, { "name": "model.layers.39.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1114384 }, { "name": "model.layers.39.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1114386 }, { "name": "model.layers.40.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1114388 }, { "name": "model.layers.40.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1114390 }, { "name": "model.layers.40.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1114392 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1114394 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1130778 }, { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1130780 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1130782 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1147166 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1163550 }, { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1163552 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1163554 }, { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1179938 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1179940 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1179942 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1196326 }, { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1196328 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1196330 }, { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1212714 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1212716 }, { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1212718 }, { "name": "model.layers.40.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1212720 }, { "name": "model.layers.40.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1229104 }, { "name": "model.layers.40.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1229106 }, { "name": "model.layers.41.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1245490 }, { "name": "model.layers.41.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1261874 }, { "name": "model.layers.41.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1261876 }, { "name": "model.layers.41.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1261878 }, { "name": "model.layers.41.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1278262 }, { "name": "model.layers.41.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1278264 }, { "name": "model.layers.42.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1278266 }, { "name": "model.layers.42.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1294650 }, { "name": "model.layers.42.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1294652 }, { "name": "model.layers.42.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1294654 }, { "name": "model.layers.42.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1311038 }, { "name": "model.layers.42.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1311040 }, { "name": "model.layers.43.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1311042 }, { "name": "model.layers.43.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1311044 }, { "name": "model.layers.43.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1311046 }, { "name": "model.layers.43.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1311048 }, { "name": "model.layers.43.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1327432 }, { "name": "model.layers.43.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1327434 }, { "name": "model.layers.44.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1343818 }, { "name": "model.layers.44.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1360202 }, { "name": "model.layers.44.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1360204 }, { "name": "model.layers.44.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1360206 }, { "name": "model.layers.44.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1376590 }, { "name": "model.layers.44.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1376592 }, { "name": "model.layers.45.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1376594 }, { "name": "model.layers.45.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1392978 }, { "name": "model.layers.45.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1392980 }, { "name": "model.layers.45.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1392982 }, { "name": "model.layers.45.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1409366 }, { "name": "model.layers.45.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1409368 }, { "name": "model.layers.46.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1409370 }, { "name": "model.layers.46.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1409372 }, { "name": "model.layers.46.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1409374 }, { "name": "model.layers.46.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1425758 }, { "name": "model.layers.46.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1425760 }, { "name": "model.layers.46.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1425762 }, { "name": "model.layers.47.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1442146 }, { "name": "model.layers.47.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1458530 }, { "name": "model.layers.47.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1458532 }, { "name": "model.layers.47.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1458534 }, { "name": "model.layers.47.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1474918 }, { "name": "model.layers.47.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1474920 }, { "name": "model.layers.48.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1474922 }, { "name": "model.layers.48.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1491306 }, { "name": "model.layers.48.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1491308 }, { "name": "model.layers.48.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1491310 }, { "name": "model.layers.48.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1507694 }, { "name": "model.layers.48.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1507696 }, { "name": "model.layers.49.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1507698 }, { "name": "model.layers.49.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1507700 }, { "name": "model.layers.49.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1524084 }, { "name": "model.layers.49.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1524086 }, { "name": "model.layers.49.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1524088 }, { "name": "model.layers.49.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1540472 }, { "name": "model.layers.50.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1540474 }, { "name": "model.layers.50.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1556858 }, { "name": "model.layers.50.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1556860 }, { "name": "model.layers.50.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1556862 }, { "name": "model.layers.50.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1573246 }, { "name": "model.layers.50.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1573248 }, { "name": "model.layers.51.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1573250 }, { "name": "model.layers.51.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1589634 }, { "name": "model.layers.51.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1589636 }, { "name": "model.layers.51.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1589638 }, { "name": "model.layers.51.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1606022 }, { "name": "model.layers.51.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1606024 }, { "name": "model.layers.52.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1606026 }, { "name": "model.layers.52.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1622410 }, { "name": "model.layers.52.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1622412 }, { "name": "model.layers.52.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1622414 }, { "name": "model.layers.52.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1638798 }, { "name": "model.layers.52.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1638800 }, { "name": "model.layers.53.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1638802 }, { "name": "model.layers.53.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1655186 }, { "name": "model.layers.53.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1655188 }, { "name": "model.layers.53.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1655190 }, { "name": "model.layers.53.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1671574 }, { "name": "model.layers.53.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1671576 }, { "name": "model.layers.54.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1671578 }, { "name": "model.layers.54.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1671580 }, { "name": "model.layers.54.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1671582 }, { "name": "model.layers.54.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1671584 }, { "name": "model.layers.54.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1687968 }, { "name": "model.layers.54.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1687970 }, { "name": "model.layers.55.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1704354 }, { "name": "model.layers.55.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1720738 }, { "name": "model.layers.55.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1720740 }, { "name": "model.layers.55.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1720742 }, { "name": "model.layers.55.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1737126 }, { "name": "model.layers.55.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1737128 }, { "name": "model.layers.56.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1737130 }, { "name": "model.layers.56.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1753514 }, { "name": "model.layers.56.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1753516 }, { "name": "model.layers.56.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1753518 }, { "name": "model.layers.56.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1769902 }, { "name": "model.layers.56.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1769904 }, { "name": "model.layers.57.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1769906 }, { "name": "model.layers.57.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1769908 }, { "name": "model.layers.57.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1769910 }, { "name": "model.layers.57.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1769912 }, { "name": "model.layers.57.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1786296 }, { "name": "model.layers.57.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1786298 }, { "name": "model.layers.58.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1802682 }, { "name": "model.layers.58.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1819066 }, { "name": "model.layers.58.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1819068 }, { "name": "model.layers.58.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1819070 }, { "name": "model.layers.58.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1835454 }, { "name": "model.layers.58.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1835456 }, { "name": "model.layers.59.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1835458 }, { "name": "model.layers.59.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1851842 }, { "name": "model.layers.59.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1851844 }, { "name": "model.layers.59.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1851846 }, { "name": "model.layers.59.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1868230 }, { "name": "model.layers.59.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1868232 }, { "name": "model.layers.60.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1868234 }, { "name": "model.layers.60.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1868236 }, { "name": "model.layers.60.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1868238 }, { "name": "model.layers.60.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1884622 }, { "name": "model.layers.60.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1884624 }, { "name": "model.layers.60.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1884626 }, { "name": "model.layers.61.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1901010 }, { "name": "model.layers.61.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1917394 }, { "name": "model.layers.61.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1917396 }, { "name": "model.layers.61.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1917398 }, { "name": "model.layers.61.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1933782 }, { "name": "model.layers.61.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1933784 }, { "name": "model.layers.62.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1933786 }, { "name": "model.layers.62.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1950170 }, { "name": "model.layers.62.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1950172 }, { "name": "model.layers.62.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1950174 }, { "name": "model.layers.62.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1966558 }, { "name": "model.layers.62.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1966560 }, { "name": "model.layers.63.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1966562 }, { "name": "model.layers.63.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1966564 }, { "name": "model.layers.63.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1982948 }, { "name": "model.layers.63.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1982950 }, { "name": "model.layers.63.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1982952 }, { "name": "model.layers.63.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 1999336 }, { "name": "model.layers.64.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1999338 }, { "name": "model.layers.64.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 2015722 }, { "name": "model.layers.64.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 2015724 }, { "name": "model.layers.64.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2015726 }, { "name": "model.layers.64.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 2032110 }, { "name": "model.layers.64.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 2032112 }, { "name": "model.layers.65.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2032114 }, { "name": "model.layers.65.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 2048498 }, { "name": "model.layers.65.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 2048500 }, { "name": "model.layers.65.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2048502 }, { "name": "model.layers.65.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 2064886 }, { "name": "model.layers.65.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 2064888 }, { "name": "model.layers.66.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2064890 }, { "name": "model.layers.66.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 2081274 }, { "name": "model.layers.66.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 2081276 }, { "name": "model.layers.66.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2081278 }, { "name": "model.layers.66.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 2097662 }, { "name": "model.layers.66.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 2097664 }, { "name": "model.layers.67.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2097666 }, { "name": "model.layers.67.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 2114050 }, { "name": "model.layers.67.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 2114052 }, { "name": "model.layers.67.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2114054 }, { "name": "model.layers.67.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 2130438 }, { "name": "model.layers.67.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 2130440 }, { "name": "model.layers.68.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 2130442 }, { "name": "model.layers.68.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 2130444 }, { "name": "model.layers.68.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 2130446 }, { "name": "model.layers.68.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2130448 }, { "name": "model.layers.68.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 2146832 }, { "name": "model.layers.68.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2146834 }, { "name": "model.layers.69.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2163218 }, { "name": "model.layers.69.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 2179602 }, { "name": "model.layers.69.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 2179604 }, { "name": "model.layers.69.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2179606 }, { "name": "model.layers.69.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 2195990 }, { "name": "model.layers.69.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 2195992 }, { "name": "model.layers.70.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2195994 }, { "name": "model.layers.70.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 2212378 }, { "name": "model.layers.70.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 2212380 }, { "name": "model.layers.70.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2212382 }, { "name": "model.layers.70.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 2228766 }, { "name": "model.layers.70.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 2228768 }, { "name": "model.layers.71.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 2228770 }, { "name": "model.layers.71.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 2228772 }, { "name": "model.layers.71.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 2228774 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2228776 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 2245160 }, { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 2245162 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2245164 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 2261548 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2261550 }, { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 2277934 }, { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 2277936 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2277938 }, { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 2294322 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 2294324 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2294326 }, { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 2310710 }, { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 2310712 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2310714 }, { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 2327098 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 2327100 }, { "name": "model.layers.71.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2327102 }, { "name": "model.layers.71.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 2343486 }, { "name": "model.layers.71.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2343488 }, { "name": "model.layers.72.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2359872 }, { "name": "model.layers.72.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 2376256 }, { "name": "model.layers.72.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 2376258 }, { "name": "model.layers.72.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2376260 }, { "name": "model.layers.72.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 2392644 }, { "name": "model.layers.72.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 2392646 }, { "name": "model.layers.73.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2392648 }, { "name": "model.layers.73.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 2409032 }, { "name": "model.layers.73.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 2409034 }, { "name": "model.layers.73.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2409036 }, { "name": "model.layers.73.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 2425420 }, { "name": "model.layers.73.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 2425422 }, { "name": "model.layers.74.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 2425424 }, { "name": "model.layers.74.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 2425426 }, { "name": "model.layers.74.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2425428 }, { "name": "model.layers.74.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 2441812 }, { "name": "model.layers.74.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 2441814 }, { "name": "model.layers.74.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2441816 }, { "name": "model.layers.75.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2458200 }, { "name": "model.layers.75.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 2474584 }, { "name": "model.layers.75.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 2474586 }, { "name": "model.layers.75.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2474588 }, { "name": "model.layers.75.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 2490972 }, { "name": "model.layers.75.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 2490974 }, { "name": "model.layers.76.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2490976 }, { "name": "model.layers.76.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 2507360 }, { "name": "model.layers.76.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 2507362 }, { "name": "model.layers.76.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2507364 }, { "name": "model.layers.76.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 2523748 }, { "name": "model.layers.76.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 2523750 }, { "name": "model.layers.77.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 2523752 }, { "name": "model.layers.77.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2523754 }, { "name": "model.layers.77.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 2540138 }, { "name": "model.layers.77.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 2540140 }, { "name": "model.layers.77.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2540142 }, { "name": "model.layers.77.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 2556526 }, { "name": "model.layers.78.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2556528 }, { "name": "model.layers.78.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 2572912 }, { "name": "model.layers.78.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 2572914 }, { "name": "model.layers.78.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2572916 }, { "name": "model.layers.78.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 2589300 }, { "name": "model.layers.78.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 2589302 }, { "name": "model.layers.79.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2589304 }, { "name": "model.layers.79.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 2605688 }, { "name": "model.layers.79.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 2605690 }, { "name": "model.layers.79.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2605692 }, { "name": "model.layers.79.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 2622076 }, { "name": "model.layers.79.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 2622078 }, { "name": "model.norm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2622080 } ], "md5sum": "0652ccb1955f446ae8c22bdaf66076bf" }, { "dataPath": "params_shard_323.bin", "format": "raw-shard", "nbytes": 640, "records": [ { "name": "model.layers.0.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 0 }, { "name": "model.layers.0.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 2 }, { "name": "model.layers.0.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 4 }, { "name": "model.layers.0.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 6 }, { "name": "model.layers.1.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 8 }, { "name": "model.layers.1.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 10 }, { "name": "model.layers.1.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 12 }, { "name": "model.layers.1.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 14 }, { "name": "model.layers.2.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 16 }, { "name": "model.layers.2.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 18 }, { "name": "model.layers.2.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 20 }, { "name": "model.layers.2.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 22 }, { "name": "model.layers.3.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 24 }, { "name": "model.layers.3.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 26 }, { "name": "model.layers.3.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 28 }, { "name": "model.layers.3.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 30 }, { "name": "model.layers.4.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 32 }, { "name": "model.layers.4.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 34 }, { "name": "model.layers.4.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 36 }, { "name": "model.layers.4.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 38 }, { "name": "model.layers.5.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 40 }, { "name": "model.layers.5.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 42 }, { "name": "model.layers.5.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 44 }, { "name": "model.layers.5.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 46 }, { "name": "model.layers.6.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 48 }, { "name": "model.layers.6.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 50 }, { "name": "model.layers.6.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 52 }, { "name": "model.layers.6.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 54 }, { "name": "model.layers.7.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 56 }, { "name": "model.layers.7.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 58 }, { "name": "model.layers.7.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 60 }, { "name": "model.layers.7.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 62 }, { "name": "model.layers.8.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 64 }, { "name": "model.layers.8.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 66 }, { "name": "model.layers.8.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 68 }, { "name": "model.layers.8.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 70 }, { "name": "model.layers.9.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 72 }, { "name": "model.layers.9.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 74 }, { "name": "model.layers.9.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 76 }, { "name": "model.layers.9.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 78 }, { "name": "model.layers.10.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 80 }, { "name": "model.layers.10.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 82 }, { "name": "model.layers.10.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 84 }, { "name": "model.layers.10.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 86 }, { "name": "model.layers.11.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 88 }, { "name": "model.layers.11.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 90 }, { "name": "model.layers.11.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 92 }, { "name": "model.layers.11.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 94 }, { "name": "model.layers.12.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 96 }, { "name": "model.layers.12.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 98 }, { "name": "model.layers.12.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 100 }, { "name": "model.layers.12.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 102 }, { "name": "model.layers.13.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 104 }, { "name": "model.layers.13.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 106 }, { "name": "model.layers.13.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 108 }, { "name": "model.layers.13.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 110 }, { "name": "model.layers.14.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 112 }, { "name": "model.layers.14.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 114 }, { "name": "model.layers.14.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 116 }, { "name": "model.layers.14.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 118 }, { "name": "model.layers.15.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 120 }, { "name": "model.layers.15.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 122 }, { "name": "model.layers.15.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 124 }, { "name": "model.layers.15.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 126 }, { "name": "model.layers.16.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 128 }, { "name": "model.layers.16.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 130 }, { "name": "model.layers.16.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 132 }, { "name": "model.layers.16.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 134 }, { "name": "model.layers.17.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 136 }, { "name": "model.layers.17.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 138 }, { "name": "model.layers.17.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 140 }, { "name": "model.layers.17.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 142 }, { "name": "model.layers.18.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 144 }, { "name": "model.layers.18.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 146 }, { "name": "model.layers.18.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 148 }, { "name": "model.layers.18.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 150 }, { "name": "model.layers.19.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 152 }, { "name": "model.layers.19.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 154 }, { "name": "model.layers.19.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 156 }, { "name": "model.layers.19.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 158 }, { "name": "model.layers.20.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 160 }, { "name": "model.layers.20.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 162 }, { "name": "model.layers.20.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 164 }, { "name": "model.layers.20.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 166 }, { "name": "model.layers.21.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 168 }, { "name": "model.layers.21.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 170 }, { "name": "model.layers.21.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 172 }, { "name": "model.layers.21.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 174 }, { "name": "model.layers.22.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 176 }, { "name": "model.layers.22.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 178 }, { "name": "model.layers.22.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 180 }, { "name": "model.layers.22.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 182 }, { "name": "model.layers.23.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 184 }, { "name": "model.layers.23.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 186 }, { "name": "model.layers.23.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 188 }, { "name": "model.layers.23.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 190 }, { "name": "model.layers.24.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 192 }, { "name": "model.layers.24.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 194 }, { "name": "model.layers.24.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 196 }, { "name": "model.layers.24.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 198 }, { "name": "model.layers.25.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 200 }, { "name": "model.layers.25.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 202 }, { "name": "model.layers.25.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 204 }, { "name": "model.layers.25.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 206 }, { "name": "model.layers.26.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 208 }, { "name": "model.layers.26.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 210 }, { "name": "model.layers.26.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 212 }, { "name": "model.layers.26.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 214 }, { "name": "model.layers.27.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 216 }, { "name": "model.layers.27.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 218 }, { "name": "model.layers.27.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 220 }, { "name": "model.layers.27.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 222 }, { "name": "model.layers.28.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 224 }, { "name": "model.layers.28.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 226 }, { "name": "model.layers.28.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 228 }, { "name": "model.layers.28.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 230 }, { "name": "model.layers.29.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 232 }, { "name": "model.layers.29.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 234 }, { "name": "model.layers.29.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 236 }, { "name": "model.layers.29.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 238 }, { "name": "model.layers.30.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 240 }, { "name": "model.layers.30.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 242 }, { "name": "model.layers.30.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 244 }, { "name": "model.layers.30.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 246 }, { "name": "model.layers.31.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 248 }, { "name": "model.layers.31.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 250 }, { "name": "model.layers.31.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 252 }, { "name": "model.layers.31.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 254 }, { "name": "model.layers.32.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 256 }, { "name": "model.layers.32.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 258 }, { "name": "model.layers.32.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 260 }, { "name": "model.layers.32.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 262 }, { "name": "model.layers.33.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 264 }, { "name": "model.layers.33.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 266 }, { "name": "model.layers.33.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 268 }, { "name": "model.layers.33.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 270 }, { "name": "model.layers.34.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 272 }, { "name": "model.layers.34.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 274 }, { "name": "model.layers.34.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 276 }, { "name": "model.layers.34.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 278 }, { "name": "model.layers.35.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 280 }, { "name": "model.layers.35.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 282 }, { "name": "model.layers.35.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 284 }, { "name": "model.layers.35.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 286 }, { "name": "model.layers.36.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 288 }, { "name": "model.layers.36.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 290 }, { "name": "model.layers.36.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 292 }, { "name": "model.layers.36.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 294 }, { "name": "model.layers.37.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 296 }, { "name": "model.layers.37.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 298 }, { "name": "model.layers.37.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 300 }, { "name": "model.layers.37.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 302 }, { "name": "model.layers.38.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 304 }, { "name": "model.layers.38.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 306 }, { "name": "model.layers.38.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 308 }, { "name": "model.layers.38.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 310 }, { "name": "model.layers.39.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 312 }, { "name": "model.layers.39.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 314 }, { "name": "model.layers.39.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 316 }, { "name": "model.layers.39.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 318 }, { "name": "model.layers.40.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 320 }, { "name": "model.layers.40.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 322 }, { "name": "model.layers.40.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 324 }, { "name": "model.layers.40.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 326 }, { "name": "model.layers.41.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 328 }, { "name": "model.layers.41.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 330 }, { "name": "model.layers.41.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 332 }, { "name": "model.layers.41.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 334 }, { "name": "model.layers.42.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 336 }, { "name": "model.layers.42.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 338 }, { "name": "model.layers.42.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 340 }, { "name": "model.layers.42.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 342 }, { "name": "model.layers.43.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 344 }, { "name": "model.layers.43.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 346 }, { "name": "model.layers.43.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 348 }, { "name": "model.layers.43.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 350 }, { "name": "model.layers.44.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 352 }, { "name": "model.layers.44.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 354 }, { "name": "model.layers.44.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 356 }, { "name": "model.layers.44.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 358 }, { "name": "model.layers.45.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 360 }, { "name": "model.layers.45.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 362 }, { "name": "model.layers.45.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 364 }, { "name": "model.layers.45.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 366 }, { "name": "model.layers.46.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 368 }, { "name": "model.layers.46.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 370 }, { "name": "model.layers.46.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 372 }, { "name": "model.layers.46.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 374 }, { "name": "model.layers.47.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 376 }, { "name": "model.layers.47.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 378 }, { "name": "model.layers.47.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 380 }, { "name": "model.layers.47.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 382 }, { "name": "model.layers.48.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 384 }, { "name": "model.layers.48.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 386 }, { "name": "model.layers.48.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 388 }, { "name": "model.layers.48.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 390 }, { "name": "model.layers.49.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 392 }, { "name": "model.layers.49.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 394 }, { "name": "model.layers.49.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 396 }, { "name": "model.layers.49.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 398 }, { "name": "model.layers.50.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 400 }, { "name": "model.layers.50.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 402 }, { "name": "model.layers.50.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 404 }, { "name": "model.layers.50.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 406 }, { "name": "model.layers.51.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 408 }, { "name": "model.layers.51.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 410 }, { "name": "model.layers.51.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 412 }, { "name": "model.layers.51.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 414 }, { "name": "model.layers.52.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 416 }, { "name": "model.layers.52.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 418 }, { "name": "model.layers.52.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 420 }, { "name": "model.layers.52.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 422 }, { "name": "model.layers.53.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 424 }, { "name": "model.layers.53.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 426 }, { "name": "model.layers.53.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 428 }, { "name": "model.layers.53.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 430 }, { "name": "model.layers.54.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 432 }, { "name": "model.layers.54.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 434 }, { "name": "model.layers.54.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 436 }, { "name": "model.layers.54.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 438 }, { "name": "model.layers.55.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 440 }, { "name": "model.layers.55.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 442 }, { "name": "model.layers.55.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 444 }, { "name": "model.layers.55.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 446 }, { "name": "model.layers.56.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 448 }, { "name": "model.layers.56.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 450 }, { "name": "model.layers.56.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 452 }, { "name": "model.layers.56.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 454 }, { "name": "model.layers.57.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 456 }, { "name": "model.layers.57.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 458 }, { "name": "model.layers.57.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 460 }, { "name": "model.layers.57.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 462 }, { "name": "model.layers.58.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 464 }, { "name": "model.layers.58.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 466 }, { "name": "model.layers.58.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 468 }, { "name": "model.layers.58.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 470 }, { "name": "model.layers.59.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 472 }, { "name": "model.layers.59.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 474 }, { "name": "model.layers.59.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 476 }, { "name": "model.layers.59.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 478 }, { "name": "model.layers.60.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 480 }, { "name": "model.layers.60.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 482 }, { "name": "model.layers.60.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 484 }, { "name": "model.layers.60.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 486 }, { "name": "model.layers.61.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 488 }, { "name": "model.layers.61.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 490 }, { "name": "model.layers.61.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 492 }, { "name": "model.layers.61.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 494 }, { "name": "model.layers.62.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 496 }, { "name": "model.layers.62.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 498 }, { "name": "model.layers.62.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 500 }, { "name": "model.layers.62.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 502 }, { "name": "model.layers.63.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 504 }, { "name": "model.layers.63.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 506 }, { "name": "model.layers.63.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 508 }, { "name": "model.layers.63.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 510 }, { "name": "model.layers.64.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 512 }, { "name": "model.layers.64.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 514 }, { "name": "model.layers.64.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 516 }, { "name": "model.layers.64.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 518 }, { "name": "model.layers.65.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 520 }, { "name": "model.layers.65.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 522 }, { "name": "model.layers.65.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 524 }, { "name": "model.layers.65.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 526 }, { "name": "model.layers.66.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 528 }, { "name": "model.layers.66.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 530 }, { "name": "model.layers.66.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 532 }, { "name": "model.layers.66.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 534 }, { "name": "model.layers.67.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 536 }, { "name": "model.layers.67.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 538 }, { "name": "model.layers.67.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 540 }, { "name": "model.layers.67.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 542 }, { "name": "model.layers.68.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 544 }, { "name": "model.layers.68.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 546 }, { "name": "model.layers.68.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 548 }, { "name": "model.layers.68.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 550 }, { "name": "model.layers.69.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 552 }, { "name": "model.layers.69.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 554 }, { "name": "model.layers.69.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 556 }, { "name": "model.layers.69.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 558 }, { "name": "model.layers.70.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 560 }, { "name": "model.layers.70.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 562 }, { "name": "model.layers.70.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 564 }, { "name": "model.layers.70.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 566 }, { "name": "model.layers.71.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 568 }, { "name": "model.layers.71.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 570 }, { "name": "model.layers.71.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 572 }, { "name": "model.layers.71.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 574 }, { "name": "model.layers.72.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 576 }, { "name": "model.layers.72.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 578 }, { "name": "model.layers.72.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 580 }, { "name": "model.layers.72.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 582 }, { "name": "model.layers.73.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 584 }, { "name": "model.layers.73.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 586 }, { "name": "model.layers.73.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 588 }, { "name": "model.layers.73.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 590 }, { "name": "model.layers.74.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 592 }, { "name": "model.layers.74.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 594 }, { "name": "model.layers.74.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 596 }, { "name": "model.layers.74.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 598 }, { "name": "model.layers.75.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 600 }, { "name": "model.layers.75.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 602 }, { "name": "model.layers.75.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 604 }, { "name": "model.layers.75.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 606 }, { "name": "model.layers.76.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 608 }, { "name": "model.layers.76.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 610 }, { "name": "model.layers.76.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 612 }, { "name": "model.layers.76.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 614 }, { "name": "model.layers.77.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 616 }, { "name": "model.layers.77.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 618 }, { "name": "model.layers.77.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 620 }, { "name": "model.layers.77.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 622 }, { "name": "model.layers.78.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 624 }, { "name": "model.layers.78.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 626 }, { "name": "model.layers.78.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 628 }, { "name": "model.layers.78.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 630 }, { "name": "model.layers.79.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 632 }, { "name": "model.layers.79.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 634 }, { "name": "model.layers.79.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 636 }, { "name": "model.layers.79.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2, "byteOffset": 638 } ], "md5sum": "da1be33a2091194cf554da743c4f1db3" } ] }