|
{ |
|
"metadata": { |
|
"ParamSize": 195, |
|
"ParamBytes": 7642159104.0, |
|
"BitsPerParam": 16.0 |
|
}, |
|
"records": [ |
|
{ |
|
"dataPath": "params_shard_0.bin", |
|
"format": "raw-shard", |
|
"nbytes": 197001216, |
|
"records": [ |
|
{ |
|
"name": "lm_head.weight", |
|
"shape": [ |
|
32064, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 197001216, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "355cbdc18645ac1e5a7abdb8e0b4f555" |
|
}, |
|
{ |
|
"dataPath": "params_shard_1.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.21.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "cf28a1cacc4df6daa4f2ec3d5411ec8b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_2.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.21.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8fd160898eedfe8502008427b1789f67" |
|
}, |
|
{ |
|
"dataPath": "params_shard_3.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.21.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7eb060da1eb7a76488c6ad88bb55be68" |
|
}, |
|
{ |
|
"dataPath": "params_shard_4.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.22.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e43ba8574ee0bde28797ac6e1f4290cd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_5.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.22.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "978b0567eaf6723e96528dfb9f34ae2c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_6.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.22.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6a6a3e4847b7604add5ebee94398d615" |
|
}, |
|
{ |
|
"dataPath": "params_shard_7.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.23.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d8ae3a2ad6177babf2e4f3004c3c1501" |
|
}, |
|
{ |
|
"dataPath": "params_shard_8.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.23.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b9c61a69bea764d8665bd1625c9b003f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_9.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.23.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "91ef5f839a886ffd0474465618b5379f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_10.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.23.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "268530f14d16de93a92bec1c9f18284a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_11.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.24.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "cdfdc76d4737aadfa851a741f46e7acd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_12.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.24.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "bed3c3a48dfbc1a5abcb39f89bf7a023" |
|
}, |
|
{ |
|
"dataPath": "params_shard_13.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.24.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "17f2ace615c304ac4abac920e340c996" |
|
}, |
|
{ |
|
"dataPath": "params_shard_14.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.24.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "129bd9f4ed4d4fc7d24c8529aa95246f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_15.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.25.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e280a5b59a72f35c8437aa893feb74f0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_16.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.25.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8539094db79c9fd29dfc411a79dba026" |
|
}, |
|
{ |
|
"dataPath": "params_shard_17.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.25.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "faa2883b38c085ca646fc6ab81abd39d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_18.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.25.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5591a612e538ff92e1543a71a58eb4b6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_19.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.26.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "418024ea7732acd521e1ab189f5c7f82" |
|
}, |
|
{ |
|
"dataPath": "params_shard_20.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.26.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ad28f5efd45052104574d7a743178b10" |
|
}, |
|
{ |
|
"dataPath": "params_shard_21.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.26.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "62d0ca66c268c7f91df5c4aed581e0d1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_22.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.26.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f1a779e7c2019bf5ae85d7706e2f084e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_23.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.27.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "35352b375985a227aa6840b106026012" |
|
}, |
|
{ |
|
"dataPath": "params_shard_24.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.27.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "738d864fe39149129b85f69c0c0c635e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_25.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.27.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "022bb7ede68c0f9ff23735a9fedb4f32" |
|
}, |
|
{ |
|
"dataPath": "params_shard_26.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.27.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0ad69d77d578753787e2fed8df17de42" |
|
}, |
|
{ |
|
"dataPath": "params_shard_27.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.28.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "96422f4a2384d546df2c5f5221e4bcf2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_28.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.28.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5ee3c5fba261ad1ee080be9050b30d8d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_29.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.28.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "bfe56c0deb2c00c6036e42be6409c8fc" |
|
}, |
|
{ |
|
"dataPath": "params_shard_30.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.28.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "062da8b2bb613a6a5af09ef1c793b62a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_31.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.29.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "19435e3da71e0f7d237a2e6401d92d78" |
|
}, |
|
{ |
|
"dataPath": "params_shard_32.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.29.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f4fa5b358c8a574c36e5ee466f71c3f7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_33.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.29.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "fedd1781bcda6d8cb8a0f216b0491661" |
|
}, |
|
{ |
|
"dataPath": "params_shard_34.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.29.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "807c0c0e0dc33c078780c826c65412f5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_35.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.30.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a8917bfd3a8295471ea8c3d080380700" |
|
}, |
|
{ |
|
"dataPath": "params_shard_36.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.30.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "623da9d2482c4a0aeb3a40a66480e3aa" |
|
}, |
|
{ |
|
"dataPath": "params_shard_37.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.30.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2b595136a8328888c183d11b401b7d41" |
|
}, |
|
{ |
|
"dataPath": "params_shard_38.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.30.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "43ac5afec0fd1d91f6898cdb1e7fa3cb" |
|
}, |
|
{ |
|
"dataPath": "params_shard_39.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.31.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "bc50d0f973ffaa22757de43d4d1fbbab" |
|
}, |
|
{ |
|
"dataPath": "params_shard_40.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.31.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1c3934bd451a66c703cf1dce6deee213" |
|
}, |
|
{ |
|
"dataPath": "params_shard_41.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.31.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "10d95c7d01f1bb5ca1e4412a9ed2c2ef" |
|
}, |
|
{ |
|
"dataPath": "params_shard_42.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.31.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ffad92466030bc43e685bc879737b37a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_43.bin", |
|
"format": "raw-shard", |
|
"nbytes": 197001216, |
|
"records": [ |
|
{ |
|
"name": "transformer.embd.weight", |
|
"shape": [ |
|
32064, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 197001216, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4ff3016fcb146b8e981591475c80fd55" |
|
}, |
|
{ |
|
"dataPath": "params_shard_44.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.0.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d2089117ea980d37a17b97c1c2480824" |
|
}, |
|
{ |
|
"dataPath": "params_shard_45.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.0.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "08fcdeed7a735b3e54e4d751270702f0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_46.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.0.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a0a39f19ff0ecef7594769e0dde21b14" |
|
}, |
|
{ |
|
"dataPath": "params_shard_47.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.0.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ec47cf48c288af63a08f5e420b0382d0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_48.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.1.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6b520b0fa670029862be7e33ccb1e20c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_49.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.1.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d698683d9729d3dc1f7bd46db350f0e6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_50.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.1.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8dac2a9e07c546b2a03d51d7cd6a6cb3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_51.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.1.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c9b539e646a5ef9c0f33d6622281fc78" |
|
}, |
|
{ |
|
"dataPath": "params_shard_52.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.10.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "185306cd725f9fcdc1d7e219a235de3d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_53.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.10.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5c275cf6a9f185a75990a21b8d636ee0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_54.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.10.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "66a324483e76960da9f8483e09193479" |
|
}, |
|
{ |
|
"dataPath": "params_shard_55.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.10.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ad55826479c758a41057a783bb5cf768" |
|
}, |
|
{ |
|
"dataPath": "params_shard_56.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.11.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8fdcc5d0f096e522c2e997ccc2f9b66f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_57.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.11.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9ba4e5bf1f85b55de33ec8d0fdced1f4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_58.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.11.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "816a461f025574b571ccecf0c502a957" |
|
}, |
|
{ |
|
"dataPath": "params_shard_59.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.11.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e101d259ecf64620fe505791e4e212d9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_60.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.12.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f9e4d509b0406e88f41dab0f1bea9da2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_61.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.12.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e4ea2dd00700fa17ca8347c498ccb6ac" |
|
}, |
|
{ |
|
"dataPath": "params_shard_62.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.12.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0d9a457299a8fb48735c118fa0d17244" |
|
}, |
|
{ |
|
"dataPath": "params_shard_63.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.12.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9c749f5f94b57db159c5e8f0f724db51" |
|
}, |
|
{ |
|
"dataPath": "params_shard_64.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.13.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f2449244ccfdfc157a163b3b203e92c0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_65.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.13.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "db2df24cf56b551bcc2548080570f4d8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_66.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.13.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "785e19ea294ce5b71cc2103be15677e8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_67.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.13.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "bfb01523b72ef50058c7ca5419e94b8d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_68.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.14.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "30698dcede7faab88cc026e74edd3516" |
|
}, |
|
{ |
|
"dataPath": "params_shard_69.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.14.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "34ef8f55c49dc254792afaf5aabfccf5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_70.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.14.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6b7707a306c0f3d206d260084c351359" |
|
}, |
|
{ |
|
"dataPath": "params_shard_71.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.14.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "012b59f3bfe838bd356dda02f13a2cfe" |
|
}, |
|
{ |
|
"dataPath": "params_shard_72.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.15.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f467eb2377c883998559bf4aaaea8770" |
|
}, |
|
{ |
|
"dataPath": "params_shard_73.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.15.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "56278aba505322e53a1a3cd2220ae0df" |
|
}, |
|
{ |
|
"dataPath": "params_shard_74.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.15.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1693e38eb67f5348a21cc630262027b4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_75.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.15.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "736fc8cedf7555a03a1d3d3a1773a2b4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_76.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.16.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e625d66e9c304190540b7390fc994c44" |
|
}, |
|
{ |
|
"dataPath": "params_shard_77.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.16.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c67da25e62946eb1baaa539d9688d0f9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_78.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.16.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "98fc4443e34666093d1bb9c377aa5c8f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_79.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.16.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ab449f3f4a1b4d792fdbaa6127f1dc55" |
|
}, |
|
{ |
|
"dataPath": "params_shard_80.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.17.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "96c9762484d3c89e9f3367d63a20133b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_81.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.17.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8cee417759b02ecae8d39c5d82ec1013" |
|
}, |
|
{ |
|
"dataPath": "params_shard_82.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.17.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6a255ae12df2e8f93e706e45c5aa04cc" |
|
}, |
|
{ |
|
"dataPath": "params_shard_83.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.17.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "85e048566ad49eccf1776a90b44b9ea7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_84.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.18.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3e8175b8ccd51257022b868c9b90949c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_85.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.18.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a4ba7d28548c799b45f3f1857f4eda9f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_86.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.18.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ef5b8db6abf1b78b17f13c4350901bd3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_87.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.18.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6fb3d130a34ebc35495cc88aebca0a26" |
|
}, |
|
{ |
|
"dataPath": "params_shard_88.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.19.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4891197d898f1cc23fa9a0eda31f6f86" |
|
}, |
|
{ |
|
"dataPath": "params_shard_89.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.19.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "adb238ba2608c9260149c5de1ade8d0c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_90.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.19.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2db87fb56a7880cf8976bc859388767d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_91.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.19.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "256ada9253db24a7ef0aaa9cff1b5245" |
|
}, |
|
{ |
|
"dataPath": "params_shard_92.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.2.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "44e503562401484d752aaccf40850fdd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_93.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.2.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "92b15620c753a0e3cfe818071a79a1a8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_94.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.2.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e72876345ed49f2200cf289e7ff670e3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_95.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.2.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "11638690d4019df7dbcb5d4ce42ebbd9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_96.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.20.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1e18403502a66158427c885a030b3012" |
|
}, |
|
{ |
|
"dataPath": "params_shard_97.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.20.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "cef53d13f9f17f4daf3902459a2bf86e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_98.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.20.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "aca52f56ff0263682722eec9af60ddc5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_99.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.20.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b79439e9730eb2d9c95240fc31a766f3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_100.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.21.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4083534b0423d4cd92f64053a3ecf9a1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_101.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.3.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e05bdedb234537840669f5d402ab2114" |
|
}, |
|
{ |
|
"dataPath": "params_shard_102.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.3.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "869f30f4fcee3f58bae2cb13672f1aa7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_103.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.3.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6d2dca57ddfb9420cf81794a8ebedc67" |
|
}, |
|
{ |
|
"dataPath": "params_shard_104.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.3.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "77f51083eb8e430c4f51d3ff2a2aa150" |
|
}, |
|
{ |
|
"dataPath": "params_shard_105.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.4.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0673d8cc2dcb30f7c9bf8fe7f72fedcf" |
|
}, |
|
{ |
|
"dataPath": "params_shard_106.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.4.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8d74d91f6f0dbce4361fb9bd4319e435" |
|
}, |
|
{ |
|
"dataPath": "params_shard_107.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.4.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a7280ca3d850c9e2c70798ad1827b753" |
|
}, |
|
{ |
|
"dataPath": "params_shard_108.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.4.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "feada6159342059b34ee9ee9f2b5a696" |
|
}, |
|
{ |
|
"dataPath": "params_shard_109.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.5.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c11c8f2bdc0dac4c24d73630a7bc29ce" |
|
}, |
|
{ |
|
"dataPath": "params_shard_110.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.5.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2f405cbb6f4bae8e3d0a6eb6c2247b8b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_111.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.5.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7235c4fff722c734b82860b8a0a9b7d9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_112.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.5.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "78506574176d25f78c32f912055a152d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_113.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.6.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "974f760704e76ebde4b5de88bf2c4907" |
|
}, |
|
{ |
|
"dataPath": "params_shard_114.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.6.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "bfe07b2d96e1d263e33a6967393b3ff2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_115.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.6.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7f354a9bed45479ca9a1e00d02fe1b65" |
|
}, |
|
{ |
|
"dataPath": "params_shard_116.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.6.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "baae7b45e7aca08cb608530a819276db" |
|
}, |
|
{ |
|
"dataPath": "params_shard_117.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.7.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f8fe08779e3f816bc0081b3ef7f77126" |
|
}, |
|
{ |
|
"dataPath": "params_shard_118.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.7.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "156dd0541b9bfb11d07e724a38d10cc6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_119.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.7.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d25c30dbcd729ca38d6ec3dd48274825" |
|
}, |
|
{ |
|
"dataPath": "params_shard_120.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.7.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8fa83aeab434da353f36c7f4604958cc" |
|
}, |
|
{ |
|
"dataPath": "params_shard_121.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.8.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "cae6c26be7a1e1e6f91d1859e7a57211" |
|
}, |
|
{ |
|
"dataPath": "params_shard_122.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.8.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f1c9153abd2616db5ba5c825a3bebebf" |
|
}, |
|
{ |
|
"dataPath": "params_shard_123.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.8.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3439e666077cd25f208166702d8bc0d6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_124.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.8.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7fc4f7e9e589c6e3cac14a2457d35da0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_125.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.9.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "004c33d05d98ef6d3242cad950e68831" |
|
}, |
|
{ |
|
"dataPath": "params_shard_126.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.9.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "60ef30f75f51b8148b779a4134ba86b3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_127.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.9.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6088d58197ec37d982808983e9b8c234" |
|
}, |
|
{ |
|
"dataPath": "params_shard_128.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.9.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6f2ea673e396f982a532b9bd7612996e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_129.bin", |
|
"format": "raw-shard", |
|
"nbytes": 19273728, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.21.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.21.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 6144 |
|
}, |
|
{ |
|
"name": "transformer.h.22.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 12288 |
|
}, |
|
{ |
|
"name": "transformer.h.22.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 18432 |
|
}, |
|
{ |
|
"name": "transformer.h.22.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 24576 |
|
}, |
|
{ |
|
"name": "transformer.h.23.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 18898944 |
|
}, |
|
{ |
|
"name": "transformer.h.23.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 18905088 |
|
}, |
|
{ |
|
"name": "transformer.h.24.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 18911232 |
|
}, |
|
{ |
|
"name": "transformer.h.24.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 18917376 |
|
}, |
|
{ |
|
"name": "transformer.h.25.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 18923520 |
|
}, |
|
{ |
|
"name": "transformer.h.25.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 18929664 |
|
}, |
|
{ |
|
"name": "transformer.h.26.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 18935808 |
|
}, |
|
{ |
|
"name": "transformer.h.26.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 18941952 |
|
}, |
|
{ |
|
"name": "transformer.h.27.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 18948096 |
|
}, |
|
{ |
|
"name": "transformer.h.27.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 18954240 |
|
}, |
|
{ |
|
"name": "transformer.h.28.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 18960384 |
|
}, |
|
{ |
|
"name": "transformer.h.28.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 18966528 |
|
}, |
|
{ |
|
"name": "transformer.h.29.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 18972672 |
|
}, |
|
{ |
|
"name": "transformer.h.29.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 18978816 |
|
}, |
|
{ |
|
"name": "transformer.h.30.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 18984960 |
|
}, |
|
{ |
|
"name": "transformer.h.30.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 18991104 |
|
}, |
|
{ |
|
"name": "transformer.h.31.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 18997248 |
|
}, |
|
{ |
|
"name": "transformer.h.31.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19003392 |
|
}, |
|
{ |
|
"name": "transformer.norm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19009536 |
|
}, |
|
{ |
|
"name": "transformer.h.0.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19015680 |
|
}, |
|
{ |
|
"name": "transformer.h.0.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19021824 |
|
}, |
|
{ |
|
"name": "transformer.h.1.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19027968 |
|
}, |
|
{ |
|
"name": "transformer.h.1.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19034112 |
|
}, |
|
{ |
|
"name": "transformer.h.10.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19040256 |
|
}, |
|
{ |
|
"name": "transformer.h.10.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19046400 |
|
}, |
|
{ |
|
"name": "transformer.h.11.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19052544 |
|
}, |
|
{ |
|
"name": "transformer.h.11.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19058688 |
|
}, |
|
{ |
|
"name": "transformer.h.12.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19064832 |
|
}, |
|
{ |
|
"name": "transformer.h.12.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19070976 |
|
}, |
|
{ |
|
"name": "transformer.h.13.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19077120 |
|
}, |
|
{ |
|
"name": "transformer.h.13.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19083264 |
|
}, |
|
{ |
|
"name": "transformer.h.14.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19089408 |
|
}, |
|
{ |
|
"name": "transformer.h.14.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19095552 |
|
}, |
|
{ |
|
"name": "transformer.h.15.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19101696 |
|
}, |
|
{ |
|
"name": "transformer.h.15.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19107840 |
|
}, |
|
{ |
|
"name": "transformer.h.16.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19113984 |
|
}, |
|
{ |
|
"name": "transformer.h.16.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19120128 |
|
}, |
|
{ |
|
"name": "transformer.h.17.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19126272 |
|
}, |
|
{ |
|
"name": "transformer.h.17.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19132416 |
|
}, |
|
{ |
|
"name": "transformer.h.18.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19138560 |
|
}, |
|
{ |
|
"name": "transformer.h.18.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19144704 |
|
}, |
|
{ |
|
"name": "transformer.h.19.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19150848 |
|
}, |
|
{ |
|
"name": "transformer.h.19.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19156992 |
|
}, |
|
{ |
|
"name": "transformer.h.2.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19163136 |
|
}, |
|
{ |
|
"name": "transformer.h.2.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19169280 |
|
}, |
|
{ |
|
"name": "transformer.h.20.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19175424 |
|
}, |
|
{ |
|
"name": "transformer.h.20.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19181568 |
|
}, |
|
{ |
|
"name": "transformer.h.3.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19187712 |
|
}, |
|
{ |
|
"name": "transformer.h.3.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19193856 |
|
}, |
|
{ |
|
"name": "transformer.h.4.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19200000 |
|
}, |
|
{ |
|
"name": "transformer.h.4.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19206144 |
|
}, |
|
{ |
|
"name": "transformer.h.5.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19212288 |
|
}, |
|
{ |
|
"name": "transformer.h.5.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19218432 |
|
}, |
|
{ |
|
"name": "transformer.h.6.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19224576 |
|
}, |
|
{ |
|
"name": "transformer.h.6.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19230720 |
|
}, |
|
{ |
|
"name": "transformer.h.7.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19236864 |
|
}, |
|
{ |
|
"name": "transformer.h.7.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19243008 |
|
}, |
|
{ |
|
"name": "transformer.h.8.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19249152 |
|
}, |
|
{ |
|
"name": "transformer.h.8.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19255296 |
|
}, |
|
{ |
|
"name": "transformer.h.9.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19261440 |
|
}, |
|
{ |
|
"name": "transformer.h.9.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19267584 |
|
} |
|
], |
|
"md5sum": "92b95946386cfea46d3fa66d3beb05de" |
|
} |
|
] |
|
} |