|
{ |
|
"metadata": { |
|
"ParamSize": 149, |
|
"ParamBytes": 326074368.0, |
|
"BitsPerParam": 16.0 |
|
}, |
|
"records": [ |
|
{ |
|
"dataPath": "params_shard_0.bin", |
|
"format": "raw-shard", |
|
"nbytes": 77194752, |
|
"records": [ |
|
{ |
|
"name": "lm_head.weight", |
|
"shape": [ |
|
50257, |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 77194752, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8aa629e8739ff337f4983e485d0a145e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_1.bin", |
|
"format": "raw-shard", |
|
"nbytes": 77194752, |
|
"records": [ |
|
{ |
|
"name": "transformer.wte.weight", |
|
"shape": [ |
|
50257, |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 77194752, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8aa629e8739ff337f4983e485d0a145e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_2.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33470976, |
|
"records": [ |
|
{ |
|
"name": "transformer.wpe.weight", |
|
"shape": [ |
|
1024, |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1572864, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.0.ln_1.weight", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 1572864 |
|
}, |
|
{ |
|
"name": "transformer.h.0.ln_1.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 1574400 |
|
}, |
|
{ |
|
"name": "transformer.h.0.attn.c_attn.weight", |
|
"shape": [ |
|
2304, |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3538944, |
|
"byteOffset": 1575936 |
|
}, |
|
{ |
|
"name": "transformer.h.0.attn.c_attn.bias", |
|
"shape": [ |
|
2304 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4608, |
|
"byteOffset": 5114880 |
|
}, |
|
{ |
|
"name": "transformer.h.0.attn.c_proj.weight", |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1179648, |
|
"byteOffset": 5119488 |
|
}, |
|
{ |
|
"name": "transformer.h.0.attn.c_proj.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 6299136 |
|
}, |
|
{ |
|
"name": "transformer.h.0.ln_2.weight", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 6300672 |
|
}, |
|
{ |
|
"name": "transformer.h.0.ln_2.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 6302208 |
|
}, |
|
{ |
|
"name": "transformer.h.0.mlp.c_fc.weight", |
|
"shape": [ |
|
3072, |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 6303744 |
|
}, |
|
{ |
|
"name": "transformer.h.0.mlp.c_fc.bias", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 11022336 |
|
}, |
|
{ |
|
"name": "transformer.h.0.mlp.c_proj.weight", |
|
"shape": [ |
|
768, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 11028480 |
|
}, |
|
{ |
|
"name": "transformer.h.0.mlp.c_proj.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 15747072 |
|
}, |
|
{ |
|
"name": "transformer.h.1.ln_1.weight", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 15748608 |
|
}, |
|
{ |
|
"name": "transformer.h.1.ln_1.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 15750144 |
|
}, |
|
{ |
|
"name": "transformer.h.1.attn.c_attn.weight", |
|
"shape": [ |
|
2304, |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3538944, |
|
"byteOffset": 15751680 |
|
}, |
|
{ |
|
"name": "transformer.h.1.attn.c_attn.bias", |
|
"shape": [ |
|
2304 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4608, |
|
"byteOffset": 19290624 |
|
}, |
|
{ |
|
"name": "transformer.h.1.attn.c_proj.weight", |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1179648, |
|
"byteOffset": 19295232 |
|
}, |
|
{ |
|
"name": "transformer.h.1.attn.c_proj.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 20474880 |
|
}, |
|
{ |
|
"name": "transformer.h.1.ln_2.weight", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 20476416 |
|
}, |
|
{ |
|
"name": "transformer.h.1.ln_2.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 20477952 |
|
}, |
|
{ |
|
"name": "transformer.h.1.mlp.c_fc.weight", |
|
"shape": [ |
|
3072, |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 20479488 |
|
}, |
|
{ |
|
"name": "transformer.h.1.mlp.c_fc.bias", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 25198080 |
|
}, |
|
{ |
|
"name": "transformer.h.1.mlp.c_proj.weight", |
|
"shape": [ |
|
768, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 25204224 |
|
}, |
|
{ |
|
"name": "transformer.h.1.mlp.c_proj.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 29922816 |
|
}, |
|
{ |
|
"name": "transformer.h.2.ln_1.weight", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 29924352 |
|
}, |
|
{ |
|
"name": "transformer.h.2.ln_1.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 29925888 |
|
}, |
|
{ |
|
"name": "transformer.h.2.attn.c_attn.weight", |
|
"shape": [ |
|
2304, |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3538944, |
|
"byteOffset": 29927424 |
|
}, |
|
{ |
|
"name": "transformer.h.2.attn.c_attn.bias", |
|
"shape": [ |
|
2304 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4608, |
|
"byteOffset": 33466368 |
|
} |
|
], |
|
"md5sum": "748d75aca928d18aac58d8f9301c3bcb" |
|
}, |
|
{ |
|
"dataPath": "params_shard_3.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29535744, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.2.attn.c_proj.weight", |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1179648, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.2.attn.c_proj.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 1179648 |
|
}, |
|
{ |
|
"name": "transformer.h.2.ln_2.weight", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 1181184 |
|
}, |
|
{ |
|
"name": "transformer.h.2.ln_2.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 1182720 |
|
}, |
|
{ |
|
"name": "transformer.h.2.mlp.c_fc.weight", |
|
"shape": [ |
|
3072, |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 1184256 |
|
}, |
|
{ |
|
"name": "transformer.h.2.mlp.c_fc.bias", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 5902848 |
|
}, |
|
{ |
|
"name": "transformer.h.2.mlp.c_proj.weight", |
|
"shape": [ |
|
768, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 5908992 |
|
}, |
|
{ |
|
"name": "transformer.h.2.mlp.c_proj.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 10627584 |
|
}, |
|
{ |
|
"name": "transformer.h.3.ln_1.weight", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 10629120 |
|
}, |
|
{ |
|
"name": "transformer.h.3.ln_1.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 10630656 |
|
}, |
|
{ |
|
"name": "transformer.h.3.attn.c_attn.weight", |
|
"shape": [ |
|
2304, |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3538944, |
|
"byteOffset": 10632192 |
|
}, |
|
{ |
|
"name": "transformer.h.3.attn.c_attn.bias", |
|
"shape": [ |
|
2304 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4608, |
|
"byteOffset": 14171136 |
|
}, |
|
{ |
|
"name": "transformer.h.3.attn.c_proj.weight", |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1179648, |
|
"byteOffset": 14175744 |
|
}, |
|
{ |
|
"name": "transformer.h.3.attn.c_proj.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 15355392 |
|
}, |
|
{ |
|
"name": "transformer.h.3.ln_2.weight", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 15356928 |
|
}, |
|
{ |
|
"name": "transformer.h.3.ln_2.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 15358464 |
|
}, |
|
{ |
|
"name": "transformer.h.3.mlp.c_fc.weight", |
|
"shape": [ |
|
3072, |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 15360000 |
|
}, |
|
{ |
|
"name": "transformer.h.3.mlp.c_fc.bias", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 20078592 |
|
}, |
|
{ |
|
"name": "transformer.h.3.mlp.c_proj.weight", |
|
"shape": [ |
|
768, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 20084736 |
|
}, |
|
{ |
|
"name": "transformer.h.3.mlp.c_proj.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 24803328 |
|
}, |
|
{ |
|
"name": "transformer.h.4.ln_1.weight", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 24804864 |
|
}, |
|
{ |
|
"name": "transformer.h.4.ln_1.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 24806400 |
|
}, |
|
{ |
|
"name": "transformer.h.4.attn.c_attn.weight", |
|
"shape": [ |
|
2304, |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3538944, |
|
"byteOffset": 24807936 |
|
}, |
|
{ |
|
"name": "transformer.h.4.attn.c_attn.bias", |
|
"shape": [ |
|
2304 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4608, |
|
"byteOffset": 28346880 |
|
}, |
|
{ |
|
"name": "transformer.h.4.attn.c_proj.weight", |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1179648, |
|
"byteOffset": 28351488 |
|
}, |
|
{ |
|
"name": "transformer.h.4.attn.c_proj.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 29531136 |
|
}, |
|
{ |
|
"name": "transformer.h.4.ln_2.weight", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 29532672 |
|
}, |
|
{ |
|
"name": "transformer.h.4.ln_2.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 29534208 |
|
} |
|
], |
|
"md5sum": "94d1cea00ea6a1492cb7bb23eb5d64b5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_4.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33076224, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.4.mlp.c_fc.weight", |
|
"shape": [ |
|
3072, |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.4.mlp.c_fc.bias", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 4718592 |
|
}, |
|
{ |
|
"name": "transformer.h.4.mlp.c_proj.weight", |
|
"shape": [ |
|
768, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 4724736 |
|
}, |
|
{ |
|
"name": "transformer.h.4.mlp.c_proj.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 9443328 |
|
}, |
|
{ |
|
"name": "transformer.h.5.ln_1.weight", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 9444864 |
|
}, |
|
{ |
|
"name": "transformer.h.5.ln_1.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 9446400 |
|
}, |
|
{ |
|
"name": "transformer.h.5.attn.c_attn.weight", |
|
"shape": [ |
|
2304, |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3538944, |
|
"byteOffset": 9447936 |
|
}, |
|
{ |
|
"name": "transformer.h.5.attn.c_attn.bias", |
|
"shape": [ |
|
2304 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4608, |
|
"byteOffset": 12986880 |
|
}, |
|
{ |
|
"name": "transformer.h.5.attn.c_proj.weight", |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1179648, |
|
"byteOffset": 12991488 |
|
}, |
|
{ |
|
"name": "transformer.h.5.attn.c_proj.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 14171136 |
|
}, |
|
{ |
|
"name": "transformer.h.5.ln_2.weight", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 14172672 |
|
}, |
|
{ |
|
"name": "transformer.h.5.ln_2.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 14174208 |
|
}, |
|
{ |
|
"name": "transformer.h.5.mlp.c_fc.weight", |
|
"shape": [ |
|
3072, |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 14175744 |
|
}, |
|
{ |
|
"name": "transformer.h.5.mlp.c_fc.bias", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 18894336 |
|
}, |
|
{ |
|
"name": "transformer.h.5.mlp.c_proj.weight", |
|
"shape": [ |
|
768, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 18900480 |
|
}, |
|
{ |
|
"name": "transformer.h.5.mlp.c_proj.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 23619072 |
|
}, |
|
{ |
|
"name": "transformer.h.6.ln_1.weight", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 23620608 |
|
}, |
|
{ |
|
"name": "transformer.h.6.ln_1.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 23622144 |
|
}, |
|
{ |
|
"name": "transformer.h.6.attn.c_attn.weight", |
|
"shape": [ |
|
2304, |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3538944, |
|
"byteOffset": 23623680 |
|
}, |
|
{ |
|
"name": "transformer.h.6.attn.c_attn.bias", |
|
"shape": [ |
|
2304 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4608, |
|
"byteOffset": 27162624 |
|
}, |
|
{ |
|
"name": "transformer.h.6.attn.c_proj.weight", |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1179648, |
|
"byteOffset": 27167232 |
|
}, |
|
{ |
|
"name": "transformer.h.6.attn.c_proj.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 28346880 |
|
}, |
|
{ |
|
"name": "transformer.h.6.ln_2.weight", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 28348416 |
|
}, |
|
{ |
|
"name": "transformer.h.6.ln_2.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 28349952 |
|
}, |
|
{ |
|
"name": "transformer.h.6.mlp.c_fc.weight", |
|
"shape": [ |
|
3072, |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 28351488 |
|
}, |
|
{ |
|
"name": "transformer.h.6.mlp.c_fc.bias", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 33070080 |
|
} |
|
], |
|
"md5sum": "43957cf0e0da3bc016aa35099e8a4e53" |
|
}, |
|
{ |
|
"dataPath": "params_shard_5.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33074688, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.6.mlp.c_proj.weight", |
|
"shape": [ |
|
768, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.6.mlp.c_proj.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 4718592 |
|
}, |
|
{ |
|
"name": "transformer.h.7.ln_1.weight", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 4720128 |
|
}, |
|
{ |
|
"name": "transformer.h.7.ln_1.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 4721664 |
|
}, |
|
{ |
|
"name": "transformer.h.7.attn.c_attn.weight", |
|
"shape": [ |
|
2304, |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3538944, |
|
"byteOffset": 4723200 |
|
}, |
|
{ |
|
"name": "transformer.h.7.attn.c_attn.bias", |
|
"shape": [ |
|
2304 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4608, |
|
"byteOffset": 8262144 |
|
}, |
|
{ |
|
"name": "transformer.h.7.attn.c_proj.weight", |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1179648, |
|
"byteOffset": 8266752 |
|
}, |
|
{ |
|
"name": "transformer.h.7.attn.c_proj.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 9446400 |
|
}, |
|
{ |
|
"name": "transformer.h.7.ln_2.weight", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 9447936 |
|
}, |
|
{ |
|
"name": "transformer.h.7.ln_2.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 9449472 |
|
}, |
|
{ |
|
"name": "transformer.h.7.mlp.c_fc.weight", |
|
"shape": [ |
|
3072, |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 9451008 |
|
}, |
|
{ |
|
"name": "transformer.h.7.mlp.c_fc.bias", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 14169600 |
|
}, |
|
{ |
|
"name": "transformer.h.7.mlp.c_proj.weight", |
|
"shape": [ |
|
768, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 14175744 |
|
}, |
|
{ |
|
"name": "transformer.h.7.mlp.c_proj.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 18894336 |
|
}, |
|
{ |
|
"name": "transformer.h.8.ln_1.weight", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 18895872 |
|
}, |
|
{ |
|
"name": "transformer.h.8.ln_1.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 18897408 |
|
}, |
|
{ |
|
"name": "transformer.h.8.attn.c_attn.weight", |
|
"shape": [ |
|
2304, |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3538944, |
|
"byteOffset": 18898944 |
|
}, |
|
{ |
|
"name": "transformer.h.8.attn.c_attn.bias", |
|
"shape": [ |
|
2304 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4608, |
|
"byteOffset": 22437888 |
|
}, |
|
{ |
|
"name": "transformer.h.8.attn.c_proj.weight", |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1179648, |
|
"byteOffset": 22442496 |
|
}, |
|
{ |
|
"name": "transformer.h.8.attn.c_proj.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 23622144 |
|
}, |
|
{ |
|
"name": "transformer.h.8.ln_2.weight", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 23623680 |
|
}, |
|
{ |
|
"name": "transformer.h.8.ln_2.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 23625216 |
|
}, |
|
{ |
|
"name": "transformer.h.8.mlp.c_fc.weight", |
|
"shape": [ |
|
3072, |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 23626752 |
|
}, |
|
{ |
|
"name": "transformer.h.8.mlp.c_fc.bias", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 28345344 |
|
}, |
|
{ |
|
"name": "transformer.h.8.mlp.c_proj.weight", |
|
"shape": [ |
|
768, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 28351488 |
|
}, |
|
{ |
|
"name": "transformer.h.8.mlp.c_proj.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 33070080 |
|
}, |
|
{ |
|
"name": "transformer.h.9.ln_1.weight", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 33071616 |
|
}, |
|
{ |
|
"name": "transformer.h.9.ln_1.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 33073152 |
|
} |
|
], |
|
"md5sum": "4cbbe2cce00b37c72cd95d22b3aa03d3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_6.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33079296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.9.attn.c_attn.weight", |
|
"shape": [ |
|
2304, |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3538944, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.9.attn.c_attn.bias", |
|
"shape": [ |
|
2304 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4608, |
|
"byteOffset": 3538944 |
|
}, |
|
{ |
|
"name": "transformer.h.9.attn.c_proj.weight", |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1179648, |
|
"byteOffset": 3543552 |
|
}, |
|
{ |
|
"name": "transformer.h.9.attn.c_proj.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 4723200 |
|
}, |
|
{ |
|
"name": "transformer.h.9.ln_2.weight", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 4724736 |
|
}, |
|
{ |
|
"name": "transformer.h.9.ln_2.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 4726272 |
|
}, |
|
{ |
|
"name": "transformer.h.9.mlp.c_fc.weight", |
|
"shape": [ |
|
3072, |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 4727808 |
|
}, |
|
{ |
|
"name": "transformer.h.9.mlp.c_fc.bias", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 9446400 |
|
}, |
|
{ |
|
"name": "transformer.h.9.mlp.c_proj.weight", |
|
"shape": [ |
|
768, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 9452544 |
|
}, |
|
{ |
|
"name": "transformer.h.9.mlp.c_proj.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 14171136 |
|
}, |
|
{ |
|
"name": "transformer.h.10.ln_1.weight", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 14172672 |
|
}, |
|
{ |
|
"name": "transformer.h.10.ln_1.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 14174208 |
|
}, |
|
{ |
|
"name": "transformer.h.10.attn.c_attn.weight", |
|
"shape": [ |
|
2304, |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3538944, |
|
"byteOffset": 14175744 |
|
}, |
|
{ |
|
"name": "transformer.h.10.attn.c_attn.bias", |
|
"shape": [ |
|
2304 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4608, |
|
"byteOffset": 17714688 |
|
}, |
|
{ |
|
"name": "transformer.h.10.attn.c_proj.weight", |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1179648, |
|
"byteOffset": 17719296 |
|
}, |
|
{ |
|
"name": "transformer.h.10.attn.c_proj.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 18898944 |
|
}, |
|
{ |
|
"name": "transformer.h.10.ln_2.weight", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 18900480 |
|
}, |
|
{ |
|
"name": "transformer.h.10.ln_2.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 18902016 |
|
}, |
|
{ |
|
"name": "transformer.h.10.mlp.c_fc.weight", |
|
"shape": [ |
|
3072, |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 18903552 |
|
}, |
|
{ |
|
"name": "transformer.h.10.mlp.c_fc.bias", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 23622144 |
|
}, |
|
{ |
|
"name": "transformer.h.10.mlp.c_proj.weight", |
|
"shape": [ |
|
768, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 23628288 |
|
}, |
|
{ |
|
"name": "transformer.h.10.mlp.c_proj.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 28346880 |
|
}, |
|
{ |
|
"name": "transformer.h.11.ln_1.weight", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 28348416 |
|
}, |
|
{ |
|
"name": "transformer.h.11.ln_1.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 28349952 |
|
}, |
|
{ |
|
"name": "transformer.h.11.attn.c_attn.weight", |
|
"shape": [ |
|
2304, |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3538944, |
|
"byteOffset": 28351488 |
|
}, |
|
{ |
|
"name": "transformer.h.11.attn.c_attn.bias", |
|
"shape": [ |
|
2304 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4608, |
|
"byteOffset": 31890432 |
|
}, |
|
{ |
|
"name": "transformer.h.11.attn.c_proj.weight", |
|
"shape": [ |
|
768, |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1179648, |
|
"byteOffset": 31895040 |
|
}, |
|
{ |
|
"name": "transformer.h.11.attn.c_proj.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 33074688 |
|
}, |
|
{ |
|
"name": "transformer.h.11.ln_2.weight", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 33076224 |
|
}, |
|
{ |
|
"name": "transformer.h.11.ln_2.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 33077760 |
|
} |
|
], |
|
"md5sum": "4f1aa298f8b0af477effc227d22f5ef1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_7.bin", |
|
"format": "raw-shard", |
|
"nbytes": 9447936, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.11.mlp.c_fc.weight", |
|
"shape": [ |
|
3072, |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.11.mlp.c_fc.bias", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 4718592 |
|
}, |
|
{ |
|
"name": "transformer.h.11.mlp.c_proj.weight", |
|
"shape": [ |
|
768, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 4724736 |
|
}, |
|
{ |
|
"name": "transformer.h.11.mlp.c_proj.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 9443328 |
|
}, |
|
{ |
|
"name": "transformer.ln_f.weight", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 9444864 |
|
}, |
|
{ |
|
"name": "transformer.ln_f.bias", |
|
"shape": [ |
|
768 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1536, |
|
"byteOffset": 9446400 |
|
} |
|
], |
|
"md5sum": "1532478e2b064b31091ff718a8c67188" |
|
} |
|
] |
|
} |