gpt2-q0f16-MLC / ndarray-cache.json
CharlieFRuan's picture
Initial commit
49b7cec
{
"metadata": {
"ParamSize": 149,
"ParamBytes": 326074368.0,
"BitsPerParam": 16.0
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 77194752,
"records": [
{
"name": "lm_head.weight",
"shape": [
50257,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 77194752,
"byteOffset": 0
}
],
"md5sum": "8aa629e8739ff337f4983e485d0a145e"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 77194752,
"records": [
{
"name": "transformer.wte.weight",
"shape": [
50257,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 77194752,
"byteOffset": 0
}
],
"md5sum": "8aa629e8739ff337f4983e485d0a145e"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 33470976,
"records": [
{
"name": "transformer.wpe.weight",
"shape": [
1024,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 0
},
{
"name": "transformer.h.0.ln_1.weight",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 1572864
},
{
"name": "transformer.h.0.ln_1.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 1574400
},
{
"name": "transformer.h.0.attn.c_attn.weight",
"shape": [
2304,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3538944,
"byteOffset": 1575936
},
{
"name": "transformer.h.0.attn.c_attn.bias",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 5114880
},
{
"name": "transformer.h.0.attn.c_proj.weight",
"shape": [
768,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 5119488
},
{
"name": "transformer.h.0.attn.c_proj.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 6299136
},
{
"name": "transformer.h.0.ln_2.weight",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 6300672
},
{
"name": "transformer.h.0.ln_2.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 6302208
},
{
"name": "transformer.h.0.mlp.c_fc.weight",
"shape": [
3072,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 6303744
},
{
"name": "transformer.h.0.mlp.c_fc.bias",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 11022336
},
{
"name": "transformer.h.0.mlp.c_proj.weight",
"shape": [
768,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 11028480
},
{
"name": "transformer.h.0.mlp.c_proj.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 15747072
},
{
"name": "transformer.h.1.ln_1.weight",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 15748608
},
{
"name": "transformer.h.1.ln_1.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 15750144
},
{
"name": "transformer.h.1.attn.c_attn.weight",
"shape": [
2304,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3538944,
"byteOffset": 15751680
},
{
"name": "transformer.h.1.attn.c_attn.bias",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 19290624
},
{
"name": "transformer.h.1.attn.c_proj.weight",
"shape": [
768,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 19295232
},
{
"name": "transformer.h.1.attn.c_proj.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 20474880
},
{
"name": "transformer.h.1.ln_2.weight",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 20476416
},
{
"name": "transformer.h.1.ln_2.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 20477952
},
{
"name": "transformer.h.1.mlp.c_fc.weight",
"shape": [
3072,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 20479488
},
{
"name": "transformer.h.1.mlp.c_fc.bias",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 25198080
},
{
"name": "transformer.h.1.mlp.c_proj.weight",
"shape": [
768,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 25204224
},
{
"name": "transformer.h.1.mlp.c_proj.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 29922816
},
{
"name": "transformer.h.2.ln_1.weight",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 29924352
},
{
"name": "transformer.h.2.ln_1.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 29925888
},
{
"name": "transformer.h.2.attn.c_attn.weight",
"shape": [
2304,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3538944,
"byteOffset": 29927424
},
{
"name": "transformer.h.2.attn.c_attn.bias",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 33466368
}
],
"md5sum": "748d75aca928d18aac58d8f9301c3bcb"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 29535744,
"records": [
{
"name": "transformer.h.2.attn.c_proj.weight",
"shape": [
768,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 0
},
{
"name": "transformer.h.2.attn.c_proj.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 1179648
},
{
"name": "transformer.h.2.ln_2.weight",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 1181184
},
{
"name": "transformer.h.2.ln_2.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 1182720
},
{
"name": "transformer.h.2.mlp.c_fc.weight",
"shape": [
3072,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 1184256
},
{
"name": "transformer.h.2.mlp.c_fc.bias",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 5902848
},
{
"name": "transformer.h.2.mlp.c_proj.weight",
"shape": [
768,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 5908992
},
{
"name": "transformer.h.2.mlp.c_proj.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 10627584
},
{
"name": "transformer.h.3.ln_1.weight",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 10629120
},
{
"name": "transformer.h.3.ln_1.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 10630656
},
{
"name": "transformer.h.3.attn.c_attn.weight",
"shape": [
2304,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3538944,
"byteOffset": 10632192
},
{
"name": "transformer.h.3.attn.c_attn.bias",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 14171136
},
{
"name": "transformer.h.3.attn.c_proj.weight",
"shape": [
768,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 14175744
},
{
"name": "transformer.h.3.attn.c_proj.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 15355392
},
{
"name": "transformer.h.3.ln_2.weight",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 15356928
},
{
"name": "transformer.h.3.ln_2.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 15358464
},
{
"name": "transformer.h.3.mlp.c_fc.weight",
"shape": [
3072,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 15360000
},
{
"name": "transformer.h.3.mlp.c_fc.bias",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 20078592
},
{
"name": "transformer.h.3.mlp.c_proj.weight",
"shape": [
768,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 20084736
},
{
"name": "transformer.h.3.mlp.c_proj.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 24803328
},
{
"name": "transformer.h.4.ln_1.weight",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 24804864
},
{
"name": "transformer.h.4.ln_1.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 24806400
},
{
"name": "transformer.h.4.attn.c_attn.weight",
"shape": [
2304,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3538944,
"byteOffset": 24807936
},
{
"name": "transformer.h.4.attn.c_attn.bias",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 28346880
},
{
"name": "transformer.h.4.attn.c_proj.weight",
"shape": [
768,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 28351488
},
{
"name": "transformer.h.4.attn.c_proj.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 29531136
},
{
"name": "transformer.h.4.ln_2.weight",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 29532672
},
{
"name": "transformer.h.4.ln_2.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 29534208
}
],
"md5sum": "94d1cea00ea6a1492cb7bb23eb5d64b5"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 33076224,
"records": [
{
"name": "transformer.h.4.mlp.c_fc.weight",
"shape": [
3072,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "transformer.h.4.mlp.c_fc.bias",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 4718592
},
{
"name": "transformer.h.4.mlp.c_proj.weight",
"shape": [
768,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 4724736
},
{
"name": "transformer.h.4.mlp.c_proj.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 9443328
},
{
"name": "transformer.h.5.ln_1.weight",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 9444864
},
{
"name": "transformer.h.5.ln_1.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 9446400
},
{
"name": "transformer.h.5.attn.c_attn.weight",
"shape": [
2304,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3538944,
"byteOffset": 9447936
},
{
"name": "transformer.h.5.attn.c_attn.bias",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 12986880
},
{
"name": "transformer.h.5.attn.c_proj.weight",
"shape": [
768,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 12991488
},
{
"name": "transformer.h.5.attn.c_proj.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 14171136
},
{
"name": "transformer.h.5.ln_2.weight",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 14172672
},
{
"name": "transformer.h.5.ln_2.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 14174208
},
{
"name": "transformer.h.5.mlp.c_fc.weight",
"shape": [
3072,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 14175744
},
{
"name": "transformer.h.5.mlp.c_fc.bias",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 18894336
},
{
"name": "transformer.h.5.mlp.c_proj.weight",
"shape": [
768,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 18900480
},
{
"name": "transformer.h.5.mlp.c_proj.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 23619072
},
{
"name": "transformer.h.6.ln_1.weight",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 23620608
},
{
"name": "transformer.h.6.ln_1.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 23622144
},
{
"name": "transformer.h.6.attn.c_attn.weight",
"shape": [
2304,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3538944,
"byteOffset": 23623680
},
{
"name": "transformer.h.6.attn.c_attn.bias",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 27162624
},
{
"name": "transformer.h.6.attn.c_proj.weight",
"shape": [
768,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 27167232
},
{
"name": "transformer.h.6.attn.c_proj.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 28346880
},
{
"name": "transformer.h.6.ln_2.weight",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 28348416
},
{
"name": "transformer.h.6.ln_2.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 28349952
},
{
"name": "transformer.h.6.mlp.c_fc.weight",
"shape": [
3072,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 28351488
},
{
"name": "transformer.h.6.mlp.c_fc.bias",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 33070080
}
],
"md5sum": "43957cf0e0da3bc016aa35099e8a4e53"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 33074688,
"records": [
{
"name": "transformer.h.6.mlp.c_proj.weight",
"shape": [
768,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "transformer.h.6.mlp.c_proj.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 4718592
},
{
"name": "transformer.h.7.ln_1.weight",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 4720128
},
{
"name": "transformer.h.7.ln_1.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 4721664
},
{
"name": "transformer.h.7.attn.c_attn.weight",
"shape": [
2304,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3538944,
"byteOffset": 4723200
},
{
"name": "transformer.h.7.attn.c_attn.bias",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 8262144
},
{
"name": "transformer.h.7.attn.c_proj.weight",
"shape": [
768,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 8266752
},
{
"name": "transformer.h.7.attn.c_proj.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 9446400
},
{
"name": "transformer.h.7.ln_2.weight",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 9447936
},
{
"name": "transformer.h.7.ln_2.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 9449472
},
{
"name": "transformer.h.7.mlp.c_fc.weight",
"shape": [
3072,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 9451008
},
{
"name": "transformer.h.7.mlp.c_fc.bias",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 14169600
},
{
"name": "transformer.h.7.mlp.c_proj.weight",
"shape": [
768,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 14175744
},
{
"name": "transformer.h.7.mlp.c_proj.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 18894336
},
{
"name": "transformer.h.8.ln_1.weight",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 18895872
},
{
"name": "transformer.h.8.ln_1.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 18897408
},
{
"name": "transformer.h.8.attn.c_attn.weight",
"shape": [
2304,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3538944,
"byteOffset": 18898944
},
{
"name": "transformer.h.8.attn.c_attn.bias",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 22437888
},
{
"name": "transformer.h.8.attn.c_proj.weight",
"shape": [
768,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 22442496
},
{
"name": "transformer.h.8.attn.c_proj.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 23622144
},
{
"name": "transformer.h.8.ln_2.weight",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 23623680
},
{
"name": "transformer.h.8.ln_2.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 23625216
},
{
"name": "transformer.h.8.mlp.c_fc.weight",
"shape": [
3072,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 23626752
},
{
"name": "transformer.h.8.mlp.c_fc.bias",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 28345344
},
{
"name": "transformer.h.8.mlp.c_proj.weight",
"shape": [
768,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 28351488
},
{
"name": "transformer.h.8.mlp.c_proj.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 33070080
},
{
"name": "transformer.h.9.ln_1.weight",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 33071616
},
{
"name": "transformer.h.9.ln_1.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 33073152
}
],
"md5sum": "4cbbe2cce00b37c72cd95d22b3aa03d3"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 33079296,
"records": [
{
"name": "transformer.h.9.attn.c_attn.weight",
"shape": [
2304,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3538944,
"byteOffset": 0
},
{
"name": "transformer.h.9.attn.c_attn.bias",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 3538944
},
{
"name": "transformer.h.9.attn.c_proj.weight",
"shape": [
768,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 3543552
},
{
"name": "transformer.h.9.attn.c_proj.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 4723200
},
{
"name": "transformer.h.9.ln_2.weight",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 4724736
},
{
"name": "transformer.h.9.ln_2.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 4726272
},
{
"name": "transformer.h.9.mlp.c_fc.weight",
"shape": [
3072,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 4727808
},
{
"name": "transformer.h.9.mlp.c_fc.bias",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 9446400
},
{
"name": "transformer.h.9.mlp.c_proj.weight",
"shape": [
768,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 9452544
},
{
"name": "transformer.h.9.mlp.c_proj.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 14171136
},
{
"name": "transformer.h.10.ln_1.weight",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 14172672
},
{
"name": "transformer.h.10.ln_1.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 14174208
},
{
"name": "transformer.h.10.attn.c_attn.weight",
"shape": [
2304,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3538944,
"byteOffset": 14175744
},
{
"name": "transformer.h.10.attn.c_attn.bias",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 17714688
},
{
"name": "transformer.h.10.attn.c_proj.weight",
"shape": [
768,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 17719296
},
{
"name": "transformer.h.10.attn.c_proj.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 18898944
},
{
"name": "transformer.h.10.ln_2.weight",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 18900480
},
{
"name": "transformer.h.10.ln_2.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 18902016
},
{
"name": "transformer.h.10.mlp.c_fc.weight",
"shape": [
3072,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 18903552
},
{
"name": "transformer.h.10.mlp.c_fc.bias",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 23622144
},
{
"name": "transformer.h.10.mlp.c_proj.weight",
"shape": [
768,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 23628288
},
{
"name": "transformer.h.10.mlp.c_proj.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 28346880
},
{
"name": "transformer.h.11.ln_1.weight",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 28348416
},
{
"name": "transformer.h.11.ln_1.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 28349952
},
{
"name": "transformer.h.11.attn.c_attn.weight",
"shape": [
2304,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3538944,
"byteOffset": 28351488
},
{
"name": "transformer.h.11.attn.c_attn.bias",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 31890432
},
{
"name": "transformer.h.11.attn.c_proj.weight",
"shape": [
768,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 31895040
},
{
"name": "transformer.h.11.attn.c_proj.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 33074688
},
{
"name": "transformer.h.11.ln_2.weight",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 33076224
},
{
"name": "transformer.h.11.ln_2.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 33077760
}
],
"md5sum": "4f1aa298f8b0af477effc227d22f5ef1"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 9447936,
"records": [
{
"name": "transformer.h.11.mlp.c_fc.weight",
"shape": [
3072,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "transformer.h.11.mlp.c_fc.bias",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 4718592
},
{
"name": "transformer.h.11.mlp.c_proj.weight",
"shape": [
768,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 4724736
},
{
"name": "transformer.h.11.mlp.c_proj.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 9443328
},
{
"name": "transformer.ln_f.weight",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 9444864
},
{
"name": "transformer.ln_f.bias",
"shape": [
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1536,
"byteOffset": 9446400
}
],
"md5sum": "1532478e2b064b31091ff718a8c67188"
}
]
}