CharlieFRuan commited on
Commit
97c3b34
1 Parent(s): 165792a

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
ndarray-cache-b16.json ADDED
@@ -0,0 +1,1446 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "ParamSize": 98,
4
+ "ParamBytes": 4943257600.0,
5
+ "BitsPerParam": 32.0
6
+ },
7
+ "records": [
8
+ {
9
+ "dataPath": "params_shard_0.bin",
10
+ "format": "raw-shard",
11
+ "nbytes": 525336576,
12
+ "records": [
13
+ {
14
+ "name": "model.embed_tokens.weight",
15
+ "shape": [
16
+ 128256,
17
+ 2048
18
+ ],
19
+ "dtype": "bfloat16",
20
+ "format": "raw",
21
+ "nbytes": 525336576,
22
+ "byteOffset": 0
23
+ }
24
+ ],
25
+ "md5sum": "2097305f352cca56c3dc0ad1edb5bcbf"
26
+ },
27
+ {
28
+ "dataPath": "params_shard_1.bin",
29
+ "format": "raw-shard",
30
+ "nbytes": 33554432,
31
+ "records": [
32
+ {
33
+ "name": "model.layers.0.mlp.down_proj.weight",
34
+ "shape": [
35
+ 2048,
36
+ 8192
37
+ ],
38
+ "dtype": "bfloat16",
39
+ "format": "raw",
40
+ "nbytes": 33554432,
41
+ "byteOffset": 0
42
+ }
43
+ ],
44
+ "md5sum": "800df08c617c74598d4ca6af76553de0"
45
+ },
46
+ {
47
+ "dataPath": "params_shard_2.bin",
48
+ "format": "raw-shard",
49
+ "nbytes": 67108864,
50
+ "records": [
51
+ {
52
+ "name": "model.layers.0.mlp.gate_up_proj.weight",
53
+ "shape": [
54
+ 16384,
55
+ 2048
56
+ ],
57
+ "dtype": "bfloat16",
58
+ "format": "raw",
59
+ "nbytes": 67108864,
60
+ "byteOffset": 0
61
+ }
62
+ ],
63
+ "md5sum": "eb3b214c8e085725718f03cf804164ae"
64
+ },
65
+ {
66
+ "dataPath": "params_shard_3.bin",
67
+ "format": "raw-shard",
68
+ "nbytes": 33554432,
69
+ "records": [
70
+ {
71
+ "name": "model.layers.1.mlp.down_proj.weight",
72
+ "shape": [
73
+ 2048,
74
+ 8192
75
+ ],
76
+ "dtype": "bfloat16",
77
+ "format": "raw",
78
+ "nbytes": 33554432,
79
+ "byteOffset": 0
80
+ }
81
+ ],
82
+ "md5sum": "6a9a2411562a216b805451d914163a8c"
83
+ },
84
+ {
85
+ "dataPath": "params_shard_4.bin",
86
+ "format": "raw-shard",
87
+ "nbytes": 67108864,
88
+ "records": [
89
+ {
90
+ "name": "model.layers.1.mlp.gate_up_proj.weight",
91
+ "shape": [
92
+ 16384,
93
+ 2048
94
+ ],
95
+ "dtype": "bfloat16",
96
+ "format": "raw",
97
+ "nbytes": 67108864,
98
+ "byteOffset": 0
99
+ }
100
+ ],
101
+ "md5sum": "32b4b83389df3b212ef853f8f19fc2d1"
102
+ },
103
+ {
104
+ "dataPath": "params_shard_5.bin",
105
+ "format": "raw-shard",
106
+ "nbytes": 20987904,
107
+ "records": [
108
+ {
109
+ "name": "model.layers.0.input_layernorm.weight",
110
+ "shape": [
111
+ 2048
112
+ ],
113
+ "dtype": "bfloat16",
114
+ "format": "raw",
115
+ "nbytes": 4096,
116
+ "byteOffset": 0
117
+ },
118
+ {
119
+ "name": "model.layers.0.post_attention_layernorm.weight",
120
+ "shape": [
121
+ 2048
122
+ ],
123
+ "dtype": "bfloat16",
124
+ "format": "raw",
125
+ "nbytes": 4096,
126
+ "byteOffset": 4096
127
+ },
128
+ {
129
+ "name": "model.layers.0.self_attn.qkv_proj.weight",
130
+ "shape": [
131
+ 3072,
132
+ 2048
133
+ ],
134
+ "dtype": "bfloat16",
135
+ "format": "raw",
136
+ "nbytes": 12582912,
137
+ "byteOffset": 8192
138
+ },
139
+ {
140
+ "name": "model.layers.0.self_attn.o_proj.weight",
141
+ "shape": [
142
+ 2048,
143
+ 2048
144
+ ],
145
+ "dtype": "bfloat16",
146
+ "format": "raw",
147
+ "nbytes": 8388608,
148
+ "byteOffset": 12591104
149
+ },
150
+ {
151
+ "name": "model.layers.1.input_layernorm.weight",
152
+ "shape": [
153
+ 2048
154
+ ],
155
+ "dtype": "bfloat16",
156
+ "format": "raw",
157
+ "nbytes": 4096,
158
+ "byteOffset": 20979712
159
+ },
160
+ {
161
+ "name": "model.layers.1.post_attention_layernorm.weight",
162
+ "shape": [
163
+ 2048
164
+ ],
165
+ "dtype": "bfloat16",
166
+ "format": "raw",
167
+ "nbytes": 4096,
168
+ "byteOffset": 20983808
169
+ }
170
+ ],
171
+ "md5sum": "35b8ff220ef5e2e6627dcfb621fb832d"
172
+ },
173
+ {
174
+ "dataPath": "params_shard_6.bin",
175
+ "format": "raw-shard",
176
+ "nbytes": 33554432,
177
+ "records": [
178
+ {
179
+ "name": "model.layers.10.mlp.down_proj.weight",
180
+ "shape": [
181
+ 2048,
182
+ 8192
183
+ ],
184
+ "dtype": "bfloat16",
185
+ "format": "raw",
186
+ "nbytes": 33554432,
187
+ "byteOffset": 0
188
+ }
189
+ ],
190
+ "md5sum": "9d8db30764216ea077a052dcd20fb2c4"
191
+ },
192
+ {
193
+ "dataPath": "params_shard_7.bin",
194
+ "format": "raw-shard",
195
+ "nbytes": 67108864,
196
+ "records": [
197
+ {
198
+ "name": "model.layers.10.mlp.gate_up_proj.weight",
199
+ "shape": [
200
+ 16384,
201
+ 2048
202
+ ],
203
+ "dtype": "bfloat16",
204
+ "format": "raw",
205
+ "nbytes": 67108864,
206
+ "byteOffset": 0
207
+ }
208
+ ],
209
+ "md5sum": "0856245486dd0d049581359e670f33e0"
210
+ },
211
+ {
212
+ "dataPath": "params_shard_8.bin",
213
+ "format": "raw-shard",
214
+ "nbytes": 20979712,
215
+ "records": [
216
+ {
217
+ "name": "model.layers.1.self_attn.qkv_proj.weight",
218
+ "shape": [
219
+ 3072,
220
+ 2048
221
+ ],
222
+ "dtype": "bfloat16",
223
+ "format": "raw",
224
+ "nbytes": 12582912,
225
+ "byteOffset": 0
226
+ },
227
+ {
228
+ "name": "model.layers.1.self_attn.o_proj.weight",
229
+ "shape": [
230
+ 2048,
231
+ 2048
232
+ ],
233
+ "dtype": "bfloat16",
234
+ "format": "raw",
235
+ "nbytes": 8388608,
236
+ "byteOffset": 12582912
237
+ },
238
+ {
239
+ "name": "model.layers.10.input_layernorm.weight",
240
+ "shape": [
241
+ 2048
242
+ ],
243
+ "dtype": "bfloat16",
244
+ "format": "raw",
245
+ "nbytes": 4096,
246
+ "byteOffset": 20971520
247
+ },
248
+ {
249
+ "name": "model.layers.10.post_attention_layernorm.weight",
250
+ "shape": [
251
+ 2048
252
+ ],
253
+ "dtype": "bfloat16",
254
+ "format": "raw",
255
+ "nbytes": 4096,
256
+ "byteOffset": 20975616
257
+ }
258
+ ],
259
+ "md5sum": "af71427b909126ccc9a9635a8279e0a9"
260
+ },
261
+ {
262
+ "dataPath": "params_shard_9.bin",
263
+ "format": "raw-shard",
264
+ "nbytes": 33554432,
265
+ "records": [
266
+ {
267
+ "name": "model.layers.11.mlp.down_proj.weight",
268
+ "shape": [
269
+ 2048,
270
+ 8192
271
+ ],
272
+ "dtype": "bfloat16",
273
+ "format": "raw",
274
+ "nbytes": 33554432,
275
+ "byteOffset": 0
276
+ }
277
+ ],
278
+ "md5sum": "795af0ca2c531da387572abece860a7e"
279
+ },
280
+ {
281
+ "dataPath": "params_shard_10.bin",
282
+ "format": "raw-shard",
283
+ "nbytes": 67108864,
284
+ "records": [
285
+ {
286
+ "name": "model.layers.11.mlp.gate_up_proj.weight",
287
+ "shape": [
288
+ 16384,
289
+ 2048
290
+ ],
291
+ "dtype": "bfloat16",
292
+ "format": "raw",
293
+ "nbytes": 67108864,
294
+ "byteOffset": 0
295
+ }
296
+ ],
297
+ "md5sum": "4c8d8ed56e53eb99608130db0c1de05d"
298
+ },
299
+ {
300
+ "dataPath": "params_shard_11.bin",
301
+ "format": "raw-shard",
302
+ "nbytes": 20979712,
303
+ "records": [
304
+ {
305
+ "name": "model.layers.10.self_attn.qkv_proj.weight",
306
+ "shape": [
307
+ 3072,
308
+ 2048
309
+ ],
310
+ "dtype": "bfloat16",
311
+ "format": "raw",
312
+ "nbytes": 12582912,
313
+ "byteOffset": 0
314
+ },
315
+ {
316
+ "name": "model.layers.10.self_attn.o_proj.weight",
317
+ "shape": [
318
+ 2048,
319
+ 2048
320
+ ],
321
+ "dtype": "bfloat16",
322
+ "format": "raw",
323
+ "nbytes": 8388608,
324
+ "byteOffset": 12582912
325
+ },
326
+ {
327
+ "name": "model.layers.11.input_layernorm.weight",
328
+ "shape": [
329
+ 2048
330
+ ],
331
+ "dtype": "bfloat16",
332
+ "format": "raw",
333
+ "nbytes": 4096,
334
+ "byteOffset": 20971520
335
+ },
336
+ {
337
+ "name": "model.layers.11.post_attention_layernorm.weight",
338
+ "shape": [
339
+ 2048
340
+ ],
341
+ "dtype": "bfloat16",
342
+ "format": "raw",
343
+ "nbytes": 4096,
344
+ "byteOffset": 20975616
345
+ }
346
+ ],
347
+ "md5sum": "f7628a591cb04c54546aaaa4f9398a68"
348
+ },
349
+ {
350
+ "dataPath": "params_shard_12.bin",
351
+ "format": "raw-shard",
352
+ "nbytes": 33554432,
353
+ "records": [
354
+ {
355
+ "name": "model.layers.12.mlp.down_proj.weight",
356
+ "shape": [
357
+ 2048,
358
+ 8192
359
+ ],
360
+ "dtype": "bfloat16",
361
+ "format": "raw",
362
+ "nbytes": 33554432,
363
+ "byteOffset": 0
364
+ }
365
+ ],
366
+ "md5sum": "64f4c86756397349e929cd1f2f849e5c"
367
+ },
368
+ {
369
+ "dataPath": "params_shard_13.bin",
370
+ "format": "raw-shard",
371
+ "nbytes": 67108864,
372
+ "records": [
373
+ {
374
+ "name": "model.layers.12.mlp.gate_up_proj.weight",
375
+ "shape": [
376
+ 16384,
377
+ 2048
378
+ ],
379
+ "dtype": "bfloat16",
380
+ "format": "raw",
381
+ "nbytes": 67108864,
382
+ "byteOffset": 0
383
+ }
384
+ ],
385
+ "md5sum": "ea4ff7cec9e3c5ec3c805a6586bdd2ee"
386
+ },
387
+ {
388
+ "dataPath": "params_shard_14.bin",
389
+ "format": "raw-shard",
390
+ "nbytes": 20979712,
391
+ "records": [
392
+ {
393
+ "name": "model.layers.11.self_attn.qkv_proj.weight",
394
+ "shape": [
395
+ 3072,
396
+ 2048
397
+ ],
398
+ "dtype": "bfloat16",
399
+ "format": "raw",
400
+ "nbytes": 12582912,
401
+ "byteOffset": 0
402
+ },
403
+ {
404
+ "name": "model.layers.11.self_attn.o_proj.weight",
405
+ "shape": [
406
+ 2048,
407
+ 2048
408
+ ],
409
+ "dtype": "bfloat16",
410
+ "format": "raw",
411
+ "nbytes": 8388608,
412
+ "byteOffset": 12582912
413
+ },
414
+ {
415
+ "name": "model.layers.12.input_layernorm.weight",
416
+ "shape": [
417
+ 2048
418
+ ],
419
+ "dtype": "bfloat16",
420
+ "format": "raw",
421
+ "nbytes": 4096,
422
+ "byteOffset": 20971520
423
+ },
424
+ {
425
+ "name": "model.layers.12.post_attention_layernorm.weight",
426
+ "shape": [
427
+ 2048
428
+ ],
429
+ "dtype": "bfloat16",
430
+ "format": "raw",
431
+ "nbytes": 4096,
432
+ "byteOffset": 20975616
433
+ }
434
+ ],
435
+ "md5sum": "90f829661b428a1095ad0216de8f525e"
436
+ },
437
+ {
438
+ "dataPath": "params_shard_15.bin",
439
+ "format": "raw-shard",
440
+ "nbytes": 33554432,
441
+ "records": [
442
+ {
443
+ "name": "model.layers.13.mlp.down_proj.weight",
444
+ "shape": [
445
+ 2048,
446
+ 8192
447
+ ],
448
+ "dtype": "bfloat16",
449
+ "format": "raw",
450
+ "nbytes": 33554432,
451
+ "byteOffset": 0
452
+ }
453
+ ],
454
+ "md5sum": "fb6f9e666f95e8149da896c79bbb730c"
455
+ },
456
+ {
457
+ "dataPath": "params_shard_16.bin",
458
+ "format": "raw-shard",
459
+ "nbytes": 67108864,
460
+ "records": [
461
+ {
462
+ "name": "model.layers.13.mlp.gate_up_proj.weight",
463
+ "shape": [
464
+ 16384,
465
+ 2048
466
+ ],
467
+ "dtype": "bfloat16",
468
+ "format": "raw",
469
+ "nbytes": 67108864,
470
+ "byteOffset": 0
471
+ }
472
+ ],
473
+ "md5sum": "097a18cf907839e45e7bceedcfeee09c"
474
+ },
475
+ {
476
+ "dataPath": "params_shard_17.bin",
477
+ "format": "raw-shard",
478
+ "nbytes": 20979712,
479
+ "records": [
480
+ {
481
+ "name": "model.layers.12.self_attn.qkv_proj.weight",
482
+ "shape": [
483
+ 3072,
484
+ 2048
485
+ ],
486
+ "dtype": "bfloat16",
487
+ "format": "raw",
488
+ "nbytes": 12582912,
489
+ "byteOffset": 0
490
+ },
491
+ {
492
+ "name": "model.layers.12.self_attn.o_proj.weight",
493
+ "shape": [
494
+ 2048,
495
+ 2048
496
+ ],
497
+ "dtype": "bfloat16",
498
+ "format": "raw",
499
+ "nbytes": 8388608,
500
+ "byteOffset": 12582912
501
+ },
502
+ {
503
+ "name": "model.layers.13.input_layernorm.weight",
504
+ "shape": [
505
+ 2048
506
+ ],
507
+ "dtype": "bfloat16",
508
+ "format": "raw",
509
+ "nbytes": 4096,
510
+ "byteOffset": 20971520
511
+ },
512
+ {
513
+ "name": "model.layers.13.post_attention_layernorm.weight",
514
+ "shape": [
515
+ 2048
516
+ ],
517
+ "dtype": "bfloat16",
518
+ "format": "raw",
519
+ "nbytes": 4096,
520
+ "byteOffset": 20975616
521
+ }
522
+ ],
523
+ "md5sum": "8c3f48c9a1fb32d02410baeb954f1bb6"
524
+ },
525
+ {
526
+ "dataPath": "params_shard_18.bin",
527
+ "format": "raw-shard",
528
+ "nbytes": 33554432,
529
+ "records": [
530
+ {
531
+ "name": "model.layers.14.mlp.down_proj.weight",
532
+ "shape": [
533
+ 2048,
534
+ 8192
535
+ ],
536
+ "dtype": "bfloat16",
537
+ "format": "raw",
538
+ "nbytes": 33554432,
539
+ "byteOffset": 0
540
+ }
541
+ ],
542
+ "md5sum": "ab16992964f5135c087f55a6c96eaf30"
543
+ },
544
+ {
545
+ "dataPath": "params_shard_19.bin",
546
+ "format": "raw-shard",
547
+ "nbytes": 67108864,
548
+ "records": [
549
+ {
550
+ "name": "model.layers.14.mlp.gate_up_proj.weight",
551
+ "shape": [
552
+ 16384,
553
+ 2048
554
+ ],
555
+ "dtype": "bfloat16",
556
+ "format": "raw",
557
+ "nbytes": 67108864,
558
+ "byteOffset": 0
559
+ }
560
+ ],
561
+ "md5sum": "75e6fc44ba742deec3aea8e1bd310f22"
562
+ },
563
+ {
564
+ "dataPath": "params_shard_20.bin",
565
+ "format": "raw-shard",
566
+ "nbytes": 20979712,
567
+ "records": [
568
+ {
569
+ "name": "model.layers.13.self_attn.qkv_proj.weight",
570
+ "shape": [
571
+ 3072,
572
+ 2048
573
+ ],
574
+ "dtype": "bfloat16",
575
+ "format": "raw",
576
+ "nbytes": 12582912,
577
+ "byteOffset": 0
578
+ },
579
+ {
580
+ "name": "model.layers.13.self_attn.o_proj.weight",
581
+ "shape": [
582
+ 2048,
583
+ 2048
584
+ ],
585
+ "dtype": "bfloat16",
586
+ "format": "raw",
587
+ "nbytes": 8388608,
588
+ "byteOffset": 12582912
589
+ },
590
+ {
591
+ "name": "model.layers.14.input_layernorm.weight",
592
+ "shape": [
593
+ 2048
594
+ ],
595
+ "dtype": "bfloat16",
596
+ "format": "raw",
597
+ "nbytes": 4096,
598
+ "byteOffset": 20971520
599
+ },
600
+ {
601
+ "name": "model.layers.14.post_attention_layernorm.weight",
602
+ "shape": [
603
+ 2048
604
+ ],
605
+ "dtype": "bfloat16",
606
+ "format": "raw",
607
+ "nbytes": 4096,
608
+ "byteOffset": 20975616
609
+ }
610
+ ],
611
+ "md5sum": "a2c7db5bc8440c57f50b3c38a403c9f8"
612
+ },
613
+ {
614
+ "dataPath": "params_shard_21.bin",
615
+ "format": "raw-shard",
616
+ "nbytes": 33554432,
617
+ "records": [
618
+ {
619
+ "name": "model.layers.15.mlp.down_proj.weight",
620
+ "shape": [
621
+ 2048,
622
+ 8192
623
+ ],
624
+ "dtype": "bfloat16",
625
+ "format": "raw",
626
+ "nbytes": 33554432,
627
+ "byteOffset": 0
628
+ }
629
+ ],
630
+ "md5sum": "3fe8a1e2c9e49e659f3411c7e138cd9f"
631
+ },
632
+ {
633
+ "dataPath": "params_shard_22.bin",
634
+ "format": "raw-shard",
635
+ "nbytes": 67108864,
636
+ "records": [
637
+ {
638
+ "name": "model.layers.15.mlp.gate_up_proj.weight",
639
+ "shape": [
640
+ 16384,
641
+ 2048
642
+ ],
643
+ "dtype": "bfloat16",
644
+ "format": "raw",
645
+ "nbytes": 67108864,
646
+ "byteOffset": 0
647
+ }
648
+ ],
649
+ "md5sum": "6929ff78c04aaf58088e2b0fd33c61c4"
650
+ },
651
+ {
652
+ "dataPath": "params_shard_23.bin",
653
+ "format": "raw-shard",
654
+ "nbytes": 20979712,
655
+ "records": [
656
+ {
657
+ "name": "model.layers.14.self_attn.qkv_proj.weight",
658
+ "shape": [
659
+ 3072,
660
+ 2048
661
+ ],
662
+ "dtype": "bfloat16",
663
+ "format": "raw",
664
+ "nbytes": 12582912,
665
+ "byteOffset": 0
666
+ },
667
+ {
668
+ "name": "model.layers.14.self_attn.o_proj.weight",
669
+ "shape": [
670
+ 2048,
671
+ 2048
672
+ ],
673
+ "dtype": "bfloat16",
674
+ "format": "raw",
675
+ "nbytes": 8388608,
676
+ "byteOffset": 12582912
677
+ },
678
+ {
679
+ "name": "model.layers.15.input_layernorm.weight",
680
+ "shape": [
681
+ 2048
682
+ ],
683
+ "dtype": "bfloat16",
684
+ "format": "raw",
685
+ "nbytes": 4096,
686
+ "byteOffset": 20971520
687
+ },
688
+ {
689
+ "name": "model.layers.15.post_attention_layernorm.weight",
690
+ "shape": [
691
+ 2048
692
+ ],
693
+ "dtype": "bfloat16",
694
+ "format": "raw",
695
+ "nbytes": 4096,
696
+ "byteOffset": 20975616
697
+ }
698
+ ],
699
+ "md5sum": "50aa881a0f07e3bf087ec22fd3c2c7f7"
700
+ },
701
+ {
702
+ "dataPath": "params_shard_24.bin",
703
+ "format": "raw-shard",
704
+ "nbytes": 33554432,
705
+ "records": [
706
+ {
707
+ "name": "model.layers.2.mlp.down_proj.weight",
708
+ "shape": [
709
+ 2048,
710
+ 8192
711
+ ],
712
+ "dtype": "bfloat16",
713
+ "format": "raw",
714
+ "nbytes": 33554432,
715
+ "byteOffset": 0
716
+ }
717
+ ],
718
+ "md5sum": "5814d5a86c183a63f441e473b96eac9f"
719
+ },
720
+ {
721
+ "dataPath": "params_shard_25.bin",
722
+ "format": "raw-shard",
723
+ "nbytes": 67108864,
724
+ "records": [
725
+ {
726
+ "name": "model.layers.2.mlp.gate_up_proj.weight",
727
+ "shape": [
728
+ 16384,
729
+ 2048
730
+ ],
731
+ "dtype": "bfloat16",
732
+ "format": "raw",
733
+ "nbytes": 67108864,
734
+ "byteOffset": 0
735
+ }
736
+ ],
737
+ "md5sum": "66938847fe03b3b7beb0c34e0ea3d146"
738
+ },
739
+ {
740
+ "dataPath": "params_shard_26.bin",
741
+ "format": "raw-shard",
742
+ "nbytes": 20979712,
743
+ "records": [
744
+ {
745
+ "name": "model.layers.15.self_attn.qkv_proj.weight",
746
+ "shape": [
747
+ 3072,
748
+ 2048
749
+ ],
750
+ "dtype": "bfloat16",
751
+ "format": "raw",
752
+ "nbytes": 12582912,
753
+ "byteOffset": 0
754
+ },
755
+ {
756
+ "name": "model.layers.15.self_attn.o_proj.weight",
757
+ "shape": [
758
+ 2048,
759
+ 2048
760
+ ],
761
+ "dtype": "bfloat16",
762
+ "format": "raw",
763
+ "nbytes": 8388608,
764
+ "byteOffset": 12582912
765
+ },
766
+ {
767
+ "name": "model.layers.2.input_layernorm.weight",
768
+ "shape": [
769
+ 2048
770
+ ],
771
+ "dtype": "bfloat16",
772
+ "format": "raw",
773
+ "nbytes": 4096,
774
+ "byteOffset": 20971520
775
+ },
776
+ {
777
+ "name": "model.layers.2.post_attention_layernorm.weight",
778
+ "shape": [
779
+ 2048
780
+ ],
781
+ "dtype": "bfloat16",
782
+ "format": "raw",
783
+ "nbytes": 4096,
784
+ "byteOffset": 20975616
785
+ }
786
+ ],
787
+ "md5sum": "60e2dec95a7f652b11879a58814a0b72"
788
+ },
789
+ {
790
+ "dataPath": "params_shard_27.bin",
791
+ "format": "raw-shard",
792
+ "nbytes": 33554432,
793
+ "records": [
794
+ {
795
+ "name": "model.layers.3.mlp.down_proj.weight",
796
+ "shape": [
797
+ 2048,
798
+ 8192
799
+ ],
800
+ "dtype": "bfloat16",
801
+ "format": "raw",
802
+ "nbytes": 33554432,
803
+ "byteOffset": 0
804
+ }
805
+ ],
806
+ "md5sum": "daca4f0d1145ceba60aba195b6e4b717"
807
+ },
808
+ {
809
+ "dataPath": "params_shard_28.bin",
810
+ "format": "raw-shard",
811
+ "nbytes": 67108864,
812
+ "records": [
813
+ {
814
+ "name": "model.layers.3.mlp.gate_up_proj.weight",
815
+ "shape": [
816
+ 16384,
817
+ 2048
818
+ ],
819
+ "dtype": "bfloat16",
820
+ "format": "raw",
821
+ "nbytes": 67108864,
822
+ "byteOffset": 0
823
+ }
824
+ ],
825
+ "md5sum": "a7d140e2aa8117b4e0251ab7d4f1e894"
826
+ },
827
+ {
828
+ "dataPath": "params_shard_29.bin",
829
+ "format": "raw-shard",
830
+ "nbytes": 20979712,
831
+ "records": [
832
+ {
833
+ "name": "model.layers.2.self_attn.qkv_proj.weight",
834
+ "shape": [
835
+ 3072,
836
+ 2048
837
+ ],
838
+ "dtype": "bfloat16",
839
+ "format": "raw",
840
+ "nbytes": 12582912,
841
+ "byteOffset": 0
842
+ },
843
+ {
844
+ "name": "model.layers.2.self_attn.o_proj.weight",
845
+ "shape": [
846
+ 2048,
847
+ 2048
848
+ ],
849
+ "dtype": "bfloat16",
850
+ "format": "raw",
851
+ "nbytes": 8388608,
852
+ "byteOffset": 12582912
853
+ },
854
+ {
855
+ "name": "model.layers.3.input_layernorm.weight",
856
+ "shape": [
857
+ 2048
858
+ ],
859
+ "dtype": "bfloat16",
860
+ "format": "raw",
861
+ "nbytes": 4096,
862
+ "byteOffset": 20971520
863
+ },
864
+ {
865
+ "name": "model.layers.3.post_attention_layernorm.weight",
866
+ "shape": [
867
+ 2048
868
+ ],
869
+ "dtype": "bfloat16",
870
+ "format": "raw",
871
+ "nbytes": 4096,
872
+ "byteOffset": 20975616
873
+ }
874
+ ],
875
+ "md5sum": "aa3290f5cbda1c5d43f00d0846fb3c5d"
876
+ },
877
+ {
878
+ "dataPath": "params_shard_30.bin",
879
+ "format": "raw-shard",
880
+ "nbytes": 33554432,
881
+ "records": [
882
+ {
883
+ "name": "model.layers.4.mlp.down_proj.weight",
884
+ "shape": [
885
+ 2048,
886
+ 8192
887
+ ],
888
+ "dtype": "bfloat16",
889
+ "format": "raw",
890
+ "nbytes": 33554432,
891
+ "byteOffset": 0
892
+ }
893
+ ],
894
+ "md5sum": "6f6f0eafe2557a8793292023a93fc44d"
895
+ },
896
+ {
897
+ "dataPath": "params_shard_31.bin",
898
+ "format": "raw-shard",
899
+ "nbytes": 67108864,
900
+ "records": [
901
+ {
902
+ "name": "model.layers.4.mlp.gate_up_proj.weight",
903
+ "shape": [
904
+ 16384,
905
+ 2048
906
+ ],
907
+ "dtype": "bfloat16",
908
+ "format": "raw",
909
+ "nbytes": 67108864,
910
+ "byteOffset": 0
911
+ }
912
+ ],
913
+ "md5sum": "ac207948c6dd9f8c618249b5c8370101"
914
+ },
915
+ {
916
+ "dataPath": "params_shard_32.bin",
917
+ "format": "raw-shard",
918
+ "nbytes": 20979712,
919
+ "records": [
920
+ {
921
+ "name": "model.layers.3.self_attn.qkv_proj.weight",
922
+ "shape": [
923
+ 3072,
924
+ 2048
925
+ ],
926
+ "dtype": "bfloat16",
927
+ "format": "raw",
928
+ "nbytes": 12582912,
929
+ "byteOffset": 0
930
+ },
931
+ {
932
+ "name": "model.layers.3.self_attn.o_proj.weight",
933
+ "shape": [
934
+ 2048,
935
+ 2048
936
+ ],
937
+ "dtype": "bfloat16",
938
+ "format": "raw",
939
+ "nbytes": 8388608,
940
+ "byteOffset": 12582912
941
+ },
942
+ {
943
+ "name": "model.layers.4.input_layernorm.weight",
944
+ "shape": [
945
+ 2048
946
+ ],
947
+ "dtype": "bfloat16",
948
+ "format": "raw",
949
+ "nbytes": 4096,
950
+ "byteOffset": 20971520
951
+ },
952
+ {
953
+ "name": "model.layers.4.post_attention_layernorm.weight",
954
+ "shape": [
955
+ 2048
956
+ ],
957
+ "dtype": "bfloat16",
958
+ "format": "raw",
959
+ "nbytes": 4096,
960
+ "byteOffset": 20975616
961
+ }
962
+ ],
963
+ "md5sum": "5fd4c85aebd5f940fb7cb90a06bc0e3d"
964
+ },
965
+ {
966
+ "dataPath": "params_shard_33.bin",
967
+ "format": "raw-shard",
968
+ "nbytes": 33554432,
969
+ "records": [
970
+ {
971
+ "name": "model.layers.5.mlp.down_proj.weight",
972
+ "shape": [
973
+ 2048,
974
+ 8192
975
+ ],
976
+ "dtype": "bfloat16",
977
+ "format": "raw",
978
+ "nbytes": 33554432,
979
+ "byteOffset": 0
980
+ }
981
+ ],
982
+ "md5sum": "385fbfd45620a6a7c7d555b8cf7d35ff"
983
+ },
984
+ {
985
+ "dataPath": "params_shard_34.bin",
986
+ "format": "raw-shard",
987
+ "nbytes": 67108864,
988
+ "records": [
989
+ {
990
+ "name": "model.layers.5.mlp.gate_up_proj.weight",
991
+ "shape": [
992
+ 16384,
993
+ 2048
994
+ ],
995
+ "dtype": "bfloat16",
996
+ "format": "raw",
997
+ "nbytes": 67108864,
998
+ "byteOffset": 0
999
+ }
1000
+ ],
1001
+ "md5sum": "9d34ce6dc3d3bcecc868378358d66bcb"
1002
+ },
1003
+ {
1004
+ "dataPath": "params_shard_35.bin",
1005
+ "format": "raw-shard",
1006
+ "nbytes": 20979712,
1007
+ "records": [
1008
+ {
1009
+ "name": "model.layers.4.self_attn.qkv_proj.weight",
1010
+ "shape": [
1011
+ 3072,
1012
+ 2048
1013
+ ],
1014
+ "dtype": "bfloat16",
1015
+ "format": "raw",
1016
+ "nbytes": 12582912,
1017
+ "byteOffset": 0
1018
+ },
1019
+ {
1020
+ "name": "model.layers.4.self_attn.o_proj.weight",
1021
+ "shape": [
1022
+ 2048,
1023
+ 2048
1024
+ ],
1025
+ "dtype": "bfloat16",
1026
+ "format": "raw",
1027
+ "nbytes": 8388608,
1028
+ "byteOffset": 12582912
1029
+ },
1030
+ {
1031
+ "name": "model.layers.5.input_layernorm.weight",
1032
+ "shape": [
1033
+ 2048
1034
+ ],
1035
+ "dtype": "bfloat16",
1036
+ "format": "raw",
1037
+ "nbytes": 4096,
1038
+ "byteOffset": 20971520
1039
+ },
1040
+ {
1041
+ "name": "model.layers.5.post_attention_layernorm.weight",
1042
+ "shape": [
1043
+ 2048
1044
+ ],
1045
+ "dtype": "bfloat16",
1046
+ "format": "raw",
1047
+ "nbytes": 4096,
1048
+ "byteOffset": 20975616
1049
+ }
1050
+ ],
1051
+ "md5sum": "24b869dad75eb377412439976e3ae012"
1052
+ },
1053
+ {
1054
+ "dataPath": "params_shard_36.bin",
1055
+ "format": "raw-shard",
1056
+ "nbytes": 33554432,
1057
+ "records": [
1058
+ {
1059
+ "name": "model.layers.6.mlp.down_proj.weight",
1060
+ "shape": [
1061
+ 2048,
1062
+ 8192
1063
+ ],
1064
+ "dtype": "bfloat16",
1065
+ "format": "raw",
1066
+ "nbytes": 33554432,
1067
+ "byteOffset": 0
1068
+ }
1069
+ ],
1070
+ "md5sum": "0168cb04de60c94a82da930ef69dfc79"
1071
+ },
1072
+ {
1073
+ "dataPath": "params_shard_37.bin",
1074
+ "format": "raw-shard",
1075
+ "nbytes": 67108864,
1076
+ "records": [
1077
+ {
1078
+ "name": "model.layers.6.mlp.gate_up_proj.weight",
1079
+ "shape": [
1080
+ 16384,
1081
+ 2048
1082
+ ],
1083
+ "dtype": "bfloat16",
1084
+ "format": "raw",
1085
+ "nbytes": 67108864,
1086
+ "byteOffset": 0
1087
+ }
1088
+ ],
1089
+ "md5sum": "f38fe5a46d483f933a79ce6577bd5236"
1090
+ },
1091
+ {
1092
+ "dataPath": "params_shard_38.bin",
1093
+ "format": "raw-shard",
1094
+ "nbytes": 20979712,
1095
+ "records": [
1096
+ {
1097
+ "name": "model.layers.5.self_attn.qkv_proj.weight",
1098
+ "shape": [
1099
+ 3072,
1100
+ 2048
1101
+ ],
1102
+ "dtype": "bfloat16",
1103
+ "format": "raw",
1104
+ "nbytes": 12582912,
1105
+ "byteOffset": 0
1106
+ },
1107
+ {
1108
+ "name": "model.layers.5.self_attn.o_proj.weight",
1109
+ "shape": [
1110
+ 2048,
1111
+ 2048
1112
+ ],
1113
+ "dtype": "bfloat16",
1114
+ "format": "raw",
1115
+ "nbytes": 8388608,
1116
+ "byteOffset": 12582912
1117
+ },
1118
+ {
1119
+ "name": "model.layers.6.input_layernorm.weight",
1120
+ "shape": [
1121
+ 2048
1122
+ ],
1123
+ "dtype": "bfloat16",
1124
+ "format": "raw",
1125
+ "nbytes": 4096,
1126
+ "byteOffset": 20971520
1127
+ },
1128
+ {
1129
+ "name": "model.layers.6.post_attention_layernorm.weight",
1130
+ "shape": [
1131
+ 2048
1132
+ ],
1133
+ "dtype": "bfloat16",
1134
+ "format": "raw",
1135
+ "nbytes": 4096,
1136
+ "byteOffset": 20975616
1137
+ }
1138
+ ],
1139
+ "md5sum": "49401c003986778a26ab6e1d17dea5a9"
1140
+ },
1141
+ {
1142
+ "dataPath": "params_shard_39.bin",
1143
+ "format": "raw-shard",
1144
+ "nbytes": 33554432,
1145
+ "records": [
1146
+ {
1147
+ "name": "model.layers.7.mlp.down_proj.weight",
1148
+ "shape": [
1149
+ 2048,
1150
+ 8192
1151
+ ],
1152
+ "dtype": "bfloat16",
1153
+ "format": "raw",
1154
+ "nbytes": 33554432,
1155
+ "byteOffset": 0
1156
+ }
1157
+ ],
1158
+ "md5sum": "c9edef3101ccb32cf23b9342315b1a38"
1159
+ },
1160
+ {
1161
+ "dataPath": "params_shard_40.bin",
1162
+ "format": "raw-shard",
1163
+ "nbytes": 67108864,
1164
+ "records": [
1165
+ {
1166
+ "name": "model.layers.7.mlp.gate_up_proj.weight",
1167
+ "shape": [
1168
+ 16384,
1169
+ 2048
1170
+ ],
1171
+ "dtype": "bfloat16",
1172
+ "format": "raw",
1173
+ "nbytes": 67108864,
1174
+ "byteOffset": 0
1175
+ }
1176
+ ],
1177
+ "md5sum": "c7b95c82540aad932bca9ebac8caa96e"
1178
+ },
1179
+ {
1180
+ "dataPath": "params_shard_41.bin",
1181
+ "format": "raw-shard",
1182
+ "nbytes": 20979712,
1183
+ "records": [
1184
+ {
1185
+ "name": "model.layers.6.self_attn.qkv_proj.weight",
1186
+ "shape": [
1187
+ 3072,
1188
+ 2048
1189
+ ],
1190
+ "dtype": "bfloat16",
1191
+ "format": "raw",
1192
+ "nbytes": 12582912,
1193
+ "byteOffset": 0
1194
+ },
1195
+ {
1196
+ "name": "model.layers.6.self_attn.o_proj.weight",
1197
+ "shape": [
1198
+ 2048,
1199
+ 2048
1200
+ ],
1201
+ "dtype": "bfloat16",
1202
+ "format": "raw",
1203
+ "nbytes": 8388608,
1204
+ "byteOffset": 12582912
1205
+ },
1206
+ {
1207
+ "name": "model.layers.7.input_layernorm.weight",
1208
+ "shape": [
1209
+ 2048
1210
+ ],
1211
+ "dtype": "bfloat16",
1212
+ "format": "raw",
1213
+ "nbytes": 4096,
1214
+ "byteOffset": 20971520
1215
+ },
1216
+ {
1217
+ "name": "model.layers.7.post_attention_layernorm.weight",
1218
+ "shape": [
1219
+ 2048
1220
+ ],
1221
+ "dtype": "bfloat16",
1222
+ "format": "raw",
1223
+ "nbytes": 4096,
1224
+ "byteOffset": 20975616
1225
+ }
1226
+ ],
1227
+ "md5sum": "e44904f0c432ad45b9919a22b590ae9a"
1228
+ },
1229
+ {
1230
+ "dataPath": "params_shard_42.bin",
1231
+ "format": "raw-shard",
1232
+ "nbytes": 33554432,
1233
+ "records": [
1234
+ {
1235
+ "name": "model.layers.8.mlp.down_proj.weight",
1236
+ "shape": [
1237
+ 2048,
1238
+ 8192
1239
+ ],
1240
+ "dtype": "bfloat16",
1241
+ "format": "raw",
1242
+ "nbytes": 33554432,
1243
+ "byteOffset": 0
1244
+ }
1245
+ ],
1246
+ "md5sum": "3e337579a791f65389f4a5a93dc95633"
1247
+ },
1248
+ {
1249
+ "dataPath": "params_shard_43.bin",
1250
+ "format": "raw-shard",
1251
+ "nbytes": 67108864,
1252
+ "records": [
1253
+ {
1254
+ "name": "model.layers.8.mlp.gate_up_proj.weight",
1255
+ "shape": [
1256
+ 16384,
1257
+ 2048
1258
+ ],
1259
+ "dtype": "bfloat16",
1260
+ "format": "raw",
1261
+ "nbytes": 67108864,
1262
+ "byteOffset": 0
1263
+ }
1264
+ ],
1265
+ "md5sum": "79c0f7d1fa6519b778d30ef952522121"
1266
+ },
1267
+ {
1268
+ "dataPath": "params_shard_44.bin",
1269
+ "format": "raw-shard",
1270
+ "nbytes": 20979712,
1271
+ "records": [
1272
+ {
1273
+ "name": "model.layers.7.self_attn.qkv_proj.weight",
1274
+ "shape": [
1275
+ 3072,
1276
+ 2048
1277
+ ],
1278
+ "dtype": "bfloat16",
1279
+ "format": "raw",
1280
+ "nbytes": 12582912,
1281
+ "byteOffset": 0
1282
+ },
1283
+ {
1284
+ "name": "model.layers.7.self_attn.o_proj.weight",
1285
+ "shape": [
1286
+ 2048,
1287
+ 2048
1288
+ ],
1289
+ "dtype": "bfloat16",
1290
+ "format": "raw",
1291
+ "nbytes": 8388608,
1292
+ "byteOffset": 12582912
1293
+ },
1294
+ {
1295
+ "name": "model.layers.8.input_layernorm.weight",
1296
+ "shape": [
1297
+ 2048
1298
+ ],
1299
+ "dtype": "bfloat16",
1300
+ "format": "raw",
1301
+ "nbytes": 4096,
1302
+ "byteOffset": 20971520
1303
+ },
1304
+ {
1305
+ "name": "model.layers.8.post_attention_layernorm.weight",
1306
+ "shape": [
1307
+ 2048
1308
+ ],
1309
+ "dtype": "bfloat16",
1310
+ "format": "raw",
1311
+ "nbytes": 4096,
1312
+ "byteOffset": 20975616
1313
+ }
1314
+ ],
1315
+ "md5sum": "5a57388997fefe29f3788be628569150"
1316
+ },
1317
+ {
1318
+ "dataPath": "params_shard_45.bin",
1319
+ "format": "raw-shard",
1320
+ "nbytes": 33554432,
1321
+ "records": [
1322
+ {
1323
+ "name": "model.layers.9.mlp.down_proj.weight",
1324
+ "shape": [
1325
+ 2048,
1326
+ 8192
1327
+ ],
1328
+ "dtype": "bfloat16",
1329
+ "format": "raw",
1330
+ "nbytes": 33554432,
1331
+ "byteOffset": 0
1332
+ }
1333
+ ],
1334
+ "md5sum": "e2d663fbf063a76d8845a1117834610f"
1335
+ },
1336
+ {
1337
+ "dataPath": "params_shard_46.bin",
1338
+ "format": "raw-shard",
1339
+ "nbytes": 67108864,
1340
+ "records": [
1341
+ {
1342
+ "name": "model.layers.9.mlp.gate_up_proj.weight",
1343
+ "shape": [
1344
+ 16384,
1345
+ 2048
1346
+ ],
1347
+ "dtype": "bfloat16",
1348
+ "format": "raw",
1349
+ "nbytes": 67108864,
1350
+ "byteOffset": 0
1351
+ }
1352
+ ],
1353
+ "md5sum": "50e4119a748b23e67bfa10d1ec8e1f00"
1354
+ },
1355
+ {
1356
+ "dataPath": "params_shard_47.bin",
1357
+ "format": "raw-shard",
1358
+ "nbytes": 20979712,
1359
+ "records": [
1360
+ {
1361
+ "name": "model.layers.8.self_attn.qkv_proj.weight",
1362
+ "shape": [
1363
+ 3072,
1364
+ 2048
1365
+ ],
1366
+ "dtype": "bfloat16",
1367
+ "format": "raw",
1368
+ "nbytes": 12582912,
1369
+ "byteOffset": 0
1370
+ },
1371
+ {
1372
+ "name": "model.layers.8.self_attn.o_proj.weight",
1373
+ "shape": [
1374
+ 2048,
1375
+ 2048
1376
+ ],
1377
+ "dtype": "bfloat16",
1378
+ "format": "raw",
1379
+ "nbytes": 8388608,
1380
+ "byteOffset": 12582912
1381
+ },
1382
+ {
1383
+ "name": "model.layers.9.input_layernorm.weight",
1384
+ "shape": [
1385
+ 2048
1386
+ ],
1387
+ "dtype": "bfloat16",
1388
+ "format": "raw",
1389
+ "nbytes": 4096,
1390
+ "byteOffset": 20971520
1391
+ },
1392
+ {
1393
+ "name": "model.layers.9.post_attention_layernorm.weight",
1394
+ "shape": [
1395
+ 2048
1396
+ ],
1397
+ "dtype": "bfloat16",
1398
+ "format": "raw",
1399
+ "nbytes": 4096,
1400
+ "byteOffset": 20975616
1401
+ }
1402
+ ],
1403
+ "md5sum": "c7f736b657c13231c2c106aab8827f06"
1404
+ },
1405
+ {
1406
+ "dataPath": "params_shard_48.bin",
1407
+ "format": "raw-shard",
1408
+ "nbytes": 20975616,
1409
+ "records": [
1410
+ {
1411
+ "name": "model.layers.9.self_attn.qkv_proj.weight",
1412
+ "shape": [
1413
+ 3072,
1414
+ 2048
1415
+ ],
1416
+ "dtype": "bfloat16",
1417
+ "format": "raw",
1418
+ "nbytes": 12582912,
1419
+ "byteOffset": 0
1420
+ },
1421
+ {
1422
+ "name": "model.layers.9.self_attn.o_proj.weight",
1423
+ "shape": [
1424
+ 2048,
1425
+ 2048
1426
+ ],
1427
+ "dtype": "bfloat16",
1428
+ "format": "raw",
1429
+ "nbytes": 8388608,
1430
+ "byteOffset": 12582912
1431
+ },
1432
+ {
1433
+ "name": "model.norm.weight",
1434
+ "shape": [
1435
+ 2048
1436
+ ],
1437
+ "dtype": "bfloat16",
1438
+ "format": "raw",
1439
+ "nbytes": 4096,
1440
+ "byteOffset": 20971520
1441
+ }
1442
+ ],
1443
+ "md5sum": "2484e69deb5afeb91287322eac29c4f8"
1444
+ }
1445
+ ]
1446
+ }
ndarray-cache.json ADDED
@@ -0,0 +1,1446 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "ParamSize": 98,
4
+ "ParamBytes": 4943257600.0,
5
+ "BitsPerParam": 32.0
6
+ },
7
+ "records": [
8
+ {
9
+ "dataPath": "params_shard_0.bin",
10
+ "format": "raw-shard",
11
+ "nbytes": 525336576,
12
+ "records": [
13
+ {
14
+ "name": "model.embed_tokens.weight",
15
+ "shape": [
16
+ 128256,
17
+ 2048
18
+ ],
19
+ "dtype": "float32",
20
+ "format": "f32-to-bf16",
21
+ "nbytes": 525336576,
22
+ "byteOffset": 0
23
+ }
24
+ ],
25
+ "md5sum": "2097305f352cca56c3dc0ad1edb5bcbf"
26
+ },
27
+ {
28
+ "dataPath": "params_shard_1.bin",
29
+ "format": "raw-shard",
30
+ "nbytes": 33554432,
31
+ "records": [
32
+ {
33
+ "name": "model.layers.0.mlp.down_proj.weight",
34
+ "shape": [
35
+ 2048,
36
+ 8192
37
+ ],
38
+ "dtype": "float32",
39
+ "format": "f32-to-bf16",
40
+ "nbytes": 33554432,
41
+ "byteOffset": 0
42
+ }
43
+ ],
44
+ "md5sum": "800df08c617c74598d4ca6af76553de0"
45
+ },
46
+ {
47
+ "dataPath": "params_shard_2.bin",
48
+ "format": "raw-shard",
49
+ "nbytes": 67108864,
50
+ "records": [
51
+ {
52
+ "name": "model.layers.0.mlp.gate_up_proj.weight",
53
+ "shape": [
54
+ 16384,
55
+ 2048
56
+ ],
57
+ "dtype": "float32",
58
+ "format": "f32-to-bf16",
59
+ "nbytes": 67108864,
60
+ "byteOffset": 0
61
+ }
62
+ ],
63
+ "md5sum": "eb3b214c8e085725718f03cf804164ae"
64
+ },
65
+ {
66
+ "dataPath": "params_shard_3.bin",
67
+ "format": "raw-shard",
68
+ "nbytes": 33554432,
69
+ "records": [
70
+ {
71
+ "name": "model.layers.1.mlp.down_proj.weight",
72
+ "shape": [
73
+ 2048,
74
+ 8192
75
+ ],
76
+ "dtype": "float32",
77
+ "format": "f32-to-bf16",
78
+ "nbytes": 33554432,
79
+ "byteOffset": 0
80
+ }
81
+ ],
82
+ "md5sum": "6a9a2411562a216b805451d914163a8c"
83
+ },
84
+ {
85
+ "dataPath": "params_shard_4.bin",
86
+ "format": "raw-shard",
87
+ "nbytes": 67108864,
88
+ "records": [
89
+ {
90
+ "name": "model.layers.1.mlp.gate_up_proj.weight",
91
+ "shape": [
92
+ 16384,
93
+ 2048
94
+ ],
95
+ "dtype": "float32",
96
+ "format": "f32-to-bf16",
97
+ "nbytes": 67108864,
98
+ "byteOffset": 0
99
+ }
100
+ ],
101
+ "md5sum": "32b4b83389df3b212ef853f8f19fc2d1"
102
+ },
103
+ {
104
+ "dataPath": "params_shard_5.bin",
105
+ "format": "raw-shard",
106
+ "nbytes": 20987904,
107
+ "records": [
108
+ {
109
+ "name": "model.layers.0.input_layernorm.weight",
110
+ "shape": [
111
+ 2048
112
+ ],
113
+ "dtype": "float32",
114
+ "format": "f32-to-bf16",
115
+ "nbytes": 4096,
116
+ "byteOffset": 0
117
+ },
118
+ {
119
+ "name": "model.layers.0.post_attention_layernorm.weight",
120
+ "shape": [
121
+ 2048
122
+ ],
123
+ "dtype": "float32",
124
+ "format": "f32-to-bf16",
125
+ "nbytes": 4096,
126
+ "byteOffset": 4096
127
+ },
128
+ {
129
+ "name": "model.layers.0.self_attn.qkv_proj.weight",
130
+ "shape": [
131
+ 3072,
132
+ 2048
133
+ ],
134
+ "dtype": "float32",
135
+ "format": "f32-to-bf16",
136
+ "nbytes": 12582912,
137
+ "byteOffset": 8192
138
+ },
139
+ {
140
+ "name": "model.layers.0.self_attn.o_proj.weight",
141
+ "shape": [
142
+ 2048,
143
+ 2048
144
+ ],
145
+ "dtype": "float32",
146
+ "format": "f32-to-bf16",
147
+ "nbytes": 8388608,
148
+ "byteOffset": 12591104
149
+ },
150
+ {
151
+ "name": "model.layers.1.input_layernorm.weight",
152
+ "shape": [
153
+ 2048
154
+ ],
155
+ "dtype": "float32",
156
+ "format": "f32-to-bf16",
157
+ "nbytes": 4096,
158
+ "byteOffset": 20979712
159
+ },
160
+ {
161
+ "name": "model.layers.1.post_attention_layernorm.weight",
162
+ "shape": [
163
+ 2048
164
+ ],
165
+ "dtype": "float32",
166
+ "format": "f32-to-bf16",
167
+ "nbytes": 4096,
168
+ "byteOffset": 20983808
169
+ }
170
+ ],
171
+ "md5sum": "35b8ff220ef5e2e6627dcfb621fb832d"
172
+ },
173
+ {
174
+ "dataPath": "params_shard_6.bin",
175
+ "format": "raw-shard",
176
+ "nbytes": 33554432,
177
+ "records": [
178
+ {
179
+ "name": "model.layers.10.mlp.down_proj.weight",
180
+ "shape": [
181
+ 2048,
182
+ 8192
183
+ ],
184
+ "dtype": "float32",
185
+ "format": "f32-to-bf16",
186
+ "nbytes": 33554432,
187
+ "byteOffset": 0
188
+ }
189
+ ],
190
+ "md5sum": "9d8db30764216ea077a052dcd20fb2c4"
191
+ },
192
+ {
193
+ "dataPath": "params_shard_7.bin",
194
+ "format": "raw-shard",
195
+ "nbytes": 67108864,
196
+ "records": [
197
+ {
198
+ "name": "model.layers.10.mlp.gate_up_proj.weight",
199
+ "shape": [
200
+ 16384,
201
+ 2048
202
+ ],
203
+ "dtype": "float32",
204
+ "format": "f32-to-bf16",
205
+ "nbytes": 67108864,
206
+ "byteOffset": 0
207
+ }
208
+ ],
209
+ "md5sum": "0856245486dd0d049581359e670f33e0"
210
+ },
211
+ {
212
+ "dataPath": "params_shard_8.bin",
213
+ "format": "raw-shard",
214
+ "nbytes": 20979712,
215
+ "records": [
216
+ {
217
+ "name": "model.layers.1.self_attn.qkv_proj.weight",
218
+ "shape": [
219
+ 3072,
220
+ 2048
221
+ ],
222
+ "dtype": "float32",
223
+ "format": "f32-to-bf16",
224
+ "nbytes": 12582912,
225
+ "byteOffset": 0
226
+ },
227
+ {
228
+ "name": "model.layers.1.self_attn.o_proj.weight",
229
+ "shape": [
230
+ 2048,
231
+ 2048
232
+ ],
233
+ "dtype": "float32",
234
+ "format": "f32-to-bf16",
235
+ "nbytes": 8388608,
236
+ "byteOffset": 12582912
237
+ },
238
+ {
239
+ "name": "model.layers.10.input_layernorm.weight",
240
+ "shape": [
241
+ 2048
242
+ ],
243
+ "dtype": "float32",
244
+ "format": "f32-to-bf16",
245
+ "nbytes": 4096,
246
+ "byteOffset": 20971520
247
+ },
248
+ {
249
+ "name": "model.layers.10.post_attention_layernorm.weight",
250
+ "shape": [
251
+ 2048
252
+ ],
253
+ "dtype": "float32",
254
+ "format": "f32-to-bf16",
255
+ "nbytes": 4096,
256
+ "byteOffset": 20975616
257
+ }
258
+ ],
259
+ "md5sum": "af71427b909126ccc9a9635a8279e0a9"
260
+ },
261
+ {
262
+ "dataPath": "params_shard_9.bin",
263
+ "format": "raw-shard",
264
+ "nbytes": 33554432,
265
+ "records": [
266
+ {
267
+ "name": "model.layers.11.mlp.down_proj.weight",
268
+ "shape": [
269
+ 2048,
270
+ 8192
271
+ ],
272
+ "dtype": "float32",
273
+ "format": "f32-to-bf16",
274
+ "nbytes": 33554432,
275
+ "byteOffset": 0
276
+ }
277
+ ],
278
+ "md5sum": "795af0ca2c531da387572abece860a7e"
279
+ },
280
+ {
281
+ "dataPath": "params_shard_10.bin",
282
+ "format": "raw-shard",
283
+ "nbytes": 67108864,
284
+ "records": [
285
+ {
286
+ "name": "model.layers.11.mlp.gate_up_proj.weight",
287
+ "shape": [
288
+ 16384,
289
+ 2048
290
+ ],
291
+ "dtype": "float32",
292
+ "format": "f32-to-bf16",
293
+ "nbytes": 67108864,
294
+ "byteOffset": 0
295
+ }
296
+ ],
297
+ "md5sum": "4c8d8ed56e53eb99608130db0c1de05d"
298
+ },
299
+ {
300
+ "dataPath": "params_shard_11.bin",
301
+ "format": "raw-shard",
302
+ "nbytes": 20979712,
303
+ "records": [
304
+ {
305
+ "name": "model.layers.10.self_attn.qkv_proj.weight",
306
+ "shape": [
307
+ 3072,
308
+ 2048
309
+ ],
310
+ "dtype": "float32",
311
+ "format": "f32-to-bf16",
312
+ "nbytes": 12582912,
313
+ "byteOffset": 0
314
+ },
315
+ {
316
+ "name": "model.layers.10.self_attn.o_proj.weight",
317
+ "shape": [
318
+ 2048,
319
+ 2048
320
+ ],
321
+ "dtype": "float32",
322
+ "format": "f32-to-bf16",
323
+ "nbytes": 8388608,
324
+ "byteOffset": 12582912
325
+ },
326
+ {
327
+ "name": "model.layers.11.input_layernorm.weight",
328
+ "shape": [
329
+ 2048
330
+ ],
331
+ "dtype": "float32",
332
+ "format": "f32-to-bf16",
333
+ "nbytes": 4096,
334
+ "byteOffset": 20971520
335
+ },
336
+ {
337
+ "name": "model.layers.11.post_attention_layernorm.weight",
338
+ "shape": [
339
+ 2048
340
+ ],
341
+ "dtype": "float32",
342
+ "format": "f32-to-bf16",
343
+ "nbytes": 4096,
344
+ "byteOffset": 20975616
345
+ }
346
+ ],
347
+ "md5sum": "f7628a591cb04c54546aaaa4f9398a68"
348
+ },
349
+ {
350
+ "dataPath": "params_shard_12.bin",
351
+ "format": "raw-shard",
352
+ "nbytes": 33554432,
353
+ "records": [
354
+ {
355
+ "name": "model.layers.12.mlp.down_proj.weight",
356
+ "shape": [
357
+ 2048,
358
+ 8192
359
+ ],
360
+ "dtype": "float32",
361
+ "format": "f32-to-bf16",
362
+ "nbytes": 33554432,
363
+ "byteOffset": 0
364
+ }
365
+ ],
366
+ "md5sum": "64f4c86756397349e929cd1f2f849e5c"
367
+ },
368
+ {
369
+ "dataPath": "params_shard_13.bin",
370
+ "format": "raw-shard",
371
+ "nbytes": 67108864,
372
+ "records": [
373
+ {
374
+ "name": "model.layers.12.mlp.gate_up_proj.weight",
375
+ "shape": [
376
+ 16384,
377
+ 2048
378
+ ],
379
+ "dtype": "float32",
380
+ "format": "f32-to-bf16",
381
+ "nbytes": 67108864,
382
+ "byteOffset": 0
383
+ }
384
+ ],
385
+ "md5sum": "ea4ff7cec9e3c5ec3c805a6586bdd2ee"
386
+ },
387
+ {
388
+ "dataPath": "params_shard_14.bin",
389
+ "format": "raw-shard",
390
+ "nbytes": 20979712,
391
+ "records": [
392
+ {
393
+ "name": "model.layers.11.self_attn.qkv_proj.weight",
394
+ "shape": [
395
+ 3072,
396
+ 2048
397
+ ],
398
+ "dtype": "float32",
399
+ "format": "f32-to-bf16",
400
+ "nbytes": 12582912,
401
+ "byteOffset": 0
402
+ },
403
+ {
404
+ "name": "model.layers.11.self_attn.o_proj.weight",
405
+ "shape": [
406
+ 2048,
407
+ 2048
408
+ ],
409
+ "dtype": "float32",
410
+ "format": "f32-to-bf16",
411
+ "nbytes": 8388608,
412
+ "byteOffset": 12582912
413
+ },
414
+ {
415
+ "name": "model.layers.12.input_layernorm.weight",
416
+ "shape": [
417
+ 2048
418
+ ],
419
+ "dtype": "float32",
420
+ "format": "f32-to-bf16",
421
+ "nbytes": 4096,
422
+ "byteOffset": 20971520
423
+ },
424
+ {
425
+ "name": "model.layers.12.post_attention_layernorm.weight",
426
+ "shape": [
427
+ 2048
428
+ ],
429
+ "dtype": "float32",
430
+ "format": "f32-to-bf16",
431
+ "nbytes": 4096,
432
+ "byteOffset": 20975616
433
+ }
434
+ ],
435
+ "md5sum": "90f829661b428a1095ad0216de8f525e"
436
+ },
437
+ {
438
+ "dataPath": "params_shard_15.bin",
439
+ "format": "raw-shard",
440
+ "nbytes": 33554432,
441
+ "records": [
442
+ {
443
+ "name": "model.layers.13.mlp.down_proj.weight",
444
+ "shape": [
445
+ 2048,
446
+ 8192
447
+ ],
448
+ "dtype": "float32",
449
+ "format": "f32-to-bf16",
450
+ "nbytes": 33554432,
451
+ "byteOffset": 0
452
+ }
453
+ ],
454
+ "md5sum": "fb6f9e666f95e8149da896c79bbb730c"
455
+ },
456
+ {
457
+ "dataPath": "params_shard_16.bin",
458
+ "format": "raw-shard",
459
+ "nbytes": 67108864,
460
+ "records": [
461
+ {
462
+ "name": "model.layers.13.mlp.gate_up_proj.weight",
463
+ "shape": [
464
+ 16384,
465
+ 2048
466
+ ],
467
+ "dtype": "float32",
468
+ "format": "f32-to-bf16",
469
+ "nbytes": 67108864,
470
+ "byteOffset": 0
471
+ }
472
+ ],
473
+ "md5sum": "097a18cf907839e45e7bceedcfeee09c"
474
+ },
475
+ {
476
+ "dataPath": "params_shard_17.bin",
477
+ "format": "raw-shard",
478
+ "nbytes": 20979712,
479
+ "records": [
480
+ {
481
+ "name": "model.layers.12.self_attn.qkv_proj.weight",
482
+ "shape": [
483
+ 3072,
484
+ 2048
485
+ ],
486
+ "dtype": "float32",
487
+ "format": "f32-to-bf16",
488
+ "nbytes": 12582912,
489
+ "byteOffset": 0
490
+ },
491
+ {
492
+ "name": "model.layers.12.self_attn.o_proj.weight",
493
+ "shape": [
494
+ 2048,
495
+ 2048
496
+ ],
497
+ "dtype": "float32",
498
+ "format": "f32-to-bf16",
499
+ "nbytes": 8388608,
500
+ "byteOffset": 12582912
501
+ },
502
+ {
503
+ "name": "model.layers.13.input_layernorm.weight",
504
+ "shape": [
505
+ 2048
506
+ ],
507
+ "dtype": "float32",
508
+ "format": "f32-to-bf16",
509
+ "nbytes": 4096,
510
+ "byteOffset": 20971520
511
+ },
512
+ {
513
+ "name": "model.layers.13.post_attention_layernorm.weight",
514
+ "shape": [
515
+ 2048
516
+ ],
517
+ "dtype": "float32",
518
+ "format": "f32-to-bf16",
519
+ "nbytes": 4096,
520
+ "byteOffset": 20975616
521
+ }
522
+ ],
523
+ "md5sum": "8c3f48c9a1fb32d02410baeb954f1bb6"
524
+ },
525
+ {
526
+ "dataPath": "params_shard_18.bin",
527
+ "format": "raw-shard",
528
+ "nbytes": 33554432,
529
+ "records": [
530
+ {
531
+ "name": "model.layers.14.mlp.down_proj.weight",
532
+ "shape": [
533
+ 2048,
534
+ 8192
535
+ ],
536
+ "dtype": "float32",
537
+ "format": "f32-to-bf16",
538
+ "nbytes": 33554432,
539
+ "byteOffset": 0
540
+ }
541
+ ],
542
+ "md5sum": "ab16992964f5135c087f55a6c96eaf30"
543
+ },
544
+ {
545
+ "dataPath": "params_shard_19.bin",
546
+ "format": "raw-shard",
547
+ "nbytes": 67108864,
548
+ "records": [
549
+ {
550
+ "name": "model.layers.14.mlp.gate_up_proj.weight",
551
+ "shape": [
552
+ 16384,
553
+ 2048
554
+ ],
555
+ "dtype": "float32",
556
+ "format": "f32-to-bf16",
557
+ "nbytes": 67108864,
558
+ "byteOffset": 0
559
+ }
560
+ ],
561
+ "md5sum": "75e6fc44ba742deec3aea8e1bd310f22"
562
+ },
563
+ {
564
+ "dataPath": "params_shard_20.bin",
565
+ "format": "raw-shard",
566
+ "nbytes": 20979712,
567
+ "records": [
568
+ {
569
+ "name": "model.layers.13.self_attn.qkv_proj.weight",
570
+ "shape": [
571
+ 3072,
572
+ 2048
573
+ ],
574
+ "dtype": "float32",
575
+ "format": "f32-to-bf16",
576
+ "nbytes": 12582912,
577
+ "byteOffset": 0
578
+ },
579
+ {
580
+ "name": "model.layers.13.self_attn.o_proj.weight",
581
+ "shape": [
582
+ 2048,
583
+ 2048
584
+ ],
585
+ "dtype": "float32",
586
+ "format": "f32-to-bf16",
587
+ "nbytes": 8388608,
588
+ "byteOffset": 12582912
589
+ },
590
+ {
591
+ "name": "model.layers.14.input_layernorm.weight",
592
+ "shape": [
593
+ 2048
594
+ ],
595
+ "dtype": "float32",
596
+ "format": "f32-to-bf16",
597
+ "nbytes": 4096,
598
+ "byteOffset": 20971520
599
+ },
600
+ {
601
+ "name": "model.layers.14.post_attention_layernorm.weight",
602
+ "shape": [
603
+ 2048
604
+ ],
605
+ "dtype": "float32",
606
+ "format": "f32-to-bf16",
607
+ "nbytes": 4096,
608
+ "byteOffset": 20975616
609
+ }
610
+ ],
611
+ "md5sum": "a2c7db5bc8440c57f50b3c38a403c9f8"
612
+ },
613
+ {
614
+ "dataPath": "params_shard_21.bin",
615
+ "format": "raw-shard",
616
+ "nbytes": 33554432,
617
+ "records": [
618
+ {
619
+ "name": "model.layers.15.mlp.down_proj.weight",
620
+ "shape": [
621
+ 2048,
622
+ 8192
623
+ ],
624
+ "dtype": "float32",
625
+ "format": "f32-to-bf16",
626
+ "nbytes": 33554432,
627
+ "byteOffset": 0
628
+ }
629
+ ],
630
+ "md5sum": "3fe8a1e2c9e49e659f3411c7e138cd9f"
631
+ },
632
+ {
633
+ "dataPath": "params_shard_22.bin",
634
+ "format": "raw-shard",
635
+ "nbytes": 67108864,
636
+ "records": [
637
+ {
638
+ "name": "model.layers.15.mlp.gate_up_proj.weight",
639
+ "shape": [
640
+ 16384,
641
+ 2048
642
+ ],
643
+ "dtype": "float32",
644
+ "format": "f32-to-bf16",
645
+ "nbytes": 67108864,
646
+ "byteOffset": 0
647
+ }
648
+ ],
649
+ "md5sum": "6929ff78c04aaf58088e2b0fd33c61c4"
650
+ },
651
+ {
652
+ "dataPath": "params_shard_23.bin",
653
+ "format": "raw-shard",
654
+ "nbytes": 20979712,
655
+ "records": [
656
+ {
657
+ "name": "model.layers.14.self_attn.qkv_proj.weight",
658
+ "shape": [
659
+ 3072,
660
+ 2048
661
+ ],
662
+ "dtype": "float32",
663
+ "format": "f32-to-bf16",
664
+ "nbytes": 12582912,
665
+ "byteOffset": 0
666
+ },
667
+ {
668
+ "name": "model.layers.14.self_attn.o_proj.weight",
669
+ "shape": [
670
+ 2048,
671
+ 2048
672
+ ],
673
+ "dtype": "float32",
674
+ "format": "f32-to-bf16",
675
+ "nbytes": 8388608,
676
+ "byteOffset": 12582912
677
+ },
678
+ {
679
+ "name": "model.layers.15.input_layernorm.weight",
680
+ "shape": [
681
+ 2048
682
+ ],
683
+ "dtype": "float32",
684
+ "format": "f32-to-bf16",
685
+ "nbytes": 4096,
686
+ "byteOffset": 20971520
687
+ },
688
+ {
689
+ "name": "model.layers.15.post_attention_layernorm.weight",
690
+ "shape": [
691
+ 2048
692
+ ],
693
+ "dtype": "float32",
694
+ "format": "f32-to-bf16",
695
+ "nbytes": 4096,
696
+ "byteOffset": 20975616
697
+ }
698
+ ],
699
+ "md5sum": "50aa881a0f07e3bf087ec22fd3c2c7f7"
700
+ },
701
+ {
702
+ "dataPath": "params_shard_24.bin",
703
+ "format": "raw-shard",
704
+ "nbytes": 33554432,
705
+ "records": [
706
+ {
707
+ "name": "model.layers.2.mlp.down_proj.weight",
708
+ "shape": [
709
+ 2048,
710
+ 8192
711
+ ],
712
+ "dtype": "float32",
713
+ "format": "f32-to-bf16",
714
+ "nbytes": 33554432,
715
+ "byteOffset": 0
716
+ }
717
+ ],
718
+ "md5sum": "5814d5a86c183a63f441e473b96eac9f"
719
+ },
720
+ {
721
+ "dataPath": "params_shard_25.bin",
722
+ "format": "raw-shard",
723
+ "nbytes": 67108864,
724
+ "records": [
725
+ {
726
+ "name": "model.layers.2.mlp.gate_up_proj.weight",
727
+ "shape": [
728
+ 16384,
729
+ 2048
730
+ ],
731
+ "dtype": "float32",
732
+ "format": "f32-to-bf16",
733
+ "nbytes": 67108864,
734
+ "byteOffset": 0
735
+ }
736
+ ],
737
+ "md5sum": "66938847fe03b3b7beb0c34e0ea3d146"
738
+ },
739
+ {
740
+ "dataPath": "params_shard_26.bin",
741
+ "format": "raw-shard",
742
+ "nbytes": 20979712,
743
+ "records": [
744
+ {
745
+ "name": "model.layers.15.self_attn.qkv_proj.weight",
746
+ "shape": [
747
+ 3072,
748
+ 2048
749
+ ],
750
+ "dtype": "float32",
751
+ "format": "f32-to-bf16",
752
+ "nbytes": 12582912,
753
+ "byteOffset": 0
754
+ },
755
+ {
756
+ "name": "model.layers.15.self_attn.o_proj.weight",
757
+ "shape": [
758
+ 2048,
759
+ 2048
760
+ ],
761
+ "dtype": "float32",
762
+ "format": "f32-to-bf16",
763
+ "nbytes": 8388608,
764
+ "byteOffset": 12582912
765
+ },
766
+ {
767
+ "name": "model.layers.2.input_layernorm.weight",
768
+ "shape": [
769
+ 2048
770
+ ],
771
+ "dtype": "float32",
772
+ "format": "f32-to-bf16",
773
+ "nbytes": 4096,
774
+ "byteOffset": 20971520
775
+ },
776
+ {
777
+ "name": "model.layers.2.post_attention_layernorm.weight",
778
+ "shape": [
779
+ 2048
780
+ ],
781
+ "dtype": "float32",
782
+ "format": "f32-to-bf16",
783
+ "nbytes": 4096,
784
+ "byteOffset": 20975616
785
+ }
786
+ ],
787
+ "md5sum": "60e2dec95a7f652b11879a58814a0b72"
788
+ },
789
+ {
790
+ "dataPath": "params_shard_27.bin",
791
+ "format": "raw-shard",
792
+ "nbytes": 33554432,
793
+ "records": [
794
+ {
795
+ "name": "model.layers.3.mlp.down_proj.weight",
796
+ "shape": [
797
+ 2048,
798
+ 8192
799
+ ],
800
+ "dtype": "float32",
801
+ "format": "f32-to-bf16",
802
+ "nbytes": 33554432,
803
+ "byteOffset": 0
804
+ }
805
+ ],
806
+ "md5sum": "daca4f0d1145ceba60aba195b6e4b717"
807
+ },
808
+ {
809
+ "dataPath": "params_shard_28.bin",
810
+ "format": "raw-shard",
811
+ "nbytes": 67108864,
812
+ "records": [
813
+ {
814
+ "name": "model.layers.3.mlp.gate_up_proj.weight",
815
+ "shape": [
816
+ 16384,
817
+ 2048
818
+ ],
819
+ "dtype": "float32",
820
+ "format": "f32-to-bf16",
821
+ "nbytes": 67108864,
822
+ "byteOffset": 0
823
+ }
824
+ ],
825
+ "md5sum": "a7d140e2aa8117b4e0251ab7d4f1e894"
826
+ },
827
+ {
828
+ "dataPath": "params_shard_29.bin",
829
+ "format": "raw-shard",
830
+ "nbytes": 20979712,
831
+ "records": [
832
+ {
833
+ "name": "model.layers.2.self_attn.qkv_proj.weight",
834
+ "shape": [
835
+ 3072,
836
+ 2048
837
+ ],
838
+ "dtype": "float32",
839
+ "format": "f32-to-bf16",
840
+ "nbytes": 12582912,
841
+ "byteOffset": 0
842
+ },
843
+ {
844
+ "name": "model.layers.2.self_attn.o_proj.weight",
845
+ "shape": [
846
+ 2048,
847
+ 2048
848
+ ],
849
+ "dtype": "float32",
850
+ "format": "f32-to-bf16",
851
+ "nbytes": 8388608,
852
+ "byteOffset": 12582912
853
+ },
854
+ {
855
+ "name": "model.layers.3.input_layernorm.weight",
856
+ "shape": [
857
+ 2048
858
+ ],
859
+ "dtype": "float32",
860
+ "format": "f32-to-bf16",
861
+ "nbytes": 4096,
862
+ "byteOffset": 20971520
863
+ },
864
+ {
865
+ "name": "model.layers.3.post_attention_layernorm.weight",
866
+ "shape": [
867
+ 2048
868
+ ],
869
+ "dtype": "float32",
870
+ "format": "f32-to-bf16",
871
+ "nbytes": 4096,
872
+ "byteOffset": 20975616
873
+ }
874
+ ],
875
+ "md5sum": "aa3290f5cbda1c5d43f00d0846fb3c5d"
876
+ },
877
+ {
878
+ "dataPath": "params_shard_30.bin",
879
+ "format": "raw-shard",
880
+ "nbytes": 33554432,
881
+ "records": [
882
+ {
883
+ "name": "model.layers.4.mlp.down_proj.weight",
884
+ "shape": [
885
+ 2048,
886
+ 8192
887
+ ],
888
+ "dtype": "float32",
889
+ "format": "f32-to-bf16",
890
+ "nbytes": 33554432,
891
+ "byteOffset": 0
892
+ }
893
+ ],
894
+ "md5sum": "6f6f0eafe2557a8793292023a93fc44d"
895
+ },
896
+ {
897
+ "dataPath": "params_shard_31.bin",
898
+ "format": "raw-shard",
899
+ "nbytes": 67108864,
900
+ "records": [
901
+ {
902
+ "name": "model.layers.4.mlp.gate_up_proj.weight",
903
+ "shape": [
904
+ 16384,
905
+ 2048
906
+ ],
907
+ "dtype": "float32",
908
+ "format": "f32-to-bf16",
909
+ "nbytes": 67108864,
910
+ "byteOffset": 0
911
+ }
912
+ ],
913
+ "md5sum": "ac207948c6dd9f8c618249b5c8370101"
914
+ },
915
+ {
916
+ "dataPath": "params_shard_32.bin",
917
+ "format": "raw-shard",
918
+ "nbytes": 20979712,
919
+ "records": [
920
+ {
921
+ "name": "model.layers.3.self_attn.qkv_proj.weight",
922
+ "shape": [
923
+ 3072,
924
+ 2048
925
+ ],
926
+ "dtype": "float32",
927
+ "format": "f32-to-bf16",
928
+ "nbytes": 12582912,
929
+ "byteOffset": 0
930
+ },
931
+ {
932
+ "name": "model.layers.3.self_attn.o_proj.weight",
933
+ "shape": [
934
+ 2048,
935
+ 2048
936
+ ],
937
+ "dtype": "float32",
938
+ "format": "f32-to-bf16",
939
+ "nbytes": 8388608,
940
+ "byteOffset": 12582912
941
+ },
942
+ {
943
+ "name": "model.layers.4.input_layernorm.weight",
944
+ "shape": [
945
+ 2048
946
+ ],
947
+ "dtype": "float32",
948
+ "format": "f32-to-bf16",
949
+ "nbytes": 4096,
950
+ "byteOffset": 20971520
951
+ },
952
+ {
953
+ "name": "model.layers.4.post_attention_layernorm.weight",
954
+ "shape": [
955
+ 2048
956
+ ],
957
+ "dtype": "float32",
958
+ "format": "f32-to-bf16",
959
+ "nbytes": 4096,
960
+ "byteOffset": 20975616
961
+ }
962
+ ],
963
+ "md5sum": "5fd4c85aebd5f940fb7cb90a06bc0e3d"
964
+ },
965
+ {
966
+ "dataPath": "params_shard_33.bin",
967
+ "format": "raw-shard",
968
+ "nbytes": 33554432,
969
+ "records": [
970
+ {
971
+ "name": "model.layers.5.mlp.down_proj.weight",
972
+ "shape": [
973
+ 2048,
974
+ 8192
975
+ ],
976
+ "dtype": "float32",
977
+ "format": "f32-to-bf16",
978
+ "nbytes": 33554432,
979
+ "byteOffset": 0
980
+ }
981
+ ],
982
+ "md5sum": "385fbfd45620a6a7c7d555b8cf7d35ff"
983
+ },
984
+ {
985
+ "dataPath": "params_shard_34.bin",
986
+ "format": "raw-shard",
987
+ "nbytes": 67108864,
988
+ "records": [
989
+ {
990
+ "name": "model.layers.5.mlp.gate_up_proj.weight",
991
+ "shape": [
992
+ 16384,
993
+ 2048
994
+ ],
995
+ "dtype": "float32",
996
+ "format": "f32-to-bf16",
997
+ "nbytes": 67108864,
998
+ "byteOffset": 0
999
+ }
1000
+ ],
1001
+ "md5sum": "9d34ce6dc3d3bcecc868378358d66bcb"
1002
+ },
1003
+ {
1004
+ "dataPath": "params_shard_35.bin",
1005
+ "format": "raw-shard",
1006
+ "nbytes": 20979712,
1007
+ "records": [
1008
+ {
1009
+ "name": "model.layers.4.self_attn.qkv_proj.weight",
1010
+ "shape": [
1011
+ 3072,
1012
+ 2048
1013
+ ],
1014
+ "dtype": "float32",
1015
+ "format": "f32-to-bf16",
1016
+ "nbytes": 12582912,
1017
+ "byteOffset": 0
1018
+ },
1019
+ {
1020
+ "name": "model.layers.4.self_attn.o_proj.weight",
1021
+ "shape": [
1022
+ 2048,
1023
+ 2048
1024
+ ],
1025
+ "dtype": "float32",
1026
+ "format": "f32-to-bf16",
1027
+ "nbytes": 8388608,
1028
+ "byteOffset": 12582912
1029
+ },
1030
+ {
1031
+ "name": "model.layers.5.input_layernorm.weight",
1032
+ "shape": [
1033
+ 2048
1034
+ ],
1035
+ "dtype": "float32",
1036
+ "format": "f32-to-bf16",
1037
+ "nbytes": 4096,
1038
+ "byteOffset": 20971520
1039
+ },
1040
+ {
1041
+ "name": "model.layers.5.post_attention_layernorm.weight",
1042
+ "shape": [
1043
+ 2048
1044
+ ],
1045
+ "dtype": "float32",
1046
+ "format": "f32-to-bf16",
1047
+ "nbytes": 4096,
1048
+ "byteOffset": 20975616
1049
+ }
1050
+ ],
1051
+ "md5sum": "24b869dad75eb377412439976e3ae012"
1052
+ },
1053
+ {
1054
+ "dataPath": "params_shard_36.bin",
1055
+ "format": "raw-shard",
1056
+ "nbytes": 33554432,
1057
+ "records": [
1058
+ {
1059
+ "name": "model.layers.6.mlp.down_proj.weight",
1060
+ "shape": [
1061
+ 2048,
1062
+ 8192
1063
+ ],
1064
+ "dtype": "float32",
1065
+ "format": "f32-to-bf16",
1066
+ "nbytes": 33554432,
1067
+ "byteOffset": 0
1068
+ }
1069
+ ],
1070
+ "md5sum": "0168cb04de60c94a82da930ef69dfc79"
1071
+ },
1072
+ {
1073
+ "dataPath": "params_shard_37.bin",
1074
+ "format": "raw-shard",
1075
+ "nbytes": 67108864,
1076
+ "records": [
1077
+ {
1078
+ "name": "model.layers.6.mlp.gate_up_proj.weight",
1079
+ "shape": [
1080
+ 16384,
1081
+ 2048
1082
+ ],
1083
+ "dtype": "float32",
1084
+ "format": "f32-to-bf16",
1085
+ "nbytes": 67108864,
1086
+ "byteOffset": 0
1087
+ }
1088
+ ],
1089
+ "md5sum": "f38fe5a46d483f933a79ce6577bd5236"
1090
+ },
1091
+ {
1092
+ "dataPath": "params_shard_38.bin",
1093
+ "format": "raw-shard",
1094
+ "nbytes": 20979712,
1095
+ "records": [
1096
+ {
1097
+ "name": "model.layers.5.self_attn.qkv_proj.weight",
1098
+ "shape": [
1099
+ 3072,
1100
+ 2048
1101
+ ],
1102
+ "dtype": "float32",
1103
+ "format": "f32-to-bf16",
1104
+ "nbytes": 12582912,
1105
+ "byteOffset": 0
1106
+ },
1107
+ {
1108
+ "name": "model.layers.5.self_attn.o_proj.weight",
1109
+ "shape": [
1110
+ 2048,
1111
+ 2048
1112
+ ],
1113
+ "dtype": "float32",
1114
+ "format": "f32-to-bf16",
1115
+ "nbytes": 8388608,
1116
+ "byteOffset": 12582912
1117
+ },
1118
+ {
1119
+ "name": "model.layers.6.input_layernorm.weight",
1120
+ "shape": [
1121
+ 2048
1122
+ ],
1123
+ "dtype": "float32",
1124
+ "format": "f32-to-bf16",
1125
+ "nbytes": 4096,
1126
+ "byteOffset": 20971520
1127
+ },
1128
+ {
1129
+ "name": "model.layers.6.post_attention_layernorm.weight",
1130
+ "shape": [
1131
+ 2048
1132
+ ],
1133
+ "dtype": "float32",
1134
+ "format": "f32-to-bf16",
1135
+ "nbytes": 4096,
1136
+ "byteOffset": 20975616
1137
+ }
1138
+ ],
1139
+ "md5sum": "49401c003986778a26ab6e1d17dea5a9"
1140
+ },
1141
+ {
1142
+ "dataPath": "params_shard_39.bin",
1143
+ "format": "raw-shard",
1144
+ "nbytes": 33554432,
1145
+ "records": [
1146
+ {
1147
+ "name": "model.layers.7.mlp.down_proj.weight",
1148
+ "shape": [
1149
+ 2048,
1150
+ 8192
1151
+ ],
1152
+ "dtype": "float32",
1153
+ "format": "f32-to-bf16",
1154
+ "nbytes": 33554432,
1155
+ "byteOffset": 0
1156
+ }
1157
+ ],
1158
+ "md5sum": "c9edef3101ccb32cf23b9342315b1a38"
1159
+ },
1160
+ {
1161
+ "dataPath": "params_shard_40.bin",
1162
+ "format": "raw-shard",
1163
+ "nbytes": 67108864,
1164
+ "records": [
1165
+ {
1166
+ "name": "model.layers.7.mlp.gate_up_proj.weight",
1167
+ "shape": [
1168
+ 16384,
1169
+ 2048
1170
+ ],
1171
+ "dtype": "float32",
1172
+ "format": "f32-to-bf16",
1173
+ "nbytes": 67108864,
1174
+ "byteOffset": 0
1175
+ }
1176
+ ],
1177
+ "md5sum": "c7b95c82540aad932bca9ebac8caa96e"
1178
+ },
1179
+ {
1180
+ "dataPath": "params_shard_41.bin",
1181
+ "format": "raw-shard",
1182
+ "nbytes": 20979712,
1183
+ "records": [
1184
+ {
1185
+ "name": "model.layers.6.self_attn.qkv_proj.weight",
1186
+ "shape": [
1187
+ 3072,
1188
+ 2048
1189
+ ],
1190
+ "dtype": "float32",
1191
+ "format": "f32-to-bf16",
1192
+ "nbytes": 12582912,
1193
+ "byteOffset": 0
1194
+ },
1195
+ {
1196
+ "name": "model.layers.6.self_attn.o_proj.weight",
1197
+ "shape": [
1198
+ 2048,
1199
+ 2048
1200
+ ],
1201
+ "dtype": "float32",
1202
+ "format": "f32-to-bf16",
1203
+ "nbytes": 8388608,
1204
+ "byteOffset": 12582912
1205
+ },
1206
+ {
1207
+ "name": "model.layers.7.input_layernorm.weight",
1208
+ "shape": [
1209
+ 2048
1210
+ ],
1211
+ "dtype": "float32",
1212
+ "format": "f32-to-bf16",
1213
+ "nbytes": 4096,
1214
+ "byteOffset": 20971520
1215
+ },
1216
+ {
1217
+ "name": "model.layers.7.post_attention_layernorm.weight",
1218
+ "shape": [
1219
+ 2048
1220
+ ],
1221
+ "dtype": "float32",
1222
+ "format": "f32-to-bf16",
1223
+ "nbytes": 4096,
1224
+ "byteOffset": 20975616
1225
+ }
1226
+ ],
1227
+ "md5sum": "e44904f0c432ad45b9919a22b590ae9a"
1228
+ },
1229
+ {
1230
+ "dataPath": "params_shard_42.bin",
1231
+ "format": "raw-shard",
1232
+ "nbytes": 33554432,
1233
+ "records": [
1234
+ {
1235
+ "name": "model.layers.8.mlp.down_proj.weight",
1236
+ "shape": [
1237
+ 2048,
1238
+ 8192
1239
+ ],
1240
+ "dtype": "float32",
1241
+ "format": "f32-to-bf16",
1242
+ "nbytes": 33554432,
1243
+ "byteOffset": 0
1244
+ }
1245
+ ],
1246
+ "md5sum": "3e337579a791f65389f4a5a93dc95633"
1247
+ },
1248
+ {
1249
+ "dataPath": "params_shard_43.bin",
1250
+ "format": "raw-shard",
1251
+ "nbytes": 67108864,
1252
+ "records": [
1253
+ {
1254
+ "name": "model.layers.8.mlp.gate_up_proj.weight",
1255
+ "shape": [
1256
+ 16384,
1257
+ 2048
1258
+ ],
1259
+ "dtype": "float32",
1260
+ "format": "f32-to-bf16",
1261
+ "nbytes": 67108864,
1262
+ "byteOffset": 0
1263
+ }
1264
+ ],
1265
+ "md5sum": "79c0f7d1fa6519b778d30ef952522121"
1266
+ },
1267
+ {
1268
+ "dataPath": "params_shard_44.bin",
1269
+ "format": "raw-shard",
1270
+ "nbytes": 20979712,
1271
+ "records": [
1272
+ {
1273
+ "name": "model.layers.7.self_attn.qkv_proj.weight",
1274
+ "shape": [
1275
+ 3072,
1276
+ 2048
1277
+ ],
1278
+ "dtype": "float32",
1279
+ "format": "f32-to-bf16",
1280
+ "nbytes": 12582912,
1281
+ "byteOffset": 0
1282
+ },
1283
+ {
1284
+ "name": "model.layers.7.self_attn.o_proj.weight",
1285
+ "shape": [
1286
+ 2048,
1287
+ 2048
1288
+ ],
1289
+ "dtype": "float32",
1290
+ "format": "f32-to-bf16",
1291
+ "nbytes": 8388608,
1292
+ "byteOffset": 12582912
1293
+ },
1294
+ {
1295
+ "name": "model.layers.8.input_layernorm.weight",
1296
+ "shape": [
1297
+ 2048
1298
+ ],
1299
+ "dtype": "float32",
1300
+ "format": "f32-to-bf16",
1301
+ "nbytes": 4096,
1302
+ "byteOffset": 20971520
1303
+ },
1304
+ {
1305
+ "name": "model.layers.8.post_attention_layernorm.weight",
1306
+ "shape": [
1307
+ 2048
1308
+ ],
1309
+ "dtype": "float32",
1310
+ "format": "f32-to-bf16",
1311
+ "nbytes": 4096,
1312
+ "byteOffset": 20975616
1313
+ }
1314
+ ],
1315
+ "md5sum": "5a57388997fefe29f3788be628569150"
1316
+ },
1317
+ {
1318
+ "dataPath": "params_shard_45.bin",
1319
+ "format": "raw-shard",
1320
+ "nbytes": 33554432,
1321
+ "records": [
1322
+ {
1323
+ "name": "model.layers.9.mlp.down_proj.weight",
1324
+ "shape": [
1325
+ 2048,
1326
+ 8192
1327
+ ],
1328
+ "dtype": "float32",
1329
+ "format": "f32-to-bf16",
1330
+ "nbytes": 33554432,
1331
+ "byteOffset": 0
1332
+ }
1333
+ ],
1334
+ "md5sum": "e2d663fbf063a76d8845a1117834610f"
1335
+ },
1336
+ {
1337
+ "dataPath": "params_shard_46.bin",
1338
+ "format": "raw-shard",
1339
+ "nbytes": 67108864,
1340
+ "records": [
1341
+ {
1342
+ "name": "model.layers.9.mlp.gate_up_proj.weight",
1343
+ "shape": [
1344
+ 16384,
1345
+ 2048
1346
+ ],
1347
+ "dtype": "float32",
1348
+ "format": "f32-to-bf16",
1349
+ "nbytes": 67108864,
1350
+ "byteOffset": 0
1351
+ }
1352
+ ],
1353
+ "md5sum": "50e4119a748b23e67bfa10d1ec8e1f00"
1354
+ },
1355
+ {
1356
+ "dataPath": "params_shard_47.bin",
1357
+ "format": "raw-shard",
1358
+ "nbytes": 20979712,
1359
+ "records": [
1360
+ {
1361
+ "name": "model.layers.8.self_attn.qkv_proj.weight",
1362
+ "shape": [
1363
+ 3072,
1364
+ 2048
1365
+ ],
1366
+ "dtype": "float32",
1367
+ "format": "f32-to-bf16",
1368
+ "nbytes": 12582912,
1369
+ "byteOffset": 0
1370
+ },
1371
+ {
1372
+ "name": "model.layers.8.self_attn.o_proj.weight",
1373
+ "shape": [
1374
+ 2048,
1375
+ 2048
1376
+ ],
1377
+ "dtype": "float32",
1378
+ "format": "f32-to-bf16",
1379
+ "nbytes": 8388608,
1380
+ "byteOffset": 12582912
1381
+ },
1382
+ {
1383
+ "name": "model.layers.9.input_layernorm.weight",
1384
+ "shape": [
1385
+ 2048
1386
+ ],
1387
+ "dtype": "float32",
1388
+ "format": "f32-to-bf16",
1389
+ "nbytes": 4096,
1390
+ "byteOffset": 20971520
1391
+ },
1392
+ {
1393
+ "name": "model.layers.9.post_attention_layernorm.weight",
1394
+ "shape": [
1395
+ 2048
1396
+ ],
1397
+ "dtype": "float32",
1398
+ "format": "f32-to-bf16",
1399
+ "nbytes": 4096,
1400
+ "byteOffset": 20975616
1401
+ }
1402
+ ],
1403
+ "md5sum": "c7f736b657c13231c2c106aab8827f06"
1404
+ },
1405
+ {
1406
+ "dataPath": "params_shard_48.bin",
1407
+ "format": "raw-shard",
1408
+ "nbytes": 20975616,
1409
+ "records": [
1410
+ {
1411
+ "name": "model.layers.9.self_attn.qkv_proj.weight",
1412
+ "shape": [
1413
+ 3072,
1414
+ 2048
1415
+ ],
1416
+ "dtype": "float32",
1417
+ "format": "f32-to-bf16",
1418
+ "nbytes": 12582912,
1419
+ "byteOffset": 0
1420
+ },
1421
+ {
1422
+ "name": "model.layers.9.self_attn.o_proj.weight",
1423
+ "shape": [
1424
+ 2048,
1425
+ 2048
1426
+ ],
1427
+ "dtype": "float32",
1428
+ "format": "f32-to-bf16",
1429
+ "nbytes": 8388608,
1430
+ "byteOffset": 12582912
1431
+ },
1432
+ {
1433
+ "name": "model.norm.weight",
1434
+ "shape": [
1435
+ 2048
1436
+ ],
1437
+ "dtype": "float32",
1438
+ "format": "f32-to-bf16",
1439
+ "nbytes": 4096,
1440
+ "byteOffset": 20971520
1441
+ }
1442
+ ],
1443
+ "md5sum": "2484e69deb5afeb91287322eac29c4f8"
1444
+ }
1445
+ ]
1446
+ }
params_shard_0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b111063841df6851fa28395ca43f6570c81486a16d4830d0026cb62cf14a7b5d
3
+ size 525336576
params_shard_1.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2751b067ffdfc7c9b3501307f9b0dedf655c2a2ced5c13e67a27b02a497dbb30
3
+ size 33554432
params_shard_10.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5fec79d8e298aac768aaeaa71871877fe60291bcc14c236c56c345bbab5a00b2
3
+ size 67108864
params_shard_11.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0f2f6e8a15dd3ef655d7f76d3ef58cd208b5f34058ffe88f43242ddc8cbe955
3
+ size 20979712
params_shard_12.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9eca1a84eb57c68fb3c4b33a1c5f6688a8605c31bd42dd1af5c04063cbfb378
3
+ size 33554432
params_shard_13.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee0d12905ad47315b08849029622d665c60e97396f35b5307c25079c6afefe5b
3
+ size 67108864
params_shard_14.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d3e0f88920cc5b66aa7b0fa5ba87d51e1bbdb21de6c6227f93e0a5a87d9070f
3
+ size 20979712
params_shard_15.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1eda51a024f20c599fbf8dd0041a16419676661415019a4a050722b56dc82a9e
3
+ size 33554432
params_shard_16.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4dd082bad03d500d58887307c0f8eba7036173bcfc6cd0b0acd2ad4023231e04
3
+ size 67108864
params_shard_17.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d66d0cc6d0128c6e83b7bff692922161ddb1350d0f467c09ffd024890d26421
3
+ size 20979712
params_shard_18.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc01f83069720ca78330773cc349bd8bd9ce27511a594f0a874ad81dd1892b5c
3
+ size 33554432
params_shard_19.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:619592501a586e71828897bba1dd0447f675ffece9aecf135fb317d40bbf5728
3
+ size 67108864
params_shard_2.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76f4c8e627a778e1ffa7369a3a32981e90f2a104319753aefac4c7565c6fa731
3
+ size 67108864
params_shard_20.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d958501265d40f9ef4cd9ae95c472d2fe74fe0695e7173f8d7369d58cf04683e
3
+ size 20979712
params_shard_21.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:284ca029330d3ce6413f8ecd0c38dd6bd0dd0077d2facf182bbfe2074827e72a
3
+ size 33554432
params_shard_22.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01d69ffac141f44459044e61873d16f8d9fff64f978e4ef06cc7a62dee0e891c
3
+ size 67108864
params_shard_23.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8ffd629d0ed973ae01b4f97d87cecb48ccfebf7ae94f8ec171b51d22046fb7a
3
+ size 20979712
params_shard_24.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6c322e8f8a3641339375d21dfa0e52b6bec3976f5a8ede49f200dec8bc38149
3
+ size 33554432
params_shard_25.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b68611f11765f27e65c60bc5a4391b9e7bffbd62da6a0039c01f8d7e39ee210
3
+ size 67108864
params_shard_26.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23673cab99976fa38cee9b9bc53c9f1d40b5e9686ad21d9b32a7827f3dfccb59
3
+ size 20979712
params_shard_27.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87981c1be8b95a602ed901fa3fc525b4ba83e2e1ef4769b9f382073b408e17f7
3
+ size 33554432
params_shard_28.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:712c6f7d3358449a33d0c42f5563a7df91315bb1c8208392722eb8d329e9bd81
3
+ size 67108864
params_shard_29.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f85f4f5c404a1548aac2a8b6589c26e79e6a70550411b0ad6ee14822122ecc5
3
+ size 20979712
params_shard_3.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a813f0b5f69255660f70c184476fe1e07380eae849ebb82ef774d923b2ec1bc
3
+ size 33554432
params_shard_30.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d40fe993e7ed4a0b51d71b02bf8b16761ce56b36f232f14e3c31c394f1c0804e
3
+ size 33554432
params_shard_31.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b762587add02e7b63f8ffab05a46f30d3cb3fa04710db88f0894a78ce192abb8
3
+ size 67108864
params_shard_32.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f869262e90c2e4ed78d52bb227b57bb6731a3c85b1b52c2f2dbb42aaf6be21c
3
+ size 20979712
params_shard_33.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eac72c3b0d2484e25de526c0869435f2581722547ca2ff19526a9f28ec06a2c4
3
+ size 33554432
params_shard_34.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5cf1039cd6f774e0d4a9ff4111a2abce43c46bf07b1f7c472a44d4f780c23663
3
+ size 67108864
params_shard_35.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b98d5d31457778c761b58f658a49afb0076be3f7ecf55aeceec3fae7110da0c
3
+ size 20979712
params_shard_36.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21d3162b619ec7e50359470bca94bb1a9900866179068ef7c0b6d577afd24c69
3
+ size 33554432
params_shard_37.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b154094b220e60f814e0892f52004fd27d05fe46af23e4483fe70d7fff31ae1
3
+ size 67108864
params_shard_38.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d0b88b362fac5b32e9609c0ab26197277b118a43a61bac5210045317a3e9a5e
3
+ size 20979712
params_shard_39.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7c35432c958b20627cb04fa50b56429affc65085baebc8ba3603af31bb928f2
3
+ size 33554432
params_shard_4.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8bb6ade0e13361d86784563d721c7141e58f9b17cbd5df261de1a0e861a16b7a
3
+ size 67108864
params_shard_40.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:185db985b99858d6f3beeace72f38e639528e0b943a8a1cd81b101d3b3cae471
3
+ size 67108864
params_shard_41.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b74abe9959a32f904b1156963e46ae08e33f317055c2653d5a639bc4d2959e79
3
+ size 20979712
params_shard_42.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f665309b6599288f471acd10c5c6d109799dfa1087693b549fdf76fdf735843c
3
+ size 33554432
params_shard_43.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0c4cc3b469adfe6521251ec0afdfbed7fc1981b9b80a6bbb8f6eed7f04795da
3
+ size 67108864
params_shard_44.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d36b1e0c801344cfcaefa2f98ce5e9187b17d4ec42b042d924eed4ceaaeea72b
3
+ size 20979712
params_shard_45.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1607dace1e9c113055b5d232141dd8938d84fc11c9e82ac50ab68830504f5da7
3
+ size 33554432
params_shard_46.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:678cc0c4b0deccc816101952e4129511543cefb40a47af72a34cf7d34279c16d
3
+ size 67108864
params_shard_47.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cedef3e2ff9d4c9778dda120ab80c82ab7fb09a360ad33b648d1cf98563aecf1
3
+ size 20979712
params_shard_48.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e2cf0d445902f6dfcff13a47076c2f908adc1b2b99b276cd1eed41ec6529c76
3
+ size 20975616
params_shard_5.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21c5a7920cfcc502e7e7c8faf63c7a74826a9a6636df11a703c86f700e7ac0c9
3
+ size 20987904
params_shard_6.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39d89336af2cf06d717decc432cdf30706c798f77af5699a5de78ff08910091b
3
+ size 33554432
params_shard_7.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:155426c2069b8426af85978fb0ae63d6e6e09969144fe2758966d984abd42a51
3
+ size 67108864
params_shard_8.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97453c83cc7bc80bb0e705aa8134d91cad229e2528e3675161b9cf02cd2bf243
3
+ size 20979712