geonmin-kim commited on
Commit
a78d3ba
1 Parent(s): 4789993

Upload folder using huggingface_hub

Browse files
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
mlc-chat-config.json ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "0.1.0",
3
+ "model_type": "qwen2",
4
+ "quantization": "q0f16",
5
+ "model_config": {
6
+ "hidden_act": "silu",
7
+ "hidden_size": 3584,
8
+ "intermediate_size": 18944,
9
+ "num_attention_heads": 28,
10
+ "num_hidden_layers": 4,
11
+ "num_key_value_heads": 4,
12
+ "rms_norm_eps": 1e-06,
13
+ "rope_theta": 1000000.0,
14
+ "vocab_size": 152064,
15
+ "tie_word_embeddings": false,
16
+ "context_window_size": 32768,
17
+ "prefill_chunk_size": 8192,
18
+ "tensor_parallel_shards": 1,
19
+ "head_dim": 128,
20
+ "dtype": "float32",
21
+ "max_batch_size": 128
22
+ },
23
+ "vocab_size": 152064,
24
+ "context_window_size": 32768,
25
+ "sliding_window_size": -1,
26
+ "prefill_chunk_size": 8192,
27
+ "attention_sink_size": -1,
28
+ "tensor_parallel_shards": 1,
29
+ "pipeline_parallel_stages": 1,
30
+ "temperature": 0.7,
31
+ "presence_penalty": 0.0,
32
+ "frequency_penalty": 0.0,
33
+ "repetition_penalty": 1.05,
34
+ "top_p": 0.8,
35
+ "tokenizer_files": [
36
+ "tokenizer.json",
37
+ "vocab.json",
38
+ "merges.txt",
39
+ "tokenizer_config.json"
40
+ ],
41
+ "tokenizer_info": {
42
+ "token_postproc_method": "byte_level",
43
+ "prepend_space_in_encode": false,
44
+ "strip_space_in_decode": false
45
+ },
46
+ "conv_template": {
47
+ "name": "qwen2",
48
+ "system_template": "<|im_start|>system\n{system_message}<|im_end|>\n",
49
+ "system_message": "You are a helpful assistant.",
50
+ "system_prefix_token_ids": null,
51
+ "add_role_after_system_message": true,
52
+ "roles": {
53
+ "user": "<|im_start|>user",
54
+ "assistant": "<|im_start|>assistant"
55
+ },
56
+ "role_templates": {
57
+ "user": "{user_message}",
58
+ "assistant": "{assistant_message}",
59
+ "tool": "{tool_message}"
60
+ },
61
+ "messages": [],
62
+ "seps": [
63
+ "<|im_end|>\n"
64
+ ],
65
+ "role_content_sep": "\n",
66
+ "role_empty_sep": "\n",
67
+ "stop_str": [
68
+ "<|endoftext|>",
69
+ "<|im_end|>"
70
+ ],
71
+ "stop_token_ids": [
72
+ 151643,
73
+ 151645
74
+ ],
75
+ "function_string": "",
76
+ "use_function_calling": false
77
+ },
78
+ "pad_token_id": 151643,
79
+ "bos_token_id": 151643,
80
+ "eos_token_id": [
81
+ 151645,
82
+ 151643
83
+ ]
84
+ }
ndarray-cache.json ADDED
@@ -0,0 +1,481 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "ParamSize": 31,
4
+ "ParamBytes": 4044459008.0,
5
+ "BitsPerParam": 16.0
6
+ },
7
+ "records": [
8
+ {
9
+ "dataPath": "params_shard_0.bin",
10
+ "format": "raw-shard",
11
+ "nbytes": 1089994752,
12
+ "records": [
13
+ {
14
+ "name": "model.embed_tokens.weight",
15
+ "shape": [
16
+ 152064,
17
+ 3584
18
+ ],
19
+ "dtype": "float16",
20
+ "format": "f32-to-bf16",
21
+ "nbytes": 1089994752,
22
+ "byteOffset": 0
23
+ }
24
+ ],
25
+ "md5sum": "1096c3043fce52d07e686e7d70c5687e"
26
+ },
27
+ {
28
+ "dataPath": "params_shard_1.bin",
29
+ "format": "raw-shard",
30
+ "nbytes": 25690112,
31
+ "records": [
32
+ {
33
+ "name": "model.layers.0.self_attn.o_proj.weight",
34
+ "shape": [
35
+ 3584,
36
+ 3584
37
+ ],
38
+ "dtype": "float16",
39
+ "format": "f32-to-bf16",
40
+ "nbytes": 25690112,
41
+ "byteOffset": 0
42
+ }
43
+ ],
44
+ "md5sum": "fa12518f256dab0e627e73cd62f21382"
45
+ },
46
+ {
47
+ "dataPath": "params_shard_2.bin",
48
+ "format": "raw-shard",
49
+ "nbytes": 271581184,
50
+ "records": [
51
+ {
52
+ "name": "model.layers.0.mlp.gate_up_proj.weight",
53
+ "shape": [
54
+ 37888,
55
+ 3584
56
+ ],
57
+ "dtype": "float16",
58
+ "format": "f32-to-bf16",
59
+ "nbytes": 271581184,
60
+ "byteOffset": 0
61
+ }
62
+ ],
63
+ "md5sum": "7c93523d9c9ec575cc22618781e70be9"
64
+ },
65
+ {
66
+ "dataPath": "params_shard_3.bin",
67
+ "format": "raw-shard",
68
+ "nbytes": 135790592,
69
+ "records": [
70
+ {
71
+ "name": "model.layers.0.mlp.down_proj.weight",
72
+ "shape": [
73
+ 3584,
74
+ 18944
75
+ ],
76
+ "dtype": "float16",
77
+ "format": "f32-to-bf16",
78
+ "nbytes": 135790592,
79
+ "byteOffset": 0
80
+ }
81
+ ],
82
+ "md5sum": "24b1add61255660dbf0e160b5f01eae0"
83
+ },
84
+ {
85
+ "dataPath": "params_shard_4.bin",
86
+ "format": "raw-shard",
87
+ "nbytes": 33030144,
88
+ "records": [
89
+ {
90
+ "name": "model.layers.1.self_attn.c_attn.weight",
91
+ "shape": [
92
+ 4608,
93
+ 3584
94
+ ],
95
+ "dtype": "float16",
96
+ "format": "f32-to-bf16",
97
+ "nbytes": 33030144,
98
+ "byteOffset": 0
99
+ }
100
+ ],
101
+ "md5sum": "c0eb62c7ee3c6ab51d17be1cedd4f64f"
102
+ },
103
+ {
104
+ "dataPath": "params_shard_5.bin",
105
+ "format": "raw-shard",
106
+ "nbytes": 25690112,
107
+ "records": [
108
+ {
109
+ "name": "model.layers.1.self_attn.o_proj.weight",
110
+ "shape": [
111
+ 3584,
112
+ 3584
113
+ ],
114
+ "dtype": "float16",
115
+ "format": "f32-to-bf16",
116
+ "nbytes": 25690112,
117
+ "byteOffset": 0
118
+ }
119
+ ],
120
+ "md5sum": "9722bac502585ec42f5af5a0a7b00046"
121
+ },
122
+ {
123
+ "dataPath": "params_shard_6.bin",
124
+ "format": "raw-shard",
125
+ "nbytes": 271581184,
126
+ "records": [
127
+ {
128
+ "name": "model.layers.1.mlp.gate_up_proj.weight",
129
+ "shape": [
130
+ 37888,
131
+ 3584
132
+ ],
133
+ "dtype": "float16",
134
+ "format": "f32-to-bf16",
135
+ "nbytes": 271581184,
136
+ "byteOffset": 0
137
+ }
138
+ ],
139
+ "md5sum": "a19d16581a1cbb884505e130b635797e"
140
+ },
141
+ {
142
+ "dataPath": "params_shard_7.bin",
143
+ "format": "raw-shard",
144
+ "nbytes": 135790592,
145
+ "records": [
146
+ {
147
+ "name": "model.layers.1.mlp.down_proj.weight",
148
+ "shape": [
149
+ 3584,
150
+ 18944
151
+ ],
152
+ "dtype": "float16",
153
+ "format": "f32-to-bf16",
154
+ "nbytes": 135790592,
155
+ "byteOffset": 0
156
+ }
157
+ ],
158
+ "md5sum": "14ccce743b3fc0660e2b0132db110ba5"
159
+ },
160
+ {
161
+ "dataPath": "params_shard_8.bin",
162
+ "format": "raw-shard",
163
+ "nbytes": 33030144,
164
+ "records": [
165
+ {
166
+ "name": "model.layers.2.self_attn.c_attn.weight",
167
+ "shape": [
168
+ 4608,
169
+ 3584
170
+ ],
171
+ "dtype": "float16",
172
+ "format": "f32-to-bf16",
173
+ "nbytes": 33030144,
174
+ "byteOffset": 0
175
+ }
176
+ ],
177
+ "md5sum": "f2a26b2d7170a6ed2cf070f9c0815f33"
178
+ },
179
+ {
180
+ "dataPath": "params_shard_9.bin",
181
+ "format": "raw-shard",
182
+ "nbytes": 25690112,
183
+ "records": [
184
+ {
185
+ "name": "model.layers.2.self_attn.o_proj.weight",
186
+ "shape": [
187
+ 3584,
188
+ 3584
189
+ ],
190
+ "dtype": "float16",
191
+ "format": "f32-to-bf16",
192
+ "nbytes": 25690112,
193
+ "byteOffset": 0
194
+ }
195
+ ],
196
+ "md5sum": "ff56b50721f25ec8693491f6776565bc"
197
+ },
198
+ {
199
+ "dataPath": "params_shard_10.bin",
200
+ "format": "raw-shard",
201
+ "nbytes": 271581184,
202
+ "records": [
203
+ {
204
+ "name": "model.layers.2.mlp.gate_up_proj.weight",
205
+ "shape": [
206
+ 37888,
207
+ 3584
208
+ ],
209
+ "dtype": "float16",
210
+ "format": "f32-to-bf16",
211
+ "nbytes": 271581184,
212
+ "byteOffset": 0
213
+ }
214
+ ],
215
+ "md5sum": "1de3679fdbffb14849545694266a49a4"
216
+ },
217
+ {
218
+ "dataPath": "params_shard_11.bin",
219
+ "format": "raw-shard",
220
+ "nbytes": 135790592,
221
+ "records": [
222
+ {
223
+ "name": "model.layers.2.mlp.down_proj.weight",
224
+ "shape": [
225
+ 3584,
226
+ 18944
227
+ ],
228
+ "dtype": "float16",
229
+ "format": "f32-to-bf16",
230
+ "nbytes": 135790592,
231
+ "byteOffset": 0
232
+ }
233
+ ],
234
+ "md5sum": "a1ac20b3b6a4dbac225c68dd1e484e0f"
235
+ },
236
+ {
237
+ "dataPath": "params_shard_12.bin",
238
+ "format": "raw-shard",
239
+ "nbytes": 33030144,
240
+ "records": [
241
+ {
242
+ "name": "model.layers.3.self_attn.c_attn.weight",
243
+ "shape": [
244
+ 4608,
245
+ 3584
246
+ ],
247
+ "dtype": "float16",
248
+ "format": "f32-to-bf16",
249
+ "nbytes": 33030144,
250
+ "byteOffset": 0
251
+ }
252
+ ],
253
+ "md5sum": "b87a624913f69fc8783afa80e04d5655"
254
+ },
255
+ {
256
+ "dataPath": "params_shard_13.bin",
257
+ "format": "raw-shard",
258
+ "nbytes": 25690112,
259
+ "records": [
260
+ {
261
+ "name": "model.layers.3.self_attn.o_proj.weight",
262
+ "shape": [
263
+ 3584,
264
+ 3584
265
+ ],
266
+ "dtype": "float16",
267
+ "format": "f32-to-bf16",
268
+ "nbytes": 25690112,
269
+ "byteOffset": 0
270
+ }
271
+ ],
272
+ "md5sum": "13d09067604b7f0c1d63c98937b4e174"
273
+ },
274
+ {
275
+ "dataPath": "params_shard_14.bin",
276
+ "format": "raw-shard",
277
+ "nbytes": 271581184,
278
+ "records": [
279
+ {
280
+ "name": "model.layers.3.mlp.gate_up_proj.weight",
281
+ "shape": [
282
+ 37888,
283
+ 3584
284
+ ],
285
+ "dtype": "float16",
286
+ "format": "f32-to-bf16",
287
+ "nbytes": 271581184,
288
+ "byteOffset": 0
289
+ }
290
+ ],
291
+ "md5sum": "b6e2d8049e71f27bdd1a704f6e8f537a"
292
+ },
293
+ {
294
+ "dataPath": "params_shard_15.bin",
295
+ "format": "raw-shard",
296
+ "nbytes": 135790592,
297
+ "records": [
298
+ {
299
+ "name": "model.layers.3.mlp.down_proj.weight",
300
+ "shape": [
301
+ 3584,
302
+ 18944
303
+ ],
304
+ "dtype": "float16",
305
+ "format": "f32-to-bf16",
306
+ "nbytes": 135790592,
307
+ "byteOffset": 0
308
+ }
309
+ ],
310
+ "md5sum": "165dc6852e9590980d6b93e38b55e09f"
311
+ },
312
+ {
313
+ "dataPath": "params_shard_16.bin",
314
+ "format": "raw-shard",
315
+ "nbytes": 1089994752,
316
+ "records": [
317
+ {
318
+ "name": "lm_head.weight",
319
+ "shape": [
320
+ 152064,
321
+ 3584
322
+ ],
323
+ "dtype": "float16",
324
+ "format": "f32-to-bf16",
325
+ "nbytes": 1089994752,
326
+ "byteOffset": 0
327
+ }
328
+ ],
329
+ "md5sum": "b0a68a612362cfcfb3d746bbd9bbb73d"
330
+ },
331
+ {
332
+ "dataPath": "params_shard_17.bin",
333
+ "format": "raw-shard",
334
+ "nbytes": 33131520,
335
+ "records": [
336
+ {
337
+ "name": "model.layers.0.self_attn.c_attn.weight",
338
+ "shape": [
339
+ 4608,
340
+ 3584
341
+ ],
342
+ "dtype": "float16",
343
+ "format": "f32-to-bf16",
344
+ "nbytes": 33030144,
345
+ "byteOffset": 0
346
+ },
347
+ {
348
+ "name": "model.layers.0.self_attn.c_attn.bias",
349
+ "shape": [
350
+ 4608
351
+ ],
352
+ "dtype": "float16",
353
+ "format": "f32-to-bf16",
354
+ "nbytes": 9216,
355
+ "byteOffset": 33030144
356
+ },
357
+ {
358
+ "name": "model.layers.0.input_layernorm.weight",
359
+ "shape": [
360
+ 3584
361
+ ],
362
+ "dtype": "float16",
363
+ "format": "f32-to-bf16",
364
+ "nbytes": 7168,
365
+ "byteOffset": 33039360
366
+ },
367
+ {
368
+ "name": "model.layers.0.post_attention_layernorm.weight",
369
+ "shape": [
370
+ 3584
371
+ ],
372
+ "dtype": "float16",
373
+ "format": "f32-to-bf16",
374
+ "nbytes": 7168,
375
+ "byteOffset": 33046528
376
+ },
377
+ {
378
+ "name": "model.layers.1.self_attn.c_attn.bias",
379
+ "shape": [
380
+ 4608
381
+ ],
382
+ "dtype": "float16",
383
+ "format": "f32-to-bf16",
384
+ "nbytes": 9216,
385
+ "byteOffset": 33053696
386
+ },
387
+ {
388
+ "name": "model.layers.1.input_layernorm.weight",
389
+ "shape": [
390
+ 3584
391
+ ],
392
+ "dtype": "float16",
393
+ "format": "f32-to-bf16",
394
+ "nbytes": 7168,
395
+ "byteOffset": 33062912
396
+ },
397
+ {
398
+ "name": "model.layers.1.post_attention_layernorm.weight",
399
+ "shape": [
400
+ 3584
401
+ ],
402
+ "dtype": "float16",
403
+ "format": "f32-to-bf16",
404
+ "nbytes": 7168,
405
+ "byteOffset": 33070080
406
+ },
407
+ {
408
+ "name": "model.layers.2.self_attn.c_attn.bias",
409
+ "shape": [
410
+ 4608
411
+ ],
412
+ "dtype": "float16",
413
+ "format": "f32-to-bf16",
414
+ "nbytes": 9216,
415
+ "byteOffset": 33077248
416
+ },
417
+ {
418
+ "name": "model.layers.2.input_layernorm.weight",
419
+ "shape": [
420
+ 3584
421
+ ],
422
+ "dtype": "float16",
423
+ "format": "f32-to-bf16",
424
+ "nbytes": 7168,
425
+ "byteOffset": 33086464
426
+ },
427
+ {
428
+ "name": "model.layers.2.post_attention_layernorm.weight",
429
+ "shape": [
430
+ 3584
431
+ ],
432
+ "dtype": "float16",
433
+ "format": "f32-to-bf16",
434
+ "nbytes": 7168,
435
+ "byteOffset": 33093632
436
+ },
437
+ {
438
+ "name": "model.layers.3.self_attn.c_attn.bias",
439
+ "shape": [
440
+ 4608
441
+ ],
442
+ "dtype": "float16",
443
+ "format": "f32-to-bf16",
444
+ "nbytes": 9216,
445
+ "byteOffset": 33100800
446
+ },
447
+ {
448
+ "name": "model.layers.3.input_layernorm.weight",
449
+ "shape": [
450
+ 3584
451
+ ],
452
+ "dtype": "float16",
453
+ "format": "f32-to-bf16",
454
+ "nbytes": 7168,
455
+ "byteOffset": 33110016
456
+ },
457
+ {
458
+ "name": "model.layers.3.post_attention_layernorm.weight",
459
+ "shape": [
460
+ 3584
461
+ ],
462
+ "dtype": "float16",
463
+ "format": "f32-to-bf16",
464
+ "nbytes": 7168,
465
+ "byteOffset": 33117184
466
+ },
467
+ {
468
+ "name": "model.norm.weight",
469
+ "shape": [
470
+ 3584
471
+ ],
472
+ "dtype": "float16",
473
+ "format": "f32-to-bf16",
474
+ "nbytes": 7168,
475
+ "byteOffset": 33124352
476
+ }
477
+ ],
478
+ "md5sum": "418e357fec9f143413cbad405043e1a3"
479
+ }
480
+ ]
481
+ }
params_shard_0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:36da690306d53161bf9bafc286ba6e3f7cdf86087e4108f222808eef912b95e9
3
+ size 1089994752
params_shard_1.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a95701b4ac52cf96783ae59da8cc4bda3108a3735f7acbeb8dc94dc090162602
3
+ size 25690112
params_shard_10.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c97007a80f5d663c8f29200aecf25dc4fe86d1577f5419007097a1704ec31ad
3
+ size 271581184
params_shard_11.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6bed4ca735d520d7b65d3c3749e3c7ab57ba8084f293cb360e7ad9b7b28a690e
3
+ size 135790592
params_shard_12.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:243b5570437c89ec3390cfd78d7516be819dd63e47d63dca30168f92b8508175
3
+ size 33030144
params_shard_13.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f59e59ab6e7aac7019a2a49cc5edc0ee345305c4a3df8d44f223c2e250ee86ac
3
+ size 25690112
params_shard_14.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68124542428386161db487e9847876100a03a9439b4b498c97b50ddccac01fd4
3
+ size 271581184
params_shard_15.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:146c9e250131dfbc9dbd9f9471725dd52a0e8ccaa871fc50f27630f69122b71b
3
+ size 135790592
params_shard_16.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e9086fecf0ed82fb9bacf4c0d7b0df7bccbf6913c96b66d0aea0d584f9b6a07
3
+ size 1089994752
params_shard_17.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ebfc478a389b18f545317ef4ac22c0e3b41c7a7256a990298074a17571d86595
3
+ size 33131520
params_shard_2.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9395feaec3fe572f6a6897e71ed4d03bd1c14af64ab276b004be27717e229299
3
+ size 271581184
params_shard_3.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a6069620d271b1b3ab4c7b9de62818c3696473df8e3bc4c25bddeb5eae920475
3
+ size 135790592
params_shard_4.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b45d4d3d3fcaa4fa0f37f548f3b621267c9dfae22139e6db1e0b238482aa9a7
3
+ size 33030144
params_shard_5.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eeb491ebaf8a3548196dcb0f9d1a6b6daf4026f58dd5122ad00682660dc43e38
3
+ size 25690112
params_shard_6.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c68843cacfd63f02cdc10845444bbb5e231402b8f6050e5bd44aebbc4a5141a
3
+ size 271581184
params_shard_7.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:882c9177364e87fc41a818b4c1483d37693c8877e8d9772f37522851ad7faf49
3
+ size 135790592
params_shard_8.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:935739307f975adf6512eb1192068918a29675314f7e8fdffa267d14591c60a6
3
+ size 33030144
params_shard_9.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea386c8e0b5cc577e38921f05ea61c7332d26abec37350b8352740c7f5ffa697
3
+ size 25690112
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "151643": {
5
+ "content": "<|endoftext|>",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "151644": {
13
+ "content": "<|im_start|>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "151645": {
21
+ "content": "<|im_end|>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ }
28
+ },
29
+ "additional_special_tokens": ["<|im_start|>", "<|im_end|>"],
30
+ "bos_token": null,
31
+ "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
32
+ "clean_up_tokenization_spaces": false,
33
+ "eos_token": "<|im_end|>",
34
+ "errors": "replace",
35
+ "model_max_length": 131072,
36
+ "pad_token": "<|endoftext|>",
37
+ "split_special_tokens": false,
38
+ "tokenizer_class": "Qwen2Tokenizer",
39
+ "unk_token": null
40
+ }
vocab.json ADDED
The diff for this file is too large to render. See raw diff