geonmin-kim commited on
Commit
c84b8ab
1 Parent(s): b215bf1

Upload folder using huggingface_hub

Browse files
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
mlc-chat-config.json ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "0.1.0",
3
+ "model_type": "qwen2",
4
+ "quantization": "q0f16",
5
+ "model_config": {
6
+ "hidden_act": "silu",
7
+ "hidden_size": 3584,
8
+ "intermediate_size": 18944,
9
+ "num_attention_heads": 28,
10
+ "num_hidden_layers": 4,
11
+ "num_key_value_heads": 4,
12
+ "rms_norm_eps": 1e-06,
13
+ "rope_theta": 1000000.0,
14
+ "vocab_size": 136488,
15
+ "tie_word_embeddings": false,
16
+ "context_window_size": 256,
17
+ "prefill_chunk_size": 128,
18
+ "tensor_parallel_shards": 1,
19
+ "head_dim": 128,
20
+ "dtype": "float32",
21
+ "max_batch_size": 1
22
+ },
23
+ "vocab_size": 136488,
24
+ "context_window_size": 256,
25
+ "sliding_window_size": -1,
26
+ "prefill_chunk_size": 128,
27
+ "attention_sink_size": -1,
28
+ "tensor_parallel_shards": 1,
29
+ "pipeline_parallel_stages": 1,
30
+ "temperature": 0.7,
31
+ "presence_penalty": 0.0,
32
+ "frequency_penalty": 0.0,
33
+ "repetition_penalty": 1.05,
34
+ "top_p": 0.8,
35
+ "tokenizer_files": [
36
+ "tokenizer.json",
37
+ "vocab.json",
38
+ "merges.txt",
39
+ "tokenizer_config.json"
40
+ ],
41
+ "tokenizer_info": {
42
+ "token_postproc_method": "byte_level",
43
+ "prepend_space_in_encode": false,
44
+ "strip_space_in_decode": false
45
+ },
46
+ "conv_template": {
47
+ "name": "qwen2",
48
+ "system_template": "<|im_start|>system\n{system_message}<|im_end|>\n",
49
+ "system_message": "You are a helpful assistant.",
50
+ "system_prefix_token_ids": null,
51
+ "add_role_after_system_message": true,
52
+ "roles": {
53
+ "user": "<|im_start|>user",
54
+ "assistant": "<|im_start|>assistant"
55
+ },
56
+ "role_templates": {
57
+ "user": "{user_message}",
58
+ "assistant": "{assistant_message}",
59
+ "tool": "{tool_message}"
60
+ },
61
+ "messages": [],
62
+ "seps": [
63
+ "<|im_end|>\n"
64
+ ],
65
+ "role_content_sep": "\n",
66
+ "role_empty_sep": "\n",
67
+ "stop_str": [
68
+ "<|endoftext|>",
69
+ "<|im_end|>"
70
+ ],
71
+ "stop_token_ids": [
72
+ 151643,
73
+ 151645
74
+ ],
75
+ "function_string": "",
76
+ "use_function_calling": false
77
+ },
78
+ "pad_token_id": 136478,
79
+ "bos_token_id": 136478,
80
+ "eos_token_id": 136480
81
+ }
ndarray-cache.json ADDED
@@ -0,0 +1,481 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "ParamSize": 31,
4
+ "ParamBytes": 3821161472.0,
5
+ "BitsPerParam": 16.0
6
+ },
7
+ "records": [
8
+ {
9
+ "dataPath": "params_shard_0.bin",
10
+ "format": "raw-shard",
11
+ "nbytes": 978345984,
12
+ "records": [
13
+ {
14
+ "name": "lm_head.weight",
15
+ "shape": [
16
+ 136488,
17
+ 3584
18
+ ],
19
+ "dtype": "float16",
20
+ "format": "f32-to-bf16",
21
+ "nbytes": 978345984,
22
+ "byteOffset": 0
23
+ }
24
+ ],
25
+ "md5sum": "f24bc2a390a565ec24307333d9aa9b5b"
26
+ },
27
+ {
28
+ "dataPath": "params_shard_1.bin",
29
+ "format": "raw-shard",
30
+ "nbytes": 135790592,
31
+ "records": [
32
+ {
33
+ "name": "model.layers.3.mlp.down_proj.weight",
34
+ "shape": [
35
+ 3584,
36
+ 18944
37
+ ],
38
+ "dtype": "float16",
39
+ "format": "f32-to-bf16",
40
+ "nbytes": 135790592,
41
+ "byteOffset": 0
42
+ }
43
+ ],
44
+ "md5sum": "8463225b5d39d8aa6befc7e394ff9a80"
45
+ },
46
+ {
47
+ "dataPath": "params_shard_2.bin",
48
+ "format": "raw-shard",
49
+ "nbytes": 271581184,
50
+ "records": [
51
+ {
52
+ "name": "model.layers.3.mlp.gate_up_proj.weight",
53
+ "shape": [
54
+ 37888,
55
+ 3584
56
+ ],
57
+ "dtype": "float16",
58
+ "format": "f32-to-bf16",
59
+ "nbytes": 271581184,
60
+ "byteOffset": 0
61
+ }
62
+ ],
63
+ "md5sum": "ce9430ea30508f4c11646d9f6987c7d5"
64
+ },
65
+ {
66
+ "dataPath": "params_shard_3.bin",
67
+ "format": "raw-shard",
68
+ "nbytes": 978345984,
69
+ "records": [
70
+ {
71
+ "name": "model.embed_tokens.weight",
72
+ "shape": [
73
+ 136488,
74
+ 3584
75
+ ],
76
+ "dtype": "float16",
77
+ "format": "f32-to-bf16",
78
+ "nbytes": 978345984,
79
+ "byteOffset": 0
80
+ }
81
+ ],
82
+ "md5sum": "2034469190648e591720406f8f5d6931"
83
+ },
84
+ {
85
+ "dataPath": "params_shard_4.bin",
86
+ "format": "raw-shard",
87
+ "nbytes": 135790592,
88
+ "records": [
89
+ {
90
+ "name": "model.layers.0.mlp.down_proj.weight",
91
+ "shape": [
92
+ 3584,
93
+ 18944
94
+ ],
95
+ "dtype": "float16",
96
+ "format": "f32-to-bf16",
97
+ "nbytes": 135790592,
98
+ "byteOffset": 0
99
+ }
100
+ ],
101
+ "md5sum": "d8c5636ba8c54a1bf9a9be52028edd24"
102
+ },
103
+ {
104
+ "dataPath": "params_shard_5.bin",
105
+ "format": "raw-shard",
106
+ "nbytes": 271581184,
107
+ "records": [
108
+ {
109
+ "name": "model.layers.0.mlp.gate_up_proj.weight",
110
+ "shape": [
111
+ 37888,
112
+ 3584
113
+ ],
114
+ "dtype": "float16",
115
+ "format": "f32-to-bf16",
116
+ "nbytes": 271581184,
117
+ "byteOffset": 0
118
+ }
119
+ ],
120
+ "md5sum": "eab8149d005afce177602e3a0502e2d5"
121
+ },
122
+ {
123
+ "dataPath": "params_shard_6.bin",
124
+ "format": "raw-shard",
125
+ "nbytes": 25690112,
126
+ "records": [
127
+ {
128
+ "name": "model.layers.0.self_attn.o_proj.weight",
129
+ "shape": [
130
+ 3584,
131
+ 3584
132
+ ],
133
+ "dtype": "float16",
134
+ "format": "f32-to-bf16",
135
+ "nbytes": 25690112,
136
+ "byteOffset": 0
137
+ }
138
+ ],
139
+ "md5sum": "23278f4c2c2bd26e27c57bf98ad4ef8c"
140
+ },
141
+ {
142
+ "dataPath": "params_shard_7.bin",
143
+ "format": "raw-shard",
144
+ "nbytes": 135790592,
145
+ "records": [
146
+ {
147
+ "name": "model.layers.1.mlp.down_proj.weight",
148
+ "shape": [
149
+ 3584,
150
+ 18944
151
+ ],
152
+ "dtype": "float16",
153
+ "format": "f32-to-bf16",
154
+ "nbytes": 135790592,
155
+ "byteOffset": 0
156
+ }
157
+ ],
158
+ "md5sum": "ccb811f73867d1f386520f524f2c01a2"
159
+ },
160
+ {
161
+ "dataPath": "params_shard_8.bin",
162
+ "format": "raw-shard",
163
+ "nbytes": 271581184,
164
+ "records": [
165
+ {
166
+ "name": "model.layers.1.mlp.gate_up_proj.weight",
167
+ "shape": [
168
+ 37888,
169
+ 3584
170
+ ],
171
+ "dtype": "float16",
172
+ "format": "f32-to-bf16",
173
+ "nbytes": 271581184,
174
+ "byteOffset": 0
175
+ }
176
+ ],
177
+ "md5sum": "92a7bea5fcddf575091c43b1c4df714c"
178
+ },
179
+ {
180
+ "dataPath": "params_shard_9.bin",
181
+ "format": "raw-shard",
182
+ "nbytes": 33030144,
183
+ "records": [
184
+ {
185
+ "name": "model.layers.1.self_attn.c_attn.weight",
186
+ "shape": [
187
+ 4608,
188
+ 3584
189
+ ],
190
+ "dtype": "float16",
191
+ "format": "f32-to-bf16",
192
+ "nbytes": 33030144,
193
+ "byteOffset": 0
194
+ }
195
+ ],
196
+ "md5sum": "c03154c2e7e28390bd1554a72a582034"
197
+ },
198
+ {
199
+ "dataPath": "params_shard_10.bin",
200
+ "format": "raw-shard",
201
+ "nbytes": 25690112,
202
+ "records": [
203
+ {
204
+ "name": "model.layers.1.self_attn.o_proj.weight",
205
+ "shape": [
206
+ 3584,
207
+ 3584
208
+ ],
209
+ "dtype": "float16",
210
+ "format": "f32-to-bf16",
211
+ "nbytes": 25690112,
212
+ "byteOffset": 0
213
+ }
214
+ ],
215
+ "md5sum": "d46398b81504bd2d82830108afdd8407"
216
+ },
217
+ {
218
+ "dataPath": "params_shard_11.bin",
219
+ "format": "raw-shard",
220
+ "nbytes": 135790592,
221
+ "records": [
222
+ {
223
+ "name": "model.layers.2.mlp.down_proj.weight",
224
+ "shape": [
225
+ 3584,
226
+ 18944
227
+ ],
228
+ "dtype": "float16",
229
+ "format": "f32-to-bf16",
230
+ "nbytes": 135790592,
231
+ "byteOffset": 0
232
+ }
233
+ ],
234
+ "md5sum": "dd79708597c0102a78c1c7487114aafe"
235
+ },
236
+ {
237
+ "dataPath": "params_shard_12.bin",
238
+ "format": "raw-shard",
239
+ "nbytes": 271581184,
240
+ "records": [
241
+ {
242
+ "name": "model.layers.2.mlp.gate_up_proj.weight",
243
+ "shape": [
244
+ 37888,
245
+ 3584
246
+ ],
247
+ "dtype": "float16",
248
+ "format": "f32-to-bf16",
249
+ "nbytes": 271581184,
250
+ "byteOffset": 0
251
+ }
252
+ ],
253
+ "md5sum": "56b5611e7e548cbe8da292871a122b53"
254
+ },
255
+ {
256
+ "dataPath": "params_shard_13.bin",
257
+ "format": "raw-shard",
258
+ "nbytes": 33030144,
259
+ "records": [
260
+ {
261
+ "name": "model.layers.2.self_attn.c_attn.weight",
262
+ "shape": [
263
+ 4608,
264
+ 3584
265
+ ],
266
+ "dtype": "float16",
267
+ "format": "f32-to-bf16",
268
+ "nbytes": 33030144,
269
+ "byteOffset": 0
270
+ }
271
+ ],
272
+ "md5sum": "8af6d905b41911171901c0c9b1309afb"
273
+ },
274
+ {
275
+ "dataPath": "params_shard_14.bin",
276
+ "format": "raw-shard",
277
+ "nbytes": 25690112,
278
+ "records": [
279
+ {
280
+ "name": "model.layers.2.self_attn.o_proj.weight",
281
+ "shape": [
282
+ 3584,
283
+ 3584
284
+ ],
285
+ "dtype": "float16",
286
+ "format": "f32-to-bf16",
287
+ "nbytes": 25690112,
288
+ "byteOffset": 0
289
+ }
290
+ ],
291
+ "md5sum": "126e17f7b67d299092e34621d3d01ba5"
292
+ },
293
+ {
294
+ "dataPath": "params_shard_15.bin",
295
+ "format": "raw-shard",
296
+ "nbytes": 33030144,
297
+ "records": [
298
+ {
299
+ "name": "model.layers.3.self_attn.c_attn.weight",
300
+ "shape": [
301
+ 4608,
302
+ 3584
303
+ ],
304
+ "dtype": "float16",
305
+ "format": "f32-to-bf16",
306
+ "nbytes": 33030144,
307
+ "byteOffset": 0
308
+ }
309
+ ],
310
+ "md5sum": "10c68f4a89d8c9e5ac3fd00b5ce2ee47"
311
+ },
312
+ {
313
+ "dataPath": "params_shard_16.bin",
314
+ "format": "raw-shard",
315
+ "nbytes": 25690112,
316
+ "records": [
317
+ {
318
+ "name": "model.layers.3.self_attn.o_proj.weight",
319
+ "shape": [
320
+ 3584,
321
+ 3584
322
+ ],
323
+ "dtype": "float16",
324
+ "format": "f32-to-bf16",
325
+ "nbytes": 25690112,
326
+ "byteOffset": 0
327
+ }
328
+ ],
329
+ "md5sum": "e655468aacdcefbc5172993aea2634e2"
330
+ },
331
+ {
332
+ "dataPath": "params_shard_17.bin",
333
+ "format": "raw-shard",
334
+ "nbytes": 33131520,
335
+ "records": [
336
+ {
337
+ "name": "model.layers.3.input_layernorm.weight",
338
+ "shape": [
339
+ 3584
340
+ ],
341
+ "dtype": "float16",
342
+ "format": "f32-to-bf16",
343
+ "nbytes": 7168,
344
+ "byteOffset": 0
345
+ },
346
+ {
347
+ "name": "model.layers.3.post_attention_layernorm.weight",
348
+ "shape": [
349
+ 3584
350
+ ],
351
+ "dtype": "float16",
352
+ "format": "f32-to-bf16",
353
+ "nbytes": 7168,
354
+ "byteOffset": 7168
355
+ },
356
+ {
357
+ "name": "model.norm.weight",
358
+ "shape": [
359
+ 3584
360
+ ],
361
+ "dtype": "float16",
362
+ "format": "f32-to-bf16",
363
+ "nbytes": 7168,
364
+ "byteOffset": 14336
365
+ },
366
+ {
367
+ "name": "model.layers.0.input_layernorm.weight",
368
+ "shape": [
369
+ 3584
370
+ ],
371
+ "dtype": "float16",
372
+ "format": "f32-to-bf16",
373
+ "nbytes": 7168,
374
+ "byteOffset": 21504
375
+ },
376
+ {
377
+ "name": "model.layers.0.post_attention_layernorm.weight",
378
+ "shape": [
379
+ 3584
380
+ ],
381
+ "dtype": "float16",
382
+ "format": "f32-to-bf16",
383
+ "nbytes": 7168,
384
+ "byteOffset": 28672
385
+ },
386
+ {
387
+ "name": "model.layers.0.self_attn.c_attn.bias",
388
+ "shape": [
389
+ 4608
390
+ ],
391
+ "dtype": "float16",
392
+ "format": "f32-to-bf16",
393
+ "nbytes": 9216,
394
+ "byteOffset": 35840
395
+ },
396
+ {
397
+ "name": "model.layers.0.self_attn.c_attn.weight",
398
+ "shape": [
399
+ 4608,
400
+ 3584
401
+ ],
402
+ "dtype": "float16",
403
+ "format": "f32-to-bf16",
404
+ "nbytes": 33030144,
405
+ "byteOffset": 45056
406
+ },
407
+ {
408
+ "name": "model.layers.1.input_layernorm.weight",
409
+ "shape": [
410
+ 3584
411
+ ],
412
+ "dtype": "float16",
413
+ "format": "f32-to-bf16",
414
+ "nbytes": 7168,
415
+ "byteOffset": 33075200
416
+ },
417
+ {
418
+ "name": "model.layers.1.post_attention_layernorm.weight",
419
+ "shape": [
420
+ 3584
421
+ ],
422
+ "dtype": "float16",
423
+ "format": "f32-to-bf16",
424
+ "nbytes": 7168,
425
+ "byteOffset": 33082368
426
+ },
427
+ {
428
+ "name": "model.layers.1.self_attn.c_attn.bias",
429
+ "shape": [
430
+ 4608
431
+ ],
432
+ "dtype": "float16",
433
+ "format": "f32-to-bf16",
434
+ "nbytes": 9216,
435
+ "byteOffset": 33089536
436
+ },
437
+ {
438
+ "name": "model.layers.2.input_layernorm.weight",
439
+ "shape": [
440
+ 3584
441
+ ],
442
+ "dtype": "float16",
443
+ "format": "f32-to-bf16",
444
+ "nbytes": 7168,
445
+ "byteOffset": 33098752
446
+ },
447
+ {
448
+ "name": "model.layers.2.post_attention_layernorm.weight",
449
+ "shape": [
450
+ 3584
451
+ ],
452
+ "dtype": "float16",
453
+ "format": "f32-to-bf16",
454
+ "nbytes": 7168,
455
+ "byteOffset": 33105920
456
+ },
457
+ {
458
+ "name": "model.layers.2.self_attn.c_attn.bias",
459
+ "shape": [
460
+ 4608
461
+ ],
462
+ "dtype": "float16",
463
+ "format": "f32-to-bf16",
464
+ "nbytes": 9216,
465
+ "byteOffset": 33113088
466
+ },
467
+ {
468
+ "name": "model.layers.3.self_attn.c_attn.bias",
469
+ "shape": [
470
+ 4608
471
+ ],
472
+ "dtype": "float16",
473
+ "format": "f32-to-bf16",
474
+ "nbytes": 9216,
475
+ "byteOffset": 33122304
476
+ }
477
+ ],
478
+ "md5sum": "8024ca13796190a329892865444cd3d9"
479
+ }
480
+ ]
481
+ }
params_shard_0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:027bda185ff72d37320fa16426ab0f7c7128aab2ac9eb72d8657340e48d90e4d
3
+ size 978345984
params_shard_1.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89b4abaaa000ea810b43c6a40e7215ef23bf9774e4d533534fdef78066e023b9
3
+ size 135790592
params_shard_10.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0db85a6033bbbb1b6e4d82705b9c21312baf17aeeb65ecf05f39efc956208eb
3
+ size 25690112
params_shard_11.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3334a58bdaffbd3b97c0f533e85ed2ae0f243de58dad10edf9541285f95428ae
3
+ size 135790592
params_shard_12.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63ac62dd8140674016640232e972f3380a5282f1044d9766d064e8fc28c314b0
3
+ size 271581184
params_shard_13.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e10b68357f9f8d8ea44347b78c4ddd51a120f2402b8f157d732087e37d2e993
3
+ size 33030144
params_shard_14.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47cfd3df2045c382bd859dc5255ebf516b4bf8c8dc4c5453385b86e36102d9f5
3
+ size 25690112
params_shard_15.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b27adf93985b8861804b61c9ca36d86afd88dbdd1a16256a20bf0539dae8084
3
+ size 33030144
params_shard_16.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa46bfc1cd163cbaaa7fcc46c8599c529c757c59a3235a026692034ec84a65c8
3
+ size 25690112
params_shard_17.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d21c896dba637025e09d1bc08af03e6399a8c2c1e5c385aa70dd90232cfbc9c7
3
+ size 33131520
params_shard_2.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c08916833966babd12d02deefb4f0745b0f6e095a6fc08fb142e3228799cdaa
3
+ size 271581184
params_shard_3.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5fe753df73072b4952a933e0600c8e4b8cf9f2be71a6025d53ef87f54b967d64
3
+ size 978345984
params_shard_4.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e351746ffadbc023756d7247b2a8b19eb7a77bccf1a20447a8e90e8b20f2a04c
3
+ size 135790592
params_shard_5.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a184d95f1ec81da8b912cc4f609c0c6dda201ee367c813d9f247aadf97d3e500
3
+ size 271581184
params_shard_6.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63265fae190fd35ce4b7952fd15222dc24e73e41176bc971d653fbe0fb182f28
3
+ size 25690112
params_shard_7.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8e2465d97a182f75ad0488a460fac33a7f77a05749ac6b2309141c6f44ddc16
3
+ size 135790592
params_shard_8.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48f19809f0965814f1caea799976e36c80c8b0350abccc96833fe1de430322a2
3
+ size 271581184
params_shard_9.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f66f118bf0e88c2995e92e965c6cf745181d48c54429ab8c19cfea311eedea2c
3
+ size 33030144
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"add_prefix_space": false, "added_tokens_decoder": {"136478": {"content": "<|endoftext|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "136479": {"content": "<|im_start|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "136480": {"content": "<|im_end|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}}, "additional_special_tokens": ["<|im_start|>", "<|im_end|>"], "bos_token": null, "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", "clean_up_tokenization_spaces": false, "eos_token": "<|im_end|>", "errors": "replace", "model_max_length": 8192, "pad_token": "<|endoftext|>", "split_special_tokens": false, "tokenizer_class": "Qwen2Tokenizer", "unk_token": null}
vocab.json ADDED
The diff for this file is too large to render. See raw diff