bjoernp commited on
Commit
bff6ac3
1 Parent(s): 8cd25a4

Update tokenizer_config.json

Browse files
Files changed (1) hide show
  1. tokenizer_config.json +20 -1113
tokenizer_config.json CHANGED
@@ -1,6 +1,4 @@
1
  {
2
- "add_bos_token": true,
3
- "add_eos_token": false,
4
  "added_tokens_decoder": {
5
  "0": {
6
  "content": "<unk>",
@@ -27,7 +25,7 @@
27
  "special": true
28
  },
29
  "32000": {
30
- "content": "<|im_start|>",
31
  "lstrip": false,
32
  "normalized": false,
33
  "rstrip": false,
@@ -35,7 +33,7 @@
35
  "special": true
36
  },
37
  "32001": {
38
- "content": "<|im_end|>",
39
  "lstrip": false,
40
  "normalized": false,
41
  "rstrip": false,
@@ -43,15 +41,15 @@
43
  "special": true
44
  },
45
  "32002": {
46
- "content": "<|padding_token_0|>",
47
  "lstrip": false,
48
- "normalized": false,
49
  "rstrip": false,
50
  "single_word": false,
51
- "special": true
52
  },
53
  "32003": {
54
- "content": "<|padding_token_1|>",
55
  "lstrip": false,
56
  "normalized": false,
57
  "rstrip": false,
@@ -59,7 +57,7 @@
59
  "special": true
60
  },
61
  "32004": {
62
- "content": "<|padding_token_2|>",
63
  "lstrip": false,
64
  "normalized": false,
65
  "rstrip": false,
@@ -67,7 +65,7 @@
67
  "special": true
68
  },
69
  "32005": {
70
- "content": "<|padding_token_3|>",
71
  "lstrip": false,
72
  "normalized": false,
73
  "rstrip": false,
@@ -75,975 +73,7 @@
75
  "special": true
76
  },
77
  "32006": {
78
- "content": "<|padding_token_4|>",
79
- "lstrip": false,
80
- "normalized": false,
81
- "rstrip": false,
82
- "single_word": false,
83
- "special": true
84
- },
85
- "32007": {
86
- "content": "<|padding_token_5|>",
87
- "lstrip": false,
88
- "normalized": false,
89
- "rstrip": false,
90
- "single_word": false,
91
- "special": true
92
- },
93
- "32008": {
94
- "content": "<|padding_token_6|>",
95
- "lstrip": false,
96
- "normalized": false,
97
- "rstrip": false,
98
- "single_word": false,
99
- "special": true
100
- },
101
- "32009": {
102
- "content": "<|padding_token_7|>",
103
- "lstrip": false,
104
- "normalized": false,
105
- "rstrip": false,
106
- "single_word": false,
107
- "special": true
108
- },
109
- "32010": {
110
- "content": "<|padding_token_8|>",
111
- "lstrip": false,
112
- "normalized": false,
113
- "rstrip": false,
114
- "single_word": false,
115
- "special": true
116
- },
117
- "32011": {
118
- "content": "<|padding_token_9|>",
119
- "lstrip": false,
120
- "normalized": false,
121
- "rstrip": false,
122
- "single_word": false,
123
- "special": true
124
- },
125
- "32012": {
126
- "content": "<|padding_token_10|>",
127
- "lstrip": false,
128
- "normalized": false,
129
- "rstrip": false,
130
- "single_word": false,
131
- "special": true
132
- },
133
- "32013": {
134
- "content": "<|padding_token_11|>",
135
- "lstrip": false,
136
- "normalized": false,
137
- "rstrip": false,
138
- "single_word": false,
139
- "special": true
140
- },
141
- "32014": {
142
- "content": "<|padding_token_12|>",
143
- "lstrip": false,
144
- "normalized": false,
145
- "rstrip": false,
146
- "single_word": false,
147
- "special": true
148
- },
149
- "32015": {
150
- "content": "<|padding_token_13|>",
151
- "lstrip": false,
152
- "normalized": false,
153
- "rstrip": false,
154
- "single_word": false,
155
- "special": true
156
- },
157
- "32016": {
158
- "content": "<|padding_token_14|>",
159
- "lstrip": false,
160
- "normalized": false,
161
- "rstrip": false,
162
- "single_word": false,
163
- "special": true
164
- },
165
- "32017": {
166
- "content": "<|padding_token_15|>",
167
- "lstrip": false,
168
- "normalized": false,
169
- "rstrip": false,
170
- "single_word": false,
171
- "special": true
172
- },
173
- "32018": {
174
- "content": "<|padding_token_16|>",
175
- "lstrip": false,
176
- "normalized": false,
177
- "rstrip": false,
178
- "single_word": false,
179
- "special": true
180
- },
181
- "32019": {
182
- "content": "<|padding_token_17|>",
183
- "lstrip": false,
184
- "normalized": false,
185
- "rstrip": false,
186
- "single_word": false,
187
- "special": true
188
- },
189
- "32020": {
190
- "content": "<|padding_token_18|>",
191
- "lstrip": false,
192
- "normalized": false,
193
- "rstrip": false,
194
- "single_word": false,
195
- "special": true
196
- },
197
- "32021": {
198
- "content": "<|padding_token_19|>",
199
- "lstrip": false,
200
- "normalized": false,
201
- "rstrip": false,
202
- "single_word": false,
203
- "special": true
204
- },
205
- "32022": {
206
- "content": "<|padding_token_20|>",
207
- "lstrip": false,
208
- "normalized": false,
209
- "rstrip": false,
210
- "single_word": false,
211
- "special": true
212
- },
213
- "32023": {
214
- "content": "<|padding_token_21|>",
215
- "lstrip": false,
216
- "normalized": false,
217
- "rstrip": false,
218
- "single_word": false,
219
- "special": true
220
- },
221
- "32024": {
222
- "content": "<|padding_token_22|>",
223
- "lstrip": false,
224
- "normalized": false,
225
- "rstrip": false,
226
- "single_word": false,
227
- "special": true
228
- },
229
- "32025": {
230
- "content": "<|padding_token_23|>",
231
- "lstrip": false,
232
- "normalized": false,
233
- "rstrip": false,
234
- "single_word": false,
235
- "special": true
236
- },
237
- "32026": {
238
- "content": "<|padding_token_24|>",
239
- "lstrip": false,
240
- "normalized": false,
241
- "rstrip": false,
242
- "single_word": false,
243
- "special": true
244
- },
245
- "32027": {
246
- "content": "<|padding_token_25|>",
247
- "lstrip": false,
248
- "normalized": false,
249
- "rstrip": false,
250
- "single_word": false,
251
- "special": true
252
- },
253
- "32028": {
254
- "content": "<|padding_token_26|>",
255
- "lstrip": false,
256
- "normalized": false,
257
- "rstrip": false,
258
- "single_word": false,
259
- "special": true
260
- },
261
- "32029": {
262
- "content": "<|padding_token_27|>",
263
- "lstrip": false,
264
- "normalized": false,
265
- "rstrip": false,
266
- "single_word": false,
267
- "special": true
268
- },
269
- "32030": {
270
- "content": "<|padding_token_28|>",
271
- "lstrip": false,
272
- "normalized": false,
273
- "rstrip": false,
274
- "single_word": false,
275
- "special": true
276
- },
277
- "32031": {
278
- "content": "<|padding_token_29|>",
279
- "lstrip": false,
280
- "normalized": false,
281
- "rstrip": false,
282
- "single_word": false,
283
- "special": true
284
- },
285
- "32032": {
286
- "content": "<|padding_token_30|>",
287
- "lstrip": false,
288
- "normalized": false,
289
- "rstrip": false,
290
- "single_word": false,
291
- "special": true
292
- },
293
- "32033": {
294
- "content": "<|padding_token_31|>",
295
- "lstrip": false,
296
- "normalized": false,
297
- "rstrip": false,
298
- "single_word": false,
299
- "special": true
300
- },
301
- "32034": {
302
- "content": "<|padding_token_32|>",
303
- "lstrip": false,
304
- "normalized": false,
305
- "rstrip": false,
306
- "single_word": false,
307
- "special": true
308
- },
309
- "32035": {
310
- "content": "<|padding_token_33|>",
311
- "lstrip": false,
312
- "normalized": false,
313
- "rstrip": false,
314
- "single_word": false,
315
- "special": true
316
- },
317
- "32036": {
318
- "content": "<|padding_token_34|>",
319
- "lstrip": false,
320
- "normalized": false,
321
- "rstrip": false,
322
- "single_word": false,
323
- "special": true
324
- },
325
- "32037": {
326
- "content": "<|padding_token_35|>",
327
- "lstrip": false,
328
- "normalized": false,
329
- "rstrip": false,
330
- "single_word": false,
331
- "special": true
332
- },
333
- "32038": {
334
- "content": "<|padding_token_36|>",
335
- "lstrip": false,
336
- "normalized": false,
337
- "rstrip": false,
338
- "single_word": false,
339
- "special": true
340
- },
341
- "32039": {
342
- "content": "<|padding_token_37|>",
343
- "lstrip": false,
344
- "normalized": false,
345
- "rstrip": false,
346
- "single_word": false,
347
- "special": true
348
- },
349
- "32040": {
350
- "content": "<|padding_token_38|>",
351
- "lstrip": false,
352
- "normalized": false,
353
- "rstrip": false,
354
- "single_word": false,
355
- "special": true
356
- },
357
- "32041": {
358
- "content": "<|padding_token_39|>",
359
- "lstrip": false,
360
- "normalized": false,
361
- "rstrip": false,
362
- "single_word": false,
363
- "special": true
364
- },
365
- "32042": {
366
- "content": "<|padding_token_40|>",
367
- "lstrip": false,
368
- "normalized": false,
369
- "rstrip": false,
370
- "single_word": false,
371
- "special": true
372
- },
373
- "32043": {
374
- "content": "<|padding_token_41|>",
375
- "lstrip": false,
376
- "normalized": false,
377
- "rstrip": false,
378
- "single_word": false,
379
- "special": true
380
- },
381
- "32044": {
382
- "content": "<|padding_token_42|>",
383
- "lstrip": false,
384
- "normalized": false,
385
- "rstrip": false,
386
- "single_word": false,
387
- "special": true
388
- },
389
- "32045": {
390
- "content": "<|padding_token_43|>",
391
- "lstrip": false,
392
- "normalized": false,
393
- "rstrip": false,
394
- "single_word": false,
395
- "special": true
396
- },
397
- "32046": {
398
- "content": "<|padding_token_44|>",
399
- "lstrip": false,
400
- "normalized": false,
401
- "rstrip": false,
402
- "single_word": false,
403
- "special": true
404
- },
405
- "32047": {
406
- "content": "<|padding_token_45|>",
407
- "lstrip": false,
408
- "normalized": false,
409
- "rstrip": false,
410
- "single_word": false,
411
- "special": true
412
- },
413
- "32048": {
414
- "content": "<|padding_token_46|>",
415
- "lstrip": false,
416
- "normalized": false,
417
- "rstrip": false,
418
- "single_word": false,
419
- "special": true
420
- },
421
- "32049": {
422
- "content": "<|padding_token_47|>",
423
- "lstrip": false,
424
- "normalized": false,
425
- "rstrip": false,
426
- "single_word": false,
427
- "special": true
428
- },
429
- "32050": {
430
- "content": "<|padding_token_48|>",
431
- "lstrip": false,
432
- "normalized": false,
433
- "rstrip": false,
434
- "single_word": false,
435
- "special": true
436
- },
437
- "32051": {
438
- "content": "<|padding_token_49|>",
439
- "lstrip": false,
440
- "normalized": false,
441
- "rstrip": false,
442
- "single_word": false,
443
- "special": true
444
- },
445
- "32052": {
446
- "content": "<|padding_token_50|>",
447
- "lstrip": false,
448
- "normalized": false,
449
- "rstrip": false,
450
- "single_word": false,
451
- "special": true
452
- },
453
- "32053": {
454
- "content": "<|padding_token_51|>",
455
- "lstrip": false,
456
- "normalized": false,
457
- "rstrip": false,
458
- "single_word": false,
459
- "special": true
460
- },
461
- "32054": {
462
- "content": "<|padding_token_52|>",
463
- "lstrip": false,
464
- "normalized": false,
465
- "rstrip": false,
466
- "single_word": false,
467
- "special": true
468
- },
469
- "32055": {
470
- "content": "<|padding_token_53|>",
471
- "lstrip": false,
472
- "normalized": false,
473
- "rstrip": false,
474
- "single_word": false,
475
- "special": true
476
- },
477
- "32056": {
478
- "content": "<|padding_token_54|>",
479
- "lstrip": false,
480
- "normalized": false,
481
- "rstrip": false,
482
- "single_word": false,
483
- "special": true
484
- },
485
- "32057": {
486
- "content": "<|padding_token_55|>",
487
- "lstrip": false,
488
- "normalized": false,
489
- "rstrip": false,
490
- "single_word": false,
491
- "special": true
492
- },
493
- "32058": {
494
- "content": "<|padding_token_56|>",
495
- "lstrip": false,
496
- "normalized": false,
497
- "rstrip": false,
498
- "single_word": false,
499
- "special": true
500
- },
501
- "32059": {
502
- "content": "<|padding_token_57|>",
503
- "lstrip": false,
504
- "normalized": false,
505
- "rstrip": false,
506
- "single_word": false,
507
- "special": true
508
- },
509
- "32060": {
510
- "content": "<|padding_token_58|>",
511
- "lstrip": false,
512
- "normalized": false,
513
- "rstrip": false,
514
- "single_word": false,
515
- "special": true
516
- },
517
- "32061": {
518
- "content": "<|padding_token_59|>",
519
- "lstrip": false,
520
- "normalized": false,
521
- "rstrip": false,
522
- "single_word": false,
523
- "special": true
524
- },
525
- "32062": {
526
- "content": "<|padding_token_60|>",
527
- "lstrip": false,
528
- "normalized": false,
529
- "rstrip": false,
530
- "single_word": false,
531
- "special": true
532
- },
533
- "32063": {
534
- "content": "<|padding_token_61|>",
535
- "lstrip": false,
536
- "normalized": false,
537
- "rstrip": false,
538
- "single_word": false,
539
- "special": true
540
- },
541
- "32064": {
542
- "content": "<|padding_token_62|>",
543
- "lstrip": false,
544
- "normalized": false,
545
- "rstrip": false,
546
- "single_word": false,
547
- "special": true
548
- },
549
- "32065": {
550
- "content": "<|padding_token_63|>",
551
- "lstrip": false,
552
- "normalized": false,
553
- "rstrip": false,
554
- "single_word": false,
555
- "special": true
556
- },
557
- "32066": {
558
- "content": "<|padding_token_64|>",
559
- "lstrip": false,
560
- "normalized": false,
561
- "rstrip": false,
562
- "single_word": false,
563
- "special": true
564
- },
565
- "32067": {
566
- "content": "<|padding_token_65|>",
567
- "lstrip": false,
568
- "normalized": false,
569
- "rstrip": false,
570
- "single_word": false,
571
- "special": true
572
- },
573
- "32068": {
574
- "content": "<|padding_token_66|>",
575
- "lstrip": false,
576
- "normalized": false,
577
- "rstrip": false,
578
- "single_word": false,
579
- "special": true
580
- },
581
- "32069": {
582
- "content": "<|padding_token_67|>",
583
- "lstrip": false,
584
- "normalized": false,
585
- "rstrip": false,
586
- "single_word": false,
587
- "special": true
588
- },
589
- "32070": {
590
- "content": "<|padding_token_68|>",
591
- "lstrip": false,
592
- "normalized": false,
593
- "rstrip": false,
594
- "single_word": false,
595
- "special": true
596
- },
597
- "32071": {
598
- "content": "<|padding_token_69|>",
599
- "lstrip": false,
600
- "normalized": false,
601
- "rstrip": false,
602
- "single_word": false,
603
- "special": true
604
- },
605
- "32072": {
606
- "content": "<|padding_token_70|>",
607
- "lstrip": false,
608
- "normalized": false,
609
- "rstrip": false,
610
- "single_word": false,
611
- "special": true
612
- },
613
- "32073": {
614
- "content": "<|padding_token_71|>",
615
- "lstrip": false,
616
- "normalized": false,
617
- "rstrip": false,
618
- "single_word": false,
619
- "special": true
620
- },
621
- "32074": {
622
- "content": "<|padding_token_72|>",
623
- "lstrip": false,
624
- "normalized": false,
625
- "rstrip": false,
626
- "single_word": false,
627
- "special": true
628
- },
629
- "32075": {
630
- "content": "<|padding_token_73|>",
631
- "lstrip": false,
632
- "normalized": false,
633
- "rstrip": false,
634
- "single_word": false,
635
- "special": true
636
- },
637
- "32076": {
638
- "content": "<|padding_token_74|>",
639
- "lstrip": false,
640
- "normalized": false,
641
- "rstrip": false,
642
- "single_word": false,
643
- "special": true
644
- },
645
- "32077": {
646
- "content": "<|padding_token_75|>",
647
- "lstrip": false,
648
- "normalized": false,
649
- "rstrip": false,
650
- "single_word": false,
651
- "special": true
652
- },
653
- "32078": {
654
- "content": "<|padding_token_76|>",
655
- "lstrip": false,
656
- "normalized": false,
657
- "rstrip": false,
658
- "single_word": false,
659
- "special": true
660
- },
661
- "32079": {
662
- "content": "<|padding_token_77|>",
663
- "lstrip": false,
664
- "normalized": false,
665
- "rstrip": false,
666
- "single_word": false,
667
- "special": true
668
- },
669
- "32080": {
670
- "content": "<|padding_token_78|>",
671
- "lstrip": false,
672
- "normalized": false,
673
- "rstrip": false,
674
- "single_word": false,
675
- "special": true
676
- },
677
- "32081": {
678
- "content": "<|padding_token_79|>",
679
- "lstrip": false,
680
- "normalized": false,
681
- "rstrip": false,
682
- "single_word": false,
683
- "special": true
684
- },
685
- "32082": {
686
- "content": "<|padding_token_80|>",
687
- "lstrip": false,
688
- "normalized": false,
689
- "rstrip": false,
690
- "single_word": false,
691
- "special": true
692
- },
693
- "32083": {
694
- "content": "<|padding_token_81|>",
695
- "lstrip": false,
696
- "normalized": false,
697
- "rstrip": false,
698
- "single_word": false,
699
- "special": true
700
- },
701
- "32084": {
702
- "content": "<|padding_token_82|>",
703
- "lstrip": false,
704
- "normalized": false,
705
- "rstrip": false,
706
- "single_word": false,
707
- "special": true
708
- },
709
- "32085": {
710
- "content": "<|padding_token_83|>",
711
- "lstrip": false,
712
- "normalized": false,
713
- "rstrip": false,
714
- "single_word": false,
715
- "special": true
716
- },
717
- "32086": {
718
- "content": "<|padding_token_84|>",
719
- "lstrip": false,
720
- "normalized": false,
721
- "rstrip": false,
722
- "single_word": false,
723
- "special": true
724
- },
725
- "32087": {
726
- "content": "<|padding_token_85|>",
727
- "lstrip": false,
728
- "normalized": false,
729
- "rstrip": false,
730
- "single_word": false,
731
- "special": true
732
- },
733
- "32088": {
734
- "content": "<|padding_token_86|>",
735
- "lstrip": false,
736
- "normalized": false,
737
- "rstrip": false,
738
- "single_word": false,
739
- "special": true
740
- },
741
- "32089": {
742
- "content": "<|padding_token_87|>",
743
- "lstrip": false,
744
- "normalized": false,
745
- "rstrip": false,
746
- "single_word": false,
747
- "special": true
748
- },
749
- "32090": {
750
- "content": "<|padding_token_88|>",
751
- "lstrip": false,
752
- "normalized": false,
753
- "rstrip": false,
754
- "single_word": false,
755
- "special": true
756
- },
757
- "32091": {
758
- "content": "<|padding_token_89|>",
759
- "lstrip": false,
760
- "normalized": false,
761
- "rstrip": false,
762
- "single_word": false,
763
- "special": true
764
- },
765
- "32092": {
766
- "content": "<|padding_token_90|>",
767
- "lstrip": false,
768
- "normalized": false,
769
- "rstrip": false,
770
- "single_word": false,
771
- "special": true
772
- },
773
- "32093": {
774
- "content": "<|padding_token_91|>",
775
- "lstrip": false,
776
- "normalized": false,
777
- "rstrip": false,
778
- "single_word": false,
779
- "special": true
780
- },
781
- "32094": {
782
- "content": "<|padding_token_92|>",
783
- "lstrip": false,
784
- "normalized": false,
785
- "rstrip": false,
786
- "single_word": false,
787
- "special": true
788
- },
789
- "32095": {
790
- "content": "<|padding_token_93|>",
791
- "lstrip": false,
792
- "normalized": false,
793
- "rstrip": false,
794
- "single_word": false,
795
- "special": true
796
- },
797
- "32096": {
798
- "content": "<|padding_token_94|>",
799
- "lstrip": false,
800
- "normalized": false,
801
- "rstrip": false,
802
- "single_word": false,
803
- "special": true
804
- },
805
- "32097": {
806
- "content": "<|padding_token_95|>",
807
- "lstrip": false,
808
- "normalized": false,
809
- "rstrip": false,
810
- "single_word": false,
811
- "special": true
812
- },
813
- "32098": {
814
- "content": "<|padding_token_96|>",
815
- "lstrip": false,
816
- "normalized": false,
817
- "rstrip": false,
818
- "single_word": false,
819
- "special": true
820
- },
821
- "32099": {
822
- "content": "<|padding_token_97|>",
823
- "lstrip": false,
824
- "normalized": false,
825
- "rstrip": false,
826
- "single_word": false,
827
- "special": true
828
- },
829
- "32100": {
830
- "content": "<|padding_token_98|>",
831
- "lstrip": false,
832
- "normalized": false,
833
- "rstrip": false,
834
- "single_word": false,
835
- "special": true
836
- },
837
- "32101": {
838
- "content": "<|padding_token_99|>",
839
- "lstrip": false,
840
- "normalized": false,
841
- "rstrip": false,
842
- "single_word": false,
843
- "special": true
844
- },
845
- "32102": {
846
- "content": "<|padding_token_100|>",
847
- "lstrip": false,
848
- "normalized": false,
849
- "rstrip": false,
850
- "single_word": false,
851
- "special": true
852
- },
853
- "32103": {
854
- "content": "<|padding_token_101|>",
855
- "lstrip": false,
856
- "normalized": false,
857
- "rstrip": false,
858
- "single_word": false,
859
- "special": true
860
- },
861
- "32104": {
862
- "content": "<|padding_token_102|>",
863
- "lstrip": false,
864
- "normalized": false,
865
- "rstrip": false,
866
- "single_word": false,
867
- "special": true
868
- },
869
- "32105": {
870
- "content": "<|padding_token_103|>",
871
- "lstrip": false,
872
- "normalized": false,
873
- "rstrip": false,
874
- "single_word": false,
875
- "special": true
876
- },
877
- "32106": {
878
- "content": "<|padding_token_104|>",
879
- "lstrip": false,
880
- "normalized": false,
881
- "rstrip": false,
882
- "single_word": false,
883
- "special": true
884
- },
885
- "32107": {
886
- "content": "<|padding_token_105|>",
887
- "lstrip": false,
888
- "normalized": false,
889
- "rstrip": false,
890
- "single_word": false,
891
- "special": true
892
- },
893
- "32108": {
894
- "content": "<|padding_token_106|>",
895
- "lstrip": false,
896
- "normalized": false,
897
- "rstrip": false,
898
- "single_word": false,
899
- "special": true
900
- },
901
- "32109": {
902
- "content": "<|padding_token_107|>",
903
- "lstrip": false,
904
- "normalized": false,
905
- "rstrip": false,
906
- "single_word": false,
907
- "special": true
908
- },
909
- "32110": {
910
- "content": "<|padding_token_108|>",
911
- "lstrip": false,
912
- "normalized": false,
913
- "rstrip": false,
914
- "single_word": false,
915
- "special": true
916
- },
917
- "32111": {
918
- "content": "<|padding_token_109|>",
919
- "lstrip": false,
920
- "normalized": false,
921
- "rstrip": false,
922
- "single_word": false,
923
- "special": true
924
- },
925
- "32112": {
926
- "content": "<|padding_token_110|>",
927
- "lstrip": false,
928
- "normalized": false,
929
- "rstrip": false,
930
- "single_word": false,
931
- "special": true
932
- },
933
- "32113": {
934
- "content": "<|padding_token_111|>",
935
- "lstrip": false,
936
- "normalized": false,
937
- "rstrip": false,
938
- "single_word": false,
939
- "special": true
940
- },
941
- "32114": {
942
- "content": "<|padding_token_112|>",
943
- "lstrip": false,
944
- "normalized": false,
945
- "rstrip": false,
946
- "single_word": false,
947
- "special": true
948
- },
949
- "32115": {
950
- "content": "<|padding_token_113|>",
951
- "lstrip": false,
952
- "normalized": false,
953
- "rstrip": false,
954
- "single_word": false,
955
- "special": true
956
- },
957
- "32116": {
958
- "content": "<|padding_token_114|>",
959
- "lstrip": false,
960
- "normalized": false,
961
- "rstrip": false,
962
- "single_word": false,
963
- "special": true
964
- },
965
- "32117": {
966
- "content": "<|padding_token_115|>",
967
- "lstrip": false,
968
- "normalized": false,
969
- "rstrip": false,
970
- "single_word": false,
971
- "special": true
972
- },
973
- "32118": {
974
- "content": "<|padding_token_116|>",
975
- "lstrip": false,
976
- "normalized": false,
977
- "rstrip": false,
978
- "single_word": false,
979
- "special": true
980
- },
981
- "32119": {
982
- "content": "<|padding_token_117|>",
983
- "lstrip": false,
984
- "normalized": false,
985
- "rstrip": false,
986
- "single_word": false,
987
- "special": true
988
- },
989
- "32120": {
990
- "content": "<|padding_token_118|>",
991
- "lstrip": false,
992
- "normalized": false,
993
- "rstrip": false,
994
- "single_word": false,
995
- "special": true
996
- },
997
- "32121": {
998
- "content": "<|padding_token_119|>",
999
- "lstrip": false,
1000
- "normalized": false,
1001
- "rstrip": false,
1002
- "single_word": false,
1003
- "special": true
1004
- },
1005
- "32122": {
1006
- "content": "<|padding_token_120|>",
1007
- "lstrip": false,
1008
- "normalized": false,
1009
- "rstrip": false,
1010
- "single_word": false,
1011
- "special": true
1012
- },
1013
- "32123": {
1014
- "content": "<|padding_token_121|>",
1015
- "lstrip": false,
1016
- "normalized": false,
1017
- "rstrip": false,
1018
- "single_word": false,
1019
- "special": true
1020
- },
1021
- "32124": {
1022
- "content": "<|padding_token_122|>",
1023
- "lstrip": false,
1024
- "normalized": false,
1025
- "rstrip": false,
1026
- "single_word": false,
1027
- "special": true
1028
- },
1029
- "32125": {
1030
- "content": "<|padding_token_123|>",
1031
- "lstrip": false,
1032
- "normalized": false,
1033
- "rstrip": false,
1034
- "single_word": false,
1035
- "special": true
1036
- },
1037
- "32126": {
1038
- "content": "<|padding_token_124|>",
1039
- "lstrip": false,
1040
- "normalized": false,
1041
- "rstrip": false,
1042
- "single_word": false,
1043
- "special": true
1044
- },
1045
- "32127": {
1046
- "content": "<|padding_token_125|>",
1047
  "lstrip": false,
1048
  "normalized": false,
1049
  "rstrip": false,
@@ -1053,144 +83,21 @@
1053
  },
1054
  "additional_special_tokens": [
1055
  "<|im_start|>",
1056
- "<|im_end|>",
1057
- "<|padding_token_0|>",
1058
- "<|padding_token_1|>",
1059
- "<|padding_token_2|>",
1060
- "<|padding_token_3|>",
1061
- "<|padding_token_4|>",
1062
- "<|padding_token_5|>",
1063
- "<|padding_token_6|>",
1064
- "<|padding_token_7|>",
1065
- "<|padding_token_8|>",
1066
- "<|padding_token_9|>",
1067
- "<|padding_token_10|>",
1068
- "<|padding_token_11|>",
1069
- "<|padding_token_12|>",
1070
- "<|padding_token_13|>",
1071
- "<|padding_token_14|>",
1072
- "<|padding_token_15|>",
1073
- "<|padding_token_16|>",
1074
- "<|padding_token_17|>",
1075
- "<|padding_token_18|>",
1076
- "<|padding_token_19|>",
1077
- "<|padding_token_20|>",
1078
- "<|padding_token_21|>",
1079
- "<|padding_token_22|>",
1080
- "<|padding_token_23|>",
1081
- "<|padding_token_24|>",
1082
- "<|padding_token_25|>",
1083
- "<|padding_token_26|>",
1084
- "<|padding_token_27|>",
1085
- "<|padding_token_28|>",
1086
- "<|padding_token_29|>",
1087
- "<|padding_token_30|>",
1088
- "<|padding_token_31|>",
1089
- "<|padding_token_32|>",
1090
- "<|padding_token_33|>",
1091
- "<|padding_token_34|>",
1092
- "<|padding_token_35|>",
1093
- "<|padding_token_36|>",
1094
- "<|padding_token_37|>",
1095
- "<|padding_token_38|>",
1096
- "<|padding_token_39|>",
1097
- "<|padding_token_40|>",
1098
- "<|padding_token_41|>",
1099
- "<|padding_token_42|>",
1100
- "<|padding_token_43|>",
1101
- "<|padding_token_44|>",
1102
- "<|padding_token_45|>",
1103
- "<|padding_token_46|>",
1104
- "<|padding_token_47|>",
1105
- "<|padding_token_48|>",
1106
- "<|padding_token_49|>",
1107
- "<|padding_token_50|>",
1108
- "<|padding_token_51|>",
1109
- "<|padding_token_52|>",
1110
- "<|padding_token_53|>",
1111
- "<|padding_token_54|>",
1112
- "<|padding_token_55|>",
1113
- "<|padding_token_56|>",
1114
- "<|padding_token_57|>",
1115
- "<|padding_token_58|>",
1116
- "<|padding_token_59|>",
1117
- "<|padding_token_60|>",
1118
- "<|padding_token_61|>",
1119
- "<|padding_token_62|>",
1120
- "<|padding_token_63|>",
1121
- "<|padding_token_64|>",
1122
- "<|padding_token_65|>",
1123
- "<|padding_token_66|>",
1124
- "<|padding_token_67|>",
1125
- "<|padding_token_68|>",
1126
- "<|padding_token_69|>",
1127
- "<|padding_token_70|>",
1128
- "<|padding_token_71|>",
1129
- "<|padding_token_72|>",
1130
- "<|padding_token_73|>",
1131
- "<|padding_token_74|>",
1132
- "<|padding_token_75|>",
1133
- "<|padding_token_76|>",
1134
- "<|padding_token_77|>",
1135
- "<|padding_token_78|>",
1136
- "<|padding_token_79|>",
1137
- "<|padding_token_80|>",
1138
- "<|padding_token_81|>",
1139
- "<|padding_token_82|>",
1140
- "<|padding_token_83|>",
1141
- "<|padding_token_84|>",
1142
- "<|padding_token_85|>",
1143
- "<|padding_token_86|>",
1144
- "<|padding_token_87|>",
1145
- "<|padding_token_88|>",
1146
- "<|padding_token_89|>",
1147
- "<|padding_token_90|>",
1148
- "<|padding_token_91|>",
1149
- "<|padding_token_92|>",
1150
- "<|padding_token_93|>",
1151
- "<|padding_token_94|>",
1152
- "<|padding_token_95|>",
1153
- "<|padding_token_96|>",
1154
- "<|padding_token_97|>",
1155
- "<|padding_token_98|>",
1156
- "<|padding_token_99|>",
1157
- "<|padding_token_100|>",
1158
- "<|padding_token_101|>",
1159
- "<|padding_token_102|>",
1160
- "<|padding_token_103|>",
1161
- "<|padding_token_104|>",
1162
- "<|padding_token_105|>",
1163
- "<|padding_token_106|>",
1164
- "<|padding_token_107|>",
1165
- "<|padding_token_108|>",
1166
- "<|padding_token_109|>",
1167
- "<|padding_token_110|>",
1168
- "<|padding_token_111|>",
1169
- "<|padding_token_112|>",
1170
- "<|padding_token_113|>",
1171
- "<|padding_token_114|>",
1172
- "<|padding_token_115|>",
1173
- "<|padding_token_116|>",
1174
- "<|padding_token_117|>",
1175
- "<|padding_token_118|>",
1176
- "<|padding_token_119|>",
1177
- "<|padding_token_120|>",
1178
- "<|padding_token_121|>",
1179
- "<|padding_token_122|>",
1180
- "<|padding_token_123|>",
1181
- "<|padding_token_124|>",
1182
- "<|padding_token_125|>"
1183
  ],
1184
- "bos_token": "<s>",
 
1185
  "clean_up_tokenization_spaces": false,
1186
- "eos_token": "</s>",
1187
- "legacy": false,
 
 
1188
  "model_max_length": 1000000000000000019884624838656,
1189
- "pad_token": null,
1190
  "padding_side": "right",
 
1191
  "sp_model_kwargs": {},
1192
- "spaces_between_special_tokens": false,
1193
  "tokenizer_class": "LlamaTokenizer",
1194
  "unk_token": "<unk>",
1195
- "use_default_system_prompt": true
1196
- }
 
1
  {
 
 
2
  "added_tokens_decoder": {
3
  "0": {
4
  "content": "<unk>",
 
25
  "special": true
26
  },
27
  "32000": {
28
+ "content": "<CLS>",
29
  "lstrip": false,
30
  "normalized": false,
31
  "rstrip": false,
 
33
  "special": true
34
  },
35
  "32001": {
36
+ "content": "<SEP>",
37
  "lstrip": false,
38
  "normalized": false,
39
  "rstrip": false,
 
41
  "special": true
42
  },
43
  "32002": {
44
+ "content": "<EOD>",
45
  "lstrip": false,
46
+ "normalized": true,
47
  "rstrip": false,
48
  "single_word": false,
49
+ "special": false
50
  },
51
  "32003": {
52
+ "content": "<MASK>",
53
  "lstrip": false,
54
  "normalized": false,
55
  "rstrip": false,
 
57
  "special": true
58
  },
59
  "32004": {
60
+ "content": "<PAD>",
61
  "lstrip": false,
62
  "normalized": false,
63
  "rstrip": false,
 
65
  "special": true
66
  },
67
  "32005": {
68
+ "content": "<|im_start|>",
69
  "lstrip": false,
70
  "normalized": false,
71
  "rstrip": false,
 
73
  "special": true
74
  },
75
  "32006": {
76
+ "content": "<|im_end|>",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
  "lstrip": false,
78
  "normalized": false,
79
  "rstrip": false,
 
83
  },
84
  "additional_special_tokens": [
85
  "<|im_start|>",
86
+ "<|im_end|>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
  ],
88
+ "bos_token": "<|im_start|>",
89
+ "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
90
  "clean_up_tokenization_spaces": false,
91
+ "cls_token": "<CLS>",
92
+ "eos_token": "<|im_end|>",
93
+ "legacy": true,
94
+ "mask_token": "<MASK>",
95
  "model_max_length": 1000000000000000019884624838656,
96
+ "pad_token": "<PAD>",
97
  "padding_side": "right",
98
+ "sep_token": "<SEP>",
99
  "sp_model_kwargs": {},
 
100
  "tokenizer_class": "LlamaTokenizer",
101
  "unk_token": "<unk>",
102
+ "use_default_system_prompt": false
103
+ }