HiroseKoichi commited on
Commit
da32cb9
1 Parent(s): a7ed461

Update tokenizer.json

Browse files

This fixes the tokenizer so that they can be converted using `convert-hf-to-gguf.py`.

I used this fix to make non-Imatrix quants here: https://huggingface.co/HiroseKoichi/Llama-3-Lumimaid-8B-v0.1-OAS-GGUF

Files changed (1) hide show
  1. tokenizer.json +63 -4
tokenizer.json CHANGED
@@ -2336,10 +2336,69 @@
2336
  ]
2337
  },
2338
  "post_processor": {
2339
- "type": "ByteLevel",
2340
- "add_prefix_space": true,
2341
- "trim_offsets": false,
2342
- "use_regex": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2343
  },
2344
  "decoder": {
2345
  "type": "ByteLevel",
 
2336
  ]
2337
  },
2338
  "post_processor": {
2339
+ "type": "Sequence",
2340
+ "processors": [
2341
+ {
2342
+ "type": "ByteLevel",
2343
+ "add_prefix_space": true,
2344
+ "trim_offsets": false,
2345
+ "use_regex": true
2346
+ },
2347
+ {
2348
+ "type": "TemplateProcessing",
2349
+ "single": [
2350
+ {
2351
+ "SpecialToken": {
2352
+ "id": "<|begin_of_text|>",
2353
+ "type_id": 0
2354
+ }
2355
+ },
2356
+ {
2357
+ "Sequence": {
2358
+ "id": "A",
2359
+ "type_id": 0
2360
+ }
2361
+ }
2362
+ ],
2363
+ "pair": [
2364
+ {
2365
+ "SpecialToken": {
2366
+ "id": "<|begin_of_text|>",
2367
+ "type_id": 0
2368
+ }
2369
+ },
2370
+ {
2371
+ "Sequence": {
2372
+ "id": "A",
2373
+ "type_id": 0
2374
+ }
2375
+ },
2376
+ {
2377
+ "SpecialToken": {
2378
+ "id": "<|begin_of_text|>",
2379
+ "type_id": 1
2380
+ }
2381
+ },
2382
+ {
2383
+ "Sequence": {
2384
+ "id": "B",
2385
+ "type_id": 1
2386
+ }
2387
+ }
2388
+ ],
2389
+ "special_tokens": {
2390
+ "<|begin_of_text|>": {
2391
+ "id": "<|begin_of_text|>",
2392
+ "ids": [
2393
+ 128000
2394
+ ],
2395
+ "tokens": [
2396
+ "<|begin_of_text|>"
2397
+ ]
2398
+ }
2399
+ }
2400
+ }
2401
+ ]
2402
  },
2403
  "decoder": {
2404
  "type": "ByteLevel",