Update tokenizer_config.json

#2
Files changed (1) hide show
  1. tokenizer_config.json +3 -46
tokenizer_config.json CHANGED
@@ -26,57 +26,14 @@
26
  "single_word": false,
27
  "special": true
28
  },
29
- "32000": {
30
- "content": "<CLS>",
31
- "lstrip": false,
32
- "normalized": false,
33
- "rstrip": false,
34
- "single_word": false,
35
- "special": true
36
- },
37
- "32001": {
38
- "content": "<SEP>",
39
- "lstrip": false,
40
- "normalized": false,
41
- "rstrip": false,
42
- "single_word": false,
43
- "special": true
44
- },
45
- "32002": {
46
- "content": "<EOD>",
47
- "lstrip": false,
48
- "normalized": false,
49
- "rstrip": false,
50
- "single_word": false,
51
- "special": true
52
- },
53
- "32003": {
54
- "content": "<MASK>",
55
- "lstrip": false,
56
- "normalized": false,
57
- "rstrip": false,
58
- "single_word": false,
59
- "special": true
60
- },
61
- "32004": {
62
- "content": "<PAD>",
63
- "lstrip": false,
64
- "normalized": false,
65
- "rstrip": false,
66
- "single_word": false,
67
- "special": true
68
- }
69
  },
70
  "bos_token": "<s>",
71
  "clean_up_tokenization_spaces": false,
72
- "cls_token": "<CLS>",
73
  "eos_token": "</s>",
74
  "legacy": false,
75
- "mask_token": "<MASK>",
76
- "model_max_length": 1000000000000000019884624838656,
77
- "pad_token": "<PAD>",
78
- "padding_side": "right",
79
- "sep_token": "<SEP>",
80
  "sp_model_kwargs": {},
81
  "tokenizer_class": "LlamaTokenizer",
82
  "unk_token": "<unk>",
 
26
  "single_word": false,
27
  "special": true
28
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  },
30
  "bos_token": "<s>",
31
  "clean_up_tokenization_spaces": false,
 
32
  "eos_token": "</s>",
33
  "legacy": false,
34
+ "model_max_length": 4096,
35
+ "pad_token": "</s>",
36
+ "padding_side": "left",
 
 
37
  "sp_model_kwargs": {},
38
  "tokenizer_class": "LlamaTokenizer",
39
  "unk_token": "<unk>",