NowaBwagel0 commited on
Commit
b1d9ada
1 Parent(s): 2db415d

Upload tokenizer

Browse files
added_tokens.json CHANGED
@@ -2,7 +2,6 @@
2
  "</s>": 5,
3
  "<s>": 4,
4
  "<unk>": 3,
5
- "<|endoftext|>": 25000,
6
  "<|im_begin|>": 0,
7
  "<|im_end|>": 1,
8
  "[PAD]": 2
 
2
  "</s>": 5,
3
  "<s>": 4,
4
  "<unk>": 3,
 
5
  "<|im_begin|>": 0,
6
  "<|im_end|>": 1,
7
  "[PAD]": 2
special_tokens_map.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "bos_token": "<|endoftext|>",
3
- "eos_token": "<|endoftext|>",
4
- "unk_token": "<|endoftext|>"
5
  }
 
1
  {
2
+ "bos_token": "<s>",
3
+ "eos_token": "</s>",
4
+ "unk_token": "<unk>"
5
  }
tokenizer.json CHANGED
@@ -56,15 +56,6 @@
56
  "rstrip": false,
57
  "normalized": false,
58
  "special": true
59
- },
60
- {
61
- "id": 25000,
62
- "content": "<|endoftext|>",
63
- "single_word": false,
64
- "lstrip": false,
65
- "rstrip": false,
66
- "normalized": false,
67
- "special": true
68
  }
69
  ],
70
  "normalizer": null,
 
56
  "rstrip": false,
57
  "normalized": false,
58
  "special": true
 
 
 
 
 
 
 
 
 
59
  }
60
  ],
61
  "normalizer": null,
tokenizer_config.json CHANGED
@@ -48,21 +48,13 @@
48
  "rstrip": false,
49
  "single_word": false,
50
  "special": true
51
- },
52
- "25000": {
53
- "content": "<|endoftext|>",
54
- "lstrip": false,
55
- "normalized": false,
56
- "rstrip": false,
57
- "single_word": false,
58
- "special": true
59
  }
60
  },
61
  "additional_special_tokens": [],
62
- "bos_token": "<|endoftext|>",
63
  "clean_up_tokenization_spaces": true,
64
- "eos_token": "<|endoftext|>",
65
  "model_max_length": 1000000000000000019884624838656,
66
  "tokenizer_class": "GPT2Tokenizer",
67
- "unk_token": "<|endoftext|>"
68
  }
 
48
  "rstrip": false,
49
  "single_word": false,
50
  "special": true
 
 
 
 
 
 
 
 
51
  }
52
  },
53
  "additional_special_tokens": [],
54
+ "bos_token": "<s>",
55
  "clean_up_tokenization_spaces": true,
56
+ "eos_token": "</s>",
57
  "model_max_length": 1000000000000000019884624838656,
58
  "tokenizer_class": "GPT2Tokenizer",
59
+ "unk_token": "<unk>"
60
  }