usoni1 commited on
Commit
7ddaf41
1 Parent(s): 07dd28d

Update tokenizer_config.json

Browse files

Need to add bos_token, eos_token and unk_token explicitly. Otherwise, it causes maximum recursion depth error. See https://github.com/EleutherAI/lm-evaluation-harness/issues/442

Files changed (1) hide show
  1. tokenizer_config.json +3 -3
tokenizer_config.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "bos_token": "",
3
- "eos_token": "",
4
  "model_max_length": 512,
5
  "padding_side": "right",
6
  "special_tokens_map_file": "/root/.cache/huggingface/hub/models--decapoda-research--llama-13b-hf/snapshots/438770a656712a5072229b62256521845d4de5ce/special_tokens_map.json",
7
  "tokenizer_class": "LlamaTokenizer",
8
- "unk_token": ""
9
  }
 
1
  {
2
+ "bos_token": "<s>",
3
+ "eos_token": "</s>",
4
  "model_max_length": 512,
5
  "padding_side": "right",
6
  "special_tokens_map_file": "/root/.cache/huggingface/hub/models--decapoda-research--llama-13b-hf/snapshots/438770a656712a5072229b62256521845d4de5ce/special_tokens_map.json",
7
  "tokenizer_class": "LlamaTokenizer",
8
+ "unk_token": "<unk>"
9
  }