Molbap HF staff commited on
Commit
e5a97f3
1 Parent(s): e16acb1

Upload tokenizer

Browse files
Files changed (2) hide show
  1. tokenizer.json +10 -0
  2. tokenizer_config.json +1 -1
tokenizer.json CHANGED
@@ -6946,6 +6946,10 @@
6946
  "normalizer": {
6947
  "type": "Sequence",
6948
  "normalizers": [
 
 
 
 
6949
  {
6950
  "type": "Replace",
6951
  "pattern": {
@@ -7025,6 +7029,12 @@
7025
  },
7026
  {
7027
  "type": "Fuse"
 
 
 
 
 
 
7028
  }
7029
  ]
7030
  },
 
6946
  "normalizer": {
6947
  "type": "Sequence",
6948
  "normalizers": [
6949
+ {
6950
+ "type": "Prepend",
6951
+ "prepend": "▁"
6952
+ },
6953
  {
6954
  "type": "Replace",
6955
  "pattern": {
 
7029
  },
7030
  {
7031
  "type": "Fuse"
7032
+ },
7033
+ {
7034
+ "type": "Strip",
7035
+ "content": " ",
7036
+ "start": 1,
7037
+ "stop": 0
7038
  }
7039
  ]
7040
  },
tokenizer_config.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "add_bos_token": true,
3
  "add_eos_token": false,
4
- "add_prefix_space": null,
5
  "added_tokens_decoder": {
6
  "0": {
7
  "content": "<unk>",
 
1
  {
2
  "add_bos_token": true,
3
  "add_eos_token": false,
4
+ "add_prefix_space": true,
5
  "added_tokens_decoder": {
6
  "0": {
7
  "content": "<unk>",