dgnk007 commited on
Commit
2942187
1 Parent(s): 85c2024

Upload tokenizer

Browse files
Files changed (1) hide show
  1. tokenizer.json +55 -4
tokenizer.json CHANGED
@@ -48,10 +48,61 @@
48
  "use_regex": true
49
  },
50
  "post_processor": {
51
- "type": "ByteLevel",
52
- "add_prefix_space": true,
53
- "trim_offsets": false,
54
- "use_regex": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  },
56
  "decoder": {
57
  "type": "ByteLevel",
 
48
  "use_regex": true
49
  },
50
  "post_processor": {
51
+ "type": "TemplateProcessing",
52
+ "single": [
53
+ {
54
+ "SpecialToken": {
55
+ "id": "<s>",
56
+ "type_id": 0
57
+ }
58
+ },
59
+ {
60
+ "Sequence": {
61
+ "id": "A",
62
+ "type_id": 0
63
+ }
64
+ },
65
+ {
66
+ "SpecialToken": {
67
+ "id": "###",
68
+ "type_id": 0
69
+ }
70
+ }
71
+ ],
72
+ "pair": [
73
+ {
74
+ "Sequence": {
75
+ "id": "A",
76
+ "type_id": 0
77
+ }
78
+ },
79
+ {
80
+ "Sequence": {
81
+ "id": "B",
82
+ "type_id": 1
83
+ }
84
+ }
85
+ ],
86
+ "special_tokens": {
87
+ "###": {
88
+ "id": "###",
89
+ "ids": [
90
+ 21017
91
+ ],
92
+ "tokens": [
93
+ "###"
94
+ ]
95
+ },
96
+ "<s>": {
97
+ "id": "<s>",
98
+ "ids": [
99
+ 50257
100
+ ],
101
+ "tokens": [
102
+ "<s>"
103
+ ]
104
+ }
105
+ }
106
  },
107
  "decoder": {
108
  "type": "ByteLevel",