woo2 commited on
Commit
2665651
1 Parent(s): c3101d6

End of training

Browse files
Files changed (5) hide show
  1. README.md +1 -1
  2. added_tokens.json +0 -14
  3. special_tokens_map.json +1 -15
  4. tokenizer.json +0 -126
  5. tokenizer_config.json +1 -127
README.md CHANGED
@@ -40,7 +40,7 @@ The following hyperparameters were used during training:
40
  - seed: 42
41
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
42
  - lr_scheduler_type: linear
43
- - num_epochs: 3
44
 
45
  ### Training results
46
 
 
40
  - seed: 42
41
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
42
  - lr_scheduler_type: linear
43
+ - num_epochs: 6
44
 
45
  ### Training results
46
 
added_tokens.json CHANGED
@@ -1,24 +1,10 @@
1
  {
2
  "</s>": 2,
3
- "</s_address>": 57527,
4
- "</s_company>": 57532,
5
- "</s_date>": 57526,
6
- "</s_description>": 57538,
7
- "</s_item no>": 57535,
8
- "</s_total amount>": 57528,
9
- "</s_total>": 57529,
10
  "<mask>": 57521,
11
  "<pad>": 1,
12
  "<s>": 0,
13
- "<s_address>": 57534,
14
- "<s_company>": 57537,
15
- "<s_date>": 57525,
16
- "<s_description>": 57533,
17
  "<s_iitcdip>": 57523,
18
- "<s_item no>": 57531,
19
  "<s_synthdog>": 57524,
20
- "<s_total amount>": 57530,
21
- "<s_total>": 57536,
22
  "<sep/>": 57522,
23
  "<unk>": 3
24
  }
 
1
  {
2
  "</s>": 2,
 
 
 
 
 
 
 
3
  "<mask>": 57521,
4
  "<pad>": 1,
5
  "<s>": 0,
 
 
 
 
6
  "<s_iitcdip>": 57523,
 
7
  "<s_synthdog>": 57524,
 
 
8
  "<sep/>": 57522,
9
  "<unk>": 3
10
  }
special_tokens_map.json CHANGED
@@ -2,22 +2,8 @@
2
  "additional_special_tokens": [
3
  "<s_iitcdip>",
4
  "<s_synthdog>",
5
- "<s_date>",
6
  "</s>",
7
- "</s_date>",
8
- "<s>",
9
- "</s_address>",
10
- "</s_total amount>",
11
- "</s_total>",
12
- "<s_total amount>",
13
- "<s_item no>",
14
- "</s_company>",
15
- "<s_description>",
16
- "<s_address>",
17
- "</s_item no>",
18
- "<s_total>",
19
- "<s_company>",
20
- "</s_description>"
21
  ],
22
  "bos_token": "<s>",
23
  "cls_token": "<s>",
 
2
  "additional_special_tokens": [
3
  "<s_iitcdip>",
4
  "<s_synthdog>",
 
5
  "</s>",
6
+ "<s>"
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  ],
8
  "bos_token": "<s>",
9
  "cls_token": "<s>",
tokenizer.json CHANGED
@@ -88,132 +88,6 @@
88
  "rstrip": true,
89
  "normalized": false,
90
  "special": true
91
- },
92
- {
93
- "id": 57525,
94
- "content": "<s_date>",
95
- "single_word": false,
96
- "lstrip": true,
97
- "rstrip": true,
98
- "normalized": false,
99
- "special": true
100
- },
101
- {
102
- "id": 57526,
103
- "content": "</s_date>",
104
- "single_word": false,
105
- "lstrip": true,
106
- "rstrip": true,
107
- "normalized": false,
108
- "special": true
109
- },
110
- {
111
- "id": 57527,
112
- "content": "</s_address>",
113
- "single_word": false,
114
- "lstrip": true,
115
- "rstrip": true,
116
- "normalized": false,
117
- "special": true
118
- },
119
- {
120
- "id": 57528,
121
- "content": "</s_total amount>",
122
- "single_word": false,
123
- "lstrip": true,
124
- "rstrip": true,
125
- "normalized": false,
126
- "special": true
127
- },
128
- {
129
- "id": 57529,
130
- "content": "</s_total>",
131
- "single_word": false,
132
- "lstrip": true,
133
- "rstrip": true,
134
- "normalized": false,
135
- "special": true
136
- },
137
- {
138
- "id": 57530,
139
- "content": "<s_total amount>",
140
- "single_word": false,
141
- "lstrip": true,
142
- "rstrip": true,
143
- "normalized": false,
144
- "special": true
145
- },
146
- {
147
- "id": 57531,
148
- "content": "<s_item no>",
149
- "single_word": false,
150
- "lstrip": true,
151
- "rstrip": true,
152
- "normalized": false,
153
- "special": true
154
- },
155
- {
156
- "id": 57532,
157
- "content": "</s_company>",
158
- "single_word": false,
159
- "lstrip": true,
160
- "rstrip": true,
161
- "normalized": false,
162
- "special": true
163
- },
164
- {
165
- "id": 57533,
166
- "content": "<s_description>",
167
- "single_word": false,
168
- "lstrip": true,
169
- "rstrip": true,
170
- "normalized": false,
171
- "special": true
172
- },
173
- {
174
- "id": 57534,
175
- "content": "<s_address>",
176
- "single_word": false,
177
- "lstrip": true,
178
- "rstrip": true,
179
- "normalized": false,
180
- "special": true
181
- },
182
- {
183
- "id": 57535,
184
- "content": "</s_item no>",
185
- "single_word": false,
186
- "lstrip": true,
187
- "rstrip": true,
188
- "normalized": false,
189
- "special": true
190
- },
191
- {
192
- "id": 57536,
193
- "content": "<s_total>",
194
- "single_word": false,
195
- "lstrip": true,
196
- "rstrip": true,
197
- "normalized": false,
198
- "special": true
199
- },
200
- {
201
- "id": 57537,
202
- "content": "<s_company>",
203
- "single_word": false,
204
- "lstrip": true,
205
- "rstrip": true,
206
- "normalized": false,
207
- "special": true
208
- },
209
- {
210
- "id": 57538,
211
- "content": "</s_description>",
212
- "single_word": false,
213
- "lstrip": true,
214
- "rstrip": true,
215
- "normalized": false,
216
- "special": true
217
  }
218
  ],
219
  "normalizer": {
 
88
  "rstrip": true,
89
  "normalized": false,
90
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  }
92
  ],
93
  "normalizer": {
tokenizer_config.json CHANGED
@@ -63,139 +63,13 @@
63
  "rstrip": true,
64
  "single_word": false,
65
  "special": true
66
- },
67
- "57525": {
68
- "content": "<s_date>",
69
- "lstrip": true,
70
- "normalized": false,
71
- "rstrip": true,
72
- "single_word": false,
73
- "special": true
74
- },
75
- "57526": {
76
- "content": "</s_date>",
77
- "lstrip": true,
78
- "normalized": false,
79
- "rstrip": true,
80
- "single_word": false,
81
- "special": true
82
- },
83
- "57527": {
84
- "content": "</s_address>",
85
- "lstrip": true,
86
- "normalized": false,
87
- "rstrip": true,
88
- "single_word": false,
89
- "special": true
90
- },
91
- "57528": {
92
- "content": "</s_total amount>",
93
- "lstrip": true,
94
- "normalized": false,
95
- "rstrip": true,
96
- "single_word": false,
97
- "special": true
98
- },
99
- "57529": {
100
- "content": "</s_total>",
101
- "lstrip": true,
102
- "normalized": false,
103
- "rstrip": true,
104
- "single_word": false,
105
- "special": true
106
- },
107
- "57530": {
108
- "content": "<s_total amount>",
109
- "lstrip": true,
110
- "normalized": false,
111
- "rstrip": true,
112
- "single_word": false,
113
- "special": true
114
- },
115
- "57531": {
116
- "content": "<s_item no>",
117
- "lstrip": true,
118
- "normalized": false,
119
- "rstrip": true,
120
- "single_word": false,
121
- "special": true
122
- },
123
- "57532": {
124
- "content": "</s_company>",
125
- "lstrip": true,
126
- "normalized": false,
127
- "rstrip": true,
128
- "single_word": false,
129
- "special": true
130
- },
131
- "57533": {
132
- "content": "<s_description>",
133
- "lstrip": true,
134
- "normalized": false,
135
- "rstrip": true,
136
- "single_word": false,
137
- "special": true
138
- },
139
- "57534": {
140
- "content": "<s_address>",
141
- "lstrip": true,
142
- "normalized": false,
143
- "rstrip": true,
144
- "single_word": false,
145
- "special": true
146
- },
147
- "57535": {
148
- "content": "</s_item no>",
149
- "lstrip": true,
150
- "normalized": false,
151
- "rstrip": true,
152
- "single_word": false,
153
- "special": true
154
- },
155
- "57536": {
156
- "content": "<s_total>",
157
- "lstrip": true,
158
- "normalized": false,
159
- "rstrip": true,
160
- "single_word": false,
161
- "special": true
162
- },
163
- "57537": {
164
- "content": "<s_company>",
165
- "lstrip": true,
166
- "normalized": false,
167
- "rstrip": true,
168
- "single_word": false,
169
- "special": true
170
- },
171
- "57538": {
172
- "content": "</s_description>",
173
- "lstrip": true,
174
- "normalized": false,
175
- "rstrip": true,
176
- "single_word": false,
177
- "special": true
178
  }
179
  },
180
  "additional_special_tokens": [
181
  "<s_iitcdip>",
182
  "<s_synthdog>",
183
- "<s_date>",
184
  "</s>",
185
- "</s_date>",
186
- "<s>",
187
- "</s_address>",
188
- "</s_total amount>",
189
- "</s_total>",
190
- "<s_total amount>",
191
- "<s_item no>",
192
- "</s_company>",
193
- "<s_description>",
194
- "<s_address>",
195
- "</s_item no>",
196
- "<s_total>",
197
- "<s_company>",
198
- "</s_description>"
199
  ],
200
  "bos_token": "<s>",
201
  "clean_up_tokenization_spaces": true,
 
63
  "rstrip": true,
64
  "single_word": false,
65
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  }
67
  },
68
  "additional_special_tokens": [
69
  "<s_iitcdip>",
70
  "<s_synthdog>",
 
71
  "</s>",
72
+ "<s>"
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  ],
74
  "bos_token": "<s>",
75
  "clean_up_tokenization_spaces": true,