thealper2 commited on
Commit
ec045d7
1 Parent(s): cbc21c5

Upload 4 files

Browse files
Files changed (2) hide show
  1. tokenizer.json +3 -3
  2. tokenizer_config.json +2 -2
tokenizer.json CHANGED
@@ -2,13 +2,13 @@
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
- "max_length": 64,
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
9
  "padding": {
10
  "strategy": {
11
- "Fixed": 64
12
  },
13
  "direction": "Right",
14
  "pad_to_multiple_of": null,
@@ -67,7 +67,7 @@
67
  "type": "BertNormalizer",
68
  "clean_text": true,
69
  "handle_chinese_chars": true,
70
- "strip_accents": null,
71
  "lowercase": false
72
  },
73
  "pre_tokenizer": {
 
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
+ "max_length": 128,
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
9
  "padding": {
10
  "strategy": {
11
+ "Fixed": 128
12
  },
13
  "direction": "Right",
14
  "pad_to_multiple_of": null,
 
67
  "type": "BertNormalizer",
68
  "clean_text": true,
69
  "handle_chinese_chars": true,
70
+ "strip_accents": false,
71
  "lowercase": false
72
  },
73
  "pre_tokenizer": {
tokenizer_config.json CHANGED
@@ -51,8 +51,8 @@
51
  "never_split": null,
52
  "pad_token": "[PAD]",
53
  "sep_token": "[SEP]",
54
- "strip_accents": null,
55
  "tokenize_chinese_chars": true,
56
- "tokenizer_class": "BertTokenizer",
57
  "unk_token": "[UNK]"
58
  }
 
51
  "never_split": null,
52
  "pad_token": "[PAD]",
53
  "sep_token": "[SEP]",
54
+ "strip_accents": false,
55
  "tokenize_chinese_chars": true,
56
+ "tokenizer_class": "ConvBertTokenizer",
57
  "unk_token": "[UNK]"
58
  }