thealper2
/

aspect-extraction-tokenizer

Model card Files Files and versions Community

thealper2 commited on Aug 8

Commit

ec045d7

•

1 Parent(s): cbc21c5

Upload 4 files

Files changed (2) hide show

tokenizer.json +3 -3
tokenizer_config.json +2 -2

tokenizer.json CHANGED Viewed

@@ -2,13 +2,13 @@
   "version": "1.0",
   "truncation": {
     "direction": "Right",
-    "max_length": 64,
     "strategy": "LongestFirst",
     "stride": 0
   },
   "padding": {
     "strategy": {
-      "Fixed": 64
     },
     "direction": "Right",
     "pad_to_multiple_of": null,
@@ -67,7 +67,7 @@
     "type": "BertNormalizer",
     "clean_text": true,
     "handle_chinese_chars": true,
-    "strip_accents": null,
     "lowercase": false
   },
   "pre_tokenizer": {

   "version": "1.0",
   "truncation": {
     "direction": "Right",
+    "max_length": 128,
     "strategy": "LongestFirst",
     "stride": 0
   },
   "padding": {
     "strategy": {
+      "Fixed": 128
     },
     "direction": "Right",
     "pad_to_multiple_of": null,
     "type": "BertNormalizer",
     "clean_text": true,
     "handle_chinese_chars": true,
+    "strip_accents": false,
     "lowercase": false
   },
   "pre_tokenizer": {

tokenizer_config.json CHANGED Viewed

@@ -51,8 +51,8 @@
   "never_split": null,
   "pad_token": "[PAD]",
   "sep_token": "[SEP]",
-  "strip_accents": null,
   "tokenize_chinese_chars": true,
-  "tokenizer_class": "BertTokenizer",
   "unk_token": "[UNK]"
 }

   "never_split": null,
   "pad_token": "[PAD]",
   "sep_token": "[SEP]",
+  "strip_accents": false,
   "tokenize_chinese_chars": true,
+  "tokenizer_class": "ConvBertTokenizer",
   "unk_token": "[UNK]"
 }