benjamin
/

zett-hypernetwork-TinyLlama-1.1B-intermediate-step-1431k-3T

Feature Extraction

Model card Files Files and versions Community

benjamin commited on May 15

Commit

9bf5852

•

1 Parent(s): a5b6bdf

update tokenizer

Files changed (1) hide show

tokenizer.json +21 -5

tokenizer.json CHANGED Viewed

@@ -31,12 +31,28 @@
       "special": true
     }
   ],
-  "normalizer": null,
   "pre_tokenizer": {
-    "type": "ByteLevel",
-    "add_prefix_space": true,
-    "trim_offsets": true,
-    "use_regex": true
   },
   "post_processor": {
     "type": "TemplateProcessing",

       "special": true
     }
   ],
+  "normalizer": {
+    "type": "Prepend",
+    "prepend": " "
+  },
   "pre_tokenizer": {
+    "type": "Sequence",
+    "pretokenizers": [
+      {
+        "type": "Split",
+        "pattern": {
+          "Regex": "'s|'t|'re|'ve|'m|'ll|'d| ?[\\p{L}\\p{M}]+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)|\\s+"
+        },
+        "behavior": "Removed",
+        "invert": true
+      },
+      {
+        "type": "ByteLevel",
+        "add_prefix_space": false,
+        "trim_offsets": true,
+        "use_regex": false
+      }
+    ]
   },
   "post_processor": {
     "type": "TemplateProcessing",